rley 0.2.03 → 0.2.04
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/examples/parsers/parsing_err_expr.rb +85 -0
- data/examples/parsers/parsing_groucho.rb +99 -0
- data/examples/parsers/parsing_tricky.rb +53 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +27 -4
- data/lib/rley/parser/earley_parser.rb +54 -16
- data/lib/rley/parser/parse_state_tracker.rb +1 -0
- data/lib/rley/parser/parse_tracer.rb +100 -0
- data/lib/rley/parser/parsing.rb +18 -6
- data/lib/rley/parser/state_set.rb +9 -1
- data/spec/rley/parser/chart_spec.rb +71 -4
- data/spec/rley/parser/earley_parser_spec.rb +33 -0
- data/spec/rley/parser/parse_tracer_spec.rb +193 -0
- data/spec/rley/parser/parsing_spec.rb +28 -11
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmIzNmI1ZmM0N2QyOWM5NmQyYjlmOWRlNzllZmZjMmMxZmNmNmQ4Yg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmJjMDU3ZTMwYTA2NzY1YzJjOWQ3ZDk1MGZjYmFmMGMyMjgzOWZhYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTgxZTIxZWZhMWRlZTU1ZmVmZDhlYjllOTk2YjYwOTE5NDZjMDgzNzVlMmE3
|
10
|
+
YTIyYzNlNDU3MWE2OTZjM2I4MzAxNzhmMDFjNWU5YmI2N2QyNzQ2NTcxYjg1
|
11
|
+
ZjZkOTU2MWU4ZjM0NWUyMWM5ZDdiNDE1NzM2YTk0NDdlOThhMmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NDRhZmM5Mzc2ZmFhOWI1OTBiNzMwODA0OTE5NTk4ZjUzYzQ0ZjgyOTAzYTg2
|
14
|
+
YTE0YjFjZDRjM2M0NDYwZDk4Nzg3NGM0OTM4NWRjMzk4NTY4Nzg0OTdkNzAx
|
15
|
+
NjgxOTAxMmIyZWFjMzY0Y2M3MTU4NzRhZjA5MzdlMjUzYzdhNmI=
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to handle parsing errors
|
2
|
+
# and render a parse tree
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple arithmetic expression language
|
17
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
18
|
+
|
19
|
+
# Let's create the grammar piece by piece
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('+', '*', 'integer')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + M))
|
24
|
+
builder.add_production('S' => 'M')
|
25
|
+
builder.add_production('M' => %w(M * T))
|
26
|
+
builder.add_production('M' => 'T')
|
27
|
+
builder.add_production('T' => 'integer')
|
28
|
+
|
29
|
+
# And now build the grammar...
|
30
|
+
grammar_s_expr = builder.grammar
|
31
|
+
|
32
|
+
|
33
|
+
########################################
|
34
|
+
# 2. Create a tokenizer for the language
|
35
|
+
# The tokenizer transforms the input into an array of tokens
|
36
|
+
def tokenizer(aText, aGrammar)
|
37
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
38
|
+
case lexeme
|
39
|
+
when '+', '*'
|
40
|
+
terminal = aGrammar.name2symbol[lexeme]
|
41
|
+
when /^[-+]?\d+$/
|
42
|
+
terminal = aGrammar.name2symbol['integer']
|
43
|
+
else
|
44
|
+
msg = "Unknown input text '#{lexeme}'"
|
45
|
+
fail StandardError, msg
|
46
|
+
end
|
47
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
48
|
+
end
|
49
|
+
|
50
|
+
return tokens
|
51
|
+
end
|
52
|
+
|
53
|
+
########################################
|
54
|
+
# Step 3. Create a parser for that grammar
|
55
|
+
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
|
+
|
57
|
+
########################################
|
58
|
+
# Step 3. Tokenize the invalid input
|
59
|
+
invalid_input = '2 + 3 * * 4'
|
60
|
+
tokens = tokenizer(invalid_input, grammar_s_expr)
|
61
|
+
|
62
|
+
########################################
|
63
|
+
# Step 5. Let the parser process the input
|
64
|
+
result = parser.parse(tokens)
|
65
|
+
puts "Parse successful? #{result.success?}"
|
66
|
+
pp result
|
67
|
+
|
68
|
+
########################################
|
69
|
+
# Step 6. Generate a parse tree from the parse result
|
70
|
+
ptree = result.parse_tree
|
71
|
+
pp ptree
|
72
|
+
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
|
85
|
+
# End of file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Purpose: to demonstrate how to parse an emblematic ambiguous sentence
|
2
|
+
# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'rley' # Load the gem
|
6
|
+
|
7
|
+
# Steps to render a parse tree (of a valid parsed input):
|
8
|
+
# 1. Define a grammar
|
9
|
+
# 2. Create a tokenizer for the language
|
10
|
+
# 3. Create a parser for that grammar
|
11
|
+
# 4. Tokenize the input
|
12
|
+
# 5. Let the parser process the input
|
13
|
+
# 6. Generate a parse tree from the parse result
|
14
|
+
# 7. Render the parse tree (in JSON)
|
15
|
+
|
16
|
+
########################################
|
17
|
+
# Step 1. Define a grammar for a micro English-like language
|
18
|
+
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
19
|
+
# It defines the syntax of a sentence in a language with a
|
20
|
+
# very limited syntax and lexicon in the context of airline reservation.
|
21
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
+
builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
|
23
|
+
builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
|
24
|
+
builder.add_production('S' => %w[NP VP])
|
25
|
+
builder.add_production('NP' => %w[Det N])
|
26
|
+
builder.add_production('NP' => %w[Det N PP])
|
27
|
+
builder.add_production('NP' => 'Pro')
|
28
|
+
builder.add_production('VP' => %w[V NP])
|
29
|
+
builder.add_production('VP' => %w[VP PP])
|
30
|
+
builder.add_production('PP' => %w[P NP])
|
31
|
+
|
32
|
+
# And now build the grammar...
|
33
|
+
groucho_grammar = builder.grammar
|
34
|
+
|
35
|
+
|
36
|
+
########################################
|
37
|
+
# 2. Create a tokenizer for the language
|
38
|
+
# The tokenizer transforms the input into an array of tokens
|
39
|
+
# This is a very simplistic implementation for demo purposes.
|
40
|
+
|
41
|
+
# The lexicon is just a Hash with pairs of the form:
|
42
|
+
# word => terminal symbol name
|
43
|
+
Groucho_lexicon = {
|
44
|
+
'elephant' => 'N',
|
45
|
+
'pajamas' => 'N',
|
46
|
+
'shot' => 'V',
|
47
|
+
'I' => 'Pro',
|
48
|
+
'an' => 'Det',
|
49
|
+
'my' => 'Det',
|
50
|
+
'in' => 'P',
|
51
|
+
}
|
52
|
+
|
53
|
+
# Highly simplified tokenizer implementation.
|
54
|
+
def tokenizer(aText, aGrammar)
|
55
|
+
tokens = aText.scan(/\S+/).map do |word|
|
56
|
+
term_name = Groucho_lexicon[word]
|
57
|
+
if term_name.nil?
|
58
|
+
fail StandardError, "Word '#{word}' not found in lexicon"
|
59
|
+
end
|
60
|
+
terminal = aGrammar.name2symbol[term_name]
|
61
|
+
Rley::Parser::Token.new(word, terminal)
|
62
|
+
end
|
63
|
+
|
64
|
+
return tokens
|
65
|
+
end
|
66
|
+
|
67
|
+
########################################
|
68
|
+
# Step 3. Create a parser for that grammar
|
69
|
+
parser = Rley::Parser::EarleyParser.new(groucho_grammar)
|
70
|
+
|
71
|
+
########################################
|
72
|
+
# Step 3. Tokenize the input
|
73
|
+
valid_input = 'I shot an elephant in my pajamas'
|
74
|
+
tokens = tokenizer(valid_input, groucho_grammar)
|
75
|
+
|
76
|
+
########################################
|
77
|
+
# Step 5. Let the parser process the input
|
78
|
+
result = parser.parse(tokens)
|
79
|
+
|
80
|
+
puts "Parsing success? #{result.success?}"
|
81
|
+
|
82
|
+
#=begin
|
83
|
+
########################################
|
84
|
+
# Step 6. Generate a parse tree from the parse result
|
85
|
+
ptree = result.parse_tree
|
86
|
+
|
87
|
+
########################################
|
88
|
+
# Step 7. Render the parse tree (in JSON)
|
89
|
+
# Let's create a parse tree visitor
|
90
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
91
|
+
|
92
|
+
#Here we create a renderer object...
|
93
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
94
|
+
|
95
|
+
# Now emit the parse tree as JSON on the console output
|
96
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
97
|
+
renderer.render(visitor)
|
98
|
+
#=end
|
99
|
+
# End of file
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Purpose: to use a grammar that causes some Earley parsers to fail.
|
2
|
+
# See: http://stackoverflow.com/questions/22311323/earley-parser-recursion
|
3
|
+
require 'rley' # Load the gem
|
4
|
+
|
5
|
+
# Steps to parse some valid input:
|
6
|
+
# 1. Define a grammar
|
7
|
+
# 2. Create a tokenizer for the language
|
8
|
+
# 3. Create a parser for that grammar
|
9
|
+
# 4. Tokenize the input
|
10
|
+
# 5. Let the parser process the input & trace its progress
|
11
|
+
|
12
|
+
|
13
|
+
########################################
|
14
|
+
# Step 1. Define a grammar that might cause infinite recursion
|
15
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
16
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
17
|
+
builder.add_terminals('ident')
|
18
|
+
builder.add_production('S' => 'E')
|
19
|
+
builder.add_production('E' => ['E', 'E'] )
|
20
|
+
builder.add_production('E' => 'ident')
|
21
|
+
|
22
|
+
# And now build the grammar...
|
23
|
+
grammar_tricky = builder.grammar
|
24
|
+
|
25
|
+
|
26
|
+
########################################
|
27
|
+
# 2. Create a tokenizer for the language
|
28
|
+
# The tokenizer transforms the input into an array of tokens
|
29
|
+
def tokenizer(aText, aGrammar)
|
30
|
+
terminal = aGrammar.name2symbol['ident']
|
31
|
+
|
32
|
+
tokens = aText.chars.map do |ch|
|
33
|
+
Rley::Parser::Token.new(ch, terminal)
|
34
|
+
end
|
35
|
+
|
36
|
+
return tokens
|
37
|
+
end
|
38
|
+
|
39
|
+
########################################
|
40
|
+
# Step 3. Create a parser for that grammar
|
41
|
+
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
42
|
+
|
43
|
+
########################################
|
44
|
+
# Step 3. Tokenize the input
|
45
|
+
valid_input = 'abcdefg'
|
46
|
+
tokens = tokenizer(valid_input, grammar_tricky)
|
47
|
+
|
48
|
+
########################################
|
49
|
+
# Step 5. Let the parser process the input, set trace level to 1
|
50
|
+
result = parser.parse(tokens, 1)
|
51
|
+
puts "Parsing success? #{result.success?}"
|
52
|
+
|
53
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -7,10 +7,19 @@ module Rley # This module is used as a namespace
|
|
7
7
|
# A one-dimensional array with n + 1 entries (n = number of input tokens).
|
8
8
|
class Chart
|
9
9
|
attr_reader(:state_sets)
|
10
|
+
|
11
|
+
# The level of trace details reported on stdout during the parse.
|
12
|
+
# The possible values are:
|
13
|
+
# 0: No trace output (default case)
|
14
|
+
# 1: Show trace of scanning and completion rules
|
15
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
16
|
+
attr_reader(:tracer)
|
10
17
|
|
11
|
-
|
18
|
+
# @param aTracerLevel [ParseTracer] A tracer object.
|
19
|
+
def initialize(startDottedItem, tokenCount, aTracer)
|
20
|
+
@tracer = aTracer
|
12
21
|
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
13
|
-
push_state(startDottedItem, 0, 0)
|
22
|
+
push_state(startDottedItem, 0, 0, :start_rule)
|
14
23
|
end
|
15
24
|
|
16
25
|
# The dotted item/rule used to seed the parse chart.
|
@@ -38,9 +47,23 @@ module Rley # This module is used as a namespace
|
|
38
47
|
end
|
39
48
|
|
40
49
|
# Push a parse state for the chart entry with given index
|
41
|
-
def push_state(aDottedItem, anOrigin, anIndex)
|
50
|
+
def push_state(aDottedItem, anOrigin, anIndex, aReason)
|
42
51
|
new_state = ParseState.new(aDottedItem, anOrigin)
|
43
|
-
self[anIndex].push_state(new_state)
|
52
|
+
pushed = self[anIndex].push_state(new_state)
|
53
|
+
if pushed && tracer.level > 0
|
54
|
+
case aReason
|
55
|
+
when :start_rule, :prediction
|
56
|
+
tracer.trace_prediction(anIndex, new_state)
|
57
|
+
|
58
|
+
when :scanning
|
59
|
+
tracer.trace_scanning(anIndex, new_state)
|
60
|
+
|
61
|
+
when :completion
|
62
|
+
tracer.trace_completion(anIndex, new_state)
|
63
|
+
else
|
64
|
+
raise NotImplementedError, "Unknown push_state mode #{aReason}"
|
65
|
+
end
|
66
|
+
end
|
44
67
|
end
|
45
68
|
end # class
|
46
69
|
end # module
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../syntax/grammar'
|
2
|
+
require_relative 'parse_tracer'
|
2
3
|
require_relative 'dotted_item'
|
3
4
|
require_relative 'parsing'
|
4
5
|
|
@@ -19,9 +20,7 @@ module Rley # This module is used as a namespace
|
|
19
20
|
# In other words, the 'next_mapping' allows to find the dotted item
|
20
21
|
# after "advancing" the dot
|
21
22
|
attr_reader(:next_mapping)
|
22
|
-
|
23
|
-
# @param aGrammar [Grammar] The grammar of the language
|
24
|
-
# (to use by the parser).
|
23
|
+
|
25
24
|
def initialize(aGrammar)
|
26
25
|
@grammar = aGrammar
|
27
26
|
@dotted_items = build_dotted_items(grammar)
|
@@ -29,29 +28,57 @@ module Rley # This module is used as a namespace
|
|
29
28
|
@next_mapping = build_next_mapping(dotted_items)
|
30
29
|
end
|
31
30
|
|
31
|
+
=begin
|
32
|
+
You can optionally specify a tracing level, for how much output you
|
33
|
+
want to see:
|
34
|
+
|
35
|
+
0: No output.
|
36
|
+
1: Show edges from scanner and completer rules (not predictor).
|
37
|
+
2 (default): Show all edges as they are added to the chart.
|
38
|
+
|
39
|
+
- For each index I{end} in [0, 1, ..., N]:
|
40
|
+
- For each I{edge} s.t. I{edge}.end = I{end}:
|
41
|
+
- If I{edge} is incomplete, and I{edge}.next is not a part
|
42
|
+
of speech:
|
43
|
+
- Apply PredictorRule to I{edge}
|
44
|
+
- If I{edge} is incomplete, and I{edge}.next is a part of
|
45
|
+
speech:
|
46
|
+
- Apply ScannerRule to I{edge}
|
47
|
+
- If I{edge} is complete:
|
48
|
+
- Apply CompleterRule to I{edge}
|
49
|
+
- Return any complete parses in the chart
|
50
|
+
=end
|
51
|
+
|
32
52
|
# Parse a sequence of input tokens.
|
33
53
|
# @param aTokenSequence [Array] Array of Tokens objects returned by a
|
34
54
|
# tokenizer/scanner/lexer.
|
55
|
+
# @param aGrammar [Grammar] The grammar of the language
|
56
|
+
# (to use by the parser).
|
57
|
+
# @param aTraceLevel [Fixnum] The specified trace level.
|
58
|
+
# The possible values are:
|
59
|
+
# 0: No trace output (default case)
|
60
|
+
# 1: Show trace of scanning and completion rules
|
61
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
35
62
|
# @return [Parsing] an object that embeds the parse results.
|
36
|
-
def parse(aTokenSequence)
|
37
|
-
|
63
|
+
def parse(aTokenSequence, aTraceLevel = 0)
|
64
|
+
tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
|
65
|
+
result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
|
38
66
|
last_token_index = aTokenSequence.size
|
39
67
|
(0..last_token_index).each do |i|
|
40
68
|
predicted = Set.new
|
41
69
|
result.chart[i].each do |state|
|
42
|
-
if state.complete?
|
43
|
-
|
44
|
-
completion(result, state, i)
|
70
|
+
if state.complete? # End of production reached?
|
71
|
+
completion(result, state, i, tracer)
|
45
72
|
else
|
46
73
|
next_symbol = state.next_symbol
|
47
74
|
if next_symbol.kind_of?(Syntax::NonTerminal)
|
48
75
|
unless predicted.include? next_symbol
|
49
|
-
prediction(result, state, next_symbol, i)
|
76
|
+
prediction(result, state, next_symbol, i, tracer)
|
50
77
|
predicted << next_symbol # Avoid repeated predictions
|
51
78
|
end
|
52
79
|
elsif i < last_token_index
|
53
80
|
# Expecting a terminal symbol
|
54
|
-
scanning(result, next_symbol, i)
|
81
|
+
scanning(result, next_symbol, i, tracer)
|
55
82
|
end
|
56
83
|
end
|
57
84
|
end
|
@@ -134,18 +161,21 @@ module Rley # This module is used as a namespace
|
|
134
161
|
# immediately follows a dot
|
135
162
|
# (= is expected/predicted by the production rule)
|
136
163
|
# @param aPosition [Fixnum] position in the input token sequence.
|
137
|
-
def prediction(aParsing, aState, aNonTerminal, aPosition)
|
164
|
+
def prediction(aParsing, aState, aNonTerminal, aPosition, aTracer)
|
165
|
+
if aTracer.level > 1
|
166
|
+
puts "Chart[#{aPosition}] Prediction(s) from #{aState}:"
|
167
|
+
end
|
138
168
|
# Retrieve all start dotted items for productions
|
139
169
|
# with aNonTerminal as its lhs
|
140
170
|
items = start_mapping[aNonTerminal]
|
141
171
|
items.each do |an_item|
|
142
|
-
aParsing.push_state(an_item, aPosition, aPosition)
|
172
|
+
aParsing.push_state(an_item, aPosition, aPosition, :prediction)
|
143
173
|
end
|
144
174
|
|
145
175
|
return unless aNonTerminal.nullable?
|
146
176
|
# Ayock-Horspool trick for nullable rules
|
147
177
|
next_item = next_mapping[aState.dotted_rule]
|
148
|
-
aParsing.push_state(next_item, aState.origin, aPosition)
|
178
|
+
aParsing.push_state(next_item, aState.origin, aPosition, :prediction)
|
149
179
|
end
|
150
180
|
|
151
181
|
# This method is called when a parse state for chart entry at position
|
@@ -162,7 +192,12 @@ module Rley # This module is used as a namespace
|
|
162
192
|
# @param aTerminal [Terminal] a terminal symbol that
|
163
193
|
# immediately follows a dot
|
164
194
|
# @param aPosition [Fixnum] position in the input token sequence.
|
165
|
-
def scanning(aParsing, aTerminal, aPosition)
|
195
|
+
def scanning(aParsing, aTerminal, aPosition, aTracer)
|
196
|
+
if aTracer.level > 1
|
197
|
+
prefix = "Chart[#{aPosition}] Scanning of terminal "
|
198
|
+
suffix = "#{aTerminal.name}:"
|
199
|
+
puts prefix + suffix
|
200
|
+
end
|
166
201
|
aParsing.scanning(aTerminal, aPosition) do |item|
|
167
202
|
next_mapping[item]
|
168
203
|
end
|
@@ -173,11 +208,14 @@ module Rley # This module is used as a namespace
|
|
173
208
|
# For every state in chart[aPosition] that is
|
174
209
|
# complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
175
210
|
# Find states s in chart[j] of the
|
176
|
-
# form {dotted_rule: Y -> α • X β, origin: i}
|
211
|
+
# form { dotted_rule: Y -> α • X β, origin: i}
|
177
212
|
# In other words, rules that predicted the non-terminal X.
|
178
213
|
# For each s, add to chart[aPosition] a state of the form
|
179
214
|
# { dotted_rule: Y → α X • β, origin: i})
|
180
|
-
def completion(aParsing, aState, aPosition)
|
215
|
+
def completion(aParsing, aState, aPosition, aTracer)
|
216
|
+
if aTracer.level > 1
|
217
|
+
puts "Chart[#{aPosition}] Completion of state #{aState}:"
|
218
|
+
end
|
181
219
|
aParsing.completion(aState, aPosition) do |item|
|
182
220
|
next_mapping[item]
|
183
221
|
end
|
@@ -25,6 +25,7 @@ module Rley # This module is used as a namespace
|
|
25
25
|
|
26
26
|
# Write accessor. Set the given parse state as the current one.
|
27
27
|
def parse_state=(aParseState)
|
28
|
+
fail StandardError, "Nil parse state" if aParseState.nil?
|
28
29
|
@parse_state = aParseState
|
29
30
|
processed_states[parse_state] = true
|
30
31
|
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
# Utility class used to trace the parsing of a token sequence.
|
6
|
+
class ParseTracer
|
7
|
+
# The stream where the trace output is sent
|
8
|
+
attr_reader(:ostream)
|
9
|
+
|
10
|
+
# The trace level
|
11
|
+
attr_reader(:level)
|
12
|
+
|
13
|
+
attr_reader(:lexemes)
|
14
|
+
|
15
|
+
attr_reader(:col_width)
|
16
|
+
|
17
|
+
def initialize(aTraceLevel, anIO, aTokenSequence)
|
18
|
+
@level = aTraceLevel <= 0 ? 0 : [aTraceLevel, 2].min
|
19
|
+
@ostream = anIO
|
20
|
+
@lexemes = aTokenSequence.map(&:lexeme)
|
21
|
+
|
22
|
+
emit_tokens
|
23
|
+
emit_heading
|
24
|
+
end
|
25
|
+
|
26
|
+
# Emit the trace text to the output IO
|
27
|
+
# if the given trace level is equal or greater to the
|
28
|
+
# trace level of the tracer instance.
|
29
|
+
def print_if(aLevel, text)
|
30
|
+
ostream.print(text) if level >= aLevel
|
31
|
+
end
|
32
|
+
|
33
|
+
# Emit the trace of a scanning step.
|
34
|
+
def trace_scanning(aStatesetIndex, aParseState)
|
35
|
+
return unless level
|
36
|
+
|
37
|
+
scan_picture = '[' + '-' * (col_width-1) + ']'
|
38
|
+
org = OpenStruct.new(origin: aStatesetIndex - 1,
|
39
|
+
dotted_rule: aParseState.dotted_rule)
|
40
|
+
trace_diagram(aStatesetIndex, org, scan_picture)
|
41
|
+
end
|
42
|
+
|
43
|
+
def trace_prediction(aStatesetIndex, aParseState)
|
44
|
+
return unless level
|
45
|
+
|
46
|
+
trace_diagram(aStatesetIndex, aParseState, '>')
|
47
|
+
end
|
48
|
+
|
49
|
+
def trace_completion(aStatesetIndex, aParseState)
|
50
|
+
return unless level
|
51
|
+
|
52
|
+
if aStatesetIndex == lexemes.size && aParseState.origin == 0 && aParseState.complete?
|
53
|
+
picture = '=' * (col_width * lexemes.size - 1)
|
54
|
+
else
|
55
|
+
count = col_width * (aStatesetIndex - aParseState.origin) - 1
|
56
|
+
picture = '-' * count
|
57
|
+
end
|
58
|
+
completion_picture = '[' + picture + (aParseState.complete? ? ']' : '>')
|
59
|
+
trace_diagram(aStatesetIndex, aParseState, completion_picture)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def emit_tokens()
|
65
|
+
literals = lexemes.map { |lx| "'#{lx}'" }
|
66
|
+
print_if 1, '[' + literals.join(', ') + "]\n"
|
67
|
+
end
|
68
|
+
|
69
|
+
def emit_heading()
|
70
|
+
longest = lexemes.map(&:length).max
|
71
|
+
@col_width = longest + 3
|
72
|
+
headers = lexemes.map { |l| "#{l.center(col_width-1, ' ')}" }
|
73
|
+
print_if 1, '|.' + headers.join('.') + ".|\n"
|
74
|
+
end
|
75
|
+
|
76
|
+
def padding(aStatesetIndex, aParseState, aPicture)
|
77
|
+
l_pad_pattern = '.' + ' ' * (col_width-1)
|
78
|
+
left_padding = l_pad_pattern * ([0, aParseState.origin].max)
|
79
|
+
r_pad_pattern = ' ' * (col_width-1) + '.'
|
80
|
+
right_padding = r_pad_pattern * (lexemes.size - aStatesetIndex)
|
81
|
+
return left_padding + aPicture + right_padding
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_state_str(aStatesetIndex, aParseState)
|
85
|
+
"[#{aParseState.origin}:#{aStatesetIndex}] #{aParseState.dotted_rule}"
|
86
|
+
end
|
87
|
+
|
88
|
+
def trace_diagram(aStatesetIndex, aParseState, aPicture)
|
89
|
+
diagram = padding(aStatesetIndex, aParseState, aPicture)
|
90
|
+
prefix = '|'
|
91
|
+
suffix = '| ' + parse_state_str(aStatesetIndex, aParseState)
|
92
|
+
trace = prefix + diagram + suffix
|
93
|
+
|
94
|
+
print_if 1, trace + "\n"
|
95
|
+
end
|
96
|
+
end # class
|
97
|
+
end # module
|
98
|
+
end # module
|
99
|
+
|
100
|
+
# End of file
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -11,9 +11,14 @@ module Rley # This module is used as a namespace
|
|
11
11
|
# The sequence of input token to parse
|
12
12
|
attr_reader(:tokens)
|
13
13
|
|
14
|
-
|
14
|
+
# @param aTraceLevel [Fixnum] The specified trace level.
|
15
|
+
# The possible values are:
|
16
|
+
# 0: No trace output (default case)
|
17
|
+
# 1: Show trace of scanning and completion rules
|
18
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
19
|
+
def initialize(startDottedRule, theTokens, aTracer)
|
15
20
|
@tokens = theTokens.dup
|
16
|
-
@chart = Chart.new(startDottedRule, tokens.size)
|
21
|
+
@chart = Chart.new(startDottedRule, tokens.size, aTracer)
|
17
22
|
end
|
18
23
|
|
19
24
|
# Return true if the parse was successful (= input tokens
|
@@ -36,6 +41,13 @@ module Rley # This module is used as a namespace
|
|
36
41
|
builder = tree_builder(state_tracker.state_set_index)
|
37
42
|
|
38
43
|
loop do
|
44
|
+
match_symbol = state_tracker.symbol_on_left
|
45
|
+
# puts '--------------------'
|
46
|
+
# puts "Active parse state: #{state_tracker.parse_state}"
|
47
|
+
# puts "Matching symbol: #{match_symbol}"
|
48
|
+
# puts 'Parse tree:'
|
49
|
+
# puts builder.root.to_string(0)
|
50
|
+
|
39
51
|
# Place the symbol on left of the dot in the parse tree
|
40
52
|
done = insert_matched_symbol(state_tracker, builder)
|
41
53
|
break if done
|
@@ -47,9 +59,9 @@ module Rley # This module is used as a namespace
|
|
47
59
|
|
48
60
|
# Push a parse state (dotted item + origin) to the
|
49
61
|
# chart entry with given index if it isn't yet in the chart entry.
|
50
|
-
def push_state(aDottedItem, anOrigin, aChartIndex)
|
62
|
+
def push_state(aDottedItem, anOrigin, aChartIndex, aReason)
|
51
63
|
fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
|
52
|
-
chart.push_state(aDottedItem, anOrigin, aChartIndex)
|
64
|
+
chart.push_state(aDottedItem, anOrigin, aChartIndex, aReason)
|
53
65
|
end
|
54
66
|
|
55
67
|
|
@@ -74,7 +86,7 @@ module Rley # This module is used as a namespace
|
|
74
86
|
states = states_expecting(aTerminal, aPosition, false)
|
75
87
|
states.each do |s|
|
76
88
|
next_item = nextMapping.call(s.dotted_rule)
|
77
|
-
push_state(next_item, s.origin, aPosition + 1)
|
89
|
+
push_state(next_item, s.origin, aPosition + 1, :scanning)
|
78
90
|
end
|
79
91
|
end
|
80
92
|
|
@@ -95,7 +107,7 @@ module Rley # This module is used as a namespace
|
|
95
107
|
states = states_expecting(curr_lhs, curr_origin, false)
|
96
108
|
states.each do |s|
|
97
109
|
next_item = nextMapping.call(s.dotted_rule)
|
98
|
-
push_state(next_item, s.origin, aPosition)
|
110
|
+
push_state(next_item, s.origin, aPosition, :completion)
|
99
111
|
end
|
100
112
|
end
|
101
113
|
|
@@ -17,8 +17,16 @@ module Rley # This module is used as a namespace
|
|
17
17
|
# Append the given state (if it isn't yet in the set)
|
18
18
|
# to the list of states
|
19
19
|
# @param aState [ParseState] the state to push.
|
20
|
+
# @return [TrueClass/FalseClass] true when the state is really added
|
20
21
|
def push_state(aState)
|
21
|
-
|
22
|
+
if include?(aState)
|
23
|
+
result = false
|
24
|
+
else
|
25
|
+
@states << aState
|
26
|
+
result = true
|
27
|
+
end
|
28
|
+
|
29
|
+
return result
|
22
30
|
end
|
23
31
|
|
24
32
|
# The list of ParseState that expect the given symbol.
|
@@ -1,5 +1,13 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
4
|
+
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/rley/syntax/production'
|
7
|
+
require_relative '../../../lib/rley/parser/token'
|
8
|
+
require_relative '../../../lib/rley/parser/dotted_item'
|
9
|
+
require_relative '../../../lib/rley/parser/parse_state'
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
3
11
|
|
4
12
|
# Load the class under test
|
5
13
|
require_relative '../../../lib/rley/parser/chart'
|
@@ -10,12 +18,21 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
10
18
|
let(:count_token) { 20 }
|
11
19
|
let(:dotted_rule) { double('fake-dotted-item') }
|
12
20
|
|
21
|
+
let(:output) { StringIO.new('', 'w') }
|
22
|
+
|
23
|
+
let(:token_seq) do
|
24
|
+
literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
|
25
|
+
literals.map {|lexeme| Token.new(lexeme, nil)}
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
|
+
|
13
30
|
context 'Initialization:' do
|
14
31
|
# Default instantiation rule
|
15
|
-
subject { Chart.new(dotted_rule, count_token) }
|
32
|
+
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
16
33
|
|
17
|
-
it 'should be created with
|
18
|
-
expect { Chart.new(dotted_rule, count_token) }.not_to raise_error
|
34
|
+
it 'should be created with start dotted rule, token count, tracer' do
|
35
|
+
expect { Chart.new(dotted_rule, count_token, sample_tracer) }.not_to raise_error
|
19
36
|
end
|
20
37
|
|
21
38
|
it 'should have a seed state in first state_set' do
|
@@ -33,10 +50,60 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
33
50
|
it 'should know the start dotted rule' do
|
34
51
|
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
35
52
|
end
|
36
|
-
|
53
|
+
|
37
54
|
it 'should have at least one non-empty state set' do
|
38
55
|
expect(subject.last_index).to eq(0)
|
39
56
|
end
|
57
|
+
|
58
|
+
it 'should reference a tracer' do
|
59
|
+
expect(subject.tracer).to eq(sample_tracer)
|
60
|
+
end
|
61
|
+
end # context
|
62
|
+
|
63
|
+
context 'Provided services:' do
|
64
|
+
let(:t_a) { Syntax::Terminal.new('A') }
|
65
|
+
let(:t_b) { Syntax::Terminal.new('B') }
|
66
|
+
let(:t_c) { Syntax::Terminal.new('C') }
|
67
|
+
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
68
|
+
|
69
|
+
let(:sample_prod) do
|
70
|
+
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
71
|
+
end
|
72
|
+
|
73
|
+
let(:origin_val) { 3 }
|
74
|
+
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
75
|
+
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
76
|
+
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
77
|
+
let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
|
78
|
+
|
79
|
+
# Factory method.
|
80
|
+
def parse_state(origin, aDottedRule)
|
81
|
+
ParseState.new(aDottedRule, origin)
|
82
|
+
end
|
83
|
+
|
84
|
+
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
85
|
+
|
86
|
+
it 'should trace its initialization' do
|
87
|
+
subject[0] # Force constructor call here
|
88
|
+
expectation = <<-SNIPPET
|
89
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
90
|
+
|. I . saw . John . with . a . dog .|
|
91
|
+
|> . . . . . .| [0:0] sentence => A B . C
|
92
|
+
SNIPPET
|
93
|
+
expect(output.string).to eq(expectation)
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should trace parse state pushing' do
|
98
|
+
subject[0] # Force constructor call here
|
99
|
+
output.string = ''
|
100
|
+
|
101
|
+
subject.push_state(dotted_rule, 3, 5, :prediction)
|
102
|
+
expectation = <<-SNIPPET
|
103
|
+
|. . . > .| [3:5] sentence => A B . C
|
104
|
+
SNIPPET
|
105
|
+
expect(output.string).to eq(expectation)
|
106
|
+
end
|
40
107
|
end # context
|
41
108
|
end # describe
|
42
109
|
end # module
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
4
5
|
require_relative '../../../lib/rley/syntax/non_terminal'
|
@@ -226,6 +227,38 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
226
227
|
compare_state_texts(state_set_5, expected)
|
227
228
|
end
|
228
229
|
|
230
|
+
it 'should trace a parse with level 1' do
|
231
|
+
# Substitute temporarily $stdout by a StringIO
|
232
|
+
prev_ostream = $stdout
|
233
|
+
$stdout = StringIO.new('', 'w')
|
234
|
+
|
235
|
+
trace_level = 1
|
236
|
+
parse_result = subject.parse(grm1_tokens, trace_level)
|
237
|
+
expectations = <<-SNIPPET
|
238
|
+
['a', 'a', 'b', 'c', 'c']
|
239
|
+
|. a . a . b . c . c .|
|
240
|
+
|> . . . . .| [0:0] S => . A
|
241
|
+
|> . . . . .| [0:0] A => . 'a' A 'c'
|
242
|
+
|> . . . . .| [0:0] A => . 'b'
|
243
|
+
|[---] . . . .| [0:1] A => 'a' . A 'c'
|
244
|
+
|. > . . . .| [1:1] A => . 'a' A 'c'
|
245
|
+
|. > . . . .| [1:1] A => . 'b'
|
246
|
+
|. [---] . . .| [1:2] A => 'a' . A 'c'
|
247
|
+
|. . > . . .| [2:2] A => . 'a' A 'c'
|
248
|
+
|. . > . . .| [2:2] A => . 'b'
|
249
|
+
|. . [---] . .| [2:3] A => 'b' .
|
250
|
+
|. [-------> . .| [1:3] A => 'a' A . 'c'
|
251
|
+
|. . . [---] .| [3:4] A => 'a' A 'c' .
|
252
|
+
|[---------------> .| [0:4] A => 'a' A . 'c'
|
253
|
+
|. . . . [---]| [4:5] A => 'a' A 'c' .
|
254
|
+
|[===================]| [0:5] S => A .
|
255
|
+
SNIPPET
|
256
|
+
expect($stdout.string).to eq(expectations)
|
257
|
+
|
258
|
+
# Restore standard ouput stream
|
259
|
+
$stdout = prev_ostream
|
260
|
+
end
|
261
|
+
|
229
262
|
it 'should parse a valid simple expression' do
|
230
263
|
instance = EarleyParser.new(grammar_expr)
|
231
264
|
parse_result = instance.parse(grm2_tokens)
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/rley/syntax/production'
|
7
|
+
require_relative '../../../lib/rley/parser/dotted_item'
|
8
|
+
require_relative '../../../lib/rley/parser/parse_state'
|
9
|
+
require_relative '../../../lib/rley/parser/token'
|
10
|
+
|
11
|
+
# Load the class under test
|
12
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
13
|
+
|
14
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
15
|
+
module Parser # Open this namespace to avoid module qualifier prefixes
|
16
|
+
describe ParseTracer do
|
17
|
+
let(:output) { StringIO.new('', 'w') }
|
18
|
+
|
19
|
+
let(:token_seq) do
|
20
|
+
literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
|
21
|
+
literals.map {|lexeme| Token.new(lexeme, nil)}
|
22
|
+
end
|
23
|
+
|
24
|
+
subject { ParseTracer.new(1, output, token_seq) }
|
25
|
+
|
26
|
+
context 'Creation & initialization:' do
|
27
|
+
it 'should accept trace level 0' do
|
28
|
+
expect { ParseTracer.new(0, output, token_seq) }.not_to raise_error
|
29
|
+
expect(output.string).to eq('')
|
30
|
+
end
|
31
|
+
|
32
|
+
# |. I . saw . John . with . a . dog .|
|
33
|
+
|
34
|
+
it 'should accept trace level 1' do
|
35
|
+
expect { ParseTracer.new(1, output, token_seq) }.not_to raise_error
|
36
|
+
expectations = <<-SNIPPET
|
37
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
38
|
+
|. I . saw . John . with . a . dog .|
|
39
|
+
SNIPPET
|
40
|
+
expect(output.string).to eq(expectations)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should accept trace level 2' do
|
44
|
+
expect { ParseTracer.new(2, output, token_seq) }.not_to raise_error
|
45
|
+
expectations = <<-SNIPPET
|
46
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
47
|
+
|. I . saw . John . with . a . dog .|
|
48
|
+
SNIPPET
|
49
|
+
expect(output.string).to eq(expectations)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should know the trace level' do
|
53
|
+
expect(subject.level).to eq(1)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should know the output stream' do
|
57
|
+
expect(subject.ostream).to eq(output)
|
58
|
+
end
|
59
|
+
end # context
|
60
|
+
|
61
|
+
context 'Provided services:' do
|
62
|
+
let(:t_a) { Syntax::Terminal.new('A') }
|
63
|
+
let(:t_b) { Syntax::Terminal.new('B') }
|
64
|
+
let(:t_c) { Syntax::Terminal.new('C') }
|
65
|
+
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
66
|
+
|
67
|
+
let(:sample_prod) do
|
68
|
+
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
69
|
+
end
|
70
|
+
|
71
|
+
let(:origin_val) { 3 }
|
72
|
+
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
73
|
+
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
74
|
+
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
75
|
+
|
76
|
+
# Factory method.
|
77
|
+
def parse_state(origin, aDottedRule)
|
78
|
+
ParseState.new(aDottedRule, origin)
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should render a scanning step' do
|
82
|
+
# Case: token at the beginning
|
83
|
+
subject.ostream.string = ''
|
84
|
+
subject.trace_scanning(1, parse_state(0, dotted_rule))
|
85
|
+
expectations = <<-SNIPPET
|
86
|
+
|[------] . . . . .| [0:1] sentence => A B . C
|
87
|
+
SNIPPET
|
88
|
+
|
89
|
+
# Case: token in the middle
|
90
|
+
subject.ostream.string = ''
|
91
|
+
subject.trace_scanning(4, sample_parse_state)
|
92
|
+
expectations = <<-SNIPPET
|
93
|
+
|. . . [------] . .| [3:4] sentence => A B . C
|
94
|
+
SNIPPET
|
95
|
+
|
96
|
+
# Case: token at the end
|
97
|
+
subject.ostream.string = ''
|
98
|
+
subject.trace_scanning(6, parse_state(5, dotted_rule))
|
99
|
+
expectations = <<-SNIPPET
|
100
|
+
|. . . . . [------]| [5:6] sentence => A B . C
|
101
|
+
SNIPPET
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
it 'should render a prediction step' do
|
106
|
+
# Case: initial stateset
|
107
|
+
subject.ostream.string = ''
|
108
|
+
subject.trace_prediction(0, parse_state(0, dotted_rule))
|
109
|
+
expectations = <<-SNIPPET
|
110
|
+
|> . . . . . .| [0:0] sentence => A B . C
|
111
|
+
SNIPPET
|
112
|
+
expect(output.string).to eq(expectations)
|
113
|
+
|
114
|
+
# Case: stateset in the middle
|
115
|
+
subject.ostream.string = ''
|
116
|
+
subject.trace_prediction(3, sample_parse_state)
|
117
|
+
expectations = <<-SNIPPET
|
118
|
+
|. . . > . . .| [3:3] sentence => A B . C
|
119
|
+
SNIPPET
|
120
|
+
expect(output.string).to eq(expectations)
|
121
|
+
|
122
|
+
# Case: final stateset
|
123
|
+
subject.ostream.string = ''
|
124
|
+
subject.trace_prediction(6, parse_state(6, dotted_rule))
|
125
|
+
expectations = <<-SNIPPET
|
126
|
+
|. . . . . . >| [6:6] sentence => A B . C
|
127
|
+
SNIPPET
|
128
|
+
expect(output.string).to eq(expectations)
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'should render a completion step' do
|
132
|
+
# Case: full parse completed
|
133
|
+
subject.ostream.string = ''
|
134
|
+
subject.trace_completion(6, parse_state(0, complete_rule))
|
135
|
+
expectations = <<-SNIPPET
|
136
|
+
|[=========================================]| [0:6] sentence => A B C .
|
137
|
+
SNIPPET
|
138
|
+
expect(output.string).to eq(expectations)
|
139
|
+
|
140
|
+
# Case: step at the start (complete)
|
141
|
+
subject.ostream.string = ''
|
142
|
+
subject.trace_completion(1, parse_state(0, complete_rule))
|
143
|
+
expectations = <<-SNIPPET
|
144
|
+
|[------] . . . . .| [0:1] sentence => A B C .
|
145
|
+
SNIPPET
|
146
|
+
expect(output.string).to eq(expectations)
|
147
|
+
|
148
|
+
# Case: step at the start (not complete)
|
149
|
+
subject.ostream.string = ''
|
150
|
+
subject.trace_completion(1, parse_state(0, dotted_rule))
|
151
|
+
expectations = <<-SNIPPET
|
152
|
+
|[------> . . . . .| [0:1] sentence => A B . C
|
153
|
+
SNIPPET
|
154
|
+
expect(output.string).to eq(expectations)
|
155
|
+
|
156
|
+
# Case: step at the middle (complete)
|
157
|
+
subject.ostream.string = ''
|
158
|
+
subject.trace_completion(4, parse_state(2, complete_rule))
|
159
|
+
expectations = <<-SNIPPET
|
160
|
+
|. . [-------------] . .| [2:4] sentence => A B C .
|
161
|
+
SNIPPET
|
162
|
+
expect(output.string).to eq(expectations)
|
163
|
+
|
164
|
+
# Case: step at the middle (not complete)
|
165
|
+
subject.ostream.string = ''
|
166
|
+
subject.trace_completion(4, parse_state(2, dotted_rule))
|
167
|
+
expectations = <<-SNIPPET
|
168
|
+
|. . [-------------> . .| [2:4] sentence => A B . C
|
169
|
+
SNIPPET
|
170
|
+
expect(output.string).to eq(expectations)
|
171
|
+
|
172
|
+
# Case: step at the end (complete)
|
173
|
+
subject.ostream.string = ''
|
174
|
+
subject.trace_completion(6, parse_state(3, complete_rule))
|
175
|
+
expectations = <<-SNIPPET
|
176
|
+
|. . . [--------------------]| [3:6] sentence => A B C .
|
177
|
+
SNIPPET
|
178
|
+
expect(output.string).to eq(expectations)
|
179
|
+
|
180
|
+
# Case: step at the end (not complete)
|
181
|
+
subject.ostream.string = ''
|
182
|
+
subject.trace_completion(6, parse_state(3, dotted_rule))
|
183
|
+
expectations = <<-SNIPPET
|
184
|
+
|. . . [-------------------->| [3:6] sentence => A B . C
|
185
|
+
SNIPPET
|
186
|
+
expect(output.string).to eq(expectations)
|
187
|
+
end
|
188
|
+
end # context
|
189
|
+
end # describe
|
190
|
+
end # module
|
191
|
+
end # module
|
192
|
+
|
193
|
+
# End of file
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
require_relative '../../../lib/rley/syntax/non_terminal'
|
4
5
|
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
@@ -6,6 +7,7 @@ require_relative '../../../lib/rley/syntax/production'
|
|
6
7
|
require_relative '../../../lib/rley/syntax/grammar_builder'
|
7
8
|
require_relative '../../../lib/rley/parser/dotted_item'
|
8
9
|
require_relative '../../../lib/rley/parser/token'
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
9
11
|
require_relative '../../../lib/rley/parser/earley_parser'
|
10
12
|
require_relative '../support/grammar_abc_helper'
|
11
13
|
require_relative '../support/grammar_b_expr_helper'
|
@@ -48,15 +50,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
48
50
|
|
49
51
|
|
50
52
|
let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
|
53
|
+
let(:output) { StringIO.new('', 'w') }
|
54
|
+
let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
|
51
55
|
|
52
56
|
# Default instantiation rule
|
53
|
-
subject { Parsing.new(start_dotted_rule, grm1_tokens) }
|
57
|
+
subject { Parsing.new(start_dotted_rule, grm1_tokens, sample_tracer) }
|
54
58
|
|
55
59
|
context 'Initialization:' do
|
56
|
-
it 'should be created with list of tokens
|
60
|
+
it 'should be created with list of tokens, start dotted rule, trace' do
|
57
61
|
start_rule = start_dotted_rule
|
58
62
|
tokens = grm1_tokens
|
59
|
-
|
63
|
+
tracer = sample_tracer
|
64
|
+
expect { Parsing.new(start_rule, tokens, tracer) }.not_to raise_error
|
60
65
|
end
|
61
66
|
|
62
67
|
it 'should know the input tokens' do
|
@@ -66,6 +71,17 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
66
71
|
it 'should know its chart object' do
|
67
72
|
expect(subject.chart).to be_kind_of(Chart)
|
68
73
|
end
|
74
|
+
|
75
|
+
it 'should emit trace level 1 info' do
|
76
|
+
tracer = ParseTracer.new(1, output, grm1_tokens)
|
77
|
+
instance = Parsing.new(start_dotted_rule, grm1_tokens, tracer)
|
78
|
+
expectations = <<-SNIPPET
|
79
|
+
['a', 'a', 'b', 'c', 'c']
|
80
|
+
|. a . a . b . c . c .|
|
81
|
+
|> . . . . .| [0:0] S => . A
|
82
|
+
SNIPPET
|
83
|
+
expect(output.string).to eq(expectations)
|
84
|
+
end
|
69
85
|
end # context
|
70
86
|
|
71
87
|
context 'Parsing:' do
|
@@ -73,27 +89,27 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
73
89
|
expect(subject.chart[1]).to be_empty
|
74
90
|
item = DottedItem.new(prod_A1, 1)
|
75
91
|
|
76
|
-
subject.push_state(item, 1, 1)
|
92
|
+
subject.push_state(item, 1, 1, :scanning)
|
77
93
|
expect(subject.chart[1]).not_to be_empty
|
78
94
|
expect(subject.chart[1].first.dotted_rule).to eq(item)
|
79
95
|
|
80
96
|
# Pushing twice the same state must be no-op
|
81
|
-
subject.push_state(item, 1, 1)
|
97
|
+
subject.push_state(item, 1, 1, :scanning)
|
82
98
|
expect(subject.chart[1].size).to eq(1)
|
83
99
|
end
|
84
100
|
|
85
101
|
it 'should complain when trying to push a nil dotted item' do
|
86
102
|
err = StandardError
|
87
103
|
msg = 'Dotted item may not be nil'
|
88
|
-
expect
|
104
|
+
expect{ subject.push_state(nil, 1, 1, :prediction) }.to raise_error(err, msg)
|
89
105
|
end
|
90
106
|
|
91
107
|
|
92
108
|
it 'should retrieve the parse states that expect a given terminal' do
|
93
109
|
item1 = DottedItem.new(prod_A1, 2)
|
94
110
|
item2 = DottedItem.new(prod_A1, 1)
|
95
|
-
subject.push_state(item1, 2, 2)
|
96
|
-
subject.push_state(item2, 2, 2)
|
111
|
+
subject.push_state(item1, 2, 2, :scanning)
|
112
|
+
subject.push_state(item2, 2, 2, :scanning)
|
97
113
|
states = subject.states_expecting(c_, 2, false)
|
98
114
|
expect(states.size).to eq(1)
|
99
115
|
expect(states[0].dotted_rule).to eq(item1)
|
@@ -106,8 +122,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
106
122
|
|
107
123
|
item1 = DottedItem.new(prod_A1, 0)
|
108
124
|
item2 = DottedItem.new(prod_A2, 0)
|
109
|
-
subject.push_state(item1, 0, 0)
|
110
|
-
subject.push_state(item2, 0, 0)
|
125
|
+
subject.push_state(item1, 0, 0, :completion)
|
126
|
+
subject.push_state(item2, 0, 0, :completion)
|
111
127
|
subject.scanning(a_, 0) { |i| i } # Code block is mock
|
112
128
|
|
113
129
|
# Expected side effect: a new state at chart[1]
|
@@ -117,7 +133,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
117
133
|
expect(new_state.origin).to eq(0)
|
118
134
|
end
|
119
135
|
end # context
|
120
|
-
|
136
|
+
=begin
|
121
137
|
context 'Parse tree building:' do
|
122
138
|
let(:sample_grammar1) do
|
123
139
|
builder = grammar_abc_builder
|
@@ -385,6 +401,7 @@ SNIPPET
|
|
385
401
|
expect(actual).to eq(expected_text.chomp)
|
386
402
|
end
|
387
403
|
end # context
|
404
|
+
=end
|
388
405
|
end # describe
|
389
406
|
end # module
|
390
407
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.04
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -80,7 +80,7 @@ dependencies:
|
|
80
80
|
- - ! '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 2.0.0
|
83
|
-
description: A
|
83
|
+
description: A general parser using the Earley algorithm.
|
84
84
|
email: famished.tiger@yahoo.com
|
85
85
|
executables: []
|
86
86
|
extensions: []
|
@@ -104,8 +104,11 @@ files:
|
|
104
104
|
- examples/parsers/parsing_abc.rb
|
105
105
|
- examples/parsers/parsing_ambig.rb
|
106
106
|
- examples/parsers/parsing_b_expr.rb
|
107
|
+
- examples/parsers/parsing_err_expr.rb
|
108
|
+
- examples/parsers/parsing_groucho.rb
|
107
109
|
- examples/parsers/parsing_L0.rb
|
108
110
|
- examples/parsers/parsing_L1.rb
|
111
|
+
- examples/parsers/parsing_tricky.rb
|
109
112
|
- examples/recognizers/recognizer_abc.rb
|
110
113
|
- lib/rley.rb
|
111
114
|
- lib/rley/constants.rb
|
@@ -117,6 +120,7 @@ files:
|
|
117
120
|
- lib/rley/parser/earley_parser.rb
|
118
121
|
- lib/rley/parser/parse_state.rb
|
119
122
|
- lib/rley/parser/parse_state_tracker.rb
|
123
|
+
- lib/rley/parser/parse_tracer.rb
|
120
124
|
- lib/rley/parser/parse_tree_builder.rb
|
121
125
|
- lib/rley/parser/parsing.rb
|
122
126
|
- lib/rley/parser/state_set.rb
|
@@ -142,6 +146,7 @@ files:
|
|
142
146
|
- spec/rley/parser/dotted_item_spec.rb
|
143
147
|
- spec/rley/parser/earley_parser_spec.rb
|
144
148
|
- spec/rley/parser/parse_state_spec.rb
|
149
|
+
- spec/rley/parser/parse_tracer_spec.rb
|
145
150
|
- spec/rley/parser/parse_tree_builder_spec.rb
|
146
151
|
- spec/rley/parser/parsing_spec.rb
|
147
152
|
- spec/rley/parser/state_set_spec.rb
|
@@ -203,6 +208,7 @@ test_files:
|
|
203
208
|
- spec/rley/parser/dotted_item_spec.rb
|
204
209
|
- spec/rley/parser/earley_parser_spec.rb
|
205
210
|
- spec/rley/parser/parse_state_spec.rb
|
211
|
+
- spec/rley/parser/parse_tracer_spec.rb
|
206
212
|
- spec/rley/parser/parse_tree_builder_spec.rb
|
207
213
|
- spec/rley/parser/parsing_spec.rb
|
208
214
|
- spec/rley/parser/state_set_spec.rb
|