dendroid 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/CHANGELOG.md +16 -0
- data/dendroid.gemspec +2 -2
- data/lib/dendroid/grm_analysis/alternative_item.rb +1 -2
- data/lib/dendroid/grm_analysis/choice_items.rb +2 -1
- data/lib/dendroid/grm_analysis/dotted_item.rb +2 -0
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +26 -9
- data/lib/dendroid/grm_analysis/production_items.rb +2 -1
- data/lib/dendroid/lexical/token.rb +1 -1
- data/lib/dendroid/lexical/token_position.rb +9 -1
- data/lib/dendroid/recognizer/chart.rb +53 -0
- data/lib/dendroid/recognizer/e_item.rb +48 -0
- data/lib/dendroid/recognizer/item_set.rb +37 -0
- data/lib/dendroid/recognizer/recognizer.rb +282 -0
- data/lib/dendroid/syntax/grm_symbol.rb +1 -1
- data/lib/dendroid/utils/base_tokenizer.rb +54 -15
- data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +7 -77
- data/spec/dendroid/lexical/token_position_spec.rb +1 -1
- data/spec/dendroid/recognizer/chart_spec.rb +2 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +55 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +63 -0
- data/spec/dendroid/recognizer/recognizer_spec.rb +186 -0
- data/spec/dendroid/support/sample_grammars.rb +76 -0
- data/spec/dendroid/utils/base_tokenizer_spec.rb +4 -4
- data/version.txt +1 -1
- metadata +13 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2564f1269225e08732a9f995b10ebbbbf4710b0a1b0aea73e7fe4b486c34a1aa
|
4
|
+
data.tar.gz: db15f965e9365276ffc576435d514cd6c9170a8727c7fafe1425a9de7ed3e0cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2517fd57cca364571e19ddd183d53fcd4fd642f0cf83ecc58ef0f62e5c7512c343cc3db4f31ee621dad5009386db0161e7c2f67944820dd36cb2a253a4d7af80
|
7
|
+
data.tar.gz: 24b77e7c0c5e97df315102c3434dddd251eacab96efaa3d194006c3874f6d260aeafa076ec5b6bd1bed296fbc675bef00e2fcf49a1e4516c20cc6b3e3b0aefdb
|
data/.rubocop.yml
CHANGED
@@ -20,7 +20,7 @@ Metrics/CyclomaticComplexity:
|
|
20
20
|
|
21
21
|
Metrics/MethodLength:
|
22
22
|
Enabled: true
|
23
|
-
Max:
|
23
|
+
Max: 60
|
24
24
|
|
25
25
|
Metrics/PerceivedComplexity:
|
26
26
|
Enabled: true
|
@@ -32,5 +32,8 @@ Naming/MethodParameterName:
|
|
32
32
|
Naming/VariableName:
|
33
33
|
Enabled: false
|
34
34
|
|
35
|
+
Style/AccessorGrouping:
|
36
|
+
Enabled: false
|
37
|
+
|
35
38
|
Style/CommentedKeyword:
|
36
39
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.11] - 2023-11-02
|
6
|
+
Added Earley recognizer and its ancillary classes.
|
7
|
+
|
8
|
+
### Added
|
9
|
+
- Class `Chart` and its spec file
|
10
|
+
- Class `EItem` and its spec file
|
11
|
+
- Class `ItemSet` and its spec file
|
12
|
+
- Class `Recognizer` and its spec file
|
13
|
+
|
14
|
+
### Changed
|
15
|
+
- RSpec tests: moved module `SampleGrammars` to separate file in folder `support`
|
16
|
+
|
17
|
+
## [0.0.10] - 2023-11-01
|
18
|
+
Added missing class and method documentation, fixed some `Rubocop` offenses.
|
19
|
+
|
20
|
+
|
5
21
|
## [0.0.9] - 2023-11-01
|
6
22
|
Added classes for tokenization and grammar analysis.
|
7
23
|
|
data/dendroid.gemspec
CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
|
|
7
7
|
path = ::File.dirname(libpath) + ::File::SEPARATOR
|
8
8
|
::File.read("#{path}version.txt").strip
|
9
9
|
end
|
10
|
-
s.summary = 'WIP. A Ruby implementation of
|
11
|
-
s.description = 'WIP. A Ruby implementation of
|
10
|
+
s.summary = 'WIP. A Ruby implementation of an Earley parser'
|
11
|
+
s.description = 'WIP. A Ruby implementation of an Earley parser'
|
12
12
|
s.authors = ['Dimitri Geshef']
|
13
13
|
s.email = 'famished.tiger@yahoo.com'
|
14
14
|
s.files = Dir['bin/dendroid',
|
@@ -61,11 +61,10 @@ module Dendroid
|
|
61
61
|
private
|
62
62
|
|
63
63
|
def valid_position(aPosition)
|
64
|
-
raise
|
64
|
+
raise StandardError if aPosition.negative? || aPosition > rule.alternatives[alt_index].size
|
65
65
|
|
66
66
|
aPosition
|
67
67
|
end
|
68
68
|
end # class
|
69
69
|
end # module
|
70
70
|
end # module
|
71
|
-
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Dendroid
|
4
|
+
# This module contains classes that from the analysis of grammar rules help to build objects
|
5
|
+
# needed by a recognizer or a parser for the language.
|
4
6
|
module GrmAnalysis
|
5
7
|
# For a given production rule, a dotted item represents a recognition state.
|
6
8
|
# The dot partitions the rhs of the rule in two parts:
|
@@ -5,19 +5,34 @@ require_relative '../grm_analysis/choice_items'
|
|
5
5
|
|
6
6
|
module Dendroid
|
7
7
|
module GrmAnalysis
|
8
|
-
# An analyzer performs an analysis of the rules
|
9
|
-
#
|
8
|
+
# An analyzer performs an analysis of the grammar rules and
|
9
|
+
# build objects (dotted items, first and follow sets) to be used
|
10
|
+
# by a recognizer or a parser.
|
10
11
|
class GrmAnalyzer
|
12
|
+
# @return [Dendroid::Syntax::Grammar] The grammar subjected to analysis
|
11
13
|
attr_reader :grammar
|
12
14
|
attr_reader :items
|
13
15
|
attr_reader :production2items
|
14
16
|
attr_reader :symbol2productions
|
17
|
+
|
18
|
+
# @return [Dendroid::Syntax::Terminal] The pseudo-terminal `__epsilon` (for empty string)
|
15
19
|
attr_reader :epsilon
|
20
|
+
|
21
|
+
# @return [Dendroid::Syntax::Terminal] The pseudo-terminal `$$` for end of input stream
|
16
22
|
attr_reader :endmarker
|
23
|
+
|
24
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FIRST SETS mapping
|
17
25
|
attr_reader :first_sets
|
26
|
+
|
27
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to PREDICT SETS mapping
|
18
28
|
attr_reader :predict_sets
|
29
|
+
|
30
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FOLLOW SETS mapping
|
19
31
|
attr_reader :follow_sets
|
20
32
|
|
33
|
+
# Constructor.
|
34
|
+
# Build dotted items, first, follow sets for the given grammar
|
35
|
+
# @param aGrammar [Dendroid::Syntax::Grammar]
|
21
36
|
def initialize(aGrammar)
|
22
37
|
@grammar = aGrammar
|
23
38
|
@items = []
|
@@ -34,6 +49,8 @@ module Dendroid
|
|
34
49
|
build_follow_sets
|
35
50
|
end
|
36
51
|
|
52
|
+
# The next item of a given dotted item
|
53
|
+
# @param aDottedItem [DottedItem]
|
37
54
|
def next_item(aDottedItem)
|
38
55
|
prod = aDottedItem.rule
|
39
56
|
prod.next_item(aDottedItem)
|
@@ -72,7 +89,7 @@ module Dendroid
|
|
72
89
|
else
|
73
90
|
first_head.merge(sequence_first(prod.body.members))
|
74
91
|
end
|
75
|
-
changed = true if
|
92
|
+
changed = true if first_head.size > pre_first_size
|
76
93
|
end
|
77
94
|
end until !changed
|
78
95
|
end
|
@@ -84,7 +101,7 @@ module Dendroid
|
|
84
101
|
elsif symb.nullable?
|
85
102
|
first_sets[symb] = Set.new([epsilon])
|
86
103
|
else
|
87
|
-
first_sets[symb]
|
104
|
+
first_sets[symb] = Set.new
|
88
105
|
end
|
89
106
|
end
|
90
107
|
end
|
@@ -115,7 +132,7 @@ module Dendroid
|
|
115
132
|
|
116
133
|
head = prod.head
|
117
134
|
head_follow = follow_sets[head]
|
118
|
-
trailer = Set.new
|
135
|
+
# trailer = Set.new
|
119
136
|
last = true
|
120
137
|
last_index = body.size - 1
|
121
138
|
last_index.downto(0) do |i|
|
@@ -130,7 +147,7 @@ module Dendroid
|
|
130
147
|
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
131
148
|
last = false
|
132
149
|
else
|
133
|
-
symbol_seq = body.slice(i+1, last_index - i)
|
150
|
+
symbol_seq = body.slice(i + 1, last_index - i)
|
134
151
|
trailer_first = sequence_first(symbol_seq)
|
135
152
|
contains_epsilon = trailer_first.include? epsilon
|
136
153
|
trailer_first.delete(epsilon) if contains_epsilon
|
@@ -146,7 +163,7 @@ module Dendroid
|
|
146
163
|
|
147
164
|
head = prod.head
|
148
165
|
head_follow = follow_sets[head]
|
149
|
-
trailer = Set.new
|
166
|
+
# trailer = Set.new
|
150
167
|
last = true
|
151
168
|
last_index = body.size - 1
|
152
169
|
last_index.downto(0) do |i|
|
@@ -161,7 +178,7 @@ module Dendroid
|
|
161
178
|
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
162
179
|
last = false
|
163
180
|
else
|
164
|
-
symbol_seq = body.slice(i+1, last_index - i)
|
181
|
+
symbol_seq = body.slice(i + 1, last_index - i)
|
165
182
|
trailer_first = sequence_first(symbol_seq)
|
166
183
|
contains_epsilon = trailer_first.include? epsilon
|
167
184
|
trailer_first.delete(epsilon) if contains_epsilon
|
@@ -179,7 +196,7 @@ module Dendroid
|
|
179
196
|
grammar.symbols.each do |symb|
|
180
197
|
next if symb.terminal?
|
181
198
|
|
182
|
-
follow_sets[symb]
|
199
|
+
follow_sets[symb] = Set.new
|
183
200
|
end
|
184
201
|
|
185
202
|
# Initialize FOLLOW(start symbol) with end marker
|
@@ -14,7 +14,7 @@ module Dendroid
|
|
14
14
|
@items = if empty?
|
15
15
|
[DottedItem.new(self, 0)]
|
16
16
|
else
|
17
|
-
(0..body.size).reduce([]) do |
|
17
|
+
(0..body.size).reduce([]) do |result, pos|
|
18
18
|
result << GrmAnalysis::DottedItem.new(self, pos)
|
19
19
|
end
|
20
20
|
end
|
@@ -47,6 +47,7 @@ module Dendroid
|
|
47
47
|
# @return [GrmAnalysis::DottedItem|NilClass]
|
48
48
|
def next_item(anItem)
|
49
49
|
return nil if anItem == @items.last
|
50
|
+
|
50
51
|
@items[anItem.position + 1]
|
51
52
|
end
|
52
53
|
end # module
|
@@ -2,19 +2,27 @@
|
|
2
2
|
|
3
3
|
module Dendroid
|
4
4
|
module Lexical
|
5
|
+
# Keeps track of the position of a token in the input stream.
|
5
6
|
class TokenPosition
|
7
|
+
# @return [Integer] The line number where the token begins
|
6
8
|
attr_reader :lineno
|
9
|
+
|
10
|
+
# @return [Integer] The column number where the token begins
|
7
11
|
attr_reader :column
|
8
12
|
|
13
|
+
# Constructor
|
14
|
+
# @param line [Integer] The line number where the token begins
|
15
|
+
# @param col [Integer] The column number where the token begins
|
9
16
|
def initialize(line, col)
|
10
17
|
@lineno = line
|
11
18
|
@column = col
|
12
19
|
end
|
13
20
|
|
21
|
+
# Return the position of the start of the token in line:col format
|
22
|
+
# @return [String]
|
14
23
|
def to_s
|
15
24
|
"#{lineno}:#{column}"
|
16
25
|
end
|
17
26
|
end # class
|
18
27
|
end # module
|
19
28
|
end # module
|
20
|
-
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'item_set'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Recognizer
|
7
|
+
# Also called a parse table.
|
8
|
+
# Assuming that n == number of input tokens,
|
9
|
+
# then the chart is an array with n + 1 entry sets.
|
10
|
+
class Chart
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
# @return [Array<Recognizer::ItemSet>] The array of item sets
|
14
|
+
attr_reader :item_sets
|
15
|
+
|
16
|
+
# @return [Boolean] Indicates whether the recognizer successfully processed the whole input
|
17
|
+
attr_writer :success
|
18
|
+
|
19
|
+
# @return [StandardError] The exception class in case of an error found by the recognizer
|
20
|
+
attr_accessor :failure_class
|
21
|
+
|
22
|
+
# @return [String] The error message
|
23
|
+
attr_accessor :failure_reason
|
24
|
+
|
25
|
+
def_delegators :@item_sets, :[], :last, :size
|
26
|
+
|
27
|
+
# Constructor
|
28
|
+
# Initialize the chart with one empty item set.
|
29
|
+
def initialize
|
30
|
+
@item_sets = []
|
31
|
+
@success = false
|
32
|
+
append_new_set
|
33
|
+
end
|
34
|
+
|
35
|
+
# Add a new empty item set at the end of the array of item sets
|
36
|
+
def append_new_set()
|
37
|
+
item_sets << ItemSet.new
|
38
|
+
end
|
39
|
+
|
40
|
+
# Add an EItem to the last item set
|
41
|
+
# @param e_item [EItem]
|
42
|
+
def seed_last_set(e_item)
|
43
|
+
item_sets.last.add_item(e_item)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Return true if the input text is valid according to the grammar.
|
47
|
+
# @return [Boolean]
|
48
|
+
def successful?
|
49
|
+
@success
|
50
|
+
end
|
51
|
+
end # class
|
52
|
+
end # module
|
53
|
+
end # module
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Recognizer
|
7
|
+
# An Earley item is essentially a pair consisting of a dotted item and the rank of a token.
|
8
|
+
# It helps to keep track the progress of an Earley recognizer.
|
9
|
+
class EItem
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
# @return [Dendroid::GrmAnalysis::DottedItem]
|
13
|
+
attr_reader :dotted_item
|
14
|
+
|
15
|
+
# @return [Integer] the rank of the token that correspond to the start of the rule.
|
16
|
+
attr_reader :origin
|
17
|
+
|
18
|
+
def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?
|
19
|
+
|
20
|
+
# @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
|
21
|
+
# @param origin [Integer]
|
22
|
+
def initialize(aDottedItem, origin)
|
23
|
+
@dotted_item = aDottedItem
|
24
|
+
@origin = origin
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Dendroid::Syntax::NonTerminal] the head of the production rule
|
28
|
+
def lhs
|
29
|
+
dotted_item.rule.lhs
|
30
|
+
end
|
31
|
+
|
32
|
+
# Equality test.
|
33
|
+
# @return [Boolean] true iff dotted items and origins are equal
|
34
|
+
def ==(other)
|
35
|
+
return true if eql?(other)
|
36
|
+
|
37
|
+
di = dotted_item
|
38
|
+
(origin == other.origin) && (di == other.dotted_item)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [String] the text representation of the Earley item
|
42
|
+
def to_s
|
43
|
+
"#{dotted_item} @ #{origin}"
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
end # module
|
48
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
module Recognizer
|
5
|
+
# Holds the EItem identified by the recognizer when processing at token at given rank.
|
6
|
+
class ItemSet
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
# @return [Recognizer::EItem]
|
10
|
+
attr_reader :items
|
11
|
+
def_delegators :@items, :clear, :each, :empty?, :select, :size
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@items = []
|
15
|
+
end
|
16
|
+
|
17
|
+
# Add an Early item to the set
|
18
|
+
# @param anItem [Recognizer::EItem]
|
19
|
+
def add_item(anItem)
|
20
|
+
@items << anItem unless items.include? anItem
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find the items that expect a given grammar symbol
|
24
|
+
# @param aSymbol [Denroid::Syntax::GrmSymbol]
|
25
|
+
# @return [void]
|
26
|
+
def items_expecting(aSymbol)
|
27
|
+
items.select { |itm| itm.expecting?(aSymbol) }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return a text representation of the item set
|
31
|
+
# @return [String]
|
32
|
+
def to_s
|
33
|
+
items.join("\n")
|
34
|
+
end
|
35
|
+
end # class
|
36
|
+
end # module
|
37
|
+
end # module
|
@@ -0,0 +1,282 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../grm_analysis/grm_analyzer'
|
4
|
+
require_relative 'e_item'
|
5
|
+
require_relative 'chart'
|
6
|
+
|
7
|
+
module Dendroid
|
8
|
+
# This module host classes needed to implement an Earley recognizer
|
9
|
+
module Recognizer
|
10
|
+
# A recognizer determines whether the input text complies to the grammar (syntax) rules.
|
11
|
+
# This class implements the Earley recognition algorithm.
|
12
|
+
class Recognizer
|
13
|
+
# @return [GrmAnalysis::GrmAnalyzer]
|
14
|
+
attr_reader :grm_analysis
|
15
|
+
|
16
|
+
# @return [Object]
|
17
|
+
attr_reader :tokenizer
|
18
|
+
|
19
|
+
def initialize(grammar, tokenizer)
|
20
|
+
@grm_analysis = GrmAnalysis::GrmAnalyzer.new(grammar)
|
21
|
+
@tokenizer = tokenizer
|
22
|
+
end
|
23
|
+
|
24
|
+
def run(source)
|
25
|
+
tokenizer.input = source
|
26
|
+
tok = tokenizer.next_token
|
27
|
+
if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
|
28
|
+
chart = new_chart
|
29
|
+
chart.failure_class = StandardError
|
30
|
+
chart.failure_reason = 'Error: Input may not be empty nor blank.'
|
31
|
+
chart
|
32
|
+
else
|
33
|
+
earley_parse(tok)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def earley_parse(initial_token)
|
38
|
+
chart = new_chart
|
39
|
+
tokens = [initial_token]
|
40
|
+
predicted_symbols = [Set.new]
|
41
|
+
eos_reached = initial_token.nil?
|
42
|
+
rank = 0
|
43
|
+
|
44
|
+
loop do
|
45
|
+
eos_reached = advance_next_token(tokens, predicted_symbols) unless eos_reached
|
46
|
+
|
47
|
+
advance = false
|
48
|
+
curr_rank = rank
|
49
|
+
curr_set = chart[curr_rank]
|
50
|
+
curr_set.each do |entry|
|
51
|
+
# For each entry, do either completer, scanner or predictor action
|
52
|
+
tick = do_entry_action(chart, entry, curr_rank, tokens, :genuine, predicted_symbols)
|
53
|
+
advance ||= tick
|
54
|
+
end
|
55
|
+
|
56
|
+
rank += 1 if advance
|
57
|
+
break if eos_reached && !advance
|
58
|
+
break if ! advance
|
59
|
+
end
|
60
|
+
|
61
|
+
determine_outcome(chart, tokens)
|
62
|
+
chart
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def new_chart
|
68
|
+
top_symbol = grm_analysis.grammar.start_symbol
|
69
|
+
|
70
|
+
# Reminder: there might be multiple rules for the start symbol
|
71
|
+
prods = grm_analysis.grammar.nonterm2productions[top_symbol]
|
72
|
+
chart = Chart.new
|
73
|
+
prods.each do |prd|
|
74
|
+
seed_items = prd.predicted_items
|
75
|
+
seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
|
76
|
+
end
|
77
|
+
|
78
|
+
chart
|
79
|
+
end
|
80
|
+
|
81
|
+
def advance_next_token(tokens, predicted_symbols)
|
82
|
+
eos_reached = false
|
83
|
+
tok = tokenizer.next_token
|
84
|
+
if tok
|
85
|
+
tokens << tok
|
86
|
+
else
|
87
|
+
eos_reached = true
|
88
|
+
end
|
89
|
+
|
90
|
+
predicted_symbols << Set.new unless eos_reached
|
91
|
+
eos_reached
|
92
|
+
end
|
93
|
+
|
94
|
+
def do_entry_action(chart, entry, rank, tokens, mode, predicted_symbols)
|
95
|
+
advance = false
|
96
|
+
|
97
|
+
if entry.completed?
|
98
|
+
completer(chart, entry, rank, tokens, mode)
|
99
|
+
else
|
100
|
+
if entry.next_symbol.terminal?
|
101
|
+
advance = scanner(chart, entry, rank, tokens)
|
102
|
+
else
|
103
|
+
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
advance
|
108
|
+
end
|
109
|
+
=begin
|
110
|
+
procedure PREDICTOR((A → α•Bβ, j), k)
|
111
|
+
for each (B → γ) in GRAMMAR_RULES_FOR(B) do
|
112
|
+
ADD_TO_SET((B → •γ, k), S[k])
|
113
|
+
end
|
114
|
+
Assuming next symbol is a non-terminal
|
115
|
+
|
116
|
+
Error case: next actual token matches none of the expected tokens.
|
117
|
+
=end
|
118
|
+
def predictor(chart, item, rank, tokens, mode, predicted_symbols)
|
119
|
+
next_symbol = item.next_symbol
|
120
|
+
if mode == :genuine
|
121
|
+
predicted_symbols << Set.new if rank == predicted_symbols.size
|
122
|
+
predicted = predicted_symbols[rank]
|
123
|
+
return if predicted.include?(next_symbol)
|
124
|
+
|
125
|
+
predicted.add(next_symbol)
|
126
|
+
end
|
127
|
+
|
128
|
+
prods = grm_analysis.symbol2productions[next_symbol]
|
129
|
+
curr_set = chart[rank]
|
130
|
+
next_token = tokens[rank]
|
131
|
+
prods.each do |prd|
|
132
|
+
entry_items = prd.predicted_items
|
133
|
+
entry_items.each do |entry|
|
134
|
+
member = entry.next_symbol
|
135
|
+
if member&.terminal?
|
136
|
+
next unless next_token
|
137
|
+
next if (member.name != next_token.terminal) && mode == :genuine
|
138
|
+
end
|
139
|
+
|
140
|
+
new_item = EItem.new(entry, rank)
|
141
|
+
curr_set.add_item(new_item)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
146
|
+
if next_symbol.nullable?
|
147
|
+
next_item = grm_analysis.next_item(item.dotted_item)
|
148
|
+
if next_item
|
149
|
+
new_item = EItem.new(next_item, item.origin)
|
150
|
+
curr_set.add_item(new_item)
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
=begin
|
156
|
+
procedure SCANNER((A → α•aβ, j), k, words)
|
157
|
+
if j < LENGTH(words) and a ⊂ PARTS_OF_SPEECH(words[k]) then
|
158
|
+
ADD_TO_SET((A → αa•β, j), S[k+1])
|
159
|
+
end
|
160
|
+
Assuming next symbol is a terminal
|
161
|
+
=end
|
162
|
+
def scanner(chart, scan_item, rank, tokens)
|
163
|
+
advance = false
|
164
|
+
dit = scan_item.dotted_item
|
165
|
+
if rank < tokens.size && dit.next_symbol.name == tokens[rank].terminal
|
166
|
+
new_rank = rank + 1
|
167
|
+
chart.append_new_set if chart[new_rank].nil?
|
168
|
+
next_dotted_item = grm_analysis.next_item(dit)
|
169
|
+
new_item = EItem.new(next_dotted_item, scan_item.origin)
|
170
|
+
chart[new_rank].add_item(new_item)
|
171
|
+
advance = true
|
172
|
+
end
|
173
|
+
|
174
|
+
advance
|
175
|
+
end
|
176
|
+
|
177
|
+
=begin
|
178
|
+
procedure COMPLETER((B → γ•, x), k)
|
179
|
+
for each (A → α•Bβ, j) in S[x] do
|
180
|
+
ADD_TO_SET((A → αB•β, j), S[k])
|
181
|
+
end
|
182
|
+
=end
|
183
|
+
def completer(chart, item, rank, tokens, mode)
|
184
|
+
origin = item.origin
|
185
|
+
|
186
|
+
curr_set = chart[rank]
|
187
|
+
set_at_origin = chart[origin]
|
188
|
+
next_token = tokens[rank]
|
189
|
+
callers = set_at_origin.items_expecting(item.lhs)
|
190
|
+
callers.each do |call_item|
|
191
|
+
return_item = grm_analysis.next_item(call_item.dotted_item)
|
192
|
+
next unless return_item
|
193
|
+
member = return_item.next_symbol
|
194
|
+
if member&.terminal? && (mode == :genuine)
|
195
|
+
next unless next_token
|
196
|
+
next if member.name != next_token.terminal
|
197
|
+
end
|
198
|
+
|
199
|
+
new_item = EItem.new(return_item, call_item.origin)
|
200
|
+
curr_set.add_item(new_item)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def seed_set(chart, rank)
|
205
|
+
curr_set = chart[rank]
|
206
|
+
previous_set = chart[rank - 1]
|
207
|
+
curr_set.clear
|
208
|
+
scan_entries = previous_set.select { |ent| ent.dotted_item.next_symbol&.terminal? }
|
209
|
+
scan_entries.map do |ent|
|
210
|
+
new_item = grm_analysis.next_item(ent.dotted_item)
|
211
|
+
curr_set.add_item(EItem.new(new_item, ent.origin))
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def determine_outcome(chart, tokens)
|
216
|
+
success = false
|
217
|
+
if chart.size == tokens.size + 1
|
218
|
+
top_symbol = grm_analysis.grammar.start_symbol
|
219
|
+
top_rules = grm_analysis.grammar.nonterm2productions[top_symbol]
|
220
|
+
final_items = top_rules.reduce([]) do |items, rule|
|
221
|
+
items.concat(rule.reduce_items)
|
222
|
+
end
|
223
|
+
last_set = chart.item_sets.last
|
224
|
+
last_set.each do |entry|
|
225
|
+
next if ((!entry.origin.zero?) || ! final_items.include?(entry.dotted_item))
|
226
|
+
success = true
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
if !success
|
231
|
+
# Error detected...
|
232
|
+
replay_last_set(chart, tokens)
|
233
|
+
if chart.size < tokens.size + 1
|
234
|
+
# Recognizer stopped prematurely...
|
235
|
+
offending_token = tokens[chart.size - 1]
|
236
|
+
pos = offending_token.position
|
237
|
+
(line, col) = [pos.lineno, pos.column]
|
238
|
+
last_set = chart.last
|
239
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
240
|
+
result << ent.next_symbol if ent.pre_scan?
|
241
|
+
result
|
242
|
+
end
|
243
|
+
terminals.uniq!
|
244
|
+
prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
|
245
|
+
expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
|
246
|
+
err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
|
247
|
+
chart.failure_class = StandardError
|
248
|
+
chart.failure_reason = err_msg
|
249
|
+
elsif chart.size == tokens.size + 1
|
250
|
+
# EOS unexpected...
|
251
|
+
last_token = tokens.last
|
252
|
+
pos = last_token.position
|
253
|
+
(line, col) = [pos.lineno, pos.column]
|
254
|
+
last_set = chart.last
|
255
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
256
|
+
result << ent.next_symbol if ent.pre_scan?
|
257
|
+
result
|
258
|
+
end
|
259
|
+
terminals.uniq!
|
260
|
+
|
261
|
+
prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
|
262
|
+
expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
|
263
|
+
err_msg = "#{prefix}, expected: #{expectation}."
|
264
|
+
chart.failure_class = StandardError
|
265
|
+
chart.failure_reason = err_msg
|
266
|
+
end
|
267
|
+
end
|
268
|
+
chart.success = success
|
269
|
+
end
|
270
|
+
|
271
|
+
def replay_last_set(chart, tokens)
|
272
|
+
rank = chart.size - 1
|
273
|
+
seed_set(chart, rank) # Re-initialize last set with scan entries
|
274
|
+
|
275
|
+
# Replay in full the actions for last set
|
276
|
+
chart[rank].each do |entry|
|
277
|
+
do_entry_action(chart, entry, rank, tokens, :error, [Set.new])
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end # class
|
281
|
+
end # module
|
282
|
+
end # module
|