dendroid 0.0.12 → 0.2.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/dendroid/formatters/ascii_tree.rb +142 -0
- data/lib/dendroid/formatters/base_formatter.rb +25 -0
- data/lib/dendroid/formatters/bracket_notation.rb +50 -0
- data/lib/dendroid/grm_analysis/dotted_item.rb +46 -30
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +24 -61
- data/lib/dendroid/grm_analysis/{choice_items.rb → rule_items.rb} +10 -10
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +3 -4
- data/lib/dendroid/parsing/and_node.rb +56 -0
- data/lib/dendroid/parsing/chart_walker.rb +293 -0
- data/lib/dendroid/parsing/composite_parse_node.rb +21 -0
- data/lib/dendroid/parsing/empty_rule_node.rb +28 -0
- data/lib/dendroid/parsing/or_node.rb +51 -0
- data/lib/dendroid/parsing/parse_node.rb +26 -0
- data/lib/dendroid/parsing/parse_tree_visitor.rb +127 -0
- data/lib/dendroid/parsing/parser.rb +185 -0
- data/lib/dendroid/parsing/terminal_node.rb +32 -0
- data/lib/dendroid/parsing/walk_progress.rb +117 -0
- data/lib/dendroid/recognizer/chart.rb +18 -2
- data/lib/dendroid/recognizer/e_item.rb +21 -2
- data/lib/dendroid/recognizer/item_set.rb +7 -2
- data/lib/dendroid/recognizer/recognizer.rb +69 -69
- data/lib/dendroid/syntax/grammar.rb +72 -60
- data/lib/dendroid/syntax/rule.rb +71 -13
- data/spec/dendroid/grm_analysis/dotted_item_spec.rb +59 -47
- data/spec/dendroid/grm_analysis/{choice_items_spec.rb → rule_items_spec.rb} +5 -6
- data/spec/dendroid/parsing/chart_walker_spec.rb +223 -0
- data/spec/dendroid/parsing/terminal_node_spec.rb +36 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +5 -5
- data/spec/dendroid/recognizer/item_set_spec.rb +16 -8
- data/spec/dendroid/recognizer/recognizer_spec.rb +57 -5
- data/spec/dendroid/support/sample_grammars.rb +2 -0
- data/spec/dendroid/syntax/grammar_spec.rb +44 -34
- data/spec/dendroid/syntax/rule_spec.rb +56 -7
- data/version.txt +1 -1
- metadata +20 -13
- data/lib/dendroid/grm_analysis/alternative_item.rb +0 -70
- data/lib/dendroid/grm_analysis/production_items.rb +0 -55
- data/lib/dendroid/syntax/choice.rb +0 -95
- data/lib/dendroid/syntax/production.rb +0 -82
- data/spec/dendroid/grm_analysis/alternative_item_spec.rb +0 -12
- data/spec/dendroid/grm_analysis/production_items_spec.rb +0 -68
- data/spec/dendroid/syntax/choice_spec.rb +0 -68
- data/spec/dendroid/syntax/production_spec.rb +0 -92
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'weakref'
|
4
|
+
require_relative '../grm_dsl/base_grm_builder'
|
5
|
+
require_relative '../utils/base_tokenizer'
|
6
|
+
require_relative '../recognizer/recognizer'
|
7
|
+
require_relative 'chart_walker'
|
8
|
+
require_relative 'parse_tree_visitor'
|
9
|
+
require_relative '../formatters/bracket_notation'
|
10
|
+
require_relative '../formatters/ascii_tree'
|
11
|
+
|
12
|
+
module Dendroid
|
13
|
+
module Parsing
|
14
|
+
class Parser
|
15
|
+
end # class
|
16
|
+
end # module
|
17
|
+
end # module
|
18
|
+
|
19
|
+
def grammar_l1
|
20
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
21
|
+
# Grammar inspired from Wikipedia entry on Earley parsing
|
22
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
23
|
+
rule('p' => 's')
|
24
|
+
rule('s' => ['s PLUS m', 'm'])
|
25
|
+
rule('m' => ['m STAR t', 't'])
|
26
|
+
rule('t' => 'INTEGER')
|
27
|
+
end
|
28
|
+
|
29
|
+
builder.grammar
|
30
|
+
end
|
31
|
+
|
32
|
+
def grammar_l31
|
33
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
34
|
+
# Ambiguous arithmetical expression language
|
35
|
+
# This language is compatible with tokenizer L1
|
36
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
37
|
+
rule('p' => 's')
|
38
|
+
rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
|
39
|
+
end
|
40
|
+
|
41
|
+
builder.grammar
|
42
|
+
end
|
43
|
+
|
44
|
+
def tokenizer_l1
|
45
|
+
Dendroid::Utils::BaseTokenizer.new do
|
46
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
47
|
+
|
48
|
+
scan_verbatim(['+', '*'])
|
49
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def retrieve_success_item(chart, grammar)
|
54
|
+
last_item_set = chart.item_sets.last
|
55
|
+
result = nil
|
56
|
+
last_item_set.items.reverse_each do |itm|
|
57
|
+
if itm.origin.zero? && itm.dotted_item.completed? && itm.dotted_item.rule.lhs == grammar.start_symbol
|
58
|
+
result = itm
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
result
|
64
|
+
end
|
65
|
+
|
66
|
+
def grammar_l10
|
67
|
+
# Grammar with left recursive rule
|
68
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
69
|
+
declare_terminals('a')
|
70
|
+
|
71
|
+
rule 'A' => ['A a', '']
|
72
|
+
end
|
73
|
+
|
74
|
+
builder.grammar
|
75
|
+
end
|
76
|
+
|
77
|
+
def tokenizer_l10
|
78
|
+
Dendroid::Utils::BaseTokenizer.new do
|
79
|
+
map_verbatim2terminal({ 'a' => :a })
|
80
|
+
|
81
|
+
scan_verbatim(['a'])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def grammar_l11
|
86
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
87
|
+
# Grammar with right-recursive rule
|
88
|
+
declare_terminals('a')
|
89
|
+
|
90
|
+
rule 'A' => ['a A', '']
|
91
|
+
end
|
92
|
+
|
93
|
+
builder.grammar
|
94
|
+
end
|
95
|
+
|
96
|
+
def tokenizer_l11
|
97
|
+
Dendroid::Utils::BaseTokenizer.new do
|
98
|
+
map_verbatim2terminal({ 'a' => :a })
|
99
|
+
|
100
|
+
scan_verbatim(['a'])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def grammar_l8
|
105
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
106
|
+
# (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
|
107
|
+
# for Natural Languages")
|
108
|
+
declare_terminals('x')
|
109
|
+
|
110
|
+
rule 'S' => ['S S', 'x']
|
111
|
+
end
|
112
|
+
|
113
|
+
builder.grammar
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenizer_l8
|
117
|
+
Dendroid::Utils::BaseTokenizer.new do
|
118
|
+
map_verbatim2terminal({ 'x' => :x })
|
119
|
+
|
120
|
+
scan_verbatim(['x'])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
########################################
|
125
|
+
# Entry point
|
126
|
+
########################################
|
127
|
+
kode = 3
|
128
|
+
|
129
|
+
case kode
|
130
|
+
when 0
|
131
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l1, tokenizer_l1)
|
132
|
+
chart = recognizer.run('2 + 3 * 4')
|
133
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
134
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
135
|
+
root = walker.walk(succ_item)
|
136
|
+
|
137
|
+
# formatter = BracketNotation.new($stdout)
|
138
|
+
formatter = Asciitree.new($stdout)
|
139
|
+
visitor = ParseTreeVisitor.new(root)
|
140
|
+
formatter.render(visitor)
|
141
|
+
|
142
|
+
when 1
|
143
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l10, tokenizer_l10)
|
144
|
+
chart = recognizer.run('a a a a a')
|
145
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
146
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
147
|
+
root = walker.walk(succ_item)
|
148
|
+
|
149
|
+
formatter = Asciitree.new($stdout)
|
150
|
+
visitor = ParseTreeVisitor.new(root)
|
151
|
+
formatter.render(visitor)
|
152
|
+
|
153
|
+
when 2
|
154
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l11, tokenizer_l11)
|
155
|
+
chart = recognizer.run('a a a a a')
|
156
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
157
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
158
|
+
root = walker.walk(succ_item)
|
159
|
+
|
160
|
+
formatter = Asciitree.new($stdout)
|
161
|
+
visitor = ParseTreeVisitor.new(root)
|
162
|
+
formatter.render(visitor)
|
163
|
+
|
164
|
+
when 3
|
165
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l31, tokenizer_l1)
|
166
|
+
chart = recognizer.run('2 + 3 * 4')
|
167
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
168
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
169
|
+
root = walker.walk(succ_item)
|
170
|
+
|
171
|
+
formatter = Asciitree.new($stdout)
|
172
|
+
visitor = ParseTreeVisitor.new(root)
|
173
|
+
formatter.render(visitor)
|
174
|
+
|
175
|
+
when 4
|
176
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l8, tokenizer_l8)
|
177
|
+
chart = recognizer.run('x x x x')
|
178
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
179
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
180
|
+
root = walker.walk(succ_item)
|
181
|
+
|
182
|
+
formatter = Asciitree.new($stdout)
|
183
|
+
visitor = ParseTreeVisitor.new(root)
|
184
|
+
formatter.render(visitor)
|
185
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'parse_node'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Parsing
|
7
|
+
class TerminalNode < ParseNode
|
8
|
+
# @return [Dendroid::Syntax::Terminal] Terminal symbol of matching token.
|
9
|
+
attr_reader :symbol
|
10
|
+
|
11
|
+
# @return [Dendroid::Lexical::Token] Matching input token object.
|
12
|
+
attr_reader :token
|
13
|
+
|
14
|
+
def initialize(sym, tok, rank)
|
15
|
+
super(rank, rank + 1)
|
16
|
+
@symbol = sym
|
17
|
+
@token = tok
|
18
|
+
end
|
19
|
+
|
20
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
21
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
22
|
+
def accept(aVisitor)
|
23
|
+
aVisitor.visit_terminal(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s()
|
27
|
+
display_val = token.is_a?(Dendroid::Lexical::Literal) ? ": #{token.value}" : ''
|
28
|
+
"#{symbol.name}#{display_val} #{super}"
|
29
|
+
end
|
30
|
+
end # class
|
31
|
+
end # module
|
32
|
+
end # module
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'and_node'
|
4
|
+
require_relative 'or_node'
|
5
|
+
require_relative 'terminal_node'
|
6
|
+
require_relative 'empty_rule_node'
|
7
|
+
|
8
|
+
module Dendroid
|
9
|
+
module Parsing
|
10
|
+
class WalkProgress
|
11
|
+
attr_accessor :state
|
12
|
+
attr_accessor :curr_rank
|
13
|
+
attr_reader :curr_item
|
14
|
+
attr_accessor :predecessor
|
15
|
+
attr_reader :parents
|
16
|
+
|
17
|
+
def initialize(start_rank, start_item, parents)
|
18
|
+
@state = :New
|
19
|
+
@curr_rank = start_rank
|
20
|
+
@curr_item = start_item
|
21
|
+
@predecessor = nil
|
22
|
+
@parents = parents
|
23
|
+
end
|
24
|
+
|
25
|
+
# Factory method.
|
26
|
+
def initialize_copy(orig)
|
27
|
+
@state = orig.state
|
28
|
+
@curr_rank = orig.curr_rank
|
29
|
+
@curr_item = orig.curr_item
|
30
|
+
@predecessor = nil
|
31
|
+
@parents = orig.parents.dup
|
32
|
+
end
|
33
|
+
|
34
|
+
def fork(thePredecessor)
|
35
|
+
@state = :Forking
|
36
|
+
@predecessor = thePredecessor
|
37
|
+
end
|
38
|
+
|
39
|
+
def curr_item=(anEntry)
|
40
|
+
if anEntry.nil?
|
41
|
+
raise StandardError
|
42
|
+
else
|
43
|
+
@curr_item = anEntry
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_node_empty(anEntry)
|
48
|
+
node_empty = EmptyRuleNode.new(anEntry, curr_rank)
|
49
|
+
add_child_node(node_empty)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Add a terminal node for terminal at current rank as a child of last parent
|
53
|
+
def add_terminal_node(token)
|
54
|
+
@curr_rank -= 1
|
55
|
+
term_node = TerminalNode.new(curr_item.prev_symbol, token, curr_rank)
|
56
|
+
add_child_node(term_node)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Add an AND node for given entry as a child of last parent
|
60
|
+
def push_and_node(anEntry)
|
61
|
+
node = ANDNode.new(anEntry, curr_rank)
|
62
|
+
raise StandardError unless anEntry.rule == node.rule # Fails
|
63
|
+
add_child_node(node)
|
64
|
+
parents.push(node)
|
65
|
+
|
66
|
+
node
|
67
|
+
end
|
68
|
+
|
69
|
+
def push_or_node(origin, arity)
|
70
|
+
node = OrNode.new(curr_item.prev_symbol, origin, curr_rank, arity)
|
71
|
+
add_child_node(node)
|
72
|
+
parents.push(node)
|
73
|
+
|
74
|
+
node
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_child_node(aNode)
|
78
|
+
parents.last.add_child(aNode, curr_item.position - 1)
|
79
|
+
aNode
|
80
|
+
end
|
81
|
+
|
82
|
+
# Do the given EItems match one of the parent?
|
83
|
+
# Matching = corresponds to the same rule and range
|
84
|
+
# @return [Array<EItem>]
|
85
|
+
def match_parent?(entries, stop_at_first)
|
86
|
+
matching = []
|
87
|
+
min_origin = entries[0].origin
|
88
|
+
first_iteration = true
|
89
|
+
offset = 0
|
90
|
+
|
91
|
+
parents.reverse_each do |node|
|
92
|
+
if node.is_a?(OrNode)
|
93
|
+
offset += 1
|
94
|
+
next
|
95
|
+
end
|
96
|
+
entries.each do |ent|
|
97
|
+
if first_iteration
|
98
|
+
min_origin = ent.origin if ent.origin < min_origin
|
99
|
+
end
|
100
|
+
next unless node.match(ent)
|
101
|
+
|
102
|
+
matching << [ent, offset]
|
103
|
+
break if stop_at_first
|
104
|
+
end
|
105
|
+
first_iteration = false
|
106
|
+
break if stop_at_first && !matching.empty?
|
107
|
+
|
108
|
+
# Stop loop when parent.origin < min(entries.origin)
|
109
|
+
break if node.range[0] < min_origin
|
110
|
+
offset += 1
|
111
|
+
end
|
112
|
+
|
113
|
+
matching
|
114
|
+
end
|
115
|
+
end # class
|
116
|
+
end # module
|
117
|
+
end # module
|
@@ -15,14 +15,17 @@ module Dendroid
|
|
15
15
|
# @return [Array<Recognizer::ItemSet>] The array of item sets
|
16
16
|
attr_reader :item_sets
|
17
17
|
|
18
|
+
# @return [Array<Dendroid::Lexical::Token>] The input tokens
|
19
|
+
attr_reader :tokens
|
20
|
+
|
18
21
|
# @return [Boolean] Indicates whether the recognizer successfully processed the whole input
|
19
22
|
attr_writer :success
|
20
23
|
|
21
24
|
# @return [StandardError] The exception class in case of an error found by the recognizer
|
22
|
-
|
25
|
+
attr_reader :failure_class
|
23
26
|
|
24
27
|
# @return [String] The error message
|
25
|
-
|
28
|
+
attr_reader :failure_reason
|
26
29
|
|
27
30
|
def_delegators :@item_sets, :[], :last, :size
|
28
31
|
|
@@ -45,11 +48,24 @@ module Dendroid
|
|
45
48
|
item_sets.last.add_item(e_item)
|
46
49
|
end
|
47
50
|
|
51
|
+
# @param input_tokens [Array<Dendroid::Lexical::Token>] The input tokens
|
52
|
+
def tokens=(input_tokens)
|
53
|
+
@tokens = input_tokens
|
54
|
+
end
|
55
|
+
|
48
56
|
# Return true if the input text is valid according to the grammar.
|
49
57
|
# @return [Boolean]
|
50
58
|
def successful?
|
51
59
|
@success
|
52
60
|
end
|
61
|
+
|
62
|
+
# Set the error cause.
|
63
|
+
# @param exception_class [StandardError] Exception class
|
64
|
+
# @param message [String] Error message
|
65
|
+
def failure(exception_class, message)
|
66
|
+
@failure_class = exception_class
|
67
|
+
@failure_reason = message
|
68
|
+
end
|
53
69
|
end # class
|
54
70
|
end # module
|
55
71
|
end # module
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'forwardable'
|
4
|
+
require 'weakref'
|
4
5
|
|
5
6
|
module Dendroid
|
6
7
|
module Recognizer
|
@@ -9,19 +10,27 @@ module Dendroid
|
|
9
10
|
class EItem
|
10
11
|
extend Forwardable
|
11
12
|
|
13
|
+
# (Weak) reference to the dotted item
|
12
14
|
# @return [Dendroid::GrmAnalysis::DottedItem]
|
13
15
|
attr_reader :dotted_item
|
14
16
|
|
15
17
|
# @return [Integer] the rank of the token that correspond to the start of the rule.
|
16
18
|
attr_reader :origin
|
17
19
|
|
18
|
-
|
20
|
+
# TODO: :predictor, :completer, :scanner
|
21
|
+
attr_accessor :algo
|
22
|
+
|
23
|
+
# @return [Array<WeakRef>] predecessors sorted by decreasing origin value
|
24
|
+
attr_accessor :predecessors
|
25
|
+
|
26
|
+
def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?, :position, :prev_symbol, :rule
|
19
27
|
|
20
28
|
# @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
|
21
29
|
# @param origin [Integer]
|
22
30
|
def initialize(aDottedItem, origin)
|
23
|
-
@dotted_item = aDottedItem
|
31
|
+
@dotted_item = WeakRef.new(aDottedItem)
|
24
32
|
@origin = origin
|
33
|
+
@predecessors = []
|
25
34
|
end
|
26
35
|
|
27
36
|
# @return [Dendroid::Syntax::NonTerminal] the head of the production rule
|
@@ -42,6 +51,16 @@ module Dendroid
|
|
42
51
|
def to_s
|
43
52
|
"#{dotted_item} @ #{origin}"
|
44
53
|
end
|
54
|
+
|
55
|
+
alias inspect to_s
|
56
|
+
|
57
|
+
def add_predecessor(pred)
|
58
|
+
if predecessors.size > 1 && pred.origin < predecessors[0].origin
|
59
|
+
predecessors.insert(2, WeakRef.new(pred))
|
60
|
+
else
|
61
|
+
predecessors.unshift(WeakRef.new(pred))
|
62
|
+
end
|
63
|
+
end
|
45
64
|
end # class
|
46
65
|
end # module
|
47
66
|
end # module
|
@@ -15,10 +15,15 @@ module Dendroid
|
|
15
15
|
@items = []
|
16
16
|
end
|
17
17
|
|
18
|
-
# Add an
|
18
|
+
# Add an Earley item to the set if not yet present.
|
19
19
|
# @param anItem [Recognizer::EItem]
|
20
|
+
# @return [Recognizer::EItem] the item in the set
|
20
21
|
def add_item(anItem)
|
21
|
-
|
22
|
+
idx = items.find_index anItem
|
23
|
+
return items[idx] if idx
|
24
|
+
|
25
|
+
@items << anItem
|
26
|
+
anItem
|
22
27
|
end
|
23
28
|
|
24
29
|
# Find the items that expect a given grammar symbol
|
@@ -31,8 +31,7 @@ module Dendroid
|
|
31
31
|
tok = tokenizer.next_token
|
32
32
|
if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
|
33
33
|
chart = new_chart
|
34
|
-
chart.
|
35
|
-
chart.failure_reason = 'Error: Input may not be empty nor blank.'
|
34
|
+
chart.failure(StandardError, 'Error: Input may not be empty nor blank.')
|
36
35
|
chart
|
37
36
|
else
|
38
37
|
earley_parse(tok)
|
@@ -40,7 +39,7 @@ module Dendroid
|
|
40
39
|
end
|
41
40
|
|
42
41
|
# Run the Earley algorithm
|
43
|
-
# @param initial_token [
|
42
|
+
# @param initial_token [Dendroid::Lexical::Token]
|
44
43
|
def earley_parse(initial_token)
|
45
44
|
chart = new_chart
|
46
45
|
tokens = [initial_token]
|
@@ -62,10 +61,11 @@ module Dendroid
|
|
62
61
|
|
63
62
|
rank += 1 if advance
|
64
63
|
break if eos_reached && !advance
|
65
|
-
break
|
64
|
+
break unless advance
|
66
65
|
end
|
67
66
|
|
68
|
-
|
67
|
+
chart.tokens = tokens
|
68
|
+
determine_outcome(chart)
|
69
69
|
chart
|
70
70
|
end
|
71
71
|
|
@@ -74,12 +74,13 @@ module Dendroid
|
|
74
74
|
def new_chart
|
75
75
|
top_symbol = grm_analysis.grammar.start_symbol
|
76
76
|
|
77
|
-
|
78
|
-
prods = grm_analysis.grammar.nonterm2productions[top_symbol]
|
77
|
+
prd = grm_analysis.grammar.nonterm2production[top_symbol]
|
79
78
|
chart = Chart.new
|
80
|
-
|
81
|
-
|
82
|
-
|
79
|
+
seed_items = prd.predicted_items
|
80
|
+
seed_items.each do |item|
|
81
|
+
entry = EItem.new(item, 0)
|
82
|
+
entry.algo = :predictor
|
83
|
+
chart.seed_last_set(entry)
|
83
84
|
end
|
84
85
|
|
85
86
|
chart
|
@@ -103,12 +104,10 @@ module Dendroid
|
|
103
104
|
|
104
105
|
if entry.completed?
|
105
106
|
completer(chart, entry, rank, tokens, mode)
|
107
|
+
elsif entry.next_symbol.terminal?
|
108
|
+
advance = scanner(chart, entry, rank, tokens)
|
106
109
|
else
|
107
|
-
|
108
|
-
advance = scanner(chart, entry, rank, tokens)
|
109
|
-
else
|
110
|
-
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
111
|
-
end
|
110
|
+
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
112
111
|
end
|
113
112
|
|
114
113
|
advance
|
@@ -123,39 +122,36 @@ module Dendroid
|
|
123
122
|
# Error case: next actual token matches none of the expected tokens.
|
124
123
|
def predictor(chart, item, rank, tokens, mode, predicted_symbols)
|
125
124
|
next_symbol = item.next_symbol
|
126
|
-
if mode == :genuine
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
125
|
+
# if mode == :genuine
|
126
|
+
# predicted_symbols << Set.new if rank == predicted_symbols.size
|
127
|
+
# predicted = predicted_symbols[rank]
|
128
|
+
# return if predicted.include?(next_symbol)
|
129
|
+
#
|
130
|
+
# predicted.add(next_symbol)
|
131
|
+
# end
|
133
132
|
|
134
|
-
prods = grm_analysis.symbol2productions[next_symbol]
|
135
133
|
curr_set = chart[rank]
|
136
134
|
next_token = tokens[rank]
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
new_item = EItem.new(entry, rank)
|
147
|
-
curr_set.add_item(new_item)
|
135
|
+
prd = grm_analysis.symbol2production(next_symbol)
|
136
|
+
entry_items = prd.predicted_items
|
137
|
+
added = []
|
138
|
+
entry_items.each do |entry|
|
139
|
+
member = entry.next_symbol
|
140
|
+
if member&.terminal?
|
141
|
+
next unless next_token
|
142
|
+
next if (member.name != next_token.terminal) && mode == :genuine
|
148
143
|
end
|
144
|
+
added << add_item(curr_set, entry, rank, item, :predictor)
|
149
145
|
end
|
150
|
-
|
151
146
|
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
147
|
+
return unless next_symbol.nullable?
|
148
|
+
|
149
|
+
next_item = grm_analysis.next_item(item.dotted_item)
|
150
|
+
return unless next_item
|
151
|
+
|
152
|
+
special = add_item(curr_set, next_item, item.origin, nil, :predictor)
|
153
|
+
# special = add_item(curr_set, next_item, item.origin, added.shift, :predictor)
|
154
|
+
# added.each { |e| special.add_predecessor(e) }
|
159
155
|
end
|
160
156
|
|
161
157
|
# procedure SCANNER((A → α•aβ, j), k, words)
|
@@ -170,8 +166,7 @@ module Dendroid
|
|
170
166
|
new_rank = rank + 1
|
171
167
|
chart.append_new_set if chart[new_rank].nil?
|
172
168
|
next_dotted_item = grm_analysis.next_item(dit)
|
173
|
-
|
174
|
-
chart[new_rank].add_item(new_item)
|
169
|
+
add_item(chart[new_rank], next_dotted_item, scan_item.origin, scan_item, :scanner)
|
175
170
|
advance = true
|
176
171
|
end
|
177
172
|
|
@@ -199,8 +194,7 @@ module Dendroid
|
|
199
194
|
next if member.name != next_token.terminal
|
200
195
|
end
|
201
196
|
|
202
|
-
|
203
|
-
curr_set.add_item(new_item)
|
197
|
+
add_item(curr_set, return_item, call_item.origin, item, :completer)
|
204
198
|
end
|
205
199
|
end
|
206
200
|
|
@@ -215,17 +209,25 @@ module Dendroid
|
|
215
209
|
end
|
216
210
|
end
|
217
211
|
|
218
|
-
def
|
212
|
+
def add_item(item_set, dotted_item, origin, predecessor, procedure)
|
213
|
+
new_item = EItem.new(dotted_item, origin)
|
214
|
+
added = item_set.add_item(new_item)
|
215
|
+
added.add_predecessor(predecessor) if predecessor
|
216
|
+
new_item.algo = procedure
|
217
|
+
|
218
|
+
added
|
219
|
+
end
|
220
|
+
|
221
|
+
def determine_outcome(chart)
|
219
222
|
success = false
|
223
|
+
tokens = chart.tokens
|
220
224
|
if chart.size == tokens.size + 1
|
221
225
|
top_symbol = grm_analysis.grammar.start_symbol
|
222
|
-
|
223
|
-
final_items =
|
224
|
-
items.concat(rule.reduce_items)
|
225
|
-
end
|
226
|
+
top_rule = grm_analysis.grammar.nonterm2production[top_symbol]
|
227
|
+
final_items = top_rule.reduce_items
|
226
228
|
last_set = chart.item_sets.last
|
227
229
|
last_set.each do |entry|
|
228
|
-
next if
|
230
|
+
next if !entry.origin.zero? || !final_items.include?(entry.dotted_item)
|
229
231
|
|
230
232
|
success = true
|
231
233
|
end
|
@@ -239,39 +241,37 @@ module Dendroid
|
|
239
241
|
offending_token = tokens[chart.size - 1]
|
240
242
|
pos = offending_token.position
|
241
243
|
(line, col) = [pos.lineno, pos.column]
|
242
|
-
|
243
|
-
terminals = last_set.items.reduce([]) do |result, ent|
|
244
|
-
result << ent.next_symbol if ent.pre_scan?
|
245
|
-
result
|
246
|
-
end
|
247
|
-
terminals.uniq!
|
244
|
+
terminals = expected_terminals(chart)
|
248
245
|
prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
|
249
246
|
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
250
247
|
err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
|
251
|
-
chart.
|
252
|
-
chart.failure_reason = err_msg
|
248
|
+
chart.failure(StandardError, err_msg)
|
253
249
|
elsif chart.size == tokens.size + 1
|
254
250
|
# EOS unexpected...
|
255
251
|
last_token = tokens.last
|
256
252
|
pos = last_token.position
|
257
253
|
(line, col) = [pos.lineno, pos.column]
|
258
|
-
|
259
|
-
terminals = last_set.items.reduce([]) do |result, ent|
|
260
|
-
result << ent.next_symbol if ent.pre_scan?
|
261
|
-
result
|
262
|
-
end
|
263
|
-
terminals.uniq!
|
264
|
-
|
254
|
+
terminals = expected_terminals(chart)
|
265
255
|
prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
|
266
256
|
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
267
257
|
err_msg = "#{prefix}, expected: #{expectation}."
|
268
|
-
chart.
|
269
|
-
chart.failure_reason = err_msg
|
258
|
+
chart.failure(StandardError, err_msg)
|
270
259
|
end
|
271
260
|
end
|
272
261
|
chart.success = success
|
273
262
|
end
|
274
263
|
|
264
|
+
def expected_terminals(chart)
|
265
|
+
last_set = chart.last
|
266
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
267
|
+
result << ent.next_symbol if ent.pre_scan?
|
268
|
+
result
|
269
|
+
end
|
270
|
+
terminals.uniq!
|
271
|
+
|
272
|
+
terminals
|
273
|
+
end
|
274
|
+
|
275
275
|
def replay_last_set(chart, tokens)
|
276
276
|
rank = chart.size - 1
|
277
277
|
seed_set(chart, rank) # Re-initialize last set with scan entries
|