dendroid 0.1.00 → 0.2.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +6 -0
- data/lib/dendroid/formatters/ascii_tree.rb +142 -0
- data/lib/dendroid/formatters/base_formatter.rb +24 -0
- data/lib/dendroid/formatters/bracket_notation.rb +50 -0
- data/lib/dendroid/grm_analysis/dotted_item.rb +45 -30
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +10 -4
- data/lib/dendroid/grm_analysis/{choice_items.rb → rule_items.rb} +10 -10
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +3 -4
- data/lib/dendroid/parsing/and_node.rb +54 -0
- data/lib/dendroid/parsing/chart_walker.rb +301 -0
- data/lib/dendroid/parsing/composite_parse_node.rb +21 -0
- data/lib/dendroid/parsing/empty_rule_node.rb +28 -0
- data/lib/dendroid/parsing/or_node.rb +46 -0
- data/lib/dendroid/parsing/parse_node.rb +26 -0
- data/lib/dendroid/parsing/parse_tree_visitor.rb +127 -0
- data/lib/dendroid/parsing/parser.rb +185 -0
- data/lib/dendroid/parsing/terminal_node.rb +32 -0
- data/lib/dendroid/parsing/walk_progress.rb +121 -0
- data/lib/dendroid/recognizer/chart.rb +3 -0
- data/lib/dendroid/recognizer/e_item.rb +21 -2
- data/lib/dendroid/recognizer/item_set.rb +7 -2
- data/lib/dendroid/recognizer/recognizer.rb +42 -23
- data/lib/dendroid/syntax/grammar.rb +5 -1
- data/lib/dendroid/syntax/rule.rb +71 -13
- data/spec/dendroid/grm_analysis/dotted_item_spec.rb +59 -47
- data/spec/dendroid/grm_analysis/{choice_items_spec.rb → rule_items_spec.rb} +5 -6
- data/spec/dendroid/parsing/chart_walker_spec.rb +250 -0
- data/spec/dendroid/parsing/terminal_node_spec.rb +36 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +5 -5
- data/spec/dendroid/recognizer/item_set_spec.rb +16 -8
- data/spec/dendroid/recognizer/recognizer_spec.rb +56 -5
- data/spec/dendroid/support/sample_grammars.rb +2 -2
- data/spec/dendroid/syntax/grammar_spec.rb +16 -21
- data/spec/dendroid/syntax/rule_spec.rb +56 -7
- data/version.txt +1 -1
- metadata +20 -13
- data/lib/dendroid/grm_analysis/alternative_item.rb +0 -70
- data/lib/dendroid/grm_analysis/production_items.rb +0 -55
- data/lib/dendroid/syntax/choice.rb +0 -95
- data/lib/dendroid/syntax/production.rb +0 -82
- data/spec/dendroid/grm_analysis/alternative_item_spec.rb +0 -12
- data/spec/dendroid/grm_analysis/production_items_spec.rb +0 -68
- data/spec/dendroid/syntax/choice_spec.rb +0 -68
- data/spec/dendroid/syntax/production_spec.rb +0 -92
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'weakref'
|
4
|
+
require_relative '../grm_dsl/base_grm_builder'
|
5
|
+
require_relative '../utils/base_tokenizer'
|
6
|
+
require_relative '../recognizer/recognizer'
|
7
|
+
require_relative 'chart_walker'
|
8
|
+
require_relative 'parse_tree_visitor'
|
9
|
+
require_relative '../formatters/bracket_notation'
|
10
|
+
require_relative '../formatters/ascii_tree'
|
11
|
+
|
12
|
+
module Dendroid
|
13
|
+
module Parsing
|
14
|
+
class Parser
|
15
|
+
end # class
|
16
|
+
end # module
|
17
|
+
end # module
|
18
|
+
|
19
|
+
def grammar_l1
|
20
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
21
|
+
# Grammar inspired from Wikipedia entry on Earley parsing
|
22
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
23
|
+
rule('p' => 's')
|
24
|
+
rule('s' => ['s PLUS m', 'm'])
|
25
|
+
rule('m' => ['m STAR t', 't'])
|
26
|
+
rule('t' => 'INTEGER')
|
27
|
+
end
|
28
|
+
|
29
|
+
builder.grammar
|
30
|
+
end
|
31
|
+
|
32
|
+
def grammar_l31
|
33
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
34
|
+
# Ambiguous arithmetical expression language
|
35
|
+
# This language is compatible with tokenizer L1
|
36
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
37
|
+
rule('p' => 's')
|
38
|
+
rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
|
39
|
+
end
|
40
|
+
|
41
|
+
builder.grammar
|
42
|
+
end
|
43
|
+
|
44
|
+
def tokenizer_l1
|
45
|
+
Dendroid::Utils::BaseTokenizer.new do
|
46
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
47
|
+
|
48
|
+
scan_verbatim(['+', '*'])
|
49
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def retrieve_success_item(chart, grammar)
|
54
|
+
last_item_set = chart.item_sets.last
|
55
|
+
result = nil
|
56
|
+
last_item_set.items.reverse_each do |itm|
|
57
|
+
if itm.origin.zero? && itm.dotted_item.completed? && itm.dotted_item.rule.lhs == grammar.start_symbol
|
58
|
+
result = itm
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
result
|
64
|
+
end
|
65
|
+
|
66
|
+
def grammar_l10
|
67
|
+
# Grammar with left recursive rule
|
68
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
69
|
+
declare_terminals('a')
|
70
|
+
|
71
|
+
rule 'A' => ['A a', '']
|
72
|
+
end
|
73
|
+
|
74
|
+
builder.grammar
|
75
|
+
end
|
76
|
+
|
77
|
+
def tokenizer_l10
|
78
|
+
Dendroid::Utils::BaseTokenizer.new do
|
79
|
+
map_verbatim2terminal({ 'a' => :a })
|
80
|
+
|
81
|
+
scan_verbatim(['a'])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def grammar_l11
|
86
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
87
|
+
# Grammar with right-recursive rule
|
88
|
+
declare_terminals('a')
|
89
|
+
|
90
|
+
rule 'A' => ['a A', '']
|
91
|
+
end
|
92
|
+
|
93
|
+
builder.grammar
|
94
|
+
end
|
95
|
+
|
96
|
+
def tokenizer_l11
|
97
|
+
Dendroid::Utils::BaseTokenizer.new do
|
98
|
+
map_verbatim2terminal({ 'a' => :a })
|
99
|
+
|
100
|
+
scan_verbatim(['a'])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def grammar_l8
|
105
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
106
|
+
# (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
|
107
|
+
# for Natural Languages")
|
108
|
+
declare_terminals('x')
|
109
|
+
|
110
|
+
rule 'S' => ['S S', 'x']
|
111
|
+
end
|
112
|
+
|
113
|
+
builder.grammar
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenizer_l8
|
117
|
+
Dendroid::Utils::BaseTokenizer.new do
|
118
|
+
map_verbatim2terminal({ 'x' => :x })
|
119
|
+
|
120
|
+
scan_verbatim(['x'])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
########################################
|
125
|
+
# Entry point
|
126
|
+
########################################
|
127
|
+
kode = 3
|
128
|
+
|
129
|
+
case kode
|
130
|
+
when 0
|
131
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l1, tokenizer_l1)
|
132
|
+
chart = recognizer.run('2 + 3 * 4')
|
133
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
134
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
135
|
+
root = walker.walk(succ_item)
|
136
|
+
|
137
|
+
# formatter = BracketNotation.new($stdout)
|
138
|
+
formatter = Asciitree.new($stdout)
|
139
|
+
visitor = ParseTreeVisitor.new(root)
|
140
|
+
formatter.render(visitor)
|
141
|
+
|
142
|
+
when 1
|
143
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l10, tokenizer_l10)
|
144
|
+
chart = recognizer.run('a a a a a')
|
145
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
146
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
147
|
+
root = walker.walk(succ_item)
|
148
|
+
|
149
|
+
formatter = Asciitree.new($stdout)
|
150
|
+
visitor = ParseTreeVisitor.new(root)
|
151
|
+
formatter.render(visitor)
|
152
|
+
|
153
|
+
when 2
|
154
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l11, tokenizer_l11)
|
155
|
+
chart = recognizer.run('a a a a a')
|
156
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
157
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
158
|
+
root = walker.walk(succ_item)
|
159
|
+
|
160
|
+
formatter = Asciitree.new($stdout)
|
161
|
+
visitor = ParseTreeVisitor.new(root)
|
162
|
+
formatter.render(visitor)
|
163
|
+
|
164
|
+
when 3
|
165
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l31, tokenizer_l1)
|
166
|
+
chart = recognizer.run('2 + 3 * 4')
|
167
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
168
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
169
|
+
root = walker.walk(succ_item)
|
170
|
+
|
171
|
+
formatter = Asciitree.new($stdout)
|
172
|
+
visitor = ParseTreeVisitor.new(root)
|
173
|
+
formatter.render(visitor)
|
174
|
+
|
175
|
+
when 4
|
176
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l8, tokenizer_l8)
|
177
|
+
chart = recognizer.run('x x x x')
|
178
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
179
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
180
|
+
root = walker.walk(succ_item)
|
181
|
+
|
182
|
+
formatter = Asciitree.new($stdout)
|
183
|
+
visitor = ParseTreeVisitor.new(root)
|
184
|
+
formatter.render(visitor)
|
185
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'parse_node'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Parsing
|
7
|
+
class TerminalNode < ParseNode
|
8
|
+
# @return [Dendroid::Syntax::Terminal] Terminal symbol of matching token.
|
9
|
+
attr_reader :symbol
|
10
|
+
|
11
|
+
# @return [Dendroid::Lexical::Token] Matching input token object.
|
12
|
+
attr_reader :token
|
13
|
+
|
14
|
+
def initialize(sym, tok, rank)
|
15
|
+
super(rank, rank + 1)
|
16
|
+
@symbol = sym
|
17
|
+
@token = tok
|
18
|
+
end
|
19
|
+
|
20
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
21
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
22
|
+
def accept(aVisitor)
|
23
|
+
aVisitor.visit_terminal(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
display_val = token.is_a?(Dendroid::Lexical::Literal) ? ": #{token.value}" : ''
|
28
|
+
"#{symbol.name}#{display_val} #{super}"
|
29
|
+
end
|
30
|
+
end # class
|
31
|
+
end # module
|
32
|
+
end # module
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'and_node'
|
4
|
+
require_relative 'or_node'
|
5
|
+
require_relative 'terminal_node'
|
6
|
+
require_relative 'empty_rule_node'
|
7
|
+
|
8
|
+
module Dendroid
|
9
|
+
module Parsing
|
10
|
+
class WalkProgress
|
11
|
+
attr_accessor :state
|
12
|
+
attr_accessor :curr_rank
|
13
|
+
attr_reader :curr_item
|
14
|
+
attr_accessor :predecessor
|
15
|
+
attr_reader :parents
|
16
|
+
|
17
|
+
# rubocop: disable Metrics/CyclomaticComplexity
|
18
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
19
|
+
|
20
|
+
def initialize(start_rank, start_item, parents)
|
21
|
+
@state = :New
|
22
|
+
@curr_rank = start_rank
|
23
|
+
@curr_item = start_item
|
24
|
+
@predecessor = nil
|
25
|
+
@parents = parents
|
26
|
+
end
|
27
|
+
|
28
|
+
# Factory method.
|
29
|
+
def initialize_copy(orig)
|
30
|
+
@state = orig.state
|
31
|
+
@curr_rank = orig.curr_rank
|
32
|
+
@curr_item = orig.curr_item
|
33
|
+
@predecessor = nil
|
34
|
+
@parents = orig.parents.dup
|
35
|
+
end
|
36
|
+
|
37
|
+
def fork(thePredecessor)
|
38
|
+
@state = :Forking
|
39
|
+
@predecessor = thePredecessor
|
40
|
+
end
|
41
|
+
|
42
|
+
def curr_item=(anEntry)
|
43
|
+
raise StandardError if anEntry.nil?
|
44
|
+
|
45
|
+
@curr_item = anEntry
|
46
|
+
end
|
47
|
+
|
48
|
+
def add_node_empty(anEntry)
|
49
|
+
node_empty = EmptyRuleNode.new(anEntry, curr_rank)
|
50
|
+
add_child_node(node_empty)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Add a terminal node for terminal at current rank as a child of last parent
|
54
|
+
def add_terminal_node(token)
|
55
|
+
@curr_rank -= 1
|
56
|
+
term_node = TerminalNode.new(curr_item.prev_symbol, token, curr_rank)
|
57
|
+
add_child_node(term_node)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Add an AND node for given entry as a child of last parent
|
61
|
+
def push_and_node(anEntry)
|
62
|
+
node = ANDNode.new(anEntry, curr_rank)
|
63
|
+
raise StandardError unless anEntry.rule == node.rule # Fails
|
64
|
+
|
65
|
+
add_child_node(node)
|
66
|
+
parents.push(node)
|
67
|
+
|
68
|
+
node
|
69
|
+
end
|
70
|
+
|
71
|
+
def push_or_node(origin, arity)
|
72
|
+
node = OrNode.new(curr_item.prev_symbol, origin, curr_rank, arity)
|
73
|
+
add_child_node(node)
|
74
|
+
parents.push(node)
|
75
|
+
|
76
|
+
node
|
77
|
+
end
|
78
|
+
|
79
|
+
def add_child_node(aNode)
|
80
|
+
parents.last.add_child(aNode, curr_item.position - 1)
|
81
|
+
aNode
|
82
|
+
end
|
83
|
+
|
84
|
+
# Do the given EItems match one of the parent?
|
85
|
+
# Matching = corresponds to the same rule and range
|
86
|
+
# @return [Array<EItem>]
|
87
|
+
def match_parent?(entries, stop_at_first)
|
88
|
+
matching = []
|
89
|
+
min_origin = entries[0].origin
|
90
|
+
first_iteration = true
|
91
|
+
offset = 0
|
92
|
+
|
93
|
+
parents.reverse_each do |node|
|
94
|
+
if node.is_a?(OrNode)
|
95
|
+
offset += 1
|
96
|
+
next
|
97
|
+
end
|
98
|
+
entries.each do |ent|
|
99
|
+
min_origin = ent.origin if first_iteration && ent.origin < min_origin
|
100
|
+
next unless node.match(ent)
|
101
|
+
|
102
|
+
matching << [ent, offset]
|
103
|
+
break if stop_at_first
|
104
|
+
end
|
105
|
+
first_iteration = false
|
106
|
+
break if stop_at_first && !matching.empty?
|
107
|
+
|
108
|
+
# Stop loop when parent.origin < min(entries.origin)
|
109
|
+
break if node.range[0] < min_origin
|
110
|
+
|
111
|
+
offset += 1
|
112
|
+
end
|
113
|
+
|
114
|
+
matching
|
115
|
+
end
|
116
|
+
end # class
|
117
|
+
|
118
|
+
# rubocop: enable Metrics/CyclomaticComplexity
|
119
|
+
# rubocop: enable Metrics/PerceivedComplexity
|
120
|
+
end # module
|
121
|
+
end # module
|
@@ -15,6 +15,9 @@ module Dendroid
|
|
15
15
|
# @return [Array<Recognizer::ItemSet>] The array of item sets
|
16
16
|
attr_reader :item_sets
|
17
17
|
|
18
|
+
# @return [Array<Dendroid::Lexical::Token>] The input tokens
|
19
|
+
attr_accessor :tokens
|
20
|
+
|
18
21
|
# @return [Boolean] Indicates whether the recognizer successfully processed the whole input
|
19
22
|
attr_writer :success
|
20
23
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'forwardable'
|
4
|
+
require 'weakref'
|
4
5
|
|
5
6
|
module Dendroid
|
6
7
|
module Recognizer
|
@@ -9,19 +10,27 @@ module Dendroid
|
|
9
10
|
class EItem
|
10
11
|
extend Forwardable
|
11
12
|
|
13
|
+
# (Weak) reference to the dotted item
|
12
14
|
# @return [Dendroid::GrmAnalysis::DottedItem]
|
13
15
|
attr_reader :dotted_item
|
14
16
|
|
15
17
|
# @return [Integer] the rank of the token that correspond to the start of the rule.
|
16
18
|
attr_reader :origin
|
17
19
|
|
18
|
-
|
20
|
+
# TODO: :predictor, :completer, :scanner
|
21
|
+
attr_accessor :algo
|
22
|
+
|
23
|
+
# @return [Array<WeakRef>] predecessors sorted by decreasing origin value
|
24
|
+
attr_accessor :predecessors
|
25
|
+
|
26
|
+
def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?, :position, :prev_symbol, :rule
|
19
27
|
|
20
28
|
# @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
|
21
29
|
# @param origin [Integer]
|
22
30
|
def initialize(aDottedItem, origin)
|
23
|
-
@dotted_item = aDottedItem
|
31
|
+
@dotted_item = WeakRef.new(aDottedItem)
|
24
32
|
@origin = origin
|
33
|
+
@predecessors = []
|
25
34
|
end
|
26
35
|
|
27
36
|
# @return [Dendroid::Syntax::NonTerminal] the head of the production rule
|
@@ -42,6 +51,16 @@ module Dendroid
|
|
42
51
|
def to_s
|
43
52
|
"#{dotted_item} @ #{origin}"
|
44
53
|
end
|
54
|
+
|
55
|
+
alias inspect to_s
|
56
|
+
|
57
|
+
def add_predecessor(pred)
|
58
|
+
if predecessors.size > 1 && pred.origin < predecessors[0].origin
|
59
|
+
predecessors.insert(2, WeakRef.new(pred))
|
60
|
+
else
|
61
|
+
predecessors.unshift(WeakRef.new(pred))
|
62
|
+
end
|
63
|
+
end
|
45
64
|
end # class
|
46
65
|
end # module
|
47
66
|
end # module
|
@@ -15,10 +15,15 @@ module Dendroid
|
|
15
15
|
@items = []
|
16
16
|
end
|
17
17
|
|
18
|
-
# Add an
|
18
|
+
# Add an Earley item to the set if not yet present.
|
19
19
|
# @param anItem [Recognizer::EItem]
|
20
|
+
# @return [Recognizer::EItem] the item in the set
|
20
21
|
def add_item(anItem)
|
21
|
-
|
22
|
+
idx = items.find_index anItem
|
23
|
+
return items[idx] if idx
|
24
|
+
|
25
|
+
@items << anItem
|
26
|
+
anItem
|
22
27
|
end
|
23
28
|
|
24
29
|
# Find the items that expect a given grammar symbol
|
@@ -5,7 +5,7 @@ require_relative 'e_item'
|
|
5
5
|
require_relative 'chart'
|
6
6
|
|
7
7
|
module Dendroid
|
8
|
-
# This module host classes needed to implement an Earley recognizer
|
8
|
+
# This module host classes needed to implement an Earley recognizer.
|
9
9
|
module Recognizer
|
10
10
|
# A recognizer determines whether the input text complies to the grammar (syntax) rules.
|
11
11
|
# This class implements the Earley recognition algorithm.
|
@@ -16,6 +16,10 @@ module Dendroid
|
|
16
16
|
# @return [Object]
|
17
17
|
attr_reader :tokenizer
|
18
18
|
|
19
|
+
# rubocop: disable Metrics/AbcSize
|
20
|
+
# rubocop: disable Metrics/CyclomaticComplexity
|
21
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
22
|
+
|
19
23
|
# @param grammar [Dendroid::Syntax::Grammar]
|
20
24
|
# @param tokenizer [Object]
|
21
25
|
def initialize(grammar, tokenizer)
|
@@ -39,7 +43,7 @@ module Dendroid
|
|
39
43
|
end
|
40
44
|
|
41
45
|
# Run the Earley algorithm
|
42
|
-
# @param initial_token [
|
46
|
+
# @param initial_token [Dendroid::Lexical::Token]
|
43
47
|
def earley_parse(initial_token)
|
44
48
|
chart = new_chart
|
45
49
|
tokens = [initial_token]
|
@@ -64,7 +68,8 @@ module Dendroid
|
|
64
68
|
break unless advance
|
65
69
|
end
|
66
70
|
|
67
|
-
|
71
|
+
chart.tokens = tokens
|
72
|
+
determine_outcome(chart)
|
68
73
|
chart
|
69
74
|
end
|
70
75
|
|
@@ -76,7 +81,11 @@ module Dendroid
|
|
76
81
|
prd = grm_analysis.grammar.nonterm2production[top_symbol]
|
77
82
|
chart = Chart.new
|
78
83
|
seed_items = prd.predicted_items
|
79
|
-
seed_items.each
|
84
|
+
seed_items.each do |item|
|
85
|
+
entry = EItem.new(item, 0)
|
86
|
+
entry.algo = :predictor
|
87
|
+
chart.seed_last_set(entry)
|
88
|
+
end
|
80
89
|
|
81
90
|
chart
|
82
91
|
end
|
@@ -115,29 +124,28 @@ module Dendroid
|
|
115
124
|
# Assuming next symbol is a non-terminal
|
116
125
|
#
|
117
126
|
# Error case: next actual token matches none of the expected tokens.
|
118
|
-
def predictor(chart, item, rank, tokens, mode,
|
127
|
+
def predictor(chart, item, rank, tokens, mode, _predicted_symbols)
|
119
128
|
next_symbol = item.next_symbol
|
120
|
-
if mode == :genuine
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
end
|
129
|
+
# if mode == :genuine
|
130
|
+
# predicted_symbols << Set.new if rank == predicted_symbols.size
|
131
|
+
# predicted = predicted_symbols[rank]
|
132
|
+
# return if predicted.include?(next_symbol)
|
133
|
+
#
|
134
|
+
# predicted.add(next_symbol)
|
135
|
+
# end
|
127
136
|
|
128
137
|
curr_set = chart[rank]
|
129
138
|
next_token = tokens[rank]
|
130
139
|
prd = grm_analysis.symbol2production(next_symbol)
|
131
140
|
entry_items = prd.predicted_items
|
141
|
+
added = []
|
132
142
|
entry_items.each do |entry|
|
133
143
|
member = entry.next_symbol
|
134
144
|
if member&.terminal?
|
135
145
|
next unless next_token
|
136
146
|
next if (member.name != next_token.terminal) && mode == :genuine
|
137
147
|
end
|
138
|
-
|
139
|
-
new_item = EItem.new(entry, rank)
|
140
|
-
curr_set.add_item(new_item)
|
148
|
+
added << add_item(curr_set, entry, rank, item, :predictor)
|
141
149
|
end
|
142
150
|
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
143
151
|
return unless next_symbol.nullable?
|
@@ -145,8 +153,7 @@ module Dendroid
|
|
145
153
|
next_item = grm_analysis.next_item(item.dotted_item)
|
146
154
|
return unless next_item
|
147
155
|
|
148
|
-
|
149
|
-
curr_set.add_item(new_item)
|
156
|
+
add_item(curr_set, next_item, item.origin, nil, :predictor)
|
150
157
|
end
|
151
158
|
|
152
159
|
# procedure SCANNER((A → α•aβ, j), k, words)
|
@@ -161,8 +168,7 @@ module Dendroid
|
|
161
168
|
new_rank = rank + 1
|
162
169
|
chart.append_new_set if chart[new_rank].nil?
|
163
170
|
next_dotted_item = grm_analysis.next_item(dit)
|
164
|
-
|
165
|
-
chart[new_rank].add_item(new_item)
|
171
|
+
add_item(chart[new_rank], next_dotted_item, scan_item.origin, scan_item, :scanner)
|
166
172
|
advance = true
|
167
173
|
end
|
168
174
|
|
@@ -190,8 +196,7 @@ module Dendroid
|
|
190
196
|
next if member.name != next_token.terminal
|
191
197
|
end
|
192
198
|
|
193
|
-
|
194
|
-
curr_set.add_item(new_item)
|
199
|
+
add_item(curr_set, return_item, call_item.origin, item, :completer)
|
195
200
|
end
|
196
201
|
end
|
197
202
|
|
@@ -206,8 +211,18 @@ module Dendroid
|
|
206
211
|
end
|
207
212
|
end
|
208
213
|
|
209
|
-
def
|
214
|
+
def add_item(item_set, dotted_item, origin, predecessor, procedure)
|
215
|
+
new_item = EItem.new(dotted_item, origin)
|
216
|
+
added = item_set.add_item(new_item)
|
217
|
+
added.add_predecessor(predecessor) if predecessor
|
218
|
+
new_item.algo = procedure
|
219
|
+
|
220
|
+
added
|
221
|
+
end
|
222
|
+
|
223
|
+
def determine_outcome(chart)
|
210
224
|
success = false
|
225
|
+
tokens = chart.tokens
|
211
226
|
if chart.size == tokens.size + 1
|
212
227
|
top_symbol = grm_analysis.grammar.start_symbol
|
213
228
|
top_rule = grm_analysis.grammar.nonterm2production[top_symbol]
|
@@ -250,7 +265,7 @@ module Dendroid
|
|
250
265
|
|
251
266
|
def expected_terminals(chart)
|
252
267
|
last_set = chart.last
|
253
|
-
terminals = last_set.items.
|
268
|
+
terminals = last_set.items.each_with_object([]) do |ent, result|
|
254
269
|
result << ent.next_symbol if ent.pre_scan?
|
255
270
|
result
|
256
271
|
end
|
@@ -269,5 +284,9 @@ module Dendroid
|
|
269
284
|
end
|
270
285
|
end
|
271
286
|
end # class
|
287
|
+
|
288
|
+
# rubocop: enable Metrics/AbcSize
|
289
|
+
# rubocop: enable Metrics/CyclomaticComplexity
|
290
|
+
# rubocop: enable Metrics/PerceivedComplexity
|
272
291
|
end # module
|
273
292
|
end # module
|
@@ -39,6 +39,8 @@ module Dendroid
|
|
39
39
|
add_terminals(terminals)
|
40
40
|
end
|
41
41
|
|
42
|
+
# rubocop: disable Style/IfUnlessModifier
|
43
|
+
|
42
44
|
# Add a rule to the grammar.
|
43
45
|
# @param rule [Dendroid::Syntax::Rule]
|
44
46
|
def add_rule(rule)
|
@@ -56,6 +58,8 @@ module Dendroid
|
|
56
58
|
nonterm2production[rule.head] = rule
|
57
59
|
end
|
58
60
|
|
61
|
+
# rubocop: enable Style/IfUnlessModifier
|
62
|
+
|
59
63
|
# Return the start symbol for the language, that is,
|
60
64
|
# the non-terminal symbol used to denote the top-level
|
61
65
|
# construct of the language being defined.
|
@@ -211,7 +215,7 @@ module Dendroid
|
|
211
215
|
backlog.subtract(to_remove)
|
212
216
|
end
|
213
217
|
|
214
|
-
backlog.each { |i| rules[i].non_productive }
|
218
|
+
# backlog.each { |i| rules[i].non_productive }
|
215
219
|
non_productive = symbols.reject(&:productive?)
|
216
220
|
non_productive.each { |symb| symb.productive = false }
|
217
221
|
non_productive
|