dendroid 0.1.00 → 0.2.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/dendroid/formatters/ascii_tree.rb +142 -0
- data/lib/dendroid/formatters/base_formatter.rb +25 -0
- data/lib/dendroid/formatters/bracket_notation.rb +50 -0
- data/lib/dendroid/grm_analysis/dotted_item.rb +46 -30
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +2 -4
- data/lib/dendroid/grm_analysis/{choice_items.rb → rule_items.rb} +10 -10
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +3 -4
- data/lib/dendroid/parsing/and_node.rb +56 -0
- data/lib/dendroid/parsing/chart_walker.rb +293 -0
- data/lib/dendroid/parsing/composite_parse_node.rb +21 -0
- data/lib/dendroid/parsing/empty_rule_node.rb +28 -0
- data/lib/dendroid/parsing/or_node.rb +51 -0
- data/lib/dendroid/parsing/parse_node.rb +26 -0
- data/lib/dendroid/parsing/parse_tree_visitor.rb +127 -0
- data/lib/dendroid/parsing/parser.rb +185 -0
- data/lib/dendroid/parsing/terminal_node.rb +32 -0
- data/lib/dendroid/parsing/walk_progress.rb +117 -0
- data/lib/dendroid/recognizer/chart.rb +8 -0
- data/lib/dendroid/recognizer/e_item.rb +21 -2
- data/lib/dendroid/recognizer/item_set.rb +7 -2
- data/lib/dendroid/recognizer/recognizer.rb +33 -20
- data/lib/dendroid/syntax/grammar.rb +1 -1
- data/lib/dendroid/syntax/rule.rb +71 -13
- data/spec/dendroid/grm_analysis/dotted_item_spec.rb +59 -47
- data/spec/dendroid/grm_analysis/{choice_items_spec.rb → rule_items_spec.rb} +5 -6
- data/spec/dendroid/parsing/chart_walker_spec.rb +223 -0
- data/spec/dendroid/parsing/terminal_node_spec.rb +36 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +5 -5
- data/spec/dendroid/recognizer/item_set_spec.rb +16 -8
- data/spec/dendroid/recognizer/recognizer_spec.rb +57 -5
- data/spec/dendroid/support/sample_grammars.rb +2 -0
- data/spec/dendroid/syntax/grammar_spec.rb +16 -21
- data/spec/dendroid/syntax/rule_spec.rb +56 -7
- data/version.txt +1 -1
- metadata +20 -13
- data/lib/dendroid/grm_analysis/alternative_item.rb +0 -70
- data/lib/dendroid/grm_analysis/production_items.rb +0 -55
- data/lib/dendroid/syntax/choice.rb +0 -95
- data/lib/dendroid/syntax/production.rb +0 -82
- data/spec/dendroid/grm_analysis/alternative_item_spec.rb +0 -12
- data/spec/dendroid/grm_analysis/production_items_spec.rb +0 -68
- data/spec/dendroid/syntax/choice_spec.rb +0 -68
- data/spec/dendroid/syntax/production_spec.rb +0 -92
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'weakref'
|
4
|
+
require_relative '../grm_dsl/base_grm_builder'
|
5
|
+
require_relative '../utils/base_tokenizer'
|
6
|
+
require_relative '../recognizer/recognizer'
|
7
|
+
require_relative 'chart_walker'
|
8
|
+
require_relative 'parse_tree_visitor'
|
9
|
+
require_relative '../formatters/bracket_notation'
|
10
|
+
require_relative '../formatters/ascii_tree'
|
11
|
+
|
12
|
+
module Dendroid
|
13
|
+
module Parsing
|
14
|
+
class Parser
|
15
|
+
end # class
|
16
|
+
end # module
|
17
|
+
end # module
|
18
|
+
|
19
|
+
def grammar_l1
|
20
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
21
|
+
# Grammar inspired from Wikipedia entry on Earley parsing
|
22
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
23
|
+
rule('p' => 's')
|
24
|
+
rule('s' => ['s PLUS m', 'm'])
|
25
|
+
rule('m' => ['m STAR t', 't'])
|
26
|
+
rule('t' => 'INTEGER')
|
27
|
+
end
|
28
|
+
|
29
|
+
builder.grammar
|
30
|
+
end
|
31
|
+
|
32
|
+
def grammar_l31
|
33
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
34
|
+
# Ambiguous arithmetical expression language
|
35
|
+
# This language is compatible with tokenizer L1
|
36
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
37
|
+
rule('p' => 's')
|
38
|
+
rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
|
39
|
+
end
|
40
|
+
|
41
|
+
builder.grammar
|
42
|
+
end
|
43
|
+
|
44
|
+
def tokenizer_l1
|
45
|
+
Dendroid::Utils::BaseTokenizer.new do
|
46
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
47
|
+
|
48
|
+
scan_verbatim(['+', '*'])
|
49
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def retrieve_success_item(chart, grammar)
|
54
|
+
last_item_set = chart.item_sets.last
|
55
|
+
result = nil
|
56
|
+
last_item_set.items.reverse_each do |itm|
|
57
|
+
if itm.origin.zero? && itm.dotted_item.completed? && itm.dotted_item.rule.lhs == grammar.start_symbol
|
58
|
+
result = itm
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
result
|
64
|
+
end
|
65
|
+
|
66
|
+
def grammar_l10
|
67
|
+
# Grammar with left recursive rule
|
68
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
69
|
+
declare_terminals('a')
|
70
|
+
|
71
|
+
rule 'A' => ['A a', '']
|
72
|
+
end
|
73
|
+
|
74
|
+
builder.grammar
|
75
|
+
end
|
76
|
+
|
77
|
+
def tokenizer_l10
|
78
|
+
Dendroid::Utils::BaseTokenizer.new do
|
79
|
+
map_verbatim2terminal({ 'a' => :a })
|
80
|
+
|
81
|
+
scan_verbatim(['a'])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def grammar_l11
|
86
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
87
|
+
# Grammar with right-recursive rule
|
88
|
+
declare_terminals('a')
|
89
|
+
|
90
|
+
rule 'A' => ['a A', '']
|
91
|
+
end
|
92
|
+
|
93
|
+
builder.grammar
|
94
|
+
end
|
95
|
+
|
96
|
+
def tokenizer_l11
|
97
|
+
Dendroid::Utils::BaseTokenizer.new do
|
98
|
+
map_verbatim2terminal({ 'a' => :a })
|
99
|
+
|
100
|
+
scan_verbatim(['a'])
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def grammar_l8
|
105
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
106
|
+
# (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
|
107
|
+
# for Natural Languages")
|
108
|
+
declare_terminals('x')
|
109
|
+
|
110
|
+
rule 'S' => ['S S', 'x']
|
111
|
+
end
|
112
|
+
|
113
|
+
builder.grammar
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenizer_l8
|
117
|
+
Dendroid::Utils::BaseTokenizer.new do
|
118
|
+
map_verbatim2terminal({ 'x' => :x })
|
119
|
+
|
120
|
+
scan_verbatim(['x'])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
########################################
|
125
|
+
# Entry point
|
126
|
+
########################################
|
127
|
+
kode = 3
|
128
|
+
|
129
|
+
case kode
|
130
|
+
when 0
|
131
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l1, tokenizer_l1)
|
132
|
+
chart = recognizer.run('2 + 3 * 4')
|
133
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
134
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
135
|
+
root = walker.walk(succ_item)
|
136
|
+
|
137
|
+
# formatter = BracketNotation.new($stdout)
|
138
|
+
formatter = Asciitree.new($stdout)
|
139
|
+
visitor = ParseTreeVisitor.new(root)
|
140
|
+
formatter.render(visitor)
|
141
|
+
|
142
|
+
when 1
|
143
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l10, tokenizer_l10)
|
144
|
+
chart = recognizer.run('a a a a a')
|
145
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
146
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
147
|
+
root = walker.walk(succ_item)
|
148
|
+
|
149
|
+
formatter = Asciitree.new($stdout)
|
150
|
+
visitor = ParseTreeVisitor.new(root)
|
151
|
+
formatter.render(visitor)
|
152
|
+
|
153
|
+
when 2
|
154
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l11, tokenizer_l11)
|
155
|
+
chart = recognizer.run('a a a a a')
|
156
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
157
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
158
|
+
root = walker.walk(succ_item)
|
159
|
+
|
160
|
+
formatter = Asciitree.new($stdout)
|
161
|
+
visitor = ParseTreeVisitor.new(root)
|
162
|
+
formatter.render(visitor)
|
163
|
+
|
164
|
+
when 3
|
165
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l31, tokenizer_l1)
|
166
|
+
chart = recognizer.run('2 + 3 * 4')
|
167
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
168
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
169
|
+
root = walker.walk(succ_item)
|
170
|
+
|
171
|
+
formatter = Asciitree.new($stdout)
|
172
|
+
visitor = ParseTreeVisitor.new(root)
|
173
|
+
formatter.render(visitor)
|
174
|
+
|
175
|
+
when 4
|
176
|
+
recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l8, tokenizer_l8)
|
177
|
+
chart = recognizer.run('x x x x')
|
178
|
+
succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
|
179
|
+
walker = Dendroid::Parsing::ChartWalker.new(chart)
|
180
|
+
root = walker.walk(succ_item)
|
181
|
+
|
182
|
+
formatter = Asciitree.new($stdout)
|
183
|
+
visitor = ParseTreeVisitor.new(root)
|
184
|
+
formatter.render(visitor)
|
185
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'parse_node'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Parsing
|
7
|
+
class TerminalNode < ParseNode
|
8
|
+
# @return [Dendroid::Syntax::Terminal] Terminal symbol of matching token.
|
9
|
+
attr_reader :symbol
|
10
|
+
|
11
|
+
# @return [Dendroid::Lexical::Token] Matching input token object.
|
12
|
+
attr_reader :token
|
13
|
+
|
14
|
+
def initialize(sym, tok, rank)
|
15
|
+
super(rank, rank + 1)
|
16
|
+
@symbol = sym
|
17
|
+
@token = tok
|
18
|
+
end
|
19
|
+
|
20
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
21
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
22
|
+
def accept(aVisitor)
|
23
|
+
aVisitor.visit_terminal(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s()
|
27
|
+
display_val = token.is_a?(Dendroid::Lexical::Literal) ? ": #{token.value}" : ''
|
28
|
+
"#{symbol.name}#{display_val} #{super}"
|
29
|
+
end
|
30
|
+
end # class
|
31
|
+
end # module
|
32
|
+
end # module
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'and_node'
|
4
|
+
require_relative 'or_node'
|
5
|
+
require_relative 'terminal_node'
|
6
|
+
require_relative 'empty_rule_node'
|
7
|
+
|
8
|
+
module Dendroid
|
9
|
+
module Parsing
|
10
|
+
class WalkProgress
|
11
|
+
attr_accessor :state
|
12
|
+
attr_accessor :curr_rank
|
13
|
+
attr_reader :curr_item
|
14
|
+
attr_accessor :predecessor
|
15
|
+
attr_reader :parents
|
16
|
+
|
17
|
+
def initialize(start_rank, start_item, parents)
|
18
|
+
@state = :New
|
19
|
+
@curr_rank = start_rank
|
20
|
+
@curr_item = start_item
|
21
|
+
@predecessor = nil
|
22
|
+
@parents = parents
|
23
|
+
end
|
24
|
+
|
25
|
+
# Factory method.
|
26
|
+
def initialize_copy(orig)
|
27
|
+
@state = orig.state
|
28
|
+
@curr_rank = orig.curr_rank
|
29
|
+
@curr_item = orig.curr_item
|
30
|
+
@predecessor = nil
|
31
|
+
@parents = orig.parents.dup
|
32
|
+
end
|
33
|
+
|
34
|
+
def fork(thePredecessor)
|
35
|
+
@state = :Forking
|
36
|
+
@predecessor = thePredecessor
|
37
|
+
end
|
38
|
+
|
39
|
+
def curr_item=(anEntry)
|
40
|
+
if anEntry.nil?
|
41
|
+
raise StandardError
|
42
|
+
else
|
43
|
+
@curr_item = anEntry
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def add_node_empty(anEntry)
|
48
|
+
node_empty = EmptyRuleNode.new(anEntry, curr_rank)
|
49
|
+
add_child_node(node_empty)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Add a terminal node for terminal at current rank as a child of last parent
|
53
|
+
def add_terminal_node(token)
|
54
|
+
@curr_rank -= 1
|
55
|
+
term_node = TerminalNode.new(curr_item.prev_symbol, token, curr_rank)
|
56
|
+
add_child_node(term_node)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Add an AND node for given entry as a child of last parent
|
60
|
+
def push_and_node(anEntry)
|
61
|
+
node = ANDNode.new(anEntry, curr_rank)
|
62
|
+
raise StandardError unless anEntry.rule == node.rule # Fails
|
63
|
+
add_child_node(node)
|
64
|
+
parents.push(node)
|
65
|
+
|
66
|
+
node
|
67
|
+
end
|
68
|
+
|
69
|
+
def push_or_node(origin, arity)
|
70
|
+
node = OrNode.new(curr_item.prev_symbol, origin, curr_rank, arity)
|
71
|
+
add_child_node(node)
|
72
|
+
parents.push(node)
|
73
|
+
|
74
|
+
node
|
75
|
+
end
|
76
|
+
|
77
|
+
def add_child_node(aNode)
|
78
|
+
parents.last.add_child(aNode, curr_item.position - 1)
|
79
|
+
aNode
|
80
|
+
end
|
81
|
+
|
82
|
+
# Do the given EItems match one of the parent?
|
83
|
+
# Matching = corresponds to the same rule and range
|
84
|
+
# @return [Array<EItem>]
|
85
|
+
def match_parent?(entries, stop_at_first)
|
86
|
+
matching = []
|
87
|
+
min_origin = entries[0].origin
|
88
|
+
first_iteration = true
|
89
|
+
offset = 0
|
90
|
+
|
91
|
+
parents.reverse_each do |node|
|
92
|
+
if node.is_a?(OrNode)
|
93
|
+
offset += 1
|
94
|
+
next
|
95
|
+
end
|
96
|
+
entries.each do |ent|
|
97
|
+
if first_iteration
|
98
|
+
min_origin = ent.origin if ent.origin < min_origin
|
99
|
+
end
|
100
|
+
next unless node.match(ent)
|
101
|
+
|
102
|
+
matching << [ent, offset]
|
103
|
+
break if stop_at_first
|
104
|
+
end
|
105
|
+
first_iteration = false
|
106
|
+
break if stop_at_first && !matching.empty?
|
107
|
+
|
108
|
+
# Stop loop when parent.origin < min(entries.origin)
|
109
|
+
break if node.range[0] < min_origin
|
110
|
+
offset += 1
|
111
|
+
end
|
112
|
+
|
113
|
+
matching
|
114
|
+
end
|
115
|
+
end # class
|
116
|
+
end # module
|
117
|
+
end # module
|
@@ -15,6 +15,9 @@ module Dendroid
|
|
15
15
|
# @return [Array<Recognizer::ItemSet>] The array of item sets
|
16
16
|
attr_reader :item_sets
|
17
17
|
|
18
|
+
# @return [Array<Dendroid::Lexical::Token>] The input tokens
|
19
|
+
attr_reader :tokens
|
20
|
+
|
18
21
|
# @return [Boolean] Indicates whether the recognizer successfully processed the whole input
|
19
22
|
attr_writer :success
|
20
23
|
|
@@ -45,6 +48,11 @@ module Dendroid
|
|
45
48
|
item_sets.last.add_item(e_item)
|
46
49
|
end
|
47
50
|
|
51
|
+
# @param input_tokens [Array<Dendroid::Lexical::Token>] The input tokens
|
52
|
+
def tokens=(input_tokens)
|
53
|
+
@tokens = input_tokens
|
54
|
+
end
|
55
|
+
|
48
56
|
# Return true if the input text is valid according to the grammar.
|
49
57
|
# @return [Boolean]
|
50
58
|
def successful?
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'forwardable'
|
4
|
+
require 'weakref'
|
4
5
|
|
5
6
|
module Dendroid
|
6
7
|
module Recognizer
|
@@ -9,19 +10,27 @@ module Dendroid
|
|
9
10
|
class EItem
|
10
11
|
extend Forwardable
|
11
12
|
|
13
|
+
# (Weak) reference to the dotted item
|
12
14
|
# @return [Dendroid::GrmAnalysis::DottedItem]
|
13
15
|
attr_reader :dotted_item
|
14
16
|
|
15
17
|
# @return [Integer] the rank of the token that correspond to the start of the rule.
|
16
18
|
attr_reader :origin
|
17
19
|
|
18
|
-
|
20
|
+
# TODO: :predictor, :completer, :scanner
|
21
|
+
attr_accessor :algo
|
22
|
+
|
23
|
+
# @return [Array<WeakRef>] predecessors sorted by decreasing origin value
|
24
|
+
attr_accessor :predecessors
|
25
|
+
|
26
|
+
def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?, :position, :prev_symbol, :rule
|
19
27
|
|
20
28
|
# @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
|
21
29
|
# @param origin [Integer]
|
22
30
|
def initialize(aDottedItem, origin)
|
23
|
-
@dotted_item = aDottedItem
|
31
|
+
@dotted_item = WeakRef.new(aDottedItem)
|
24
32
|
@origin = origin
|
33
|
+
@predecessors = []
|
25
34
|
end
|
26
35
|
|
27
36
|
# @return [Dendroid::Syntax::NonTerminal] the head of the production rule
|
@@ -42,6 +51,16 @@ module Dendroid
|
|
42
51
|
def to_s
|
43
52
|
"#{dotted_item} @ #{origin}"
|
44
53
|
end
|
54
|
+
|
55
|
+
alias inspect to_s
|
56
|
+
|
57
|
+
def add_predecessor(pred)
|
58
|
+
if predecessors.size > 1 && pred.origin < predecessors[0].origin
|
59
|
+
predecessors.insert(2, WeakRef.new(pred))
|
60
|
+
else
|
61
|
+
predecessors.unshift(WeakRef.new(pred))
|
62
|
+
end
|
63
|
+
end
|
45
64
|
end # class
|
46
65
|
end # module
|
47
66
|
end # module
|
@@ -15,10 +15,15 @@ module Dendroid
|
|
15
15
|
@items = []
|
16
16
|
end
|
17
17
|
|
18
|
-
# Add an
|
18
|
+
# Add an Earley item to the set if not yet present.
|
19
19
|
# @param anItem [Recognizer::EItem]
|
20
|
+
# @return [Recognizer::EItem] the item in the set
|
20
21
|
def add_item(anItem)
|
21
|
-
|
22
|
+
idx = items.find_index anItem
|
23
|
+
return items[idx] if idx
|
24
|
+
|
25
|
+
@items << anItem
|
26
|
+
anItem
|
22
27
|
end
|
23
28
|
|
24
29
|
# Find the items that expect a given grammar symbol
|
@@ -39,7 +39,7 @@ module Dendroid
|
|
39
39
|
end
|
40
40
|
|
41
41
|
# Run the Earley algorithm
|
42
|
-
# @param initial_token [
|
42
|
+
# @param initial_token [Dendroid::Lexical::Token]
|
43
43
|
def earley_parse(initial_token)
|
44
44
|
chart = new_chart
|
45
45
|
tokens = [initial_token]
|
@@ -64,7 +64,8 @@ module Dendroid
|
|
64
64
|
break unless advance
|
65
65
|
end
|
66
66
|
|
67
|
-
|
67
|
+
chart.tokens = tokens
|
68
|
+
determine_outcome(chart)
|
68
69
|
chart
|
69
70
|
end
|
70
71
|
|
@@ -76,7 +77,11 @@ module Dendroid
|
|
76
77
|
prd = grm_analysis.grammar.nonterm2production[top_symbol]
|
77
78
|
chart = Chart.new
|
78
79
|
seed_items = prd.predicted_items
|
79
|
-
seed_items.each
|
80
|
+
seed_items.each do |item|
|
81
|
+
entry = EItem.new(item, 0)
|
82
|
+
entry.algo = :predictor
|
83
|
+
chart.seed_last_set(entry)
|
84
|
+
end
|
80
85
|
|
81
86
|
chart
|
82
87
|
end
|
@@ -117,27 +122,26 @@ module Dendroid
|
|
117
122
|
# Error case: next actual token matches none of the expected tokens.
|
118
123
|
def predictor(chart, item, rank, tokens, mode, predicted_symbols)
|
119
124
|
next_symbol = item.next_symbol
|
120
|
-
if mode == :genuine
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
end
|
125
|
+
# if mode == :genuine
|
126
|
+
# predicted_symbols << Set.new if rank == predicted_symbols.size
|
127
|
+
# predicted = predicted_symbols[rank]
|
128
|
+
# return if predicted.include?(next_symbol)
|
129
|
+
#
|
130
|
+
# predicted.add(next_symbol)
|
131
|
+
# end
|
127
132
|
|
128
133
|
curr_set = chart[rank]
|
129
134
|
next_token = tokens[rank]
|
130
135
|
prd = grm_analysis.symbol2production(next_symbol)
|
131
136
|
entry_items = prd.predicted_items
|
137
|
+
added = []
|
132
138
|
entry_items.each do |entry|
|
133
139
|
member = entry.next_symbol
|
134
140
|
if member&.terminal?
|
135
141
|
next unless next_token
|
136
142
|
next if (member.name != next_token.terminal) && mode == :genuine
|
137
143
|
end
|
138
|
-
|
139
|
-
new_item = EItem.new(entry, rank)
|
140
|
-
curr_set.add_item(new_item)
|
144
|
+
added << add_item(curr_set, entry, rank, item, :predictor)
|
141
145
|
end
|
142
146
|
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
143
147
|
return unless next_symbol.nullable?
|
@@ -145,8 +149,9 @@ module Dendroid
|
|
145
149
|
next_item = grm_analysis.next_item(item.dotted_item)
|
146
150
|
return unless next_item
|
147
151
|
|
148
|
-
|
149
|
-
curr_set.
|
152
|
+
special = add_item(curr_set, next_item, item.origin, nil, :predictor)
|
153
|
+
# special = add_item(curr_set, next_item, item.origin, added.shift, :predictor)
|
154
|
+
# added.each { |e| special.add_predecessor(e) }
|
150
155
|
end
|
151
156
|
|
152
157
|
# procedure SCANNER((A → α•aβ, j), k, words)
|
@@ -161,8 +166,7 @@ module Dendroid
|
|
161
166
|
new_rank = rank + 1
|
162
167
|
chart.append_new_set if chart[new_rank].nil?
|
163
168
|
next_dotted_item = grm_analysis.next_item(dit)
|
164
|
-
|
165
|
-
chart[new_rank].add_item(new_item)
|
169
|
+
add_item(chart[new_rank], next_dotted_item, scan_item.origin, scan_item, :scanner)
|
166
170
|
advance = true
|
167
171
|
end
|
168
172
|
|
@@ -190,8 +194,7 @@ module Dendroid
|
|
190
194
|
next if member.name != next_token.terminal
|
191
195
|
end
|
192
196
|
|
193
|
-
|
194
|
-
curr_set.add_item(new_item)
|
197
|
+
add_item(curr_set, return_item, call_item.origin, item, :completer)
|
195
198
|
end
|
196
199
|
end
|
197
200
|
|
@@ -206,8 +209,18 @@ module Dendroid
|
|
206
209
|
end
|
207
210
|
end
|
208
211
|
|
209
|
-
def
|
212
|
+
def add_item(item_set, dotted_item, origin, predecessor, procedure)
|
213
|
+
new_item = EItem.new(dotted_item, origin)
|
214
|
+
added = item_set.add_item(new_item)
|
215
|
+
added.add_predecessor(predecessor) if predecessor
|
216
|
+
new_item.algo = procedure
|
217
|
+
|
218
|
+
added
|
219
|
+
end
|
220
|
+
|
221
|
+
def determine_outcome(chart)
|
210
222
|
success = false
|
223
|
+
tokens = chart.tokens
|
211
224
|
if chart.size == tokens.size + 1
|
212
225
|
top_symbol = grm_analysis.grammar.start_symbol
|
213
226
|
top_rule = grm_analysis.grammar.nonterm2production[top_symbol]
|
@@ -211,7 +211,7 @@ module Dendroid
|
|
211
211
|
backlog.subtract(to_remove)
|
212
212
|
end
|
213
213
|
|
214
|
-
backlog.each { |i| rules[i].non_productive }
|
214
|
+
# backlog.each { |i| rules[i].non_productive }
|
215
215
|
non_productive = symbols.reject(&:productive?)
|
216
216
|
non_productive.each { |symb| symb.productive = false }
|
217
217
|
non_productive
|
data/lib/dendroid/syntax/rule.rb
CHANGED
@@ -2,28 +2,60 @@
|
|
2
2
|
|
3
3
|
module Dendroid
|
4
4
|
module Syntax
|
5
|
-
#
|
6
|
-
#
|
7
|
-
# and the right-hand side (RHS) consists of one or more sequence of symbols.
|
8
|
-
# The symbols in RHS can be either terminal or non-terminal symbols.
|
9
|
-
# The rule stipulates that the LHS is equivalent to the RHS,
|
10
|
-
# in other words every occurrence of the LHS can be substituted to
|
11
|
-
# corresponding RHS.
|
5
|
+
# A specialization of the Rule class.
|
6
|
+
# A choice is a rule with multiple rhs
|
12
7
|
class Rule
|
13
8
|
# @return [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
|
14
9
|
attr_reader :head
|
15
10
|
alias lhs head
|
16
11
|
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
12
|
+
# @return [Array<Dendroid::Syntax::SymbolSeq>]
|
13
|
+
attr_reader :alternatives
|
14
|
+
|
15
|
+
# Create a Choice instance.
|
16
|
+
# @param theLhs [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
|
17
|
+
# @param alt [Array<Dendroid::Syntax::SymbolSeq>] the alternatives (each as a sequence of symbols).
|
18
|
+
def initialize(theLhs, alt)
|
19
|
+
@head = valid_head(theLhs)
|
20
|
+
@alternatives = valid_alternatives(alt)
|
21
21
|
end
|
22
22
|
|
23
|
-
# Return the text representation of the
|
23
|
+
# Return the text representation of the choice
|
24
24
|
# @return [String]
|
25
25
|
def to_s
|
26
|
-
head.
|
26
|
+
"#{head} => #{alternatives.join(' | ')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
# Predicate method to check whether the choice rule body is productive.
|
30
|
+
# It is productive when at least one of its alternative is productive.
|
31
|
+
# @return [Boolean]
|
32
|
+
def productive?
|
33
|
+
productive_alts = alternatives.select(&:productive?)
|
34
|
+
return false if productive_alts.empty?
|
35
|
+
|
36
|
+
@productive = Set.new(productive_alts)
|
37
|
+
head.productive = true
|
38
|
+
end
|
39
|
+
|
40
|
+
# Predicate method to check whether the rule has at least one empty alternative.
|
41
|
+
# @return [Boolean]
|
42
|
+
def empty?
|
43
|
+
alternatives.any?(&:empty?)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns an array with the symbol sequence of its alternatives
|
47
|
+
# @return [Array<Dendroid::Syntax::SymbolSeq>]
|
48
|
+
def rhs
|
49
|
+
alternatives
|
50
|
+
end
|
51
|
+
|
52
|
+
# Equality operator
|
53
|
+
# Two production rules are equal when their head and alternatives are equal.
|
54
|
+
# @return [Boolean]
|
55
|
+
def ==(other)
|
56
|
+
return true if equal?(other)
|
57
|
+
|
58
|
+
(head == other.head) && (alternatives == other.alternatives)
|
27
59
|
end
|
28
60
|
|
29
61
|
# The set of all grammar symbols that occur in the rhs.
|
@@ -70,6 +102,32 @@ module Dendroid
|
|
70
102
|
|
71
103
|
lhs
|
72
104
|
end
|
105
|
+
|
106
|
+
def valid_alternatives(alt)
|
107
|
+
raise StandardError, "Expecting an Array, found a #{rhs.class} instead." unless alt.is_a?(Array)
|
108
|
+
|
109
|
+
if alt.size.zero?
|
110
|
+
# A choice must have at least two alternatives
|
111
|
+
raise StandardError, "The choice for `#{head}` must have at least one alternative."
|
112
|
+
end
|
113
|
+
|
114
|
+
# Verify that each array element is a valid symbol sequence
|
115
|
+
alt.each { |elem| valid_sequence(elem) }
|
116
|
+
|
117
|
+
# Fail when duplicate rhs found
|
118
|
+
alt_texts = alt.map(&:to_s)
|
119
|
+
no_duplicate = alt_texts.uniq
|
120
|
+
if alt_texts.size > no_duplicate.size
|
121
|
+
alt_texts.each_with_index do |str, i|
|
122
|
+
next if str == no_duplicate[i]
|
123
|
+
|
124
|
+
err_msg = "Duplicate alternatives: #{head} => #{alt_texts[i]}"
|
125
|
+
raise StandardError, err_msg
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
alt
|
130
|
+
end
|
73
131
|
end # class
|
74
132
|
end # module
|
75
133
|
end # module
|