dendroid 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +19 -0
- data/lib/dendroid/recognizer/chart.rb +55 -0
- data/lib/dendroid/recognizer/e_item.rb +47 -0
- data/lib/dendroid/recognizer/item_set.rb +38 -0
- data/lib/dendroid/recognizer/recognizer.rb +286 -0
- data/lib/dendroid/syntax/grammar.rb +1 -1
- data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +1 -72
- data/spec/dendroid/recognizer/chart_spec.rb +1 -0
- data/spec/dendroid/recognizer/e_item_spec.rb +59 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +63 -0
- data/spec/dendroid/recognizer/recognizer_spec.rb +761 -0
- data/spec/dendroid/support/sample_grammars.rb +319 -0
- data/spec/dendroid/syntax/grammar_spec.rb +145 -0
- data/version.txt +1 -1
- metadata +11 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56842965215f0cef73b768223b5acb907fc1642b57528a9e616852ae6adab2cc
|
4
|
+
data.tar.gz: d53478ebcb86c89a407d648c67bfd34dd1f3333f41f7b6e0eac1dcb3e2a25cb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ef9e4766ad0c786471d08ba6cffcdffaec2d9acf734e25dafa796e714e1103ee838421b3b817c0c732b91c1a238c5fd32c9c3a6f2954926880336b70caab8b9
|
7
|
+
data.tar.gz: 7e389e83762cedfbdbdf23acbcf821f478a237d8e2b6da6c4299db17d0ade7e760f77328e8f9f59cb10c251f6b1711257793c2b94c12e7ddd8bde2bcad5add66
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,25 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.12] - 2023-11-02
|
6
|
+
Added more tests.
|
7
|
+
|
8
|
+
### Added
|
9
|
+
- Added more tests to spec file of `Grammar` class.
|
10
|
+
- Added more tests to spec file of `Recognizer` class.
|
11
|
+
|
12
|
+
## [0.0.11] - 2023-11-02
|
13
|
+
Added Earley recognizer and its ancillary classes.
|
14
|
+
|
15
|
+
### Added
|
16
|
+
- Class `Chart` and its spec file
|
17
|
+
- Class `EItem` and its spec file
|
18
|
+
- Class `ItemSet` and its spec file
|
19
|
+
- Class `Recognizer` and its spec file
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
- RSpec tests: moved module `SampleGrammars` to separate file in folder `support`
|
23
|
+
|
5
24
|
## [0.0.10] - 2023-11-01
|
6
25
|
Added missing class and method documentation, fixed some `Rubocop` offenses.
|
7
26
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'item_set'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Recognizer
|
7
|
+
# Also called a parse table. It records the progress of the
|
8
|
+
# Earley recognizer whens its verifies the compliance of the input text
|
9
|
+
# to the language grammar rules.
|
10
|
+
# It essentially consists in an array of item sets.
|
11
|
+
# If n is the number of input tokens then the chart has n + 1 entry sets.
|
12
|
+
class Chart
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
# @return [Array<Recognizer::ItemSet>] The array of item sets
|
16
|
+
attr_reader :item_sets
|
17
|
+
|
18
|
+
# @return [Boolean] Indicates whether the recognizer successfully processed the whole input
|
19
|
+
attr_writer :success
|
20
|
+
|
21
|
+
# @return [StandardError] The exception class in case of an error found by the recognizer
|
22
|
+
attr_accessor :failure_class
|
23
|
+
|
24
|
+
# @return [String] The error message
|
25
|
+
attr_accessor :failure_reason
|
26
|
+
|
27
|
+
def_delegators :@item_sets, :[], :last, :size
|
28
|
+
|
29
|
+
# Constructor
|
30
|
+
# Initialize the chart with one empty item set.
|
31
|
+
def initialize
|
32
|
+
@item_sets = []
|
33
|
+
@success = false
|
34
|
+
append_new_set
|
35
|
+
end
|
36
|
+
|
37
|
+
# Add a new empty item set at the end of the array of item sets
|
38
|
+
def append_new_set
|
39
|
+
item_sets << ItemSet.new
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add an EItem to the last item set
|
43
|
+
# @param e_item [EItem]
|
44
|
+
def seed_last_set(e_item)
|
45
|
+
item_sets.last.add_item(e_item)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return true if the input text is valid according to the grammar.
|
49
|
+
# @return [Boolean]
|
50
|
+
def successful?
|
51
|
+
@success
|
52
|
+
end
|
53
|
+
end # class
|
54
|
+
end # module
|
55
|
+
end # module
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Recognizer
|
7
|
+
# An Earley item is essentially a pair consisting of a dotted item and the rank of a token.
|
8
|
+
# It helps to keep track the progress of an Earley recognizer.
|
9
|
+
class EItem
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
# @return [Dendroid::GrmAnalysis::DottedItem]
|
13
|
+
attr_reader :dotted_item
|
14
|
+
|
15
|
+
# @return [Integer] the rank of the token that correspond to the start of the rule.
|
16
|
+
attr_reader :origin
|
17
|
+
|
18
|
+
def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?
|
19
|
+
|
20
|
+
# @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
|
21
|
+
# @param origin [Integer]
|
22
|
+
def initialize(aDottedItem, origin)
|
23
|
+
@dotted_item = aDottedItem
|
24
|
+
@origin = origin
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Dendroid::Syntax::NonTerminal] the head of the production rule
|
28
|
+
def lhs
|
29
|
+
dotted_item.rule.lhs
|
30
|
+
end
|
31
|
+
|
32
|
+
# Equality test.
|
33
|
+
# @return [Boolean] true iff dotted items and origins are equal
|
34
|
+
def ==(other)
|
35
|
+
return true if eql?(other)
|
36
|
+
|
37
|
+
di = dotted_item
|
38
|
+
(origin == other.origin) && (di == other.dotted_item)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [String] the text representation of the Earley item
|
42
|
+
def to_s
|
43
|
+
"#{dotted_item} @ #{origin}"
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
end # module
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
module Recognizer
|
5
|
+
# Holds the EItem identified by the recognizer when processing at token at given rank.
|
6
|
+
class ItemSet
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
# @return [Recognizer::EItem]
|
10
|
+
attr_reader :items
|
11
|
+
|
12
|
+
def_delegators :@items, :clear, :each, :empty?, :select, :size
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@items = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Add an Early item to the set
|
19
|
+
# @param anItem [Recognizer::EItem]
|
20
|
+
def add_item(anItem)
|
21
|
+
@items << anItem unless items.include? anItem
|
22
|
+
end
|
23
|
+
|
24
|
+
# Find the items that expect a given grammar symbol
|
25
|
+
# @param aSymbol [Denroid::Syntax::GrmSymbol]
|
26
|
+
# @return [void]
|
27
|
+
def items_expecting(aSymbol)
|
28
|
+
items.select { |itm| itm.expecting?(aSymbol) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return a text representation of the item set
|
32
|
+
# @return [String]
|
33
|
+
def to_s
|
34
|
+
items.join("\n")
|
35
|
+
end
|
36
|
+
end # class
|
37
|
+
end # module
|
38
|
+
end # module
|
@@ -0,0 +1,286 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../grm_analysis/grm_analyzer'
|
4
|
+
require_relative 'e_item'
|
5
|
+
require_relative 'chart'
|
6
|
+
|
7
|
+
module Dendroid
|
8
|
+
# This module host classes needed to implement an Earley recognizer
|
9
|
+
module Recognizer
|
10
|
+
# A recognizer determines whether the input text complies to the grammar (syntax) rules.
|
11
|
+
# This class implements the Earley recognition algorithm.
|
12
|
+
class Recognizer
|
13
|
+
# @return [GrmAnalysis::GrmAnalyzer]
|
14
|
+
attr_reader :grm_analysis
|
15
|
+
|
16
|
+
# @return [Object]
|
17
|
+
attr_reader :tokenizer
|
18
|
+
|
19
|
+
# @param grammar [Dendroid::Syntax::Grammar]
|
20
|
+
# @param tokenizer [Object]
|
21
|
+
def initialize(grammar, tokenizer)
|
22
|
+
@grm_analysis = GrmAnalysis::GrmAnalyzer.new(grammar)
|
23
|
+
@tokenizer = tokenizer
|
24
|
+
end
|
25
|
+
|
26
|
+
# Try to read the `source` text and verify that it is syntactically correct.
|
27
|
+
# @param source [String] Input text to recognize
|
28
|
+
# @return [Dendroid::Recognizer::Chart]
|
29
|
+
def run(source)
|
30
|
+
tokenizer.input = source
|
31
|
+
tok = tokenizer.next_token
|
32
|
+
if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
|
33
|
+
chart = new_chart
|
34
|
+
chart.failure_class = StandardError
|
35
|
+
chart.failure_reason = 'Error: Input may not be empty nor blank.'
|
36
|
+
chart
|
37
|
+
else
|
38
|
+
earley_parse(tok)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Run the Earley algorithm
|
43
|
+
# @param initial_token [Dednroid::Lexical::Token]
|
44
|
+
def earley_parse(initial_token)
|
45
|
+
chart = new_chart
|
46
|
+
tokens = [initial_token]
|
47
|
+
predicted_symbols = [Set.new]
|
48
|
+
eos_reached = initial_token.nil?
|
49
|
+
rank = 0
|
50
|
+
|
51
|
+
loop do
|
52
|
+
eos_reached ||= advance_next_token(tokens, predicted_symbols)
|
53
|
+
|
54
|
+
advance = false
|
55
|
+
curr_rank = rank
|
56
|
+
curr_set = chart[curr_rank]
|
57
|
+
curr_set.each do |entry|
|
58
|
+
# For each entry, do either completer, scanner or predictor action
|
59
|
+
tick = do_entry_action(chart, entry, curr_rank, tokens, :genuine, predicted_symbols)
|
60
|
+
advance ||= tick
|
61
|
+
end
|
62
|
+
|
63
|
+
rank += 1 if advance
|
64
|
+
break if eos_reached && !advance
|
65
|
+
break if !advance
|
66
|
+
end
|
67
|
+
|
68
|
+
determine_outcome(chart, tokens)
|
69
|
+
chart
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def new_chart
|
75
|
+
top_symbol = grm_analysis.grammar.start_symbol
|
76
|
+
|
77
|
+
# Reminder: there might be multiple rules for the start symbol
|
78
|
+
prods = grm_analysis.grammar.nonterm2productions[top_symbol]
|
79
|
+
chart = Chart.new
|
80
|
+
prods.each do |prd|
|
81
|
+
seed_items = prd.predicted_items
|
82
|
+
seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
|
83
|
+
end
|
84
|
+
|
85
|
+
chart
|
86
|
+
end
|
87
|
+
|
88
|
+
def advance_next_token(tokens, predicted_symbols)
|
89
|
+
eos_reached = false
|
90
|
+
tok = tokenizer.next_token
|
91
|
+
if tok
|
92
|
+
tokens << tok
|
93
|
+
else
|
94
|
+
eos_reached = true
|
95
|
+
end
|
96
|
+
|
97
|
+
predicted_symbols << Set.new unless eos_reached
|
98
|
+
eos_reached
|
99
|
+
end
|
100
|
+
|
101
|
+
def do_entry_action(chart, entry, rank, tokens, mode, predicted_symbols)
|
102
|
+
advance = false
|
103
|
+
|
104
|
+
if entry.completed?
|
105
|
+
completer(chart, entry, rank, tokens, mode)
|
106
|
+
else
|
107
|
+
if entry.next_symbol.terminal?
|
108
|
+
advance = scanner(chart, entry, rank, tokens)
|
109
|
+
else
|
110
|
+
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
advance
|
115
|
+
end
|
116
|
+
|
117
|
+
# procedure PREDICTOR((A → α•Bβ, j), k)
|
118
|
+
# for each (B → γ) in GRAMMAR_RULES_FOR(B) do
|
119
|
+
# ADD_TO_SET((B → •γ, k), S[k])
|
120
|
+
# end
|
121
|
+
# Assuming next symbol is a non-terminal
|
122
|
+
#
|
123
|
+
# Error case: next actual token matches none of the expected tokens.
|
124
|
+
def predictor(chart, item, rank, tokens, mode, predicted_symbols)
|
125
|
+
next_symbol = item.next_symbol
|
126
|
+
if mode == :genuine
|
127
|
+
predicted_symbols << Set.new if rank == predicted_symbols.size
|
128
|
+
predicted = predicted_symbols[rank]
|
129
|
+
return if predicted.include?(next_symbol)
|
130
|
+
|
131
|
+
predicted.add(next_symbol)
|
132
|
+
end
|
133
|
+
|
134
|
+
prods = grm_analysis.symbol2productions[next_symbol]
|
135
|
+
curr_set = chart[rank]
|
136
|
+
next_token = tokens[rank]
|
137
|
+
prods.each do |prd|
|
138
|
+
entry_items = prd.predicted_items
|
139
|
+
entry_items.each do |entry|
|
140
|
+
member = entry.next_symbol
|
141
|
+
if member&.terminal?
|
142
|
+
next unless next_token
|
143
|
+
next if (member.name != next_token.terminal) && mode == :genuine
|
144
|
+
end
|
145
|
+
|
146
|
+
new_item = EItem.new(entry, rank)
|
147
|
+
curr_set.add_item(new_item)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
152
|
+
if next_symbol.nullable?
|
153
|
+
next_item = grm_analysis.next_item(item.dotted_item)
|
154
|
+
if next_item
|
155
|
+
new_item = EItem.new(next_item, item.origin)
|
156
|
+
curr_set.add_item(new_item)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# procedure SCANNER((A → α•aβ, j), k, words)
|
162
|
+
# if j < LENGTH(words) and a ⊂ PARTS_OF_SPEECH(words[k]) then
|
163
|
+
# ADD_TO_SET((A → αa•β, j), S[k+1])
|
164
|
+
# end
|
165
|
+
# Assuming next symbol is a terminal
|
166
|
+
def scanner(chart, scan_item, rank, tokens)
|
167
|
+
advance = false
|
168
|
+
dit = scan_item.dotted_item
|
169
|
+
if rank < tokens.size && dit.next_symbol.name == tokens[rank].terminal
|
170
|
+
new_rank = rank + 1
|
171
|
+
chart.append_new_set if chart[new_rank].nil?
|
172
|
+
next_dotted_item = grm_analysis.next_item(dit)
|
173
|
+
new_item = EItem.new(next_dotted_item, scan_item.origin)
|
174
|
+
chart[new_rank].add_item(new_item)
|
175
|
+
advance = true
|
176
|
+
end
|
177
|
+
|
178
|
+
advance
|
179
|
+
end
|
180
|
+
|
181
|
+
# procedure COMPLETER((B → γ•, x), k)
|
182
|
+
# for each (A → α•Bβ, j) in S[x] do
|
183
|
+
# ADD_TO_SET((A → αB•β, j), S[k])
|
184
|
+
# end
|
185
|
+
def completer(chart, item, rank, tokens, mode)
|
186
|
+
origin = item.origin
|
187
|
+
|
188
|
+
curr_set = chart[rank]
|
189
|
+
set_at_origin = chart[origin]
|
190
|
+
next_token = tokens[rank]
|
191
|
+
callers = set_at_origin.items_expecting(item.lhs)
|
192
|
+
callers.each do |call_item|
|
193
|
+
return_item = grm_analysis.next_item(call_item.dotted_item)
|
194
|
+
next unless return_item
|
195
|
+
|
196
|
+
member = return_item.next_symbol
|
197
|
+
if member&.terminal? && (mode == :genuine)
|
198
|
+
next unless next_token
|
199
|
+
next if member.name != next_token.terminal
|
200
|
+
end
|
201
|
+
|
202
|
+
new_item = EItem.new(return_item, call_item.origin)
|
203
|
+
curr_set.add_item(new_item)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def seed_set(chart, rank)
|
208
|
+
curr_set = chart[rank]
|
209
|
+
previous_set = chart[rank - 1]
|
210
|
+
curr_set.clear
|
211
|
+
scan_entries = previous_set.select { |ent| ent.dotted_item.next_symbol&.terminal? }
|
212
|
+
scan_entries.map do |ent|
|
213
|
+
new_item = grm_analysis.next_item(ent.dotted_item)
|
214
|
+
curr_set.add_item(EItem.new(new_item, ent.origin))
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def determine_outcome(chart, tokens)
|
219
|
+
success = false
|
220
|
+
if chart.size == tokens.size + 1
|
221
|
+
top_symbol = grm_analysis.grammar.start_symbol
|
222
|
+
top_rules = grm_analysis.grammar.nonterm2productions[top_symbol]
|
223
|
+
final_items = top_rules.reduce([]) do |items, rule|
|
224
|
+
items.concat(rule.reduce_items)
|
225
|
+
end
|
226
|
+
last_set = chart.item_sets.last
|
227
|
+
last_set.each do |entry|
|
228
|
+
next if ((!entry.origin.zero?) || !final_items.include?(entry.dotted_item))
|
229
|
+
|
230
|
+
success = true
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
unless success
|
235
|
+
# Error detected...
|
236
|
+
replay_last_set(chart, tokens)
|
237
|
+
if chart.size < tokens.size + 1
|
238
|
+
# Recognizer stopped prematurely...
|
239
|
+
offending_token = tokens[chart.size - 1]
|
240
|
+
pos = offending_token.position
|
241
|
+
(line, col) = [pos.lineno, pos.column]
|
242
|
+
last_set = chart.last
|
243
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
244
|
+
result << ent.next_symbol if ent.pre_scan?
|
245
|
+
result
|
246
|
+
end
|
247
|
+
terminals.uniq!
|
248
|
+
prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
|
249
|
+
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
250
|
+
err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
|
251
|
+
chart.failure_class = StandardError
|
252
|
+
chart.failure_reason = err_msg
|
253
|
+
elsif chart.size == tokens.size + 1
|
254
|
+
# EOS unexpected...
|
255
|
+
last_token = tokens.last
|
256
|
+
pos = last_token.position
|
257
|
+
(line, col) = [pos.lineno, pos.column]
|
258
|
+
last_set = chart.last
|
259
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
260
|
+
result << ent.next_symbol if ent.pre_scan?
|
261
|
+
result
|
262
|
+
end
|
263
|
+
terminals.uniq!
|
264
|
+
|
265
|
+
prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
|
266
|
+
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
267
|
+
err_msg = "#{prefix}, expected: #{expectation}."
|
268
|
+
chart.failure_class = StandardError
|
269
|
+
chart.failure_reason = err_msg
|
270
|
+
end
|
271
|
+
end
|
272
|
+
chart.success = success
|
273
|
+
end
|
274
|
+
|
275
|
+
def replay_last_set(chart, tokens)
|
276
|
+
rank = chart.size - 1
|
277
|
+
seed_set(chart, rank) # Re-initialize last set with scan entries
|
278
|
+
|
279
|
+
# Replay in full the actions for last set
|
280
|
+
chart[rank].each do |entry|
|
281
|
+
do_entry_action(chart, entry, rank, tokens, :error, [Set.new])
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end # class
|
285
|
+
end # module
|
286
|
+
end # module
|
@@ -47,7 +47,7 @@ module Dendroid
|
|
47
47
|
end
|
48
48
|
# TODO: add test for duplicate productions
|
49
49
|
if nonterm2productions[rule.head]&.include? rule
|
50
|
-
raise StandardError, "Production rule '#{
|
50
|
+
raise StandardError, "Production rule '#{rule}' appears more than once in the grammar."
|
51
51
|
end
|
52
52
|
|
53
53
|
add_symbol(rule.head)
|
@@ -1,80 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative '../../spec_helper'
|
4
|
-
require_relative '
|
4
|
+
require_relative '../support/sample_grammars'
|
5
5
|
require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
|
6
6
|
|
7
|
-
module SampleGrammars
|
8
|
-
def grammar_l1
|
9
|
-
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
10
|
-
# Grammar inspired from Wikipedia entry on Earley parsing
|
11
|
-
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
12
|
-
|
13
|
-
rule('p' => 's')
|
14
|
-
rule('s' => ['s PLUS m', 'm'])
|
15
|
-
rule('m' => ['m STAR t', 't'])
|
16
|
-
rule('t' => 'INTEGER')
|
17
|
-
end
|
18
|
-
|
19
|
-
builder.grammar
|
20
|
-
end
|
21
|
-
|
22
|
-
def tokenizer_l1
|
23
|
-
Utils::BaseTokenizer.new do
|
24
|
-
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
25
|
-
|
26
|
-
scan_verbatim(['+', '*'])
|
27
|
-
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def grammar_l2
|
32
|
-
builder = GrmDSL::BaseGrmBuilder.new do
|
33
|
-
# Grammar inspired from Loup Vaillant's example
|
34
|
-
# https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
|
35
|
-
declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
|
36
|
-
declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
|
37
|
-
|
38
|
-
rule('p' => 'sum')
|
39
|
-
rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
|
40
|
-
rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
|
41
|
-
rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
|
42
|
-
end
|
43
|
-
|
44
|
-
builder.grammar
|
45
|
-
end
|
46
|
-
|
47
|
-
def tokenizer_l2
|
48
|
-
Utils::BaseTokenizer.new do
|
49
|
-
map_verbatim2terminal({
|
50
|
-
'+' => :PLUS,
|
51
|
-
'-' => :MINUS,
|
52
|
-
'*' => :STAR,
|
53
|
-
'/' => :SLASH,
|
54
|
-
'(' => :LPAREN,
|
55
|
-
')' => :RPAREN
|
56
|
-
})
|
57
|
-
|
58
|
-
scan_verbatim(['+', '-', '*', '/', '(', ')'])
|
59
|
-
scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def grammar_l3
|
64
|
-
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
65
|
-
# Grammar inspired from Andrew Appel's example
|
66
|
-
# Modern Compiler Implementation in Java
|
67
|
-
declare_terminals('a', 'c', 'd')
|
68
|
-
|
69
|
-
rule('Z' => ['d', 'X Y Z'])
|
70
|
-
rule('Y' => ['', 'c'])
|
71
|
-
rule('X' => %w[Y a])
|
72
|
-
end
|
73
|
-
|
74
|
-
builder.grammar
|
75
|
-
end
|
76
|
-
end # module
|
77
|
-
|
78
7
|
describe Dendroid::GrmAnalysis::GrmAnalyzer do
|
79
8
|
include SampleGrammars
|
80
9
|
let(:grammar) { grammar_l1 }
|
@@ -0,0 +1 @@
|
|
1
|
+
# frozen_string_literal: true
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/syntax/terminal'
|
5
|
+
require_relative '../../../lib/dendroid/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/dendroid/syntax/symbol_seq'
|
7
|
+
require_relative '../../../lib/dendroid/syntax/production'
|
8
|
+
require_relative '../../../lib/dendroid/grm_analysis/dotted_item'
|
9
|
+
require_relative '../../../lib/dendroid/recognizer/e_item'
|
10
|
+
|
11
|
+
describe Dendroid::Recognizer::EItem do
|
12
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
13
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
14
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
15
|
+
let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
16
|
+
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
17
|
+
let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
|
18
|
+
let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
|
19
|
+
let(:sample_dotted) { Dendroid::GrmAnalysis::DottedItem.new(prod, 1) }
|
20
|
+
let(:other_dotted) { Dendroid::GrmAnalysis::DottedItem.new(empty_prod, 0) }
|
21
|
+
let(:sample_origin) { 3 }
|
22
|
+
|
23
|
+
subject { described_class.new(sample_dotted, sample_origin) }
|
24
|
+
|
25
|
+
context 'Initialization:' do
|
26
|
+
it 'is initialized with a dotted item and an origin position' do
|
27
|
+
expect { described_class.new(sample_dotted, sample_origin) }.not_to raise_error
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'knows its related dotted item' do
|
31
|
+
expect(subject.dotted_item).to eq(sample_dotted)
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'knows its origin value' do
|
35
|
+
expect(subject.origin).to eq(sample_origin)
|
36
|
+
end
|
37
|
+
end # context
|
38
|
+
|
39
|
+
context 'Provided service:' do
|
40
|
+
it 'knows the lhs of related production' do
|
41
|
+
expect(subject.lhs).to eq(expr_symb)
|
42
|
+
end # context
|
43
|
+
|
44
|
+
# rubocop: disable Lint/BinaryOperatorWithIdenticalOperands
|
45
|
+
|
46
|
+
it 'can compare with another EItem' do
|
47
|
+
expect(subject == subject).to be_truthy
|
48
|
+
expect(subject == described_class.new(sample_dotted, sample_origin)).to be_truthy
|
49
|
+
expect(subject == described_class.new(sample_dotted, 2)).to be_falsey
|
50
|
+
expect(subject == described_class.new(other_dotted, sample_origin)).to be_falsey
|
51
|
+
end
|
52
|
+
|
53
|
+
# rubocop: enable Lint/BinaryOperatorWithIdenticalOperands
|
54
|
+
|
55
|
+
it 'can renders a String representation of itself' do
|
56
|
+
expect(subject.to_s).to eq("#{sample_dotted} @ #{sample_origin}")
|
57
|
+
end
|
58
|
+
end # context
|
59
|
+
end # describe
|