dendroid 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +7 -0
- data/lib/dendroid/recognizer/chart.rb +6 -4
- data/lib/dendroid/recognizer/e_item.rb +0 -1
- data/lib/dendroid/recognizer/item_set.rb +1 -0
- data/lib/dendroid/recognizer/recognizer.rb +32 -28
- data/lib/dendroid/syntax/grammar.rb +1 -1
- data/spec/dendroid/recognizer/chart_spec.rb +0 -1
- data/spec/dendroid/recognizer/e_item_spec.rb +4 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +1 -1
- data/spec/dendroid/recognizer/recognizer_spec.rb +594 -19
- data/spec/dendroid/support/sample_grammars.rb +249 -6
- data/spec/dendroid/syntax/grammar_spec.rb +145 -0
- data/version.txt +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56842965215f0cef73b768223b5acb907fc1642b57528a9e616852ae6adab2cc
|
4
|
+
data.tar.gz: d53478ebcb86c89a407d648c67bfd34dd1f3333f41f7b6e0eac1dcb3e2a25cb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ef9e4766ad0c786471d08ba6cffcdffaec2d9acf734e25dafa796e714e1103ee838421b3b817c0c732b91c1a238c5fd32c9c3a6f2954926880336b70caab8b9
|
7
|
+
data.tar.gz: 7e389e83762cedfbdbdf23acbcf821f478a237d8e2b6da6c4299db17d0ade7e760f77328e8f9f59cb10c251f6b1711257793c2b94c12e7ddd8bde2bcad5add66
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.12] - 2023-11-02
|
6
|
+
Added more tests.
|
7
|
+
|
8
|
+
### Added
|
9
|
+
- Added more tests to spec file of `Grammar` class.
|
10
|
+
- Added more tests to spec file of `Recognizer` class.
|
11
|
+
|
5
12
|
## [0.0.11] - 2023-11-02
|
6
13
|
Added Earley recognizer and its ancillary classes.
|
7
14
|
|
@@ -4,9 +4,11 @@ require_relative 'item_set'
|
|
4
4
|
|
5
5
|
module Dendroid
|
6
6
|
module Recognizer
|
7
|
-
# Also called a parse table.
|
8
|
-
#
|
9
|
-
#
|
7
|
+
# Also called a parse table. It records the progress of the
|
8
|
+
# Earley recognizer whens its verifies the compliance of the input text
|
9
|
+
# to the language grammar rules.
|
10
|
+
# It essentially consists in an array of item sets.
|
11
|
+
# If n is the number of input tokens then the chart has n + 1 entry sets.
|
10
12
|
class Chart
|
11
13
|
extend Forwardable
|
12
14
|
|
@@ -33,7 +35,7 @@ module Dendroid
|
|
33
35
|
end
|
34
36
|
|
35
37
|
# Add a new empty item set at the end of the array of item sets
|
36
|
-
def append_new_set
|
38
|
+
def append_new_set
|
37
39
|
item_sets << ItemSet.new
|
38
40
|
end
|
39
41
|
|
@@ -16,11 +16,16 @@ module Dendroid
|
|
16
16
|
# @return [Object]
|
17
17
|
attr_reader :tokenizer
|
18
18
|
|
19
|
+
# @param grammar [Dendroid::Syntax::Grammar]
|
20
|
+
# @param tokenizer [Object]
|
19
21
|
def initialize(grammar, tokenizer)
|
20
22
|
@grm_analysis = GrmAnalysis::GrmAnalyzer.new(grammar)
|
21
23
|
@tokenizer = tokenizer
|
22
24
|
end
|
23
25
|
|
26
|
+
# Try to read the `source` text and verify that it is syntactically correct.
|
27
|
+
# @param source [String] Input text to recognize
|
28
|
+
# @return [Dendroid::Recognizer::Chart]
|
24
29
|
def run(source)
|
25
30
|
tokenizer.input = source
|
26
31
|
tok = tokenizer.next_token
|
@@ -34,6 +39,8 @@ module Dendroid
|
|
34
39
|
end
|
35
40
|
end
|
36
41
|
|
42
|
+
# Run the Earley algorithm
|
43
|
+
# @param initial_token [Dednroid::Lexical::Token]
|
37
44
|
def earley_parse(initial_token)
|
38
45
|
chart = new_chart
|
39
46
|
tokens = [initial_token]
|
@@ -42,7 +49,7 @@ module Dendroid
|
|
42
49
|
rank = 0
|
43
50
|
|
44
51
|
loop do
|
45
|
-
eos_reached
|
52
|
+
eos_reached ||= advance_next_token(tokens, predicted_symbols)
|
46
53
|
|
47
54
|
advance = false
|
48
55
|
curr_rank = rank
|
@@ -55,7 +62,7 @@ module Dendroid
|
|
55
62
|
|
56
63
|
rank += 1 if advance
|
57
64
|
break if eos_reached && !advance
|
58
|
-
break if !
|
65
|
+
break if !advance
|
59
66
|
end
|
60
67
|
|
61
68
|
determine_outcome(chart, tokens)
|
@@ -106,15 +113,14 @@ module Dendroid
|
|
106
113
|
|
107
114
|
advance
|
108
115
|
end
|
109
|
-
=begin
|
110
|
-
procedure PREDICTOR((A → α•Bβ, j), k)
|
111
|
-
for each (B → γ) in GRAMMAR_RULES_FOR(B) do
|
112
|
-
ADD_TO_SET((B → •γ, k), S[k])
|
113
|
-
end
|
114
|
-
Assuming next symbol is a non-terminal
|
115
116
|
|
116
|
-
|
117
|
-
|
117
|
+
# procedure PREDICTOR((A → α•Bβ, j), k)
|
118
|
+
# for each (B → γ) in GRAMMAR_RULES_FOR(B) do
|
119
|
+
# ADD_TO_SET((B → •γ, k), S[k])
|
120
|
+
# end
|
121
|
+
# Assuming next symbol is a non-terminal
|
122
|
+
#
|
123
|
+
# Error case: next actual token matches none of the expected tokens.
|
118
124
|
def predictor(chart, item, rank, tokens, mode, predicted_symbols)
|
119
125
|
next_symbol = item.next_symbol
|
120
126
|
if mode == :genuine
|
@@ -152,13 +158,11 @@ module Dendroid
|
|
152
158
|
end
|
153
159
|
end
|
154
160
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
Assuming next symbol is a terminal
|
161
|
-
=end
|
161
|
+
# procedure SCANNER((A → α•aβ, j), k, words)
|
162
|
+
# if j < LENGTH(words) and a ⊂ PARTS_OF_SPEECH(words[k]) then
|
163
|
+
# ADD_TO_SET((A → αa•β, j), S[k+1])
|
164
|
+
# end
|
165
|
+
# Assuming next symbol is a terminal
|
162
166
|
def scanner(chart, scan_item, rank, tokens)
|
163
167
|
advance = false
|
164
168
|
dit = scan_item.dotted_item
|
@@ -174,12 +178,10 @@ module Dendroid
|
|
174
178
|
advance
|
175
179
|
end
|
176
180
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
end
|
182
|
-
=end
|
181
|
+
# procedure COMPLETER((B → γ•, x), k)
|
182
|
+
# for each (A → α•Bβ, j) in S[x] do
|
183
|
+
# ADD_TO_SET((A → αB•β, j), S[k])
|
184
|
+
# end
|
183
185
|
def completer(chart, item, rank, tokens, mode)
|
184
186
|
origin = item.origin
|
185
187
|
|
@@ -190,6 +192,7 @@ module Dendroid
|
|
190
192
|
callers.each do |call_item|
|
191
193
|
return_item = grm_analysis.next_item(call_item.dotted_item)
|
192
194
|
next unless return_item
|
195
|
+
|
193
196
|
member = return_item.next_symbol
|
194
197
|
if member&.terminal? && (mode == :genuine)
|
195
198
|
next unless next_token
|
@@ -222,12 +225,13 @@ module Dendroid
|
|
222
225
|
end
|
223
226
|
last_set = chart.item_sets.last
|
224
227
|
last_set.each do |entry|
|
225
|
-
next if ((!entry.origin.zero?) || !
|
228
|
+
next if ((!entry.origin.zero?) || !final_items.include?(entry.dotted_item))
|
229
|
+
|
226
230
|
success = true
|
227
231
|
end
|
228
232
|
end
|
229
233
|
|
230
|
-
|
234
|
+
unless success
|
231
235
|
# Error detected...
|
232
236
|
replay_last_set(chart, tokens)
|
233
237
|
if chart.size < tokens.size + 1
|
@@ -242,7 +246,7 @@ module Dendroid
|
|
242
246
|
end
|
243
247
|
terminals.uniq!
|
244
248
|
prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
|
245
|
-
expectation = terminals.size == 1 ?
|
249
|
+
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
246
250
|
err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
|
247
251
|
chart.failure_class = StandardError
|
248
252
|
chart.failure_reason = err_msg
|
@@ -259,7 +263,7 @@ module Dendroid
|
|
259
263
|
terminals.uniq!
|
260
264
|
|
261
265
|
prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
|
262
|
-
expectation = terminals.size == 1 ?
|
266
|
+
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
263
267
|
err_msg = "#{prefix}, expected: #{expectation}."
|
264
268
|
chart.failure_class = StandardError
|
265
269
|
chart.failure_reason = err_msg
|
@@ -279,4 +283,4 @@ module Dendroid
|
|
279
283
|
end
|
280
284
|
end # class
|
281
285
|
end # module
|
282
|
-
end # module
|
286
|
+
end # module
|
@@ -47,7 +47,7 @@ module Dendroid
|
|
47
47
|
end
|
48
48
|
# TODO: add test for duplicate productions
|
49
49
|
if nonterm2productions[rule.head]&.include? rule
|
50
|
-
raise StandardError, "Production rule '#{
|
50
|
+
raise StandardError, "Production rule '#{rule}' appears more than once in the grammar."
|
51
51
|
end
|
52
52
|
|
53
53
|
add_symbol(rule.head)
|
@@ -41,6 +41,8 @@ describe Dendroid::Recognizer::EItem do
|
|
41
41
|
expect(subject.lhs).to eq(expr_symb)
|
42
42
|
end # context
|
43
43
|
|
44
|
+
# rubocop: disable Lint/BinaryOperatorWithIdenticalOperands
|
45
|
+
|
44
46
|
it 'can compare with another EItem' do
|
45
47
|
expect(subject == subject).to be_truthy
|
46
48
|
expect(subject == described_class.new(sample_dotted, sample_origin)).to be_truthy
|
@@ -48,6 +50,8 @@ describe Dendroid::Recognizer::EItem do
|
|
48
50
|
expect(subject == described_class.new(other_dotted, sample_origin)).to be_falsey
|
49
51
|
end
|
50
52
|
|
53
|
+
# rubocop: enable Lint/BinaryOperatorWithIdenticalOperands
|
54
|
+
|
51
55
|
it 'can renders a String representation of itself' do
|
52
56
|
expect(subject.to_s).to eq("#{sample_dotted} @ #{sample_origin}")
|
53
57
|
end
|