dendroid 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 270fc74811d70652e19c4ed42cd11138a1fe9fc413e9b1856b982edfa28c5d51
4
- data.tar.gz: 280351b252bd5c4a63f3082375053ea7d3bf9a9d0d32acc055dc33cce91ed628
3
+ metadata.gz: 2564f1269225e08732a9f995b10ebbbbf4710b0a1b0aea73e7fe4b486c34a1aa
4
+ data.tar.gz: db15f965e9365276ffc576435d514cd6c9170a8727c7fafe1425a9de7ed3e0cd
5
5
  SHA512:
6
- metadata.gz: 7a34047f56f1f488377afd88c4049b935d03d8a0a902cd44f8ffba3d58578c212c5ef7f0b1229192a7f4606b1d683d70ca479273d45d716d98154a38663f233f
7
- data.tar.gz: 36578ffb40a0463a2e411000b24fa8005166c1ede8f6a856293c0122e44fdbb46d3758159042db0c9c4ccacf9c1bf071e49cfb86a64792b98fac8bb89447a85a
6
+ metadata.gz: 2517fd57cca364571e19ddd183d53fcd4fd642f0cf83ecc58ef0f62e5c7512c343cc3db4f31ee621dad5009386db0161e7c2f67944820dd36cb2a253a4d7af80
7
+ data.tar.gz: 24b77e7c0c5e97df315102c3434dddd251eacab96efaa3d194006c3874f6d260aeafa076ec5b6bd1bed296fbc675bef00e2fcf49a1e4516c20cc6b3e3b0aefdb
data/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.0.11] - 2023-11-02
6
+ Added Earley recognizer and its ancillary classes.
7
+
8
+ ### Added
9
+ - Class `Chart` and its spec file
10
+ - Class `EItem` and its spec file
11
+ - Class `ItemSet` and its spec file
12
+ - Class `Recognizer` and its spec file
13
+
14
+ ### Changed
15
+ - RSpec tests: moved module `SampleGrammars` to separate file in folder `support`
16
+
5
17
  ## [0.0.10] - 2023-11-01
6
18
  Added missing class and method documentation, fixed some `Rubocop` offenses.
7
19
 
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'item_set'
4
+
5
+ module Dendroid
6
+ module Recognizer
7
+ # Also called a parse table.
8
+ # Assuming that n == number of input tokens,
9
+ # then the chart is an array with n + 1 entry sets.
10
+ class Chart
11
+ extend Forwardable
12
+
13
+ # @return [Array<Recognizer::ItemSet>] The array of item sets
14
+ attr_reader :item_sets
15
+
16
+ # @return [Boolean] Indicates whether the recognizer successfully processed the whole input
17
+ attr_writer :success
18
+
19
+ # @return [StandardError] The exception class in case of an error found by the recognizer
20
+ attr_accessor :failure_class
21
+
22
+ # @return [String] The error message
23
+ attr_accessor :failure_reason
24
+
25
+ def_delegators :@item_sets, :[], :last, :size
26
+
27
+ # Constructor
28
+ # Initialize the chart with one empty item set.
29
+ def initialize
30
+ @item_sets = []
31
+ @success = false
32
+ append_new_set
33
+ end
34
+
35
+ # Add a new empty item set at the end of the array of item sets
36
+ def append_new_set()
37
+ item_sets << ItemSet.new
38
+ end
39
+
40
+ # Add an EItem to the last item set
41
+ # @param e_item [EItem]
42
+ def seed_last_set(e_item)
43
+ item_sets.last.add_item(e_item)
44
+ end
45
+
46
+ # Return true if the input text is valid according to the grammar.
47
+ # @return [Boolean]
48
+ def successful?
49
+ @success
50
+ end
51
+ end # class
52
+ end # module
53
+ end # module
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+
5
+ module Dendroid
6
+ module Recognizer
7
+ # An Earley item is essentially a pair consisting of a dotted item and the rank of a token.
8
+ # It helps to keep track the progress of an Earley recognizer.
9
+ class EItem
10
+ extend Forwardable
11
+
12
+ # @return [Dendroid::GrmAnalysis::DottedItem]
13
+ attr_reader :dotted_item
14
+
15
+ # @return [Integer] the rank of the token that correspond to the start of the rule.
16
+ attr_reader :origin
17
+
18
+ def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?
19
+
20
+ # @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
21
+ # @param origin [Integer]
22
+ def initialize(aDottedItem, origin)
23
+ @dotted_item = aDottedItem
24
+ @origin = origin
25
+ end
26
+
27
+ # @return [Dendroid::Syntax::NonTerminal] the head of the production rule
28
+ def lhs
29
+ dotted_item.rule.lhs
30
+ end
31
+
32
+ # Equality test.
33
+ # @return [Boolean] true iff dotted items and origins are equal
34
+ def ==(other)
35
+ return true if eql?(other)
36
+
37
+ di = dotted_item
38
+ (origin == other.origin) && (di == other.dotted_item)
39
+ end
40
+
41
+ # @return [String] the text representation of the Earley item
42
+ def to_s
43
+ "#{dotted_item} @ #{origin}"
44
+ end
45
+ end # class
46
+ end # module
47
+ end # module
48
+
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dendroid
4
+ module Recognizer
5
+ # Holds the EItem identified by the recognizer when processing at token at given rank.
6
+ class ItemSet
7
+ extend Forwardable
8
+
9
+ # @return [Recognizer::EItem]
10
+ attr_reader :items
11
+ def_delegators :@items, :clear, :each, :empty?, :select, :size
12
+
13
+ def initialize
14
+ @items = []
15
+ end
16
+
17
+ # Add an Early item to the set
18
+ # @param anItem [Recognizer::EItem]
19
+ def add_item(anItem)
20
+ @items << anItem unless items.include? anItem
21
+ end
22
+
23
+ # Find the items that expect a given grammar symbol
24
+ # @param aSymbol [Denroid::Syntax::GrmSymbol]
25
+ # @return [void]
26
+ def items_expecting(aSymbol)
27
+ items.select { |itm| itm.expecting?(aSymbol) }
28
+ end
29
+
30
+ # Return a text representation of the item set
31
+ # @return [String]
32
+ def to_s
33
+ items.join("\n")
34
+ end
35
+ end # class
36
+ end # module
37
+ end # module
@@ -0,0 +1,282 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../grm_analysis/grm_analyzer'
4
+ require_relative 'e_item'
5
+ require_relative 'chart'
6
+
7
+ module Dendroid
8
+ # This module host classes needed to implement an Earley recognizer
9
+ module Recognizer
10
+ # A recognizer determines whether the input text complies to the grammar (syntax) rules.
11
+ # This class implements the Earley recognition algorithm.
12
+ class Recognizer
13
+ # @return [GrmAnalysis::GrmAnalyzer]
14
+ attr_reader :grm_analysis
15
+
16
+ # @return [Object]
17
+ attr_reader :tokenizer
18
+
19
+ def initialize(grammar, tokenizer)
20
+ @grm_analysis = GrmAnalysis::GrmAnalyzer.new(grammar)
21
+ @tokenizer = tokenizer
22
+ end
23
+
24
+ def run(source)
25
+ tokenizer.input = source
26
+ tok = tokenizer.next_token
27
+ if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
28
+ chart = new_chart
29
+ chart.failure_class = StandardError
30
+ chart.failure_reason = 'Error: Input may not be empty nor blank.'
31
+ chart
32
+ else
33
+ earley_parse(tok)
34
+ end
35
+ end
36
+
37
+ def earley_parse(initial_token)
38
+ chart = new_chart
39
+ tokens = [initial_token]
40
+ predicted_symbols = [Set.new]
41
+ eos_reached = initial_token.nil?
42
+ rank = 0
43
+
44
+ loop do
45
+ eos_reached = advance_next_token(tokens, predicted_symbols) unless eos_reached
46
+
47
+ advance = false
48
+ curr_rank = rank
49
+ curr_set = chart[curr_rank]
50
+ curr_set.each do |entry|
51
+ # For each entry, do either completer, scanner or predictor action
52
+ tick = do_entry_action(chart, entry, curr_rank, tokens, :genuine, predicted_symbols)
53
+ advance ||= tick
54
+ end
55
+
56
+ rank += 1 if advance
57
+ break if eos_reached && !advance
58
+ break if ! advance
59
+ end
60
+
61
+ determine_outcome(chart, tokens)
62
+ chart
63
+ end
64
+
65
+ private
66
+
67
+ def new_chart
68
+ top_symbol = grm_analysis.grammar.start_symbol
69
+
70
+ # Reminder: there might be multiple rules for the start symbol
71
+ prods = grm_analysis.grammar.nonterm2productions[top_symbol]
72
+ chart = Chart.new
73
+ prods.each do |prd|
74
+ seed_items = prd.predicted_items
75
+ seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
76
+ end
77
+
78
+ chart
79
+ end
80
+
81
+ def advance_next_token(tokens, predicted_symbols)
82
+ eos_reached = false
83
+ tok = tokenizer.next_token
84
+ if tok
85
+ tokens << tok
86
+ else
87
+ eos_reached = true
88
+ end
89
+
90
+ predicted_symbols << Set.new unless eos_reached
91
+ eos_reached
92
+ end
93
+
94
+ def do_entry_action(chart, entry, rank, tokens, mode, predicted_symbols)
95
+ advance = false
96
+
97
+ if entry.completed?
98
+ completer(chart, entry, rank, tokens, mode)
99
+ else
100
+ if entry.next_symbol.terminal?
101
+ advance = scanner(chart, entry, rank, tokens)
102
+ else
103
+ predictor(chart, entry, rank, tokens, mode, predicted_symbols)
104
+ end
105
+ end
106
+
107
+ advance
108
+ end
109
+ =begin
110
+ procedure PREDICTOR((A → α•Bβ, j), k)
111
+ for each (B → γ) in GRAMMAR_RULES_FOR(B) do
112
+ ADD_TO_SET((B → •γ, k), S[k])
113
+ end
114
+ Assuming next symbol is a non-terminal
115
+
116
+ Error case: next actual token matches none of the expected tokens.
117
+ =end
118
+ def predictor(chart, item, rank, tokens, mode, predicted_symbols)
119
+ next_symbol = item.next_symbol
120
+ if mode == :genuine
121
+ predicted_symbols << Set.new if rank == predicted_symbols.size
122
+ predicted = predicted_symbols[rank]
123
+ return if predicted.include?(next_symbol)
124
+
125
+ predicted.add(next_symbol)
126
+ end
127
+
128
+ prods = grm_analysis.symbol2productions[next_symbol]
129
+ curr_set = chart[rank]
130
+ next_token = tokens[rank]
131
+ prods.each do |prd|
132
+ entry_items = prd.predicted_items
133
+ entry_items.each do |entry|
134
+ member = entry.next_symbol
135
+ if member&.terminal?
136
+ next unless next_token
137
+ next if (member.name != next_token.terminal) && mode == :genuine
138
+ end
139
+
140
+ new_item = EItem.new(entry, rank)
141
+ curr_set.add_item(new_item)
142
+ end
143
+ end
144
+
145
+ # Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
146
+ if next_symbol.nullable?
147
+ next_item = grm_analysis.next_item(item.dotted_item)
148
+ if next_item
149
+ new_item = EItem.new(next_item, item.origin)
150
+ curr_set.add_item(new_item)
151
+ end
152
+ end
153
+ end
154
+
155
+ =begin
156
+ procedure SCANNER((A → α•aβ, j), k, words)
157
+ if j < LENGTH(words) and a ⊂ PARTS_OF_SPEECH(words[k]) then
158
+ ADD_TO_SET((A → αa•β, j), S[k+1])
159
+ end
160
+ Assuming next symbol is a terminal
161
+ =end
162
+ def scanner(chart, scan_item, rank, tokens)
163
+ advance = false
164
+ dit = scan_item.dotted_item
165
+ if rank < tokens.size && dit.next_symbol.name == tokens[rank].terminal
166
+ new_rank = rank + 1
167
+ chart.append_new_set if chart[new_rank].nil?
168
+ next_dotted_item = grm_analysis.next_item(dit)
169
+ new_item = EItem.new(next_dotted_item, scan_item.origin)
170
+ chart[new_rank].add_item(new_item)
171
+ advance = true
172
+ end
173
+
174
+ advance
175
+ end
176
+
177
+ =begin
178
+ procedure COMPLETER((B → γ•, x), k)
179
+ for each (A → α•Bβ, j) in S[x] do
180
+ ADD_TO_SET((A → αB•β, j), S[k])
181
+ end
182
+ =end
183
+ def completer(chart, item, rank, tokens, mode)
184
+ origin = item.origin
185
+
186
+ curr_set = chart[rank]
187
+ set_at_origin = chart[origin]
188
+ next_token = tokens[rank]
189
+ callers = set_at_origin.items_expecting(item.lhs)
190
+ callers.each do |call_item|
191
+ return_item = grm_analysis.next_item(call_item.dotted_item)
192
+ next unless return_item
193
+ member = return_item.next_symbol
194
+ if member&.terminal? && (mode == :genuine)
195
+ next unless next_token
196
+ next if member.name != next_token.terminal
197
+ end
198
+
199
+ new_item = EItem.new(return_item, call_item.origin)
200
+ curr_set.add_item(new_item)
201
+ end
202
+ end
203
+
204
+ def seed_set(chart, rank)
205
+ curr_set = chart[rank]
206
+ previous_set = chart[rank - 1]
207
+ curr_set.clear
208
+ scan_entries = previous_set.select { |ent| ent.dotted_item.next_symbol&.terminal? }
209
+ scan_entries.map do |ent|
210
+ new_item = grm_analysis.next_item(ent.dotted_item)
211
+ curr_set.add_item(EItem.new(new_item, ent.origin))
212
+ end
213
+ end
214
+
215
+ def determine_outcome(chart, tokens)
216
+ success = false
217
+ if chart.size == tokens.size + 1
218
+ top_symbol = grm_analysis.grammar.start_symbol
219
+ top_rules = grm_analysis.grammar.nonterm2productions[top_symbol]
220
+ final_items = top_rules.reduce([]) do |items, rule|
221
+ items.concat(rule.reduce_items)
222
+ end
223
+ last_set = chart.item_sets.last
224
+ last_set.each do |entry|
225
+ next if ((!entry.origin.zero?) || ! final_items.include?(entry.dotted_item))
226
+ success = true
227
+ end
228
+ end
229
+
230
+ if !success
231
+ # Error detected...
232
+ replay_last_set(chart, tokens)
233
+ if chart.size < tokens.size + 1
234
+ # Recognizer stopped prematurely...
235
+ offending_token = tokens[chart.size - 1]
236
+ pos = offending_token.position
237
+ (line, col) = [pos.lineno, pos.column]
238
+ last_set = chart.last
239
+ terminals = last_set.items.reduce([]) do |result, ent|
240
+ result << ent.next_symbol if ent.pre_scan?
241
+ result
242
+ end
243
+ terminals.uniq!
244
+ prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
245
+ expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
246
+ err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
247
+ chart.failure_class = StandardError
248
+ chart.failure_reason = err_msg
249
+ elsif chart.size == tokens.size + 1
250
+ # EOS unexpected...
251
+ last_token = tokens.last
252
+ pos = last_token.position
253
+ (line, col) = [pos.lineno, pos.column]
254
+ last_set = chart.last
255
+ terminals = last_set.items.reduce([]) do |result, ent|
256
+ result << ent.next_symbol if ent.pre_scan?
257
+ result
258
+ end
259
+ terminals.uniq!
260
+
261
+ prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
262
+ expectation = terminals.size == 1 ? "#{terminals[0].name}" : "one of: [#{terminals.map(&:name).join(', ')}]"
263
+ err_msg = "#{prefix}, expected: #{expectation}."
264
+ chart.failure_class = StandardError
265
+ chart.failure_reason = err_msg
266
+ end
267
+ end
268
+ chart.success = success
269
+ end
270
+
271
+ def replay_last_set(chart, tokens)
272
+ rank = chart.size - 1
273
+ seed_set(chart, rank) # Re-initialize last set with scan entries
274
+
275
+ # Replay in full the actions for last set
276
+ chart[rank].each do |entry|
277
+ do_entry_action(chart, entry, rank, tokens, :error, [Set.new])
278
+ end
279
+ end
280
+ end # class
281
+ end # module
282
+ end # module
@@ -1,80 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../../spec_helper'
4
- require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
4
+ require_relative '../support/sample_grammars'
5
5
  require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
6
6
 
7
- module SampleGrammars
8
- def grammar_l1
9
- builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
10
- # Grammar inspired from Wikipedia entry on Earley parsing
11
- declare_terminals('PLUS', 'STAR', 'INTEGER')
12
-
13
- rule('p' => 's')
14
- rule('s' => ['s PLUS m', 'm'])
15
- rule('m' => ['m STAR t', 't'])
16
- rule('t' => 'INTEGER')
17
- end
18
-
19
- builder.grammar
20
- end
21
-
22
- def tokenizer_l1
23
- Utils::BaseTokenizer.new do
24
- map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
25
-
26
- scan_verbatim(['+', '*'])
27
- scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
28
- end
29
- end
30
-
31
- def grammar_l2
32
- builder = GrmDSL::BaseGrmBuilder.new do
33
- # Grammar inspired from Loup Vaillant's example
34
- # https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
35
- declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
36
- declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
37
-
38
- rule('p' => 'sum')
39
- rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
40
- rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
41
- rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
42
- end
43
-
44
- builder.grammar
45
- end
46
-
47
- def tokenizer_l2
48
- Utils::BaseTokenizer.new do
49
- map_verbatim2terminal({
50
- '+' => :PLUS,
51
- '-' => :MINUS,
52
- '*' => :STAR,
53
- '/' => :SLASH,
54
- '(' => :LPAREN,
55
- ')' => :RPAREN
56
- })
57
-
58
- scan_verbatim(['+', '-', '*', '/', '(', ')'])
59
- scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
60
- end
61
- end
62
-
63
- def grammar_l3
64
- builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
65
- # Grammar inspired from Andrew Appel's example
66
- # Modern Compiler Implementation in Java
67
- declare_terminals('a', 'c', 'd')
68
-
69
- rule('Z' => ['d', 'X Y Z'])
70
- rule('Y' => ['', 'c'])
71
- rule('X' => %w[Y a])
72
- end
73
-
74
- builder.grammar
75
- end
76
- end # module
77
-
78
7
  describe Dendroid::GrmAnalysis::GrmAnalyzer do
79
8
  include SampleGrammars
80
9
  let(:grammar) { grammar_l1 }
@@ -0,0 +1,2 @@
1
+ # frozen_string_literal: true
2
+
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/syntax/terminal'
5
+ require_relative '../../../lib/dendroid/syntax/non_terminal'
6
+ require_relative '../../../lib/dendroid/syntax/symbol_seq'
7
+ require_relative '../../../lib/dendroid/syntax/production'
8
+ require_relative '../../../lib/dendroid/grm_analysis/dotted_item'
9
+ require_relative '../../../lib/dendroid/recognizer/e_item'
10
+
11
+ describe Dendroid::Recognizer::EItem do
12
+ let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
13
+ let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
14
+ let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
15
+ let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
16
+ let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
17
+ let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
18
+ let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
19
+ let(:sample_dotted) { Dendroid::GrmAnalysis::DottedItem.new(prod, 1) }
20
+ let(:other_dotted) { Dendroid::GrmAnalysis::DottedItem.new(empty_prod, 0) }
21
+ let(:sample_origin) { 3 }
22
+
23
+ subject { described_class.new(sample_dotted, sample_origin) }
24
+
25
+ context 'Initialization:' do
26
+ it 'is initialized with a dotted item and an origin position' do
27
+ expect { described_class.new(sample_dotted, sample_origin) }.not_to raise_error
28
+ end
29
+
30
+ it 'knows its related dotted item' do
31
+ expect(subject.dotted_item).to eq(sample_dotted)
32
+ end
33
+
34
+ it 'knows its origin value' do
35
+ expect(subject.origin).to eq(sample_origin)
36
+ end
37
+ end # context
38
+
39
+ context 'Provided service:' do
40
+ it 'knows the lhs of related production' do
41
+ expect(subject.lhs).to eq(expr_symb)
42
+ end # context
43
+
44
+ it 'can compare with another EItem' do
45
+ expect(subject == subject).to be_truthy
46
+ expect(subject == described_class.new(sample_dotted, sample_origin)).to be_truthy
47
+ expect(subject == described_class.new(sample_dotted, 2)).to be_falsey
48
+ expect(subject == described_class.new(other_dotted, sample_origin)).to be_falsey
49
+ end
50
+
51
+ it 'can renders a String representation of itself' do
52
+ expect(subject.to_s).to eq("#{sample_dotted} @ #{sample_origin}")
53
+ end
54
+ end # context
55
+ end # describe
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../../../lib/dendroid/syntax/terminal'
5
+ require_relative '../../../lib/dendroid/syntax/non_terminal'
6
+ require_relative '../../../lib/dendroid/syntax/symbol_seq'
7
+ require_relative '../../../lib/dendroid/syntax/production'
8
+ require_relative '../../../lib/dendroid/grm_analysis/dotted_item'
9
+ require_relative '../../../lib/dendroid/recognizer/e_item'
10
+ require_relative '../../../lib/dendroid/recognizer/item_set'
11
+
12
+ describe Dendroid::Recognizer::ItemSet do
13
+ let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
14
+ let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
15
+ let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
16
+ let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
17
+ let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
18
+ let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
19
+ let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
20
+ let(:sample_dotted) { Dendroid::GrmAnalysis::DottedItem.new(prod, 1) }
21
+ let(:sample_origin) { 3 }
22
+ let(:other_dotted) { Dendroid::GrmAnalysis::DottedItem.new(empty_prod, 0) }
23
+ let(:first_element) { Dendroid::Recognizer::EItem.new(sample_dotted, sample_origin) }
24
+ let(:second_element) { Dendroid::Recognizer::EItem.new(other_dotted, 5) }
25
+
26
+ subject { described_class.new }
27
+
28
+ context 'Initialization:' do
29
+ it 'is initialized without argument' do
30
+ expect { described_class.new }.not_to raise_error
31
+ end
32
+
33
+ it 'is empty at creation' do
34
+ expect(subject).to be_empty
35
+ end
36
+ end # context
37
+
38
+ context 'Provided services:' do
39
+ it 'adds a new element' do
40
+ subject.add_item(first_element)
41
+ expect(subject.size).to eq(1)
42
+
43
+ # Trying a second time, doesn't change the set
44
+ subject.add_item(first_element)
45
+ expect(subject.size).to eq(1)
46
+
47
+ subject.add_item(second_element)
48
+ expect(subject.size).to eq(2)
49
+ end
50
+
51
+ it 'can render a String representation of itself' do
52
+ subject.add_item(first_element)
53
+ subject.add_item(second_element)
54
+
55
+ expectations = [
56
+ 'expression => NUMBER . PLUS NUMBER @ 3',
57
+ 'expression => . @ 5'
58
+ ].join("\n")
59
+
60
+ expect(subject.to_s).to eq(expectations)
61
+ end
62
+ end # context
63
+ end # describe
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../spec_helper'
4
+ require_relative '../support/sample_grammars'
5
+ require_relative '../../../lib/dendroid/recognizer/recognizer'
6
+
7
+ describe Dendroid::Recognizer::Recognizer do
8
+ include SampleGrammars
9
+ let(:grammar1) { grammar_l1 }
10
+
11
+ # Implements a dotted item: expression => NUMBER . PLUS NUMBER
12
+ subject { described_class.new(grammar1, tokenizer_l1) }
13
+
14
+ context 'Initialization:' do
15
+ it 'is initialized with a grammar' do
16
+ expect { described_class.new(grammar1, tokenizer_l1) }.not_to raise_error
17
+ end
18
+
19
+ it 'knows its grammar analyzer' do
20
+ expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
21
+ expect(subject.grm_analysis.grammar).to eq(grammar1)
22
+ end
23
+
24
+ it 'knows its tokenizer' do
25
+ expect(subject.grm_analysis).to be_kind_of(Dendroid::GrmAnalysis::GrmAnalyzer)
26
+ expect(subject.grm_analysis.grammar).to eq(grammar1)
27
+ end
28
+ end # context
29
+
30
+ context 'Recognizer at work:' do
31
+ it 'can recognize example from Wikipedia' do
32
+ chart = subject.run('2 + 3 * 4')
33
+ expect(chart).to be_successful
34
+
35
+ set0 = [ # . 2 + 3 * 4'
36
+ 'p => . s @ 0',
37
+ 's => . s PLUS m @ 0',
38
+ 's => . m @ 0',
39
+ 'm => . m STAR t @ 0',
40
+ 'm => . t @ 0',
41
+ 't => . INTEGER @ 0'
42
+ ]
43
+ set1 = [ # 2 . + 3 * 4'
44
+ 't => INTEGER . @ 0',
45
+ 'm => t . @ 0',
46
+ 's => m . @ 0',
47
+ #'m => m . STAR t @ 0',
48
+ 'p => s . @ 0',
49
+ 's => s . PLUS m @ 0'
50
+ ]
51
+ set2 = [ # 2 + . 3 * 4'
52
+ 's => s PLUS . m @ 0',
53
+ 'm => . m STAR t @ 2',
54
+ 'm => . t @ 2',
55
+ 't => . INTEGER @ 2'
56
+ ]
57
+ set3 = [ # 2 + 3 . * 4'
58
+ 't => INTEGER . @ 2',
59
+ 'm => t . @ 2',
60
+ 's => s PLUS m . @ 0',
61
+ 'm => m . STAR t @ 2',
62
+ 'p => s . @ 0',
63
+ # 's => s . PLUS m @ 0'
64
+ ]
65
+ set4 = [ # 2 + 3 * . 4'
66
+ 'm => m STAR . t @ 2',
67
+ 't => . INTEGER @ 4'
68
+ ]
69
+ set5 = [ # 2 + 3 * 4 .'
70
+ 't => INTEGER . @ 4',
71
+ 'm => m STAR t . @ 2',
72
+ 's => s PLUS m . @ 0',
73
+ # 'm => m . STAR t @ 2',
74
+ 'p => s . @ 0'
75
+ # 's => s . PLUS m @ 0'
76
+ ]
77
+ [set0, set1, set2, set3, set4, set5].each_with_index do |set, rank|
78
+ expect(chart[rank].to_s).to eq(set.join("\n"))
79
+ end
80
+ end
81
+
82
+ it 'can recognize example for L2 language' do
83
+ recognizer = described_class.new(grammar_l2, tokenizer_l2)
84
+ chart = recognizer.run('1 + (2 * 3 - 4)')
85
+ expect(chart).to be_successful
86
+
87
+ set0 = [ # . 1 + (2 * 3 - 4)
88
+ 'p => . sum @ 0',
89
+ 'sum => . sum PLUS product @ 0',
90
+ 'sum => . sum MINUS product @ 0',
91
+ 'sum => . product @ 0',
92
+ 'product => . product STAR factor @ 0',
93
+ 'product => . product SLASH factor @ 0',
94
+ 'product => . factor @ 0',
95
+ # 'factor => . LPAREN sum RPAREN @ 0',
96
+ 'factor => . NUMBER @ 0'
97
+ ]
98
+ set1 = [ # 1 . + (2 * 3 - 4)
99
+ 'factor => NUMBER . @ 0',
100
+ 'product => factor . @ 0',
101
+ 'sum => product . @ 0',
102
+ # 'product => product . STAR factor @ 0',
103
+ # 'product => product . SLASH factor @ 0',
104
+ 'p => sum . @ 0',
105
+ 'sum => sum . PLUS product @ 0',
106
+ # 'sum => sum . MINUS product @ 0'
107
+ ]
108
+ set2 = [ # 1 + . (2 * 3 - 4)
109
+ 'sum => sum PLUS . product @ 0',
110
+ 'product => . product STAR factor @ 2',
111
+ 'product => . product SLASH factor @ 2',
112
+ 'product => . factor @ 2',
113
+ 'factor => . LPAREN sum RPAREN @ 2',
114
+ # 'factor => . NUMBER @ 2'
115
+ ]
116
+ set3 = [ # 1 + (. 2 * 3 - 4)
117
+ 'factor => LPAREN . sum RPAREN @ 2',
118
+ 'sum => . sum PLUS product @ 3',
119
+ 'sum => . sum MINUS product @ 3',
120
+ 'sum => . product @ 3',
121
+ 'product => . product STAR factor @ 3',
122
+ 'product => . product SLASH factor @ 3',
123
+ 'product => . factor @ 3',
124
+ # 'factor => . LPAREN sum RPAREN @ 3',
125
+ 'factor => . NUMBER @ 3'
126
+ ]
127
+ set4 = [ # 1 + (2 . * 3 - 4)
128
+ 'factor => NUMBER . @ 3',
129
+ 'product => factor . @ 3',
130
+ 'sum => product . @ 3',
131
+ 'product => product . STAR factor @ 3',
132
+ # 'product => product . SLASH factor @ 3',
133
+ # 'factor => LPAREN sum . RPAREN @ 2',
134
+ # 'sum => sum . PLUS product @ 3',
135
+ # 'sum => sum . MINUS product @ 3'
136
+ ]
137
+ set5 = [ # 1 + (2 * . 3 - 4)
138
+ 'product => product STAR . factor @ 3',
139
+ # 'factor => . LPAREN sum RPAREN @ 5',
140
+ 'factor => . NUMBER @ 5'
141
+ ]
142
+ set6 = [ # 1 + (2 * 3 . - 4)
143
+ 'factor => NUMBER . @ 5',
144
+ 'product => product STAR factor . @ 3',
145
+ 'sum => product . @ 3',
146
+ # 'product => product . STAR factor @ 3',
147
+ # 'product => product . SLASH factor @ 3',
148
+ # 'factor => LPAREN sum . RPAREN @ 2',
149
+ # 'sum => sum . PLUS product @ 3',
150
+ 'sum => sum . MINUS product @ 3'
151
+ ]
152
+ set7 = [ # 1 + (2 * 3 - . 4)
153
+ 'sum => sum MINUS . product @ 3',
154
+ 'product => . product STAR factor @ 7',
155
+ 'product => . product SLASH factor @ 7',
156
+ 'product => . factor @ 7',
157
+ # 'factor => . LPAREN sum RPAREN @ 7',
158
+ 'factor => . NUMBER @ 7'
159
+ ]
160
+ set8 = [ # 1 + (2 * 3 - 4 .)
161
+ 'factor => NUMBER . @ 7',
162
+ 'product => factor . @ 7',
163
+ 'sum => sum MINUS product . @ 3',
164
+ # 'product => product . STAR factor @ 7',
165
+ # 'product => product . SLASH factor @ 7',
166
+ 'factor => LPAREN sum . RPAREN @ 2',
167
+ # 'sum => sum . PLUS product @ 3',
168
+ # 'sum => sum . MINUS product @ 3'
169
+ ]
170
+ set9 = [ # 1 + (2 * 3 - 4 ).
171
+ 'factor => LPAREN sum RPAREN . @ 2',
172
+ 'product => factor . @ 2',
173
+ 'sum => sum PLUS product . @ 0',
174
+ # 'product => product . STAR factor @ 2',
175
+ # 'product => product . SLASH factor @ 2',
176
+ 'p => sum . @ 0',
177
+ # 'sum => sum . PLUS product @ 0',
178
+ # 'sum => sum . MINUS product @ 0'
179
+ ]
180
+ expectations = [set0, set1, set2, set3, set4, set5, set6, set7, set8, set9]
181
+ expectations.each_with_index do |set, rank|
182
+ expect(chart[rank].to_s).to eq(set.join("\n"))
183
+ end
184
+ end
185
+ end # context
186
+ end # describe
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
4
+ require_relative '../../../lib/dendroid/utils/base_tokenizer'
5
+
6
+ module SampleGrammars
7
+ def grammar_l1
8
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
9
+ # Grammar inspired from Wikipedia entry on Earley parsing
10
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
11
+ rule('p' => 's')
12
+ rule('s' => ['s PLUS m', 'm'])
13
+ # rule('s' => 'm')
14
+ rule('m' => ['m STAR t', 't'])
15
+ # rule('m' => 't')
16
+ rule('t' => 'INTEGER')
17
+ end
18
+
19
+ builder.grammar
20
+ end
21
+
22
+ def tokenizer_l1
23
+ Dendroid::Utils::BaseTokenizer.new do
24
+ map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
25
+
26
+ scan_verbatim(['+', '*'])
27
+ scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
28
+ end
29
+ end
30
+
31
+
32
+ def grammar_l2
33
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
34
+ # Grammar inspired from Loup Vaillant's example
35
+ # https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
36
+ declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
37
+ declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
38
+
39
+ rule('p' => 'sum')
40
+ rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
41
+ rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
42
+ rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
43
+ end
44
+
45
+ builder.grammar
46
+ end
47
+
48
+ def tokenizer_l2
49
+ Dendroid::Utils::BaseTokenizer.new do
50
+ map_verbatim2terminal({
51
+ '+' => :PLUS,
52
+ '-' => :MINUS,
53
+ '*' => :STAR,
54
+ '/' => :SLASH,
55
+ '(' => :LPAREN,
56
+ ')' => :RPAREN })
57
+
58
+ scan_verbatim(['+', '-', '*', '/', '(', ')'])
59
+ scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
60
+ end
61
+ end
62
+
63
+ def grammar_l3
64
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
65
+ # Grammar inspired from Andrew Appel's example
66
+ # Modern Compiler Implementation in Java
67
+ declare_terminals('a', 'c', 'd')
68
+
69
+ rule('Z' => ['d', 'X Y Z'])
70
+ rule('Y' => ['', 'c'])
71
+ rule('X' => ['Y', 'a'])
72
+ end
73
+
74
+ builder.grammar
75
+ end
76
+ end # module
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dendroid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-01 00:00:00.000000000 Z
11
+ date: 2023-11-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: WIP. A Ruby implementation of an Earley parser
14
14
  email: famished.tiger@yahoo.com
@@ -33,6 +33,10 @@ files:
33
33
  - lib/dendroid/lexical/literal.rb
34
34
  - lib/dendroid/lexical/token.rb
35
35
  - lib/dendroid/lexical/token_position.rb
36
+ - lib/dendroid/recognizer/chart.rb
37
+ - lib/dendroid/recognizer/e_item.rb
38
+ - lib/dendroid/recognizer/item_set.rb
39
+ - lib/dendroid/recognizer/recognizer.rb
36
40
  - lib/dendroid/syntax/choice.rb
37
41
  - lib/dendroid/syntax/grammar.rb
38
42
  - lib/dendroid/syntax/grm_symbol.rb
@@ -51,6 +55,11 @@ files:
51
55
  - spec/dendroid/lexical/literal_spec.rb
52
56
  - spec/dendroid/lexical/token_position_spec.rb
53
57
  - spec/dendroid/lexical/token_spec.rb
58
+ - spec/dendroid/recognizer/chart_spec.rb
59
+ - spec/dendroid/recognizer/e_item_spec.rb
60
+ - spec/dendroid/recognizer/item_set_spec.rb
61
+ - spec/dendroid/recognizer/recognizer_spec.rb
62
+ - spec/dendroid/support/sample_grammars.rb
54
63
  - spec/dendroid/syntax/choice_spec.rb
55
64
  - spec/dendroid/syntax/grammar_spec.rb
56
65
  - spec/dendroid/syntax/grm_symbol_spec.rb