dendroid 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -1
- data/CHANGELOG.md +17 -0
- data/dendroid.gemspec +2 -2
- data/lib/dendroid/grm_analysis/alternative_item.rb +70 -0
- data/lib/dendroid/grm_analysis/choice_items.rb +60 -0
- data/lib/dendroid/grm_analysis/dotted_item.rb +3 -1
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +207 -0
- data/lib/dendroid/grm_analysis/production_items.rb +55 -0
- data/lib/dendroid/lexical/literal.rb +28 -0
- data/lib/dendroid/lexical/token.rb +46 -0
- data/lib/dendroid/lexical/token_position.rb +28 -0
- data/lib/dendroid/syntax/grm_symbol.rb +1 -1
- data/lib/dendroid/utils/base_tokenizer.rb +215 -0
- data/spec/dendroid/grm_analysis/alternative_item_spec.rb +12 -0
- data/spec/dendroid/grm_analysis/choice_items_spec.rb +74 -0
- data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +165 -0
- data/spec/dendroid/grm_analysis/production_items_spec.rb +68 -0
- data/spec/dendroid/lexical/literal_spec.rb +23 -0
- data/spec/dendroid/lexical/token_position_spec.rb +31 -0
- data/spec/dendroid/lexical/token_spec.rb +31 -0
- data/spec/dendroid/utils/base_tokenizer_spec.rb +55 -0
- data/version.txt +1 -1
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 270fc74811d70652e19c4ed42cd11138a1fe9fc413e9b1856b982edfa28c5d51
|
4
|
+
data.tar.gz: 280351b252bd5c4a63f3082375053ea7d3bf9a9d0d32acc055dc33cce91ed628
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a34047f56f1f488377afd88c4049b935d03d8a0a902cd44f8ffba3d58578c212c5ef7f0b1229192a7f4606b1d683d70ca479273d45d716d98154a38663f233f
|
7
|
+
data.tar.gz: 36578ffb40a0463a2e411000b24fa8005166c1ede8f6a856293c0122e44fdbb46d3758159042db0c9c4ccacf9c1bf071e49cfb86a64792b98fac8bb89447a85a
|
data/.rubocop.yml
CHANGED
@@ -20,7 +20,7 @@ Metrics/CyclomaticComplexity:
|
|
20
20
|
|
21
21
|
Metrics/MethodLength:
|
22
22
|
Enabled: true
|
23
|
-
Max:
|
23
|
+
Max: 60
|
24
24
|
|
25
25
|
Metrics/PerceivedComplexity:
|
26
26
|
Enabled: true
|
@@ -32,5 +32,8 @@ Naming/MethodParameterName:
|
|
32
32
|
Naming/VariableName:
|
33
33
|
Enabled: false
|
34
34
|
|
35
|
+
Style/AccessorGrouping:
|
36
|
+
Enabled: false
|
37
|
+
|
35
38
|
Style/CommentedKeyword:
|
36
39
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,23 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.10] - 2023-11-01
|
6
|
+
Added missing class and method documentation, fixed some `Rubocop` offenses.
|
7
|
+
|
8
|
+
|
9
|
+
## [0.0.9] - 2023-11-01
|
10
|
+
Added classes for tokenization and grammar analysis.
|
11
|
+
|
12
|
+
### Added
|
13
|
+
- Class `AlternativeItem` and its spec file
|
14
|
+
- Class `BaseTokenizer` and its spec file
|
15
|
+
- Module `ChoiceItems` and its spec file-
|
16
|
+
- Class `GrmAnalyzer` and its spec file
|
17
|
+
- Class `Literal` and its spec file
|
18
|
+
- Module `ProductionItems` and its spec file
|
19
|
+
- Class `Token` and its spec file
|
20
|
+
- Class `TokenPosition` and its spec file
|
21
|
+
|
5
22
|
## [0.0.8] - 2023-10-30
|
6
23
|
### Added
|
7
24
|
- Class `DottedItem` and its spec file
|
data/dendroid.gemspec
CHANGED
@@ -7,8 +7,8 @@ Gem::Specification.new do |s|
|
|
7
7
|
path = ::File.dirname(libpath) + ::File::SEPARATOR
|
8
8
|
::File.read("#{path}version.txt").strip
|
9
9
|
end
|
10
|
-
s.summary = '
|
11
|
-
s.description = 'WIP. A Ruby implementation of
|
10
|
+
s.summary = 'WIP. A Ruby implementation of an Earley parser'
|
11
|
+
s.description = 'WIP. A Ruby implementation of an Earley parser'
|
12
12
|
s.authors = ['Dimitri Geshef']
|
13
13
|
s.email = 'famished.tiger@yahoo.com'
|
14
14
|
s.files = Dir['bin/dendroid',
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'dotted_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# A specialization of DottedItem specific for Choice (rule)
|
8
|
+
class AlternativeItem < DottedItem
|
9
|
+
# @return [Integer] the alternative number
|
10
|
+
attr_reader :alt_index
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
# @param aChoice [Dendroid::Syntax::Choice]
|
14
|
+
# @param aPosition [Integer] Position of the dot in rhs of production.
|
15
|
+
# @param index [Integer] the rank of the alternative at hand
|
16
|
+
def initialize(aChoice, aPosition, index)
|
17
|
+
@alt_index = index
|
18
|
+
super(aChoice, aPosition)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return a String representation of the alternative item.
|
22
|
+
# @return [String]
|
23
|
+
def to_s
|
24
|
+
rhs_names = rule.alternatives[alt_index].members.map(&:to_s)
|
25
|
+
dotted_rhs = rhs_names.insert(position, '.')
|
26
|
+
"#{rule.head} => #{dotted_rhs.join(' ')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
# Indicate whether the rhs of the alternative is empty
|
30
|
+
# @return [Boolean]
|
31
|
+
def empty?
|
32
|
+
rule.alternatives[alt_index].empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Indicate whether the dot is at the start of rhs
|
36
|
+
# @return [Boolean]
|
37
|
+
def final_pos?
|
38
|
+
empty? || position == rule.alternatives[alt_index].size
|
39
|
+
end
|
40
|
+
|
41
|
+
alias completed? final_pos?
|
42
|
+
|
43
|
+
# Return the symbol right after the dot (if any)
|
44
|
+
# @return [Dendroid::Syntax::GrmSymbol, NilClass]
|
45
|
+
def next_symbol
|
46
|
+
return nil if empty? || completed?
|
47
|
+
|
48
|
+
rule.alternatives[alt_index].members[position]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test for equality with another dotted item.
|
52
|
+
# Two dotted items are equal if they refer to the same rule and
|
53
|
+
# have both the same rhs and dot positions.
|
54
|
+
# @return [Boolean]
|
55
|
+
def ==(other)
|
56
|
+
return true if eql?(other)
|
57
|
+
|
58
|
+
(position == other.position) && rule.eql?(other.rule) && (alt_index == other.alt_index)
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def valid_position(aPosition)
|
64
|
+
raise StandardError if aPosition.negative? || aPosition > rule.alternatives[alt_index].size
|
65
|
+
|
66
|
+
aPosition
|
67
|
+
end
|
68
|
+
end # class
|
69
|
+
end # module
|
70
|
+
end # module
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'alternative_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# Mix-in module for extending the Syntax::Choice class
|
8
|
+
# with dotted items manipulation methods
|
9
|
+
module ChoiceItems
|
10
|
+
# Build the alternative items for this choice and assign them
|
11
|
+
# to the `items` attributes
|
12
|
+
# @return [Array<Array<GrmAnalysis::AlternativeItem>>]
|
13
|
+
def build_items
|
14
|
+
# AlternativeItem
|
15
|
+
@items = Array.new(alternatives.size) { |_| [] }
|
16
|
+
alternatives.each_with_index do |alt_seq, index|
|
17
|
+
if alt_seq.empty?
|
18
|
+
@items[index] << AlternativeItem.new(self, 0, index)
|
19
|
+
else
|
20
|
+
(0..alt_seq.size).each do |pos|
|
21
|
+
@items[index] << AlternativeItem.new(self, pos, index)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Read accessor for the `items` attribute.
|
28
|
+
# Return the dotted items for this production
|
29
|
+
# @return [Array<Array<GrmAnalysis::AlternativeItem>>]
|
30
|
+
def items
|
31
|
+
@items
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return the predicted items (i.e. the alternative items with the dot at start)
|
35
|
+
# for this choice.
|
36
|
+
# @return [Array<GrmAnalysis::AlternativeItem>]
|
37
|
+
def predicted_items
|
38
|
+
@items.map(&:first)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return the reduce items (i.e. the alternative items with the dot at end)
|
42
|
+
# for this choice.
|
43
|
+
# @return [Array<GrmAnalysis::AlternativeItem>]
|
44
|
+
def reduce_items
|
45
|
+
@items.map(&:last)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return the next item given the provided item.
|
49
|
+
# In other words, advance the dot by one position.
|
50
|
+
# @param anItem [GrmAnalysis::AlternativeItem]
|
51
|
+
# @return [GrmAnalysis::AlternativeItem|NilClass]
|
52
|
+
def next_item(anItem)
|
53
|
+
items_arr = items[anItem.alt_index]
|
54
|
+
return nil if anItem == items_arr.last
|
55
|
+
|
56
|
+
items_arr[anItem.position + 1]
|
57
|
+
end
|
58
|
+
end # module
|
59
|
+
end # module
|
60
|
+
end # module
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Dendroid
|
4
|
+
# This module contains classes that from the analysis of grammar rules help to build objects
|
5
|
+
# needed by a recognizer or a parser for the language.
|
4
6
|
module GrmAnalysis
|
5
7
|
# For a given production rule, a dotted item represents a recognition state.
|
6
8
|
# The dot partitions the rhs of the rule in two parts:
|
@@ -87,7 +89,7 @@ module Dendroid
|
|
87
89
|
end
|
88
90
|
|
89
91
|
# Check whether the given symbol is the same as after the dot.
|
90
|
-
# @param [Dendroid::Syntax::GrmSymbol]
|
92
|
+
# @param aSymbol [Dendroid::Syntax::GrmSymbol]
|
91
93
|
# @return [Boolean]
|
92
94
|
def expecting?(aSymbol)
|
93
95
|
actual = next_symbol
|
@@ -0,0 +1,207 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../grm_analysis/production_items'
|
4
|
+
require_relative '../grm_analysis/choice_items'
|
5
|
+
|
6
|
+
module Dendroid
|
7
|
+
module GrmAnalysis
|
8
|
+
# An analyzer performs an analysis of the grammar rules and
|
9
|
+
# build objects (dotted items, first and follow sets) to be used
|
10
|
+
# by a recognizer or a parser.
|
11
|
+
class GrmAnalyzer
|
12
|
+
# @return [Dendroid::Syntax::Grammar] The grammar subjected to analysis
|
13
|
+
attr_reader :grammar
|
14
|
+
attr_reader :items
|
15
|
+
attr_reader :production2items
|
16
|
+
attr_reader :symbol2productions
|
17
|
+
|
18
|
+
# @return [Dendroid::Syntax::Terminal] The pseudo-terminal `__epsilon` (for empty string)
|
19
|
+
attr_reader :epsilon
|
20
|
+
|
21
|
+
# @return [Dendroid::Syntax::Terminal] The pseudo-terminal `$$` for end of input stream
|
22
|
+
attr_reader :endmarker
|
23
|
+
|
24
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FIRST SETS mapping
|
25
|
+
attr_reader :first_sets
|
26
|
+
|
27
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to PREDICT SETS mapping
|
28
|
+
attr_reader :predict_sets
|
29
|
+
|
30
|
+
# @return [Hash{Syntax::NonTerminal, Array<Syntax::Terminal>}] non-terminal to FOLLOW SETS mapping
|
31
|
+
attr_reader :follow_sets
|
32
|
+
|
33
|
+
# Constructor.
|
34
|
+
# Build dotted items, first, follow sets for the given grammar
|
35
|
+
# @param aGrammar [Dendroid::Syntax::Grammar]
|
36
|
+
def initialize(aGrammar)
|
37
|
+
@grammar = aGrammar
|
38
|
+
@items = []
|
39
|
+
@production2items = {}
|
40
|
+
@symbol2productions = {}
|
41
|
+
@epsilon = Syntax::Terminal.new(:__epsilon)
|
42
|
+
@endmarker = Syntax::Terminal.new(:"$$")
|
43
|
+
@first_sets = {}
|
44
|
+
@predict_sets = {}
|
45
|
+
@follow_sets = {}
|
46
|
+
|
47
|
+
build_dotted_items
|
48
|
+
build_first_sets
|
49
|
+
build_follow_sets
|
50
|
+
end
|
51
|
+
|
52
|
+
# The next item of a given dotted item
|
53
|
+
# @param aDottedItem [DottedItem]
|
54
|
+
def next_item(aDottedItem)
|
55
|
+
prod = aDottedItem.rule
|
56
|
+
prod.next_item(aDottedItem)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def build_dotted_items
|
62
|
+
grammar.rules.each do |prod|
|
63
|
+
lhs = prod.head
|
64
|
+
symbol2productions[lhs] = [] unless symbol2productions.include? lhs
|
65
|
+
symbol2productions[lhs] << prod
|
66
|
+
# production2items[prod] = []
|
67
|
+
mixin = prod.choice? ? ChoiceItems : ProductionItems
|
68
|
+
prod.extend(mixin)
|
69
|
+
prod.build_items
|
70
|
+
rule_items = prod.items.flatten
|
71
|
+
items.concat(rule_items)
|
72
|
+
production2items[prod] = rule_items
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def build_first_sets
|
77
|
+
initialize_first_sets
|
78
|
+
|
79
|
+
begin
|
80
|
+
changed = false
|
81
|
+
grammar.rules.each do |prod|
|
82
|
+
head = prod.head
|
83
|
+
first_head = first_sets[head]
|
84
|
+
pre_first_size = first_head.size
|
85
|
+
if prod.choice?
|
86
|
+
prod.alternatives.each do |alt|
|
87
|
+
first_head.merge(sequence_first(alt.members))
|
88
|
+
end
|
89
|
+
else
|
90
|
+
first_head.merge(sequence_first(prod.body.members))
|
91
|
+
end
|
92
|
+
changed = true if first_head.size > pre_first_size
|
93
|
+
end
|
94
|
+
end until !changed
|
95
|
+
end
|
96
|
+
|
97
|
+
def initialize_first_sets
|
98
|
+
grammar.symbols.each do |symb|
|
99
|
+
if symb.terminal?
|
100
|
+
first_sets[symb] = Set.new([symb])
|
101
|
+
elsif symb.nullable?
|
102
|
+
first_sets[symb] = Set.new([epsilon])
|
103
|
+
else
|
104
|
+
first_sets[symb] = Set.new
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def sequence_first(symbol_seq)
|
110
|
+
result = Set.new
|
111
|
+
symbol_seq.each do |symb|
|
112
|
+
result.delete(epsilon)
|
113
|
+
result.merge(first_sets[symb])
|
114
|
+
break unless symb.nullable?
|
115
|
+
end
|
116
|
+
|
117
|
+
result
|
118
|
+
end
|
119
|
+
|
120
|
+
# FOLLOW(A): is the set of terminals (+ end marker) that may come after the
|
121
|
+
# non-terminal A.
|
122
|
+
def build_follow_sets
|
123
|
+
initialize_follow_sets
|
124
|
+
|
125
|
+
begin
|
126
|
+
changed = false
|
127
|
+
grammar.rules.each do |prod|
|
128
|
+
if prod.choice?
|
129
|
+
prod.alternatives.each do |alt|
|
130
|
+
body = alt.members
|
131
|
+
next if body.empty?
|
132
|
+
|
133
|
+
head = prod.head
|
134
|
+
head_follow = follow_sets[head]
|
135
|
+
# trailer = Set.new
|
136
|
+
last = true
|
137
|
+
last_index = body.size - 1
|
138
|
+
last_index.downto(0) do |i|
|
139
|
+
symbol = body[i]
|
140
|
+
next if symbol.terminal?
|
141
|
+
|
142
|
+
follow_symbol = follow_sets[symbol]
|
143
|
+
size_before = follow_symbol.size
|
144
|
+
if last
|
145
|
+
# Rule: if last non-terminal member (symbol) is nullable
|
146
|
+
# then add FOLLOW(head) to FOLLOW(symbol)
|
147
|
+
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
148
|
+
last = false
|
149
|
+
else
|
150
|
+
symbol_seq = body.slice(i + 1, last_index - i)
|
151
|
+
trailer_first = sequence_first(symbol_seq)
|
152
|
+
contains_epsilon = trailer_first.include? epsilon
|
153
|
+
trailer_first.delete(epsilon) if contains_epsilon
|
154
|
+
follow_sets[symbol].merge(trailer_first)
|
155
|
+
follow_sets[symbol].merge(head_follow) if contains_epsilon
|
156
|
+
end
|
157
|
+
changed = true if follow_sets[symbol].size > size_before
|
158
|
+
end
|
159
|
+
end
|
160
|
+
else
|
161
|
+
body = prod.body.members
|
162
|
+
next if body.empty?
|
163
|
+
|
164
|
+
head = prod.head
|
165
|
+
head_follow = follow_sets[head]
|
166
|
+
# trailer = Set.new
|
167
|
+
last = true
|
168
|
+
last_index = body.size - 1
|
169
|
+
last_index.downto(0) do |i|
|
170
|
+
symbol = body[i]
|
171
|
+
next if symbol.terminal?
|
172
|
+
|
173
|
+
follow_symbol = follow_sets[symbol]
|
174
|
+
size_before = follow_symbol.size
|
175
|
+
if last
|
176
|
+
# Rule: if last non-terminal member (symbol) is nullable
|
177
|
+
# then add FOLLOW(head) to FOLLOW(symbol)
|
178
|
+
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
179
|
+
last = false
|
180
|
+
else
|
181
|
+
symbol_seq = body.slice(i + 1, last_index - i)
|
182
|
+
trailer_first = sequence_first(symbol_seq)
|
183
|
+
contains_epsilon = trailer_first.include? epsilon
|
184
|
+
trailer_first.delete(epsilon) if contains_epsilon
|
185
|
+
follow_sets[symbol].merge(trailer_first)
|
186
|
+
follow_sets[symbol].merge(head_follow) if contains_epsilon
|
187
|
+
end
|
188
|
+
changed = true if follow_sets[symbol].size > size_before
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end until !changed
|
193
|
+
end
|
194
|
+
|
195
|
+
def initialize_follow_sets
|
196
|
+
grammar.symbols.each do |symb|
|
197
|
+
next if symb.terminal?
|
198
|
+
|
199
|
+
follow_sets[symb] = Set.new
|
200
|
+
end
|
201
|
+
|
202
|
+
# Initialize FOLLOW(start symbol) with end marker
|
203
|
+
follow_sets[grammar.start_symbol].add(endmarker)
|
204
|
+
end
|
205
|
+
end # class
|
206
|
+
end # module
|
207
|
+
end # module
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'dotted_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# Mix-in module for extending the Dendroid::Syntax::Production class
|
8
|
+
# with dotted items manipulation methods and an attribute named `items`.
|
9
|
+
module ProductionItems
|
10
|
+
# Build the dotted items for this production and assign them
|
11
|
+
# to the `items` attributes
|
12
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
13
|
+
def build_items
|
14
|
+
@items = if empty?
|
15
|
+
[DottedItem.new(self, 0)]
|
16
|
+
else
|
17
|
+
(0..body.size).reduce([]) do |result, pos|
|
18
|
+
result << GrmAnalysis::DottedItem.new(self, pos)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Read accessor for the `items` attribute.
|
24
|
+
# Return the dotted items for this production
|
25
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
26
|
+
def items
|
27
|
+
@items
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return the predicted item (i.e. the dotted item with the dot at start)
|
31
|
+
# for this production.
|
32
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
33
|
+
def predicted_items
|
34
|
+
[@items.first]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return the reduce item (i.e. the dotted item with the dot at end)
|
38
|
+
# for this production.
|
39
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
40
|
+
def reduce_items
|
41
|
+
[@items.last]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return the next item given the provided item.
|
45
|
+
# In other words, advance the dot by one position.
|
46
|
+
# @param anItem [GrmAnalysis::DottedItem]
|
47
|
+
# @return [GrmAnalysis::DottedItem|NilClass]
|
48
|
+
def next_item(anItem)
|
49
|
+
return nil if anItem == @items.last
|
50
|
+
|
51
|
+
@items[anItem.position + 1]
|
52
|
+
end
|
53
|
+
end # module
|
54
|
+
end # module
|
55
|
+
end # module
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'token'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Lexical
|
7
|
+
# A literal (value) is a token that represents a data value in the parsed
|
8
|
+
# language. For instance, in Ruby data values such as strings, numbers,
|
9
|
+
# regular expression,... can appear directly in the source code as text.
|
10
|
+
# These are examples of literal values. One responsibility of a tokenizer/lexer is
|
11
|
+
# to convert the text representation into a corresponding value in a
|
12
|
+
# convenient format for the interpreter/compiler.
|
13
|
+
class Literal < Token
|
14
|
+
# @return [Object] The value expressed in one of the target datatype.
|
15
|
+
attr_reader :value
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# @param original [String] the piece of text from input
|
19
|
+
# @param pos [Dendroid::Lexical::TokenPosition] line, column position of token
|
20
|
+
# @param symbol [Dendroid::Syntax::Terminal, String]
|
21
|
+
# @param aValue [Object] value of the token in internal representation
|
22
|
+
def initialize(original, pos, symbol, aValue)
|
23
|
+
super(original, pos, symbol)
|
24
|
+
@value = aValue
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
# This module contains the core classes needed for lexical analysis.
|
5
|
+
# The lexical analysis (tokenization) aims to transform the input stream of characters
|
6
|
+
# into a sequence of tokens.
|
7
|
+
module Lexical
|
8
|
+
# A (lexical) token is an object created by a tokenizer (lexer)
|
9
|
+
# and passed to the parser. Such token object is created when a lexer
|
10
|
+
# detects that a sequence of characters(a lexeme) from the input stream
|
11
|
+
# is an instance of a terminal grammar symbol.
|
12
|
+
# Say, that in a particular language, the lexeme 'foo' is an occurrence
|
13
|
+
# of the terminal symbol IDENTIFIER. Then the lexer will return a Token
|
14
|
+
# object that states the fact that 'foo' is indeed an IDENTIFIER. Basically,
|
15
|
+
# a Token is a pair (lexeme, terminal): it asserts that a given piece of text
|
16
|
+
# is an instance of given terminal symbol.
|
17
|
+
class Token
|
18
|
+
# The sequence of character(s) from the input stream that is an occurrence
|
19
|
+
# of the related terminal symbol.
|
20
|
+
# @return [String] Input substring that is an instance of the terminal.
|
21
|
+
attr_reader :source
|
22
|
+
|
23
|
+
# @return [TokenPosition] The position -in "editor" coordinates- of the text in the source file.
|
24
|
+
attr_reader :position
|
25
|
+
|
26
|
+
# @return [String] The name of terminal symbol matching the text.
|
27
|
+
attr_reader :terminal
|
28
|
+
|
29
|
+
# Constructor.
|
30
|
+
# @param original [String] the piece of text from input
|
31
|
+
# @param pos [Dendroid::Lexical::TokenPosition] position of the token in source file
|
32
|
+
# @param symbol [Dendroid::Syntax::Terminal, String]
|
33
|
+
# The terminal symbol corresponding to the matching text.
|
34
|
+
def initialize(original, pos, symbol)
|
35
|
+
@source = original.dup
|
36
|
+
@position = pos
|
37
|
+
@terminal = symbol
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [String] The text representation of the token position
|
41
|
+
def pos_to_s
|
42
|
+
position.to_s
|
43
|
+
end
|
44
|
+
end # class
|
45
|
+
end # module
|
46
|
+
end # module
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
module Lexical
|
5
|
+
# Keeps track of the position of a token in the input stream.
|
6
|
+
class TokenPosition
|
7
|
+
# @return [Integer] The line number where the token begins
|
8
|
+
attr_reader :lineno
|
9
|
+
|
10
|
+
# @return [Integer] The column number where the token begins
|
11
|
+
attr_reader :column
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
# @param line [Integer] The line number where the token begins
|
15
|
+
# @param col [Integer] The column number where the token begins
|
16
|
+
def initialize(line, col)
|
17
|
+
@lineno = line
|
18
|
+
@column = col
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return the position of the start of the token in line:col format
|
22
|
+
# @return [String]
|
23
|
+
def to_s
|
24
|
+
"#{lineno}:#{column}"
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Dendroid
|
4
|
-
#
|
4
|
+
# This module contains for all classes representing elements of .
|
5
5
|
module Syntax
|
6
6
|
# Abstract class for grammar symbols.
|
7
7
|
# A grammar symbol is an element that appears in grammar rules.
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
require_relative '../lexical/token_position'
|
5
|
+
require_relative '../lexical/literal'
|
6
|
+
|
7
|
+
module Dendroid
|
8
|
+
# This module contains helper classes (e.g. a tokenizer generator)
|
9
|
+
module Utils
|
10
|
+
# A basic tokenizer.
|
11
|
+
# Responsibility: break input into a sequence of token objects.
|
12
|
+
# This class defines a simple DSL to build a tokenizer.
|
13
|
+
class BaseTokenizer
|
14
|
+
# @return [StringScanner] Low-level input scanner
|
15
|
+
attr_reader :scanner
|
16
|
+
|
17
|
+
# @return [Integer] The current line number
|
18
|
+
attr_reader :lineno
|
19
|
+
|
20
|
+
# @return [Integer] Position of last start of line in the input string
|
21
|
+
attr_reader :line_start
|
22
|
+
|
23
|
+
# @return [Hash{Symbol, Array<Regexp>}]
|
24
|
+
attr_reader :actions
|
25
|
+
|
26
|
+
# Constructor
|
27
|
+
# @param aBlock [Proc]
|
28
|
+
def initialize(&aBlock)
|
29
|
+
@scanner = StringScanner.new('')
|
30
|
+
@actions = { skip: [], scan_verbatim: [], scan_value: [] }
|
31
|
+
defaults
|
32
|
+
return unless block_given?
|
33
|
+
|
34
|
+
instance_exec(&aBlock)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Reset the tokenizer and set new text to tokenize
|
38
|
+
# @param source [String]
|
39
|
+
def input=(source)
|
40
|
+
reset
|
41
|
+
scanner.string = source
|
42
|
+
end
|
43
|
+
|
44
|
+
# Reset the tokenizer
|
45
|
+
def reset
|
46
|
+
@lineno = 1
|
47
|
+
@line_start = 0
|
48
|
+
scanner.reset
|
49
|
+
end
|
50
|
+
|
51
|
+
# action, pattern, terminal?, conversion?
|
52
|
+
# action = skip, skip_nl, scan
|
53
|
+
|
54
|
+
# Associate the provided pattern to the action of skipping a newline and
|
55
|
+
# incrementing the line counter.
|
56
|
+
# @param pattern [Regexp]
|
57
|
+
def skip_nl(pattern)
|
58
|
+
actions[:skip_nl] = pattern
|
59
|
+
end
|
60
|
+
|
61
|
+
# Associate the provided pattern with the action to skip whitespace(s).
|
62
|
+
# @param pattern [Regexp]
|
63
|
+
def skip_ws(pattern)
|
64
|
+
actions[:skip_ws] = pattern
|
65
|
+
end
|
66
|
+
|
67
|
+
# Associate the provided pattern with the action to skip the matching text.
|
68
|
+
# @param pattern [Regexp]
|
69
|
+
def skip(pattern)
|
70
|
+
if actions[:skip].empty?
|
71
|
+
actions[:skip] = pattern
|
72
|
+
else
|
73
|
+
new_pattern = actions[:skip].union(pattern)
|
74
|
+
actions[:skip] = new_pattern
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Associate the provided pattern with the action to tokenize the matching text
|
79
|
+
# @param pattern [Regexp]
|
80
|
+
def scan_verbatim(pattern)
|
81
|
+
patt = normalize_pattern(pattern)
|
82
|
+
if actions[:scan_verbatim].empty?
|
83
|
+
actions[:scan_verbatim] = patt
|
84
|
+
else
|
85
|
+
new_pattern = actions[:scan_verbatim].union(patt)
|
86
|
+
actions[:scan_verbatim] = new_pattern
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Associate the provided pattern with the action to tokenize the matching text
|
91
|
+
# as an instance of the given terminal symbol and convert the matching text into
|
92
|
+
# a value by using the given conversion.
|
93
|
+
# @param pattern [Regexp]
|
94
|
+
# @param terminal [Dendroid::Syntax::Terminal]
|
95
|
+
# @param conversion [Proc] a Proc (lambda) that takes a String as argument and return a value.
|
96
|
+
def scan_value(pattern, terminal, conversion)
|
97
|
+
patt = normalize_pattern(pattern)
|
98
|
+
tuple = [patt, terminal, conversion]
|
99
|
+
if actions[:scan_value].empty?
|
100
|
+
actions[:scan_value] = [tuple]
|
101
|
+
else
|
102
|
+
actions[:scan_verbatim] << tuple
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Set the mapping between a verbatim text to its corresponding terminal symbol name
|
107
|
+
# @param mapping [Hash{String, String}]
|
108
|
+
def map_verbatim2terminal(mapping)
|
109
|
+
@verbatim2terminal = mapping
|
110
|
+
end
|
111
|
+
|
112
|
+
# rubocop: disable Metrics/AbcSize
|
113
|
+
|
114
|
+
# Return the next token (if any) from the input stream.
|
115
|
+
# @return [Dendroid::Lexical::Token, NilClass]
|
116
|
+
def next_token
|
117
|
+
token = nil
|
118
|
+
|
119
|
+
# Loop until end of input reached or token found
|
120
|
+
until scanner.eos?
|
121
|
+
if scanner.skip(actions[:skip_nl])
|
122
|
+
next_line_scanned
|
123
|
+
next
|
124
|
+
end
|
125
|
+
|
126
|
+
next if scanner.skip(actions[:skip_ws]) # Skip whitespaces
|
127
|
+
|
128
|
+
if (text = scanner.scan(actions[:scan_verbatim]))
|
129
|
+
token = verbatim_scanned(text)
|
130
|
+
break
|
131
|
+
end
|
132
|
+
|
133
|
+
tuple = actions[:scan_value].find do |(pattern, _terminal, _conversion)|
|
134
|
+
scanner.check(pattern)
|
135
|
+
end
|
136
|
+
if tuple
|
137
|
+
(pattern, terminal, conversion) = tuple
|
138
|
+
text = scanner.scan(pattern)
|
139
|
+
token = value_scanned(text, terminal, conversion)
|
140
|
+
break
|
141
|
+
end
|
142
|
+
|
143
|
+
# Unknown token
|
144
|
+
col = scanner.pos - line_start + 1
|
145
|
+
erroneous = scanner.peek(1).nil? ? '' : scanner.scan(/./)
|
146
|
+
raise StandardError, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
|
147
|
+
end
|
148
|
+
|
149
|
+
token
|
150
|
+
end
|
151
|
+
|
152
|
+
# rubocop: enable Metrics/AbcSize
|
153
|
+
|
154
|
+
protected
|
155
|
+
|
156
|
+
def defaults
|
157
|
+
# Defaults
|
158
|
+
skip_nl(/(?:\r\n)|\r|\n/) # Skip newlines
|
159
|
+
skip_ws(/[ \t\f]+/) # Skip blanks
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def normalize_pattern(pattern)
|
165
|
+
case pattern
|
166
|
+
when String
|
167
|
+
Regexp.new(Regexp.escape(pattern))
|
168
|
+
when Array
|
169
|
+
regexes = pattern.map { |patt| normalize_pattern(patt) }
|
170
|
+
Regexp.union(regexes)
|
171
|
+
else
|
172
|
+
pattern
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def next_line_scanned
|
177
|
+
@lineno += 1
|
178
|
+
@line_start = scanner.pos
|
179
|
+
end
|
180
|
+
|
181
|
+
def verbatim_scanned(text)
|
182
|
+
symbol_name = @verbatim2terminal[text]
|
183
|
+
begin
|
184
|
+
lex_length = text ? text.size : 0
|
185
|
+
col = scanner.pos - lex_length - @line_start + 1
|
186
|
+
pos = Lexical::TokenPosition.new(@lineno, col)
|
187
|
+
token = Lexical::Token.new(text, pos, symbol_name)
|
188
|
+
rescue StandardError => e
|
189
|
+
puts "Failing with '#{symbol_name}' and '#{text}'"
|
190
|
+
raise e
|
191
|
+
end
|
192
|
+
|
193
|
+
token
|
194
|
+
end
|
195
|
+
|
196
|
+
def value_scanned(aText, aSymbolName, conversion)
|
197
|
+
value = conversion.call(aText)
|
198
|
+
lex_length = aText ? aText.size : 0
|
199
|
+
col = scanner.pos - lex_length - @line_start + 1
|
200
|
+
build_literal(aSymbolName, value, aText, col)
|
201
|
+
end
|
202
|
+
|
203
|
+
def build_literal(aSymbolName, aValue, aText, aPosition)
|
204
|
+
pos = if aPosition.is_a?(Integer)
|
205
|
+
col = aPosition
|
206
|
+
Lexical::TokenPosition.new(@lineno, col)
|
207
|
+
else
|
208
|
+
aPosition
|
209
|
+
end
|
210
|
+
|
211
|
+
Lexical::Literal.new(aText.dup, pos, aSymbolName, aValue)
|
212
|
+
end
|
213
|
+
end # class
|
214
|
+
end # module
|
215
|
+
end # module
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
|
+
require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
|
9
|
+
|
10
|
+
describe Dendroid::GrmAnalysis::DottedItem do
|
11
|
+
# TODO
|
12
|
+
end # describe
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
8
|
+
# require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
|
9
|
+
require_relative '..\..\..\lib\dendroid\grm_analysis\choice_items'
|
10
|
+
|
11
|
+
describe Dendroid::GrmAnalysis::ChoiceItems do
|
12
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
13
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
14
|
+
let(:star_symb) { Dendroid::Syntax::Terminal.new('STAR') }
|
15
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
16
|
+
let(:alt1) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
17
|
+
let(:alt2) { Dendroid::Syntax::SymbolSeq.new([num_symb, star_symb, num_symb]) }
|
18
|
+
let(:alt3) { Dendroid::Syntax::SymbolSeq.new([]) }
|
19
|
+
subject do
|
20
|
+
choice = Dendroid::Syntax::Choice.new(expr_symb, [alt1, alt2, alt3])
|
21
|
+
choice.extend(Dendroid::GrmAnalysis::ChoiceItems)
|
22
|
+
choice.build_items
|
23
|
+
choice
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Methods from mix-in' do
|
27
|
+
it 'builds items for given choice' do
|
28
|
+
expect(subject.items.size).to eq(subject.alternatives.size)
|
29
|
+
subject.items.each_with_index do |itemz, index|
|
30
|
+
expect(itemz.size).to eq(subject.alternatives[index].size + 1)
|
31
|
+
end
|
32
|
+
arr_items = subject.items[1]
|
33
|
+
arr_items.each_with_index do |item, pos|
|
34
|
+
expect(item.rule).to eq(subject)
|
35
|
+
expect(item.position).to eq(pos)
|
36
|
+
expect(item.alt_index).to eq(1)
|
37
|
+
end
|
38
|
+
sole_item = subject.items[2].first # empty alternative...
|
39
|
+
expect(sole_item.rule).to eq(subject)
|
40
|
+
expect(sole_item.position).to eq(0)
|
41
|
+
expect(sole_item.alt_index).to eq(2)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'returns the first (predicted) items of the choice' do
|
45
|
+
expect(subject.predicted_items.size).to eq(subject.alternatives.size)
|
46
|
+
expectations = [
|
47
|
+
subject.items[0].first,
|
48
|
+
subject.items[1].first,
|
49
|
+
subject.items[2].first
|
50
|
+
]
|
51
|
+
expect(subject.predicted_items).to eq(expectations)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'returns the last (reduce) items of the choice' do
|
55
|
+
expect(subject.reduce_items.size).to eq(subject.alternatives.size)
|
56
|
+
expectations = [
|
57
|
+
subject.items[0].last,
|
58
|
+
subject.items[1].last,
|
59
|
+
subject.items[2].last
|
60
|
+
]
|
61
|
+
expect(subject.reduce_items).to eq(expectations)
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns the consecutive item to a given one' do
|
65
|
+
arr_items = subject.items[1]
|
66
|
+
(0..arr_items.size - 1).each do |pos|
|
67
|
+
curr_item = arr_items[pos]
|
68
|
+
next_one = subject.next_item(curr_item)
|
69
|
+
expect(next_one).to eq(arr_items[pos + 1])
|
70
|
+
end
|
71
|
+
expect(subject.next_item(arr_items.last)).to be_nil
|
72
|
+
end
|
73
|
+
end # context
|
74
|
+
end # describe
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
|
5
|
+
require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
|
6
|
+
|
7
|
+
module SampleGrammars
|
8
|
+
def grammar_l1
|
9
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
10
|
+
# Grammar inspired from Wikipedia entry on Earley parsing
|
11
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
12
|
+
|
13
|
+
rule('p' => 's')
|
14
|
+
rule('s' => ['s PLUS m', 'm'])
|
15
|
+
rule('m' => ['m STAR t', 't'])
|
16
|
+
rule('t' => 'INTEGER')
|
17
|
+
end
|
18
|
+
|
19
|
+
builder.grammar
|
20
|
+
end
|
21
|
+
|
22
|
+
def tokenizer_l1
|
23
|
+
Utils::BaseTokenizer.new do
|
24
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
25
|
+
|
26
|
+
scan_verbatim(['+', '*'])
|
27
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def grammar_l2
|
32
|
+
builder = GrmDSL::BaseGrmBuilder.new do
|
33
|
+
# Grammar inspired from Loup Vaillant's example
|
34
|
+
# https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
|
35
|
+
declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
|
36
|
+
declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
|
37
|
+
|
38
|
+
rule('p' => 'sum')
|
39
|
+
rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
|
40
|
+
rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
|
41
|
+
rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
|
42
|
+
end
|
43
|
+
|
44
|
+
builder.grammar
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenizer_l2
|
48
|
+
Utils::BaseTokenizer.new do
|
49
|
+
map_verbatim2terminal({
|
50
|
+
'+' => :PLUS,
|
51
|
+
'-' => :MINUS,
|
52
|
+
'*' => :STAR,
|
53
|
+
'/' => :SLASH,
|
54
|
+
'(' => :LPAREN,
|
55
|
+
')' => :RPAREN
|
56
|
+
})
|
57
|
+
|
58
|
+
scan_verbatim(['+', '-', '*', '/', '(', ')'])
|
59
|
+
scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def grammar_l3
|
64
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
65
|
+
# Grammar inspired from Andrew Appel's example
|
66
|
+
# Modern Compiler Implementation in Java
|
67
|
+
declare_terminals('a', 'c', 'd')
|
68
|
+
|
69
|
+
rule('Z' => ['d', 'X Y Z'])
|
70
|
+
rule('Y' => ['', 'c'])
|
71
|
+
rule('X' => %w[Y a])
|
72
|
+
end
|
73
|
+
|
74
|
+
builder.grammar
|
75
|
+
end
|
76
|
+
end # module
|
77
|
+
|
78
|
+
describe Dendroid::GrmAnalysis::GrmAnalyzer do
|
79
|
+
include SampleGrammars
|
80
|
+
let(:grammar) { grammar_l1 }
|
81
|
+
|
82
|
+
subject { described_class.new(grammar) }
|
83
|
+
|
84
|
+
context 'Initialization:' do
|
85
|
+
it 'is initialized with a grammar' do
|
86
|
+
expect { described_class.new(grammar) }.not_to raise_error
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'knows its related grammar' do
|
90
|
+
expect(subject.grammar).to eq(grammar)
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'knows the dotted items' do
|
94
|
+
item_count = subject.grammar.rules.reduce(0) do |count, prod|
|
95
|
+
count + prod.items.flatten.size
|
96
|
+
end
|
97
|
+
expect(subject.items.size).to eq(item_count)
|
98
|
+
expected_items = [
|
99
|
+
'p => . s',
|
100
|
+
'p => s .',
|
101
|
+
's => . s PLUS m',
|
102
|
+
's => s . PLUS m',
|
103
|
+
's => s PLUS . m',
|
104
|
+
's => s PLUS m .',
|
105
|
+
's => . m',
|
106
|
+
's => m .',
|
107
|
+
'm => . m STAR t',
|
108
|
+
'm => m . STAR t',
|
109
|
+
'm => m STAR . t',
|
110
|
+
'm => m STAR t .',
|
111
|
+
'm => . t',
|
112
|
+
'm => t .',
|
113
|
+
't => . INTEGER',
|
114
|
+
't => INTEGER .'
|
115
|
+
]
|
116
|
+
expect(subject.items.map(&:to_s)).to eq(expected_items)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'knows the item that follows a given dotted item' do
|
120
|
+
first_item = subject.items.find { |itm| itm.to_s == 'm => . m STAR t' }
|
121
|
+
second = subject.next_item(first_item)
|
122
|
+
expect(second.to_s).to eq('m => m . STAR t')
|
123
|
+
third = subject.next_item(second)
|
124
|
+
expect(third.to_s).to eq('m => m STAR . t')
|
125
|
+
fourth = subject.next_item(third)
|
126
|
+
expect(fourth.to_s).to eq('m => m STAR t .')
|
127
|
+
expect(subject.next_item(fourth)).to be_nil
|
128
|
+
end
|
129
|
+
end # context
|
130
|
+
|
131
|
+
context 'Provided services:' do
|
132
|
+
subject { described_class.new(grammar_l3) }
|
133
|
+
it 'constructs the FIRST sets of grammar symbols' do
|
134
|
+
expectations = {
|
135
|
+
'a' => ['a'],
|
136
|
+
'c' => ['c'],
|
137
|
+
'd' => ['d'],
|
138
|
+
'X' => %w[a c], # Add epsilon
|
139
|
+
'Y' => ['c'], # Add epsilon
|
140
|
+
'Z' => %w[a c d]
|
141
|
+
}
|
142
|
+
expectations.each_pair do |sym_name, first_names|
|
143
|
+
symb = subject.grammar.name2symbol[sym_name]
|
144
|
+
expected_first = first_names.map { |name| subject.grammar.name2symbol[name] }
|
145
|
+
expected_first << subject.epsilon if sym_name =~ /[XY]/
|
146
|
+
expect(subject.first_sets[symb]).to eq(Set.new(expected_first))
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
it 'constructs the FOLLOW sets for non-terminal symbols' do
|
151
|
+
expectations = {
|
152
|
+
'Z' => [], # Add $$
|
153
|
+
'Y' => %w[a c d],
|
154
|
+
'X' => %w[a c d]
|
155
|
+
}
|
156
|
+
subject.send(:build_follow_sets)
|
157
|
+
expectations.each_pair do |sym_name, follow_names|
|
158
|
+
symb = subject.grammar.name2symbol[sym_name]
|
159
|
+
expected_follow = follow_names.map { |name| subject.grammar.name2symbol[name] }
|
160
|
+
expected_follow << subject.endmarker if sym_name == 'Z'
|
161
|
+
expect(subject.follow_sets[symb]).to eq(Set.new(expected_follow))
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end # context
|
165
|
+
end # describe
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/syntax/terminal'
|
5
|
+
require_relative '../../../lib/dendroid/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/dendroid/syntax/symbol_seq'
|
7
|
+
require_relative '../../../lib/dendroid/syntax/production'
|
8
|
+
require_relative '../../../lib/dendroid/grm_analysis/production_items'
|
9
|
+
|
10
|
+
describe Dendroid::GrmAnalysis::ProductionItems do
|
11
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
12
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
13
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
14
|
+
let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
15
|
+
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
16
|
+
let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
|
17
|
+
let(:empty_prod) do
|
18
|
+
e = Dendroid::Syntax::Production.new(expr_symb, empty_body)
|
19
|
+
e.extend(Dendroid::GrmAnalysis::ProductionItems)
|
20
|
+
e.build_items
|
21
|
+
e
|
22
|
+
end
|
23
|
+
|
24
|
+
subject do
|
25
|
+
prod.extend(Dendroid::GrmAnalysis::ProductionItems)
|
26
|
+
prod.build_items
|
27
|
+
prod
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'Methods from mix-in' do
|
31
|
+
it 'builds items for given non-empty production' do
|
32
|
+
expect(subject.items.size).to eq(subject.body.size + 1)
|
33
|
+
subject.items.each_with_index do |item, index|
|
34
|
+
expect(item.rule).to eq(subject)
|
35
|
+
expect(item.position).to eq(index)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'builds the item for given empty production' do
|
40
|
+
expect(empty_prod.items.size).to eq(1)
|
41
|
+
expect(empty_prod.items[0].rule).to eq(empty_prod)
|
42
|
+
expect(empty_prod.items[0].position).to eq(0)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns the first (predicted) item of the production' do
|
46
|
+
expect(subject.predicted_items).to eq([subject.items.first])
|
47
|
+
expect(empty_prod.predicted_items).to eq([empty_prod.items.first])
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'returns the last (reduce) item of the production' do
|
51
|
+
expect(subject.reduce_items).to eq([subject.items.last])
|
52
|
+
expect(empty_prod.reduce_items).to eq([empty_prod.items.first])
|
53
|
+
end
|
54
|
+
|
55
|
+
# rubocop: disable Style/EachForSimpleLoop
|
56
|
+
it 'returns the consecutive item to a given one' do
|
57
|
+
(0..2).each do |pos|
|
58
|
+
curr_item = subject.items[pos]
|
59
|
+
next_one = subject.next_item(curr_item)
|
60
|
+
expect(next_one).to eq(subject.items[pos + 1])
|
61
|
+
end
|
62
|
+
expect(subject.next_item(subject.items[-1])).to be_nil
|
63
|
+
|
64
|
+
expect(empty_prod.next_item(empty_prod.items[-1])).to be_nil
|
65
|
+
end
|
66
|
+
# rubocop: enable Style/EachForSimpleLoop
|
67
|
+
end # context
|
68
|
+
end # describe
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\lexical\token_position'
|
5
|
+
require_relative '..\..\..\lib\dendroid\lexical\literal'
|
6
|
+
|
7
|
+
describe Dendroid::Lexical::Literal do
|
8
|
+
let(:ex_source) { '42' }
|
9
|
+
let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
|
10
|
+
let(:ex_terminal) { :INTEGER }
|
11
|
+
let(:ex_value) { 42 }
|
12
|
+
subject { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }
|
13
|
+
|
14
|
+
context 'Initialization:' do
|
15
|
+
it 'is initialized with a text, position, symbol name and value' do
|
16
|
+
expect { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }.not_to raise_error
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'knows its value' do
|
20
|
+
expect(subject.value).to eq(ex_value)
|
21
|
+
end
|
22
|
+
end # context
|
23
|
+
end # describe
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/lexical/token_position'
|
5
|
+
|
6
|
+
describe Dendroid::Lexical::TokenPosition do
|
7
|
+
let(:ex_lineno) { 5 }
|
8
|
+
let(:ex_column) { 7 }
|
9
|
+
|
10
|
+
subject { described_class.new(ex_lineno, ex_column) }
|
11
|
+
|
12
|
+
context 'Initialization:' do
|
13
|
+
it 'is initialized with a line number and a column position' do
|
14
|
+
expect { described_class.new(ex_lineno, ex_column) }.not_to raise_error
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'knows its line number' do
|
18
|
+
expect(subject.lineno).to eq(ex_lineno)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'knows its column number' do
|
22
|
+
expect(subject.column).to eq(ex_column)
|
23
|
+
end
|
24
|
+
end # context
|
25
|
+
|
26
|
+
context 'Provided services:' do
|
27
|
+
it 'renders a String representation of itself' do
|
28
|
+
expect(subject.to_s).to eq("#{ex_lineno}:#{ex_column}")
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
end # describe
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/lexical/token_position'
|
5
|
+
require_relative '../../../lib/dendroid/lexical/token'
|
6
|
+
|
7
|
+
describe Dendroid::Lexical::Token do
|
8
|
+
let(:ex_source) { 'else' }
|
9
|
+
let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
|
10
|
+
let(:ex_terminal) { 'ELSE' }
|
11
|
+
subject { described_class.new(ex_source, ex_pos, ex_terminal) }
|
12
|
+
|
13
|
+
context 'Initialization:' do
|
14
|
+
it 'is initialized with a text, position and symbol name' do
|
15
|
+
expect { described_class.new(ex_source, ex_pos, ex_terminal) }.not_to raise_error
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'knows its source text' do
|
19
|
+
expect(subject.source).to eq(ex_source)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'knows its position' do
|
23
|
+
expect(subject.position).to eq(ex_pos)
|
24
|
+
expect(subject.pos_to_s).to eq('2:5')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'knows the terminal name' do
|
28
|
+
expect(subject.terminal).to eq(ex_terminal)
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
end # describe
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/utils/base_tokenizer'
|
5
|
+
|
6
|
+
describe Dendroid::Utils::BaseTokenizer do
|
7
|
+
# Implements a dotted item: expression => NUMBER . PLUS NUMBER
|
8
|
+
subject { described_class.new }
|
9
|
+
|
10
|
+
context 'Initialization:' do
|
11
|
+
it 'is initialized with an optional block' do
|
12
|
+
expect { described_class.new }.not_to raise_error
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a scanner at start' do
|
16
|
+
expect(subject.scanner).to be_kind_of(StringScanner)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'initializes actions to defaults' do
|
20
|
+
expect(subject.actions).to be_member(:skip_nl)
|
21
|
+
expect(subject.actions).to be_member(:skip_ws)
|
22
|
+
end
|
23
|
+
end # context
|
24
|
+
|
25
|
+
context 'Tokenizing:' do
|
26
|
+
subject do
|
27
|
+
described_class.new do
|
28
|
+
scan_verbatim(['+', '*'])
|
29
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
30
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'generates a sequence of tokens from a simple input' do
|
35
|
+
subject.input = '2 + 3 * 4'
|
36
|
+
|
37
|
+
expectations = [
|
38
|
+
['1:1', '2', :INTEGER, 2],
|
39
|
+
['1:3', '+', :PLUS, nil],
|
40
|
+
['1:5', '3', :INTEGER, 3],
|
41
|
+
['1:7', '*', :STAR, nil],
|
42
|
+
['1:9', '4', :INTEGER, 4]
|
43
|
+
]
|
44
|
+
expectations.each do |tuple|
|
45
|
+
tok = subject.next_token
|
46
|
+
%i[pos_to_s source terminal value].each_with_index do |message, index|
|
47
|
+
expect(tok.send(message)).to eq(tuple[index]) unless tuple[index].nil?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# No more token... 'next_token' method returns nil
|
52
|
+
expect(subject.next_token).to be_nil
|
53
|
+
end
|
54
|
+
end # context
|
55
|
+
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.10
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: WIP. A Ruby implementation of
|
13
|
+
description: WIP. A Ruby implementation of an Earley parser
|
14
14
|
email: famished.tiger@yahoo.com
|
15
15
|
executables: []
|
16
16
|
extensions: []
|
@@ -24,8 +24,15 @@ files:
|
|
24
24
|
- bin/dendroid
|
25
25
|
- dendroid.gemspec
|
26
26
|
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/grm_analysis/alternative_item.rb
|
28
|
+
- lib/dendroid/grm_analysis/choice_items.rb
|
27
29
|
- lib/dendroid/grm_analysis/dotted_item.rb
|
30
|
+
- lib/dendroid/grm_analysis/grm_analyzer.rb
|
31
|
+
- lib/dendroid/grm_analysis/production_items.rb
|
28
32
|
- lib/dendroid/grm_dsl/base_grm_builder.rb
|
33
|
+
- lib/dendroid/lexical/literal.rb
|
34
|
+
- lib/dendroid/lexical/token.rb
|
35
|
+
- lib/dendroid/lexical/token_position.rb
|
29
36
|
- lib/dendroid/syntax/choice.rb
|
30
37
|
- lib/dendroid/syntax/grammar.rb
|
31
38
|
- lib/dendroid/syntax/grm_symbol.rb
|
@@ -34,8 +41,16 @@ files:
|
|
34
41
|
- lib/dendroid/syntax/rule.rb
|
35
42
|
- lib/dendroid/syntax/symbol_seq.rb
|
36
43
|
- lib/dendroid/syntax/terminal.rb
|
44
|
+
- lib/dendroid/utils/base_tokenizer.rb
|
45
|
+
- spec/dendroid/grm_analysis/alternative_item_spec.rb
|
46
|
+
- spec/dendroid/grm_analysis/choice_items_spec.rb
|
37
47
|
- spec/dendroid/grm_analysis/dotted_item_spec.rb
|
48
|
+
- spec/dendroid/grm_analysis/grm_analyzer_spec.rb
|
49
|
+
- spec/dendroid/grm_analysis/production_items_spec.rb
|
38
50
|
- spec/dendroid/grm_dsl/base_grm_builder_spec.rb
|
51
|
+
- spec/dendroid/lexical/literal_spec.rb
|
52
|
+
- spec/dendroid/lexical/token_position_spec.rb
|
53
|
+
- spec/dendroid/lexical/token_spec.rb
|
39
54
|
- spec/dendroid/syntax/choice_spec.rb
|
40
55
|
- spec/dendroid/syntax/grammar_spec.rb
|
41
56
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|
@@ -44,6 +59,7 @@ files:
|
|
44
59
|
- spec/dendroid/syntax/rule_spec.rb
|
45
60
|
- spec/dendroid/syntax/symbol_seq_spec.rb
|
46
61
|
- spec/dendroid/syntax/terminal_spec.rb
|
62
|
+
- spec/dendroid/utils/base_tokenizer_spec.rb
|
47
63
|
- spec/spec_helper.rb
|
48
64
|
- version.txt
|
49
65
|
homepage: https://github.com/famished-tiger/Dendroid
|
@@ -68,5 +84,5 @@ requirements: []
|
|
68
84
|
rubygems_version: 3.3.7
|
69
85
|
signing_key:
|
70
86
|
specification_version: 4
|
71
|
-
summary:
|
87
|
+
summary: WIP. A Ruby implementation of an Earley parser
|
72
88
|
test_files: []
|