dendroid 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/dendroid.gemspec +1 -1
- data/lib/dendroid/grm_analysis/alternative_item.rb +71 -0
- data/lib/dendroid/grm_analysis/choice_items.rb +59 -0
- data/lib/dendroid/grm_analysis/dotted_item.rb +1 -1
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +190 -0
- data/lib/dendroid/grm_analysis/production_items.rb +54 -0
- data/lib/dendroid/lexical/literal.rb +28 -0
- data/lib/dendroid/lexical/token.rb +46 -0
- data/lib/dendroid/lexical/token_position.rb +20 -0
- data/lib/dendroid/utils/base_tokenizer.rb +176 -0
- data/spec/dendroid/grm_analysis/alternative_item_spec.rb +12 -0
- data/spec/dendroid/grm_analysis/choice_items_spec.rb +74 -0
- data/spec/dendroid/grm_analysis/grm_analyzer_spec.rb +164 -0
- data/spec/dendroid/grm_analysis/production_items_spec.rb +68 -0
- data/spec/dendroid/lexical/literal_spec.rb +23 -0
- data/spec/dendroid/lexical/token_position_spec.rb +31 -0
- data/spec/dendroid/lexical/token_spec.rb +31 -0
- data/spec/dendroid/utils/base_tokenizer_spec.rb +55 -0
- data/version.txt +1 -1
- metadata +19 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 171ca5c202ea7a9d5c156086cecee352539c4f0551556175cf748328d3fa0983
|
4
|
+
data.tar.gz: a81434f0fe610433cce7d6f1213dc3a25d06f7264fda882ae4b5ec4867514b31
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7894b4b5abcabde582c9e16dc93df8e5aa77b9fed8691ea52901c802e957558c4353e009544b53d352e45d145807e2957e43468a1783cca92326b52eddddb140
|
7
|
+
data.tar.gz: b4a8415a997fb45005668b03087dc6347db11dd692c87e6da5c9fa6cd0d609ee31b630e51e896627cf95b1a57a8f60f919dde9cf18c8fba4a4f08191dc28004b
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.9] - 2023-11-01
|
6
|
+
Added classes for tokenization and grammar analysis.
|
7
|
+
|
8
|
+
### Added
|
9
|
+
- Class `AlternativeItem` and its spec file
|
10
|
+
- Class `BaseTokenizer` and its spec file
|
11
|
+
- Module `ChoiceItems` and its spec file-
|
12
|
+
- Class `GrmAnalyzer` and its spec file
|
13
|
+
- Class `Literal` and its spec file
|
14
|
+
- Module `ProductionItems` and its spec file
|
15
|
+
- Class `Token` and its spec file
|
16
|
+
- Class `TokenPosition` and its spec file
|
17
|
+
|
5
18
|
## [0.0.8] - 2023-10-30
|
6
19
|
### Added
|
7
20
|
- Class `DottedItem` and its spec file
|
data/dendroid.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
path = ::File.dirname(libpath) + ::File::SEPARATOR
|
8
8
|
::File.read("#{path}version.txt").strip
|
9
9
|
end
|
10
|
-
s.summary = '
|
10
|
+
s.summary = 'WIP. A Ruby implementation of a Earley parser'
|
11
11
|
s.description = 'WIP. A Ruby implementation of a Earley parser'
|
12
12
|
s.authors = ['Dimitri Geshef']
|
13
13
|
s.email = 'famished.tiger@yahoo.com'
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'dotted_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# A specialization of DottedItem specific for Choice (rule)
|
8
|
+
class AlternativeItem < DottedItem
|
9
|
+
# @return [Integer] the alternative number
|
10
|
+
attr_reader :alt_index
|
11
|
+
|
12
|
+
# Constructor.
|
13
|
+
# @param aChoice [Dendroid::Syntax::Choice]
|
14
|
+
# @param aPosition [Integer] Position of the dot in rhs of production.
|
15
|
+
# @param index [Integer] the rank of the alternative at hand
|
16
|
+
def initialize(aChoice, aPosition, index)
|
17
|
+
@alt_index = index
|
18
|
+
super(aChoice, aPosition)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return a String representation of the alternative item.
|
22
|
+
# @return [String]
|
23
|
+
def to_s
|
24
|
+
rhs_names = rule.alternatives[alt_index].members.map(&:to_s)
|
25
|
+
dotted_rhs = rhs_names.insert(position, '.')
|
26
|
+
"#{rule.head} => #{dotted_rhs.join(' ')}"
|
27
|
+
end
|
28
|
+
|
29
|
+
# Indicate whether the rhs of the alternative is empty
|
30
|
+
# @return [Boolean]
|
31
|
+
def empty?
|
32
|
+
rule.alternatives[alt_index].empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
# Indicate whether the dot is at the start of rhs
|
36
|
+
# @return [Boolean]
|
37
|
+
def final_pos?
|
38
|
+
empty? || position == rule.alternatives[alt_index].size
|
39
|
+
end
|
40
|
+
|
41
|
+
alias completed? final_pos?
|
42
|
+
|
43
|
+
# Return the symbol right after the dot (if any)
|
44
|
+
# @return [Dendroid::Syntax::GrmSymbol, NilClass]
|
45
|
+
def next_symbol
|
46
|
+
return nil if empty? || completed?
|
47
|
+
|
48
|
+
rule.alternatives[alt_index].members[position]
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test for equality with another dotted item.
|
52
|
+
# Two dotted items are equal if they refer to the same rule and
|
53
|
+
# have both the same rhs and dot positions.
|
54
|
+
# @return [Boolean]
|
55
|
+
def ==(other)
|
56
|
+
return true if eql?(other)
|
57
|
+
|
58
|
+
(position == other.position) && rule.eql?(other.rule) && (alt_index == other.alt_index)
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def valid_position(aPosition)
|
64
|
+
raise Exception if aPosition < 0 || aPosition > rule.alternatives[alt_index].size
|
65
|
+
|
66
|
+
aPosition
|
67
|
+
end
|
68
|
+
end # class
|
69
|
+
end # module
|
70
|
+
end # module
|
71
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'alternative_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# Mix-in module for extending the Syntax::Choice class
|
8
|
+
# with dotted items manipulation methods
|
9
|
+
module ChoiceItems
|
10
|
+
# Build the alternative items for this choice and assign them
|
11
|
+
# to the `items` attributes
|
12
|
+
# @return [Array<Array<GrmAnalysis::AlternativeItem>>]
|
13
|
+
def build_items
|
14
|
+
# AlternativeItem
|
15
|
+
@items = Array.new(alternatives.size) { |_| [] }
|
16
|
+
alternatives.each_with_index do |alt_seq, index|
|
17
|
+
if alt_seq.empty?
|
18
|
+
@items[index] << AlternativeItem.new(self, 0, index)
|
19
|
+
else
|
20
|
+
(0..alt_seq.size).each do |pos|
|
21
|
+
@items[index] << AlternativeItem.new(self, pos, index)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Read accessor for the `items` attribute.
|
28
|
+
# Return the dotted items for this production
|
29
|
+
# @return [Array<Array<GrmAnalysis::AlternativeItem>>]
|
30
|
+
def items
|
31
|
+
@items
|
32
|
+
end
|
33
|
+
|
34
|
+
# Return the predicted items (i.e. the alternative items with the dot at start)
|
35
|
+
# for this choice.
|
36
|
+
# @return [Array<GrmAnalysis::AlternativeItem>]
|
37
|
+
def predicted_items
|
38
|
+
@items.map(&:first)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return the reduce items (i.e. the alternative items with the dot at end)
|
42
|
+
# for this choice.
|
43
|
+
# @return [Array<GrmAnalysis::AlternativeItem>]
|
44
|
+
def reduce_items
|
45
|
+
@items.map(&:last)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return the next item given the provided item.
|
49
|
+
# In other words, advance the dot by one position.
|
50
|
+
# @param anItem [GrmAnalysis::AlternativeItem]
|
51
|
+
# @return [GrmAnalysis::AlternativeItem|NilClass]
|
52
|
+
def next_item(anItem)
|
53
|
+
items_arr = items[anItem.alt_index]
|
54
|
+
return nil if anItem == items_arr.last
|
55
|
+
items_arr[anItem.position + 1]
|
56
|
+
end
|
57
|
+
end # module
|
58
|
+
end # module
|
59
|
+
end # module
|
@@ -0,0 +1,190 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../grm_analysis/production_items'
|
4
|
+
require_relative '../grm_analysis/choice_items'
|
5
|
+
|
6
|
+
module Dendroid
|
7
|
+
module GrmAnalysis
|
8
|
+
# An analyzer performs an analysis of the rules of a given grammar
|
9
|
+
#
|
10
|
+
class GrmAnalyzer
|
11
|
+
attr_reader :grammar
|
12
|
+
attr_reader :items
|
13
|
+
attr_reader :production2items
|
14
|
+
attr_reader :symbol2productions
|
15
|
+
attr_reader :epsilon
|
16
|
+
attr_reader :endmarker
|
17
|
+
attr_reader :first_sets
|
18
|
+
attr_reader :predict_sets
|
19
|
+
attr_reader :follow_sets
|
20
|
+
|
21
|
+
def initialize(aGrammar)
|
22
|
+
@grammar = aGrammar
|
23
|
+
@items = []
|
24
|
+
@production2items = {}
|
25
|
+
@symbol2productions = {}
|
26
|
+
@epsilon = Syntax::Terminal.new(:__epsilon)
|
27
|
+
@endmarker = Syntax::Terminal.new(:"$$")
|
28
|
+
@first_sets = {}
|
29
|
+
@predict_sets = {}
|
30
|
+
@follow_sets = {}
|
31
|
+
|
32
|
+
build_dotted_items
|
33
|
+
build_first_sets
|
34
|
+
build_follow_sets
|
35
|
+
end
|
36
|
+
|
37
|
+
def next_item(aDottedItem)
|
38
|
+
prod = aDottedItem.rule
|
39
|
+
prod.next_item(aDottedItem)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build_dotted_items
|
45
|
+
grammar.rules.each do |prod|
|
46
|
+
lhs = prod.head
|
47
|
+
symbol2productions[lhs] = [] unless symbol2productions.include? lhs
|
48
|
+
symbol2productions[lhs] << prod
|
49
|
+
# production2items[prod] = []
|
50
|
+
mixin = prod.choice? ? ChoiceItems : ProductionItems
|
51
|
+
prod.extend(mixin)
|
52
|
+
prod.build_items
|
53
|
+
rule_items = prod.items.flatten
|
54
|
+
items.concat(rule_items)
|
55
|
+
production2items[prod] = rule_items
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def build_first_sets
|
60
|
+
initialize_first_sets
|
61
|
+
|
62
|
+
begin
|
63
|
+
changed = false
|
64
|
+
grammar.rules.each do |prod|
|
65
|
+
head = prod.head
|
66
|
+
first_head = first_sets[head]
|
67
|
+
pre_first_size = first_head.size
|
68
|
+
if prod.choice?
|
69
|
+
prod.alternatives.each do |alt|
|
70
|
+
first_head.merge(sequence_first(alt.members))
|
71
|
+
end
|
72
|
+
else
|
73
|
+
first_head.merge(sequence_first(prod.body.members))
|
74
|
+
end
|
75
|
+
changed = true if (first_head.size > pre_first_size)
|
76
|
+
end
|
77
|
+
end until !changed
|
78
|
+
end
|
79
|
+
|
80
|
+
def initialize_first_sets
|
81
|
+
grammar.symbols.each do |symb|
|
82
|
+
if symb.terminal?
|
83
|
+
first_sets[symb] = Set.new([symb])
|
84
|
+
elsif symb.nullable?
|
85
|
+
first_sets[symb] = Set.new([epsilon])
|
86
|
+
else
|
87
|
+
first_sets[symb] = Set.new
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def sequence_first(symbol_seq)
|
93
|
+
result = Set.new
|
94
|
+
symbol_seq.each do |symb|
|
95
|
+
result.delete(epsilon)
|
96
|
+
result.merge(first_sets[symb])
|
97
|
+
break unless symb.nullable?
|
98
|
+
end
|
99
|
+
|
100
|
+
result
|
101
|
+
end
|
102
|
+
|
103
|
+
# FOLLOW(A): is the set of terminals (+ end marker) that may come after the
|
104
|
+
# non-terminal A.
|
105
|
+
def build_follow_sets
|
106
|
+
initialize_follow_sets
|
107
|
+
|
108
|
+
begin
|
109
|
+
changed = false
|
110
|
+
grammar.rules.each do |prod|
|
111
|
+
if prod.choice?
|
112
|
+
prod.alternatives.each do |alt|
|
113
|
+
body = alt.members
|
114
|
+
next if body.empty?
|
115
|
+
|
116
|
+
head = prod.head
|
117
|
+
head_follow = follow_sets[head]
|
118
|
+
trailer = Set.new
|
119
|
+
last = true
|
120
|
+
last_index = body.size - 1
|
121
|
+
last_index.downto(0) do |i|
|
122
|
+
symbol = body[i]
|
123
|
+
next if symbol.terminal?
|
124
|
+
|
125
|
+
follow_symbol = follow_sets[symbol]
|
126
|
+
size_before = follow_symbol.size
|
127
|
+
if last
|
128
|
+
# Rule: if last non-terminal member (symbol) is nullable
|
129
|
+
# then add FOLLOW(head) to FOLLOW(symbol)
|
130
|
+
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
131
|
+
last = false
|
132
|
+
else
|
133
|
+
symbol_seq = body.slice(i+1, last_index - i)
|
134
|
+
trailer_first = sequence_first(symbol_seq)
|
135
|
+
contains_epsilon = trailer_first.include? epsilon
|
136
|
+
trailer_first.delete(epsilon) if contains_epsilon
|
137
|
+
follow_sets[symbol].merge(trailer_first)
|
138
|
+
follow_sets[symbol].merge(head_follow) if contains_epsilon
|
139
|
+
end
|
140
|
+
changed = true if follow_sets[symbol].size > size_before
|
141
|
+
end
|
142
|
+
end
|
143
|
+
else
|
144
|
+
body = prod.body.members
|
145
|
+
next if body.empty?
|
146
|
+
|
147
|
+
head = prod.head
|
148
|
+
head_follow = follow_sets[head]
|
149
|
+
trailer = Set.new
|
150
|
+
last = true
|
151
|
+
last_index = body.size - 1
|
152
|
+
last_index.downto(0) do |i|
|
153
|
+
symbol = body[i]
|
154
|
+
next if symbol.terminal?
|
155
|
+
|
156
|
+
follow_symbol = follow_sets[symbol]
|
157
|
+
size_before = follow_symbol.size
|
158
|
+
if last
|
159
|
+
# Rule: if last non-terminal member (symbol) is nullable
|
160
|
+
# then add FOLLOW(head) to FOLLOW(symbol)
|
161
|
+
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
162
|
+
last = false
|
163
|
+
else
|
164
|
+
symbol_seq = body.slice(i+1, last_index - i)
|
165
|
+
trailer_first = sequence_first(symbol_seq)
|
166
|
+
contains_epsilon = trailer_first.include? epsilon
|
167
|
+
trailer_first.delete(epsilon) if contains_epsilon
|
168
|
+
follow_sets[symbol].merge(trailer_first)
|
169
|
+
follow_sets[symbol].merge(head_follow) if contains_epsilon
|
170
|
+
end
|
171
|
+
changed = true if follow_sets[symbol].size > size_before
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end until !changed
|
176
|
+
end
|
177
|
+
|
178
|
+
def initialize_follow_sets
|
179
|
+
grammar.symbols.each do |symb|
|
180
|
+
next if symb.terminal?
|
181
|
+
|
182
|
+
follow_sets[symb] = Set.new
|
183
|
+
end
|
184
|
+
|
185
|
+
# Initialize FOLLOW(start symbol) with end marker
|
186
|
+
follow_sets[grammar.start_symbol].add(endmarker)
|
187
|
+
end
|
188
|
+
end # class
|
189
|
+
end # module
|
190
|
+
end # module
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'dotted_item'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module GrmAnalysis
|
7
|
+
# Mix-in module for extending the Dendroid::Syntax::Production class
|
8
|
+
# with dotted items manipulation methods and an attribute named `items`.
|
9
|
+
module ProductionItems
|
10
|
+
# Build the dotted items for this production and assign them
|
11
|
+
# to the `items` attributes
|
12
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
13
|
+
def build_items
|
14
|
+
@items = if empty?
|
15
|
+
[DottedItem.new(self, 0)]
|
16
|
+
else
|
17
|
+
(0..body.size).reduce([]) do | result,pos|
|
18
|
+
result << GrmAnalysis::DottedItem.new(self, pos)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Read accessor for the `items` attribute.
|
24
|
+
# Return the dotted items for this production
|
25
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
26
|
+
def items
|
27
|
+
@items
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return the predicted item (i.e. the dotted item with the dot at start)
|
31
|
+
# for this production.
|
32
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
33
|
+
def predicted_items
|
34
|
+
[@items.first]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return the reduce item (i.e. the dotted item with the dot at end)
|
38
|
+
# for this production.
|
39
|
+
# @return [Array<GrmAnalysis::DottedItem>]
|
40
|
+
def reduce_items
|
41
|
+
[@items.last]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return the next item given the provided item.
|
45
|
+
# In other words, advance the dot by one position.
|
46
|
+
# @param anItem [GrmAnalysis::DottedItem]
|
47
|
+
# @return [GrmAnalysis::DottedItem|NilClass]
|
48
|
+
def next_item(anItem)
|
49
|
+
return nil if anItem == @items.last
|
50
|
+
@items[anItem.position + 1]
|
51
|
+
end
|
52
|
+
end # module
|
53
|
+
end # module
|
54
|
+
end # module
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'token'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Lexical
|
7
|
+
# A literal (value) is a token that represents a data value in the parsed
|
8
|
+
# language. For instance, in Ruby data values such as strings, numbers,
|
9
|
+
# regular expression,... can appear directly in the source code as text.
|
10
|
+
# These are examples of literal values. One responsibility of a tokenizer/lexer is
|
11
|
+
# to convert the text representation into a corresponding value in a
|
12
|
+
# convenient format for the interpreter/compiler.
|
13
|
+
class Literal < Token
|
14
|
+
# @return [Object] The value expressed in one of the target datatype.
|
15
|
+
attr_reader :value
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# @param original [String] the piece of text from input
|
19
|
+
# @param pos [Dendroid::Lexical::TokenPosition] line, column position of token
|
20
|
+
# @param symbol [Dendroid::Syntax::Terminal, String]
|
21
|
+
# @param aValue [Object] value of the token in internal representation
|
22
|
+
def initialize(original, pos, symbol, aValue)
|
23
|
+
super(original, pos, symbol)
|
24
|
+
@value = aValue
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
# This module contains the core classes needed for lexical analysis.
|
5
|
+
# The lexical analysis (tokenization) aims to transform the input stream of characters
|
6
|
+
# into a sequence of tokens.
|
7
|
+
module Lexical
|
8
|
+
# A (lexical) token is an object created by a tokenizer (lexer)
|
9
|
+
# and passed to the parser. Such token object is created when a lexer
|
10
|
+
# detects that a sequence of characters(a lexeme) from the input stream
|
11
|
+
# is an instance of a terminal grammar symbol.
|
12
|
+
# Say, that in a particular language, the lexeme 'foo' is an occurrence
|
13
|
+
# of the terminal symbol IDENTIFIER. Then the lexer will return a Token
|
14
|
+
# object that states the fact that 'foo' is indeed an IDENTIFIER. Basically,
|
15
|
+
# a Token is a pair (lexeme, terminal): it asserts that a given piece of text
|
16
|
+
# is an instance of given terminal symbol.
|
17
|
+
class Token
|
18
|
+
# The sequence of character(s) from the input stream that is an occurrence
|
19
|
+
# of the related terminal symbol.
|
20
|
+
# @return [String] Input substring that is an instance of the terminal.
|
21
|
+
attr_reader :source
|
22
|
+
|
23
|
+
# @return [TokenPosition] The position -in "editor" coordinates- of the text in the source file.
|
24
|
+
attr_reader :position
|
25
|
+
|
26
|
+
# @return [String] The name of terminal symbol matching the text.
|
27
|
+
attr :terminal
|
28
|
+
|
29
|
+
# Constructor.
|
30
|
+
# @param original [String] the piece of text from input
|
31
|
+
# @param pos [Dendroid::Lexical::TokenPosition] position of the token in source file
|
32
|
+
# @param symbol [Dendroid::Syntax::Terminal, String]
|
33
|
+
# The terminal symbol corresponding to the matching text.
|
34
|
+
def initialize(original, pos, symbol)
|
35
|
+
@source = original.dup
|
36
|
+
@position = pos
|
37
|
+
@terminal = symbol
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [String] The text representation of the token position
|
41
|
+
def pos_to_s
|
42
|
+
position.to_s
|
43
|
+
end
|
44
|
+
end # class
|
45
|
+
end # module
|
46
|
+
end # module
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
module Lexical
|
5
|
+
class TokenPosition
|
6
|
+
attr_reader :lineno
|
7
|
+
attr_reader :column
|
8
|
+
|
9
|
+
def initialize(line, col)
|
10
|
+
@lineno = line
|
11
|
+
@column = col
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
"#{lineno}:#{column}"
|
16
|
+
end
|
17
|
+
end # class
|
18
|
+
end # module
|
19
|
+
end # module
|
20
|
+
|
@@ -0,0 +1,176 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
require_relative '../lexical/token_position'
|
5
|
+
require_relative '../lexical/literal'
|
6
|
+
|
7
|
+
module Dendroid
|
8
|
+
module Utils
|
9
|
+
class BaseTokenizer
|
10
|
+
attr_reader :scanner
|
11
|
+
attr_reader :lineno
|
12
|
+
attr_reader :line_start
|
13
|
+
attr_reader :actions
|
14
|
+
|
15
|
+
def initialize(&aBlock)
|
16
|
+
@scanner = StringScanner.new('')
|
17
|
+
@actions = { skip: [], scan_verbatim: [], scan_value: [] }
|
18
|
+
defaults
|
19
|
+
|
20
|
+
if block_given?
|
21
|
+
instance_exec(&aBlock)
|
22
|
+
# grammar_complete!
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def input=(source)
|
27
|
+
scanner.string = source
|
28
|
+
reset
|
29
|
+
end
|
30
|
+
|
31
|
+
def reset
|
32
|
+
@lineno = 1
|
33
|
+
@line_start = 0
|
34
|
+
end
|
35
|
+
|
36
|
+
# action, pattern, terminal?, conversion?
|
37
|
+
# action = skip, skip_nl, scan
|
38
|
+
def skip_nl(pattern)
|
39
|
+
actions[:skip_nl] = pattern
|
40
|
+
end
|
41
|
+
|
42
|
+
def skip_ws(pattern)
|
43
|
+
actions[:skip_ws] = pattern
|
44
|
+
end
|
45
|
+
|
46
|
+
def skip(pattern)
|
47
|
+
if actions[:skip].empty?
|
48
|
+
actions[:skip] = pattern
|
49
|
+
else
|
50
|
+
new_pattern = actions[:skip].union(pattern)
|
51
|
+
actions[:skip] = new_pattern
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def scan_verbatim(pattern)
|
56
|
+
patt = normalize_pattern(pattern)
|
57
|
+
if actions[:scan_verbatim].empty?
|
58
|
+
actions[:scan_verbatim] = patt
|
59
|
+
else
|
60
|
+
new_pattern = actions[:scan_verbatim].union(patt)
|
61
|
+
actions[:scan_verbatim] = new_pattern
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def scan_value(pattern, terminal, convertion)
|
66
|
+
patt = normalize_pattern(pattern)
|
67
|
+
tuple = [patt, terminal, convertion]
|
68
|
+
if actions[:scan_value].empty?
|
69
|
+
actions[:scan_value] = [tuple]
|
70
|
+
else
|
71
|
+
actions[:scan_verbatim] << tuple
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def map_verbatim2terminal(mapping)
|
76
|
+
@verbatim2terminal = mapping
|
77
|
+
end
|
78
|
+
|
79
|
+
def next_token
|
80
|
+
token = nil
|
81
|
+
|
82
|
+
# Loop until end of input reached or token found
|
83
|
+
until scanner.eos?
|
84
|
+
if scanner.skip(actions[:skip_nl])
|
85
|
+
next_line_scanned
|
86
|
+
next
|
87
|
+
end
|
88
|
+
|
89
|
+
next if scanner.skip(actions[:skip_ws]) # Skip whitespaces
|
90
|
+
|
91
|
+
if (text = scanner.scan(actions[:scan_verbatim]))
|
92
|
+
token = verbatim_scanned(text)
|
93
|
+
break
|
94
|
+
end
|
95
|
+
|
96
|
+
tuple = actions[:scan_value].find do |(pattern, terminal, conversion)|
|
97
|
+
scanner.check(pattern)
|
98
|
+
end
|
99
|
+
if tuple
|
100
|
+
(pattern, terminal, conversion) = tuple
|
101
|
+
text = scanner.scan(pattern)
|
102
|
+
token = value_scanned(text, terminal, conversion)
|
103
|
+
break
|
104
|
+
end
|
105
|
+
|
106
|
+
# Unknown token
|
107
|
+
col = scanner.pos - line_start + 1
|
108
|
+
erroneous = scanner.peek(1).nil? ? '' : scanner.scan(/./)
|
109
|
+
raise Exception, "Error: [line #{lineno}:#{col}]: Unexpected character #{erroneous}."
|
110
|
+
end
|
111
|
+
|
112
|
+
token
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
|
117
|
+
def defaults
|
118
|
+
# Defaults
|
119
|
+
skip_nl /(?:\r\n)|\r|\n/ # Skip newlines
|
120
|
+
skip_ws /[ \t\f]+/ # Skip blanks
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def normalize_pattern(pattern)
|
126
|
+
case pattern
|
127
|
+
when String
|
128
|
+
Regexp.new(Regexp.escape(pattern))
|
129
|
+
when Array
|
130
|
+
regexes = pattern.map { |patt| normalize_pattern(patt) }
|
131
|
+
Regexp.union(regexes)
|
132
|
+
else
|
133
|
+
pattern
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def next_line_scanned
|
138
|
+
@lineno += 1
|
139
|
+
@line_start = scanner.pos
|
140
|
+
end
|
141
|
+
|
142
|
+
def verbatim_scanned(text)
|
143
|
+
symbol_name = @verbatim2terminal[text]
|
144
|
+
begin
|
145
|
+
lex_length = text ? text.size : 0
|
146
|
+
col = scanner.pos - lex_length - @line_start + 1
|
147
|
+
pos = Lexical::TokenPosition.new(@lineno, col)
|
148
|
+
token = Lexical::Token.new(text, pos, symbol_name)
|
149
|
+
rescue Exception => e
|
150
|
+
puts "Failing with '#{symbol_name}' and '#{text}'"
|
151
|
+
raise e
|
152
|
+
end
|
153
|
+
|
154
|
+
token
|
155
|
+
end
|
156
|
+
|
157
|
+
def value_scanned(aText, aSymbolName, convertion)
|
158
|
+
value = convertion.call(aText)
|
159
|
+
lex_length = aText ? aText.size : 0
|
160
|
+
col = scanner.pos - lex_length - @line_start + 1
|
161
|
+
build_literal(aSymbolName, value, aText, col)
|
162
|
+
end
|
163
|
+
|
164
|
+
def build_literal(aSymbolName, aValue, aText, aPosition)
|
165
|
+
pos = if aPosition.kind_of?(Integer)
|
166
|
+
col = aPosition
|
167
|
+
Lexical::TokenPosition.new(@lineno, col)
|
168
|
+
else
|
169
|
+
aPosition
|
170
|
+
end
|
171
|
+
|
172
|
+
Lexical::Literal.new(aText.dup, pos, aSymbolName, aValue)
|
173
|
+
end
|
174
|
+
end # class
|
175
|
+
end # module
|
176
|
+
end # module
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
|
+
require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
|
9
|
+
|
10
|
+
describe Dendroid::GrmAnalysis::DottedItem do
|
11
|
+
# TODO
|
12
|
+
end # describe
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
8
|
+
# require_relative '..\..\..\lib\dendroid\grm_analysis\alternative_item'
|
9
|
+
require_relative '..\..\..\lib\dendroid\grm_analysis\choice_items'
|
10
|
+
|
11
|
+
describe Dendroid::GrmAnalysis::ChoiceItems do
|
12
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
13
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
14
|
+
let(:star_symb) { Dendroid::Syntax::Terminal.new('STAR') }
|
15
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
16
|
+
let(:alt1) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
17
|
+
let(:alt2) { Dendroid::Syntax::SymbolSeq.new([num_symb, star_symb, num_symb]) }
|
18
|
+
let(:alt3) { Dendroid::Syntax::SymbolSeq.new([]) }
|
19
|
+
subject do
|
20
|
+
choice = Dendroid::Syntax::Choice.new(expr_symb, [alt1, alt2, alt3])
|
21
|
+
choice.extend(Dendroid::GrmAnalysis::ChoiceItems)
|
22
|
+
choice.build_items
|
23
|
+
choice
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'Methods from mix-in' do
|
27
|
+
it 'builds items for given choice' do
|
28
|
+
expect(subject.items.size).to eq(subject.alternatives.size)
|
29
|
+
subject.items.each_with_index do |itemz, index|
|
30
|
+
expect(itemz.size).to eq(subject.alternatives[index].size + 1)
|
31
|
+
end
|
32
|
+
arr_items = subject.items[1]
|
33
|
+
arr_items.each_with_index do |item, pos|
|
34
|
+
expect(item.rule).to eq(subject)
|
35
|
+
expect(item.position).to eq(pos)
|
36
|
+
expect(item.alt_index).to eq(1)
|
37
|
+
end
|
38
|
+
sole_item = subject.items[2].first # empty alternative...
|
39
|
+
expect(sole_item.rule).to eq(subject)
|
40
|
+
expect(sole_item.position).to eq(0)
|
41
|
+
expect(sole_item.alt_index).to eq(2)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'returns the first (predicted) items of the choice' do
|
45
|
+
expect(subject.predicted_items.size).to eq(subject.alternatives.size)
|
46
|
+
expectations = [
|
47
|
+
subject.items[0].first,
|
48
|
+
subject.items[1].first,
|
49
|
+
subject.items[2].first
|
50
|
+
]
|
51
|
+
expect(subject.predicted_items).to eq(expectations)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'returns the last (reduce) items of the choice' do
|
55
|
+
expect(subject.reduce_items.size).to eq(subject.alternatives.size)
|
56
|
+
expectations = [
|
57
|
+
subject.items[0].last,
|
58
|
+
subject.items[1].last,
|
59
|
+
subject.items[2].last
|
60
|
+
]
|
61
|
+
expect(subject.reduce_items).to eq(expectations)
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns the consecutive item to a given one' do
|
65
|
+
arr_items = subject.items[1]
|
66
|
+
(0..arr_items.size - 1).each do |pos|
|
67
|
+
curr_item = arr_items[pos]
|
68
|
+
next_one = subject.next_item(curr_item)
|
69
|
+
expect(next_one).to eq(arr_items[pos + 1])
|
70
|
+
end
|
71
|
+
expect(subject.next_item(arr_items.last)).to be_nil
|
72
|
+
end
|
73
|
+
end # context
|
74
|
+
end # describe
|
@@ -0,0 +1,164 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/grm_dsl/base_grm_builder'
|
5
|
+
require_relative '../../../lib/dendroid/grm_analysis/grm_analyzer'
|
6
|
+
|
7
|
+
module SampleGrammars
|
8
|
+
def grammar_l1
|
9
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
10
|
+
# Grammar inspired from Wikipedia entry on Earley parsing
|
11
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
12
|
+
|
13
|
+
rule('p' => 's')
|
14
|
+
rule('s' => ['s PLUS m', 'm'])
|
15
|
+
rule('m' => ['m STAR t', 't'])
|
16
|
+
rule('t' => 'INTEGER')
|
17
|
+
end
|
18
|
+
|
19
|
+
builder.grammar
|
20
|
+
end
|
21
|
+
|
22
|
+
def tokenizer_l1
|
23
|
+
Utils::BaseTokenizer.new do
|
24
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
25
|
+
|
26
|
+
scan_verbatim(['+', '*'])
|
27
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def grammar_l2
|
32
|
+
builder = GrmDSL::BaseGrmBuilder.new do
|
33
|
+
# Grammar inspired from Loup Vaillant's example
|
34
|
+
# https://loup-vaillant.fr/tutorials/earley-parsing/recogniser
|
35
|
+
declare_terminals('PLUS', 'MINUS', 'STAR', 'SLASH')
|
36
|
+
declare_terminals('LPAREN', 'RPAREN', 'NUMBER')
|
37
|
+
|
38
|
+
rule('p' => 'sum')
|
39
|
+
rule('sum' => ['sum PLUS product', 'sum MINUS product', 'product'])
|
40
|
+
rule('product' => ['product STAR factor', 'product SLASH factor', 'factor'])
|
41
|
+
rule('factor' => ['LPAREN sum RPAREN', 'NUMBER'])
|
42
|
+
end
|
43
|
+
|
44
|
+
builder.grammar
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenizer_l2
|
48
|
+
Utils::BaseTokenizer.new do
|
49
|
+
map_verbatim2terminal({
|
50
|
+
'+' => :PLUS,
|
51
|
+
'-' => :MINUS,
|
52
|
+
'*' => :STAR,
|
53
|
+
'/' => :SLASH,
|
54
|
+
'(' => :LPAREN,
|
55
|
+
')' => :RPAREN })
|
56
|
+
|
57
|
+
scan_verbatim(['+', '-', '*', '/', '(', ')'])
|
58
|
+
scan_value(/\d+/, :NUMBER, ->(txt) { txt.to_i })
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def grammar_l3
|
63
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
64
|
+
# Grammar inspired from Andrew Appel's example
|
65
|
+
# Modern Compiler Implementation in Java
|
66
|
+
declare_terminals('a', 'c', 'd')
|
67
|
+
|
68
|
+
rule('Z' => ['d', 'X Y Z'])
|
69
|
+
rule('Y' => ['', 'c'])
|
70
|
+
rule('X' => ['Y', 'a'])
|
71
|
+
end
|
72
|
+
|
73
|
+
builder.grammar
|
74
|
+
end
|
75
|
+
end # module
|
76
|
+
|
77
|
+
describe Dendroid::GrmAnalysis::GrmAnalyzer do
|
78
|
+
include SampleGrammars
|
79
|
+
let(:grammar) { grammar_l1 }
|
80
|
+
|
81
|
+
subject { described_class.new(grammar) }
|
82
|
+
|
83
|
+
context 'Initialization:' do
|
84
|
+
it 'is initialized with a grammar' do
|
85
|
+
expect { described_class.new(grammar) }.not_to raise_error
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'knows its related grammar' do
|
89
|
+
expect(subject.grammar).to eq(grammar)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'knows the dotted items' do
|
93
|
+
item_count = subject.grammar.rules.reduce(0) do |count, prod|
|
94
|
+
count += prod.items.flatten.size
|
95
|
+
end
|
96
|
+
expect(subject.items.size).to eq(item_count)
|
97
|
+
expected_items = [
|
98
|
+
'p => . s',
|
99
|
+
'p => s .',
|
100
|
+
's => . s PLUS m',
|
101
|
+
's => s . PLUS m',
|
102
|
+
's => s PLUS . m',
|
103
|
+
's => s PLUS m .',
|
104
|
+
's => . m',
|
105
|
+
's => m .',
|
106
|
+
'm => . m STAR t',
|
107
|
+
'm => m . STAR t',
|
108
|
+
'm => m STAR . t',
|
109
|
+
'm => m STAR t .',
|
110
|
+
'm => . t',
|
111
|
+
'm => t .',
|
112
|
+
't => . INTEGER',
|
113
|
+
't => INTEGER .'
|
114
|
+
]
|
115
|
+
expect(subject.items.map(&:to_s)).to eq(expected_items)
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'knows the item that follows a given dotted item' do
|
119
|
+
first_item = subject.items.find { |itm| itm.to_s == 'm => . m STAR t' }
|
120
|
+
second = subject.next_item(first_item)
|
121
|
+
expect(second.to_s).to eq('m => m . STAR t')
|
122
|
+
third = subject.next_item(second)
|
123
|
+
expect(third.to_s).to eq('m => m STAR . t')
|
124
|
+
fourth = subject.next_item(third)
|
125
|
+
expect(fourth.to_s).to eq('m => m STAR t .')
|
126
|
+
expect(subject.next_item(fourth)).to be_nil
|
127
|
+
end
|
128
|
+
end # context
|
129
|
+
|
130
|
+
context 'Provided services:' do
|
131
|
+
subject { described_class.new(grammar_l3) }
|
132
|
+
it 'constructs the FIRST sets of grammar symbols' do
|
133
|
+
expectations = {
|
134
|
+
'a' => ['a'],
|
135
|
+
'c' => ['c'],
|
136
|
+
'd' => ['d'],
|
137
|
+
'X' => ['a', 'c'], # Add epsilon
|
138
|
+
'Y' => ['c'], # Add epsilon
|
139
|
+
'Z' => ['a', 'c', 'd']
|
140
|
+
}
|
141
|
+
expectations.each_pair do |sym_name, first_names|
|
142
|
+
symb = subject.grammar.name2symbol[sym_name]
|
143
|
+
expected_first = first_names.map { |name| subject.grammar.name2symbol[name] }
|
144
|
+
expected_first << subject.epsilon if sym_name =~ /[XY]/
|
145
|
+
expect(subject.first_sets[symb]).to eq(Set.new(expected_first))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'constructs the FOLLOW sets for non-terminal symbols' do
|
150
|
+
expectations = {
|
151
|
+
'Z' => [], # Add $$
|
152
|
+
'Y' => ['a', 'c', 'd'],
|
153
|
+
'X' => ['a', 'c', 'd']
|
154
|
+
}
|
155
|
+
subject.send(:build_follow_sets)
|
156
|
+
expectations.each_pair do |sym_name, follow_names|
|
157
|
+
symb = subject.grammar.name2symbol[sym_name]
|
158
|
+
expected_follow = follow_names.map { |name| subject.grammar.name2symbol[name] }
|
159
|
+
expected_follow << subject.endmarker if sym_name == 'Z'
|
160
|
+
expect(subject.follow_sets[symb]).to eq(Set.new(expected_follow))
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end # context
|
164
|
+
end # describe
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/syntax/terminal'
|
5
|
+
require_relative '../../../lib/dendroid/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/dendroid/syntax/symbol_seq'
|
7
|
+
require_relative '../../../lib/dendroid/syntax/production'
|
8
|
+
require_relative '../../../lib/dendroid/grm_analysis/production_items'
|
9
|
+
|
10
|
+
describe Dendroid::GrmAnalysis::ProductionItems do
|
11
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
12
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
13
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
14
|
+
let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
15
|
+
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
16
|
+
let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
|
17
|
+
let(:empty_prod) do
|
18
|
+
e = Dendroid::Syntax::Production.new(expr_symb, empty_body)
|
19
|
+
e.extend(Dendroid::GrmAnalysis::ProductionItems)
|
20
|
+
e.build_items
|
21
|
+
e
|
22
|
+
end
|
23
|
+
|
24
|
+
subject do
|
25
|
+
prod.extend(Dendroid::GrmAnalysis::ProductionItems)
|
26
|
+
prod.build_items
|
27
|
+
prod
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'Methods from mix-in' do
|
31
|
+
it 'builds items for given non-empty production' do
|
32
|
+
expect(subject.items.size).to eq(subject.body.size + 1)
|
33
|
+
subject.items.each_with_index do |item, index|
|
34
|
+
expect(item.rule).to eq(subject)
|
35
|
+
expect(item.position).to eq(index)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'builds the item for given empty production' do
|
40
|
+
expect(empty_prod.items.size).to eq(1)
|
41
|
+
expect(empty_prod.items[0].rule).to eq(empty_prod)
|
42
|
+
expect(empty_prod.items[0].position).to eq(0)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns the first (predicted) item of the production' do
|
46
|
+
expect(subject.predicted_items).to eq([subject.items.first])
|
47
|
+
expect(empty_prod.predicted_items).to eq([empty_prod.items.first])
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'returns the last (reduce) item of the production' do
|
51
|
+
expect(subject.reduce_items).to eq([subject.items.last])
|
52
|
+
expect(empty_prod.reduce_items).to eq([empty_prod.items.first])
|
53
|
+
end
|
54
|
+
|
55
|
+
# rubocop: disable Style/EachForSimpleLoop
|
56
|
+
it 'returns the consecutive item to a given one' do
|
57
|
+
(0..2).each do |pos|
|
58
|
+
curr_item = subject.items[pos]
|
59
|
+
next_one = subject.next_item(curr_item)
|
60
|
+
expect(next_one).to eq(subject.items[pos + 1])
|
61
|
+
end
|
62
|
+
expect(subject.next_item(subject.items[-1])).to be_nil
|
63
|
+
|
64
|
+
expect(empty_prod.next_item(empty_prod.items[-1])).to be_nil
|
65
|
+
end
|
66
|
+
# rubocop: enable Style/EachForSimpleLoop
|
67
|
+
end # context
|
68
|
+
end # describe
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\lexical\token_position'
|
5
|
+
require_relative '..\..\..\lib\dendroid\lexical\literal'
|
6
|
+
|
7
|
+
describe Dendroid::Lexical::Literal do
|
8
|
+
let(:ex_source) { '42' }
|
9
|
+
let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
|
10
|
+
let(:ex_terminal) { :INTEGER }
|
11
|
+
let(:ex_value) { 42 }
|
12
|
+
subject { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }
|
13
|
+
|
14
|
+
context 'Initialization:' do
|
15
|
+
it 'is initialized with a text, position, symbol name and value' do
|
16
|
+
expect { described_class.new(ex_source, ex_pos, ex_terminal, ex_value) }.not_to raise_error
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'knows its value' do
|
20
|
+
expect(subject.value).to eq(ex_value)
|
21
|
+
end
|
22
|
+
end # context
|
23
|
+
end # describe
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/lexical/token_position'
|
5
|
+
|
6
|
+
describe Dendroid::Lexical::TokenPosition do
|
7
|
+
let(:ex_lineno) { 5 }
|
8
|
+
let(:ex_column) { 7 }
|
9
|
+
|
10
|
+
subject { described_class.new(ex_lineno, ex_column) }
|
11
|
+
|
12
|
+
context 'Initialization:' do
|
13
|
+
it 'is initialized with a line number and a column position' do
|
14
|
+
expect { described_class.new(ex_lineno, ex_column) }.not_to raise_error
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'knows its line number' do
|
18
|
+
expect(subject.lineno).to eq(ex_lineno)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'knows its column number' do
|
22
|
+
expect(subject.column).to eq(ex_column)
|
23
|
+
end
|
24
|
+
end # context
|
25
|
+
|
26
|
+
context 'Provided services:' do
|
27
|
+
it 'renders a String representation of itself' do
|
28
|
+
expect(subject.to_s).to eq("#{ex_lineno}:#{ex_column}")
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
end # describe
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/lexical/token_position'
|
5
|
+
require_relative '../../../lib/dendroid/lexical/token'
|
6
|
+
|
7
|
+
describe Dendroid::Lexical::Token do
|
8
|
+
let(:ex_source) { 'else' }
|
9
|
+
let(:ex_pos) { Dendroid::Lexical::TokenPosition.new(2, 5) }
|
10
|
+
let(:ex_terminal) { 'ELSE' }
|
11
|
+
subject { described_class.new(ex_source, ex_pos, ex_terminal) }
|
12
|
+
|
13
|
+
context 'Initialization:' do
|
14
|
+
it 'is initialized with a text, position and symbol name' do
|
15
|
+
expect { described_class.new(ex_source, ex_pos, ex_terminal) }.not_to raise_error
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'knows its source text' do
|
19
|
+
expect(subject.source).to eq(ex_source)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'knows its position' do
|
23
|
+
expect(subject.position).to eq(ex_pos)
|
24
|
+
expect(subject.pos_to_s).to eq('2:5')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'knows the terminal name' do
|
28
|
+
expect(subject.terminal).to eq(ex_terminal)
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
end # describe
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
require_relative '../../../lib/dendroid/utils/base_tokenizer'
|
5
|
+
|
6
|
+
describe Dendroid::Utils::BaseTokenizer do
|
7
|
+
# Implements a dotted item: expression => NUMBER . PLUS NUMBER
|
8
|
+
subject { described_class.new }
|
9
|
+
|
10
|
+
context 'Initialization:' do
|
11
|
+
it 'is initialized with an optional block' do
|
12
|
+
expect {described_class.new }.not_to raise_error
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a scanner at start' do
|
16
|
+
expect(subject.scanner).to be_kind_of(StringScanner)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'initializes actions to defaults' do
|
20
|
+
expect(subject.actions).to be_member(:skip_nl)
|
21
|
+
expect(subject.actions).to be_member(:skip_ws)
|
22
|
+
end
|
23
|
+
end # context
|
24
|
+
|
25
|
+
context 'Tokenizing:' do
|
26
|
+
subject do
|
27
|
+
described_class.new {
|
28
|
+
scan_verbatim(['+', '*'])
|
29
|
+
scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
|
30
|
+
map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'generates a sequence of tokens from a simple input' do
|
35
|
+
subject.input = '2 + 3 * 4'
|
36
|
+
|
37
|
+
expectations = [
|
38
|
+
['1:1', '2', :INTEGER, 2],
|
39
|
+
['1:3', '+', :PLUS, nil],
|
40
|
+
['1:5', '3', :INTEGER, 3],
|
41
|
+
['1:7', '*', :STAR, nil],
|
42
|
+
['1:9', '4', :INTEGER, 4]
|
43
|
+
]
|
44
|
+
expectations.each do |tuple|
|
45
|
+
tok = subject.next_token
|
46
|
+
[:pos_to_s, :source, :terminal, :value].each_with_index do |message, index|
|
47
|
+
expect(tok.send(message)).to eq(tuple[index]) unless tuple[index].nil?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# No more token... 'next_token' method returns nil
|
52
|
+
expect(subject.next_token).to be_nil
|
53
|
+
end
|
54
|
+
end # context
|
55
|
+
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.9
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: WIP. A Ruby implementation of a Earley parser
|
14
14
|
email: famished.tiger@yahoo.com
|
@@ -24,8 +24,15 @@ files:
|
|
24
24
|
- bin/dendroid
|
25
25
|
- dendroid.gemspec
|
26
26
|
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/grm_analysis/alternative_item.rb
|
28
|
+
- lib/dendroid/grm_analysis/choice_items.rb
|
27
29
|
- lib/dendroid/grm_analysis/dotted_item.rb
|
30
|
+
- lib/dendroid/grm_analysis/grm_analyzer.rb
|
31
|
+
- lib/dendroid/grm_analysis/production_items.rb
|
28
32
|
- lib/dendroid/grm_dsl/base_grm_builder.rb
|
33
|
+
- lib/dendroid/lexical/literal.rb
|
34
|
+
- lib/dendroid/lexical/token.rb
|
35
|
+
- lib/dendroid/lexical/token_position.rb
|
29
36
|
- lib/dendroid/syntax/choice.rb
|
30
37
|
- lib/dendroid/syntax/grammar.rb
|
31
38
|
- lib/dendroid/syntax/grm_symbol.rb
|
@@ -34,8 +41,16 @@ files:
|
|
34
41
|
- lib/dendroid/syntax/rule.rb
|
35
42
|
- lib/dendroid/syntax/symbol_seq.rb
|
36
43
|
- lib/dendroid/syntax/terminal.rb
|
44
|
+
- lib/dendroid/utils/base_tokenizer.rb
|
45
|
+
- spec/dendroid/grm_analysis/alternative_item_spec.rb
|
46
|
+
- spec/dendroid/grm_analysis/choice_items_spec.rb
|
37
47
|
- spec/dendroid/grm_analysis/dotted_item_spec.rb
|
48
|
+
- spec/dendroid/grm_analysis/grm_analyzer_spec.rb
|
49
|
+
- spec/dendroid/grm_analysis/production_items_spec.rb
|
38
50
|
- spec/dendroid/grm_dsl/base_grm_builder_spec.rb
|
51
|
+
- spec/dendroid/lexical/literal_spec.rb
|
52
|
+
- spec/dendroid/lexical/token_position_spec.rb
|
53
|
+
- spec/dendroid/lexical/token_spec.rb
|
39
54
|
- spec/dendroid/syntax/choice_spec.rb
|
40
55
|
- spec/dendroid/syntax/grammar_spec.rb
|
41
56
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|
@@ -44,6 +59,7 @@ files:
|
|
44
59
|
- spec/dendroid/syntax/rule_spec.rb
|
45
60
|
- spec/dendroid/syntax/symbol_seq_spec.rb
|
46
61
|
- spec/dendroid/syntax/terminal_spec.rb
|
62
|
+
- spec/dendroid/utils/base_tokenizer_spec.rb
|
47
63
|
- spec/spec_helper.rb
|
48
64
|
- version.txt
|
49
65
|
homepage: https://github.com/famished-tiger/Dendroid
|
@@ -68,5 +84,5 @@ requirements: []
|
|
68
84
|
rubygems_version: 3.3.7
|
69
85
|
signing_key:
|
70
86
|
specification_version: 4
|
71
|
-
summary:
|
87
|
+
summary: WIP. A Ruby implementation of a Earley parser
|
72
88
|
test_files: []
|