rley 0.8.03 → 0.8.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -8
- data/CHANGELOG.md +10 -0
- data/examples/data_formats/JSON/README.md +34 -0
- data/examples/data_formats/JSON/sample01.json +3 -0
- data/examples/data_formats/JSON/sample01.svg +36 -0
- data/examples/data_formats/JSON/sample02.json +6 -0
- data/examples/data_formats/JSON/sample02.svg +128 -0
- data/examples/data_formats/JSON/sample03.json +88 -0
- data/examples/general/calc_iter1/README.md +26 -0
- data/examples/general/calc_iter2/README.md +55 -0
- data/examples/general/general_examples.md +37 -0
- data/examples/tokenizer/README.md +46 -0
- data/examples/tokenizer/loxxy_raw_scanner.rex +98 -0
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +256 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +94 -0
- data/examples/tokenizer/run_tokenizer.rb +29 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/lexical/literal.rb +29 -0
- data/lib/rley/lexical/token.rb +7 -4
- data/lib/rley/syntax/base_grammar_builder.rb +0 -2
- data/lib/rley.rb +1 -1
- data/spec/rley/lexical/literal_spec.rb +33 -0
- data/spec/rley/lexical/token_spec.rb +15 -4
- data/spec/rley/notation/grammar_builder_spec.rb +2 -2
- data/spec/rley/parser/dangling_else_spec.rb +5 -7
- data/spec/rley/parser/gfg_chart_spec.rb +0 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +131 -134
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -2
- data/spec/rley/syntax/base_grammar_builder_spec.rb +7 -7
- data/spec/rley/syntax/grammar_spec.rb +6 -9
- metadata +19 -9
- data/lib/rley/parser/parse_tracer.rb +0 -103
- data/lib/rley/syntax/literal.rb +0 -20
- data/lib/rley/syntax/verbatim_symbol.rb +0 -27
- data/spec/rley/syntax/literal_spec.rb +0 -31
- data/spec/rley/syntax/verbatim_symbol_spec.rb +0 -38
@@ -0,0 +1,98 @@
|
|
1
|
+
# rubocop: disable Style/MutableConstant
|
2
|
+
# rubocop: disable Layout/SpaceBeforeSemicolon
|
3
|
+
# rubocop: disable Style/Alias
|
4
|
+
# rubocop: disable Style/AndOr
|
5
|
+
# rubocop: disable Style/MultilineIfModifier
|
6
|
+
# rubocop: disable Style/StringLiterals
|
7
|
+
# rubocop: disable Style/MethodDefParentheses
|
8
|
+
# rubocop: disable Security/Open
|
9
|
+
# rubocop: disable Style/TrailingCommaInArrayLiteral
|
10
|
+
# rubocop: disable Layout/EmptyLinesAroundMethodBody
|
11
|
+
# rubocop: disable Style/WhileUntilDo
|
12
|
+
# rubocop: disable Style/MultilineWhenThen
|
13
|
+
# rubocop: disable Layout/ExtraSpacing
|
14
|
+
# rubocop: disable Layout/SpaceInsideRangeLiteral
|
15
|
+
# rubocop: disable Style/CaseEquality
|
16
|
+
# rubocop: disable Style/EmptyCaseCondition
|
17
|
+
# rubocop: disable Style/SymbolArray
|
18
|
+
# rubocop: disable Lint/DuplicateBranch
|
19
|
+
# rubocop: disable Layout/EmptyLineBetweenDefs
|
20
|
+
# rubocop: disable Layout/IndentationConsistency
|
21
|
+
|
22
|
+
class LoxxyRawScanner
|
23
|
+
option
|
24
|
+
lineno
|
25
|
+
column
|
26
|
+
|
27
|
+
macro
|
28
|
+
DIGIT /\d/
|
29
|
+
ALPHA /[a-zA-Z_]/
|
30
|
+
|
31
|
+
rule
|
32
|
+
# Delimiters, punctuators, operators
|
33
|
+
/[ \t]+/
|
34
|
+
/\/\/[^\r\n]*/
|
35
|
+
/\r|\n/ newline
|
36
|
+
/[!=<>]=?/ { [:SPECIAL, text] }
|
37
|
+
/[(){},;.\-+\/*]/ { [:SPECIAL, text] }
|
38
|
+
|
39
|
+
# Literals & identifiers
|
40
|
+
/#{DIGIT}+(\.#{DIGIT}+)?/ { [:NUMBER, text] }
|
41
|
+
/nil/ { [:NIL, text] }
|
42
|
+
/false/ { [:FALSE, text] }
|
43
|
+
/true/ { [:TRUE, text] }
|
44
|
+
/#{ALPHA}(#{ALPHA}|#{DIGIT})*/ { [:IDENTIFIER, text] }
|
45
|
+
/""/ { [:STRING, '""'] }
|
46
|
+
/"/ :IN_STRING
|
47
|
+
|
48
|
+
:IN_STRING /[^"]+/ { [:STRING, "\"#{text}\""] }
|
49
|
+
:IN_STRING /"/ nil
|
50
|
+
|
51
|
+
inner
|
52
|
+
|
53
|
+
def do_parse
|
54
|
+
tokens = []
|
55
|
+
while (tok = next_token) do
|
56
|
+
(type, lexeme) = tok
|
57
|
+
if type == :state
|
58
|
+
self.state = lexeme
|
59
|
+
next
|
60
|
+
else
|
61
|
+
tokens << [type, lexeme, lineno, column]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
tokens
|
66
|
+
end
|
67
|
+
|
68
|
+
def newline(txt)
|
69
|
+
if txt == '\r'
|
70
|
+
ss.skip(/\n/) # CR LF sequence
|
71
|
+
|
72
|
+
self.lineno += 1
|
73
|
+
self.start_of_current_line_pos = ss.pos + 1
|
74
|
+
end
|
75
|
+
|
76
|
+
nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
# rubocop: enable Style/MutableConstant
|
80
|
+
# rubocop: enable Layout/SpaceBeforeSemicolon
|
81
|
+
# rubocop: enable Style/Alias
|
82
|
+
# rubocop: enable Style/AndOr
|
83
|
+
# rubocop: enable Style/MultilineIfModifier
|
84
|
+
# rubocop: enable Style/StringLiterals
|
85
|
+
# rubocop: enable Style/MethodDefParentheses
|
86
|
+
# rubocop: enable Security/Open
|
87
|
+
# rubocop: enable Style/TrailingCommaInArrayLiteral
|
88
|
+
# rubocop: enable Layout/EmptyLinesAroundMethodBody
|
89
|
+
# rubocop: enable Style/WhileUntilDo
|
90
|
+
# rubocop: enable Style/MultilineWhenThen
|
91
|
+
# rubocop: enable Layout/ExtraSpacing
|
92
|
+
# rubocop: enable Layout/SpaceInsideRangeLiteral
|
93
|
+
# rubocop: enable Style/CaseEquality
|
94
|
+
# rubocop: enable Style/EmptyCaseCondition
|
95
|
+
# rubocop: enable Style/SymbolArray
|
96
|
+
# rubocop: enable Lint/DuplicateBranch
|
97
|
+
# rubocop: enable Layout/EmptyLineBetweenDefs
|
98
|
+
# rubocop: enable Layout/IndentationConsistency
|
@@ -0,0 +1,256 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# encoding: UTF-8
|
4
|
+
#--
|
5
|
+
# This file is automatically generated. Do not modify it.
|
6
|
+
# Generated by: oedipus_lex version 2.5.3.
|
7
|
+
# Source: loxxy_raw_scanner.rex
|
8
|
+
#++
|
9
|
+
|
10
|
+
# rubocop: disable Style/MutableConstant
|
11
|
+
# rubocop: disable Layout/SpaceBeforeSemicolon
|
12
|
+
# rubocop: disable Style/Alias
|
13
|
+
# rubocop: disable Style/AndOr
|
14
|
+
# rubocop: disable Style/MultilineIfModifier
|
15
|
+
# rubocop: disable Style/StringLiterals
|
16
|
+
# rubocop: disable Style/MethodDefParentheses
|
17
|
+
# rubocop: disable Security/Open
|
18
|
+
# rubocop: disable Style/TrailingCommaInArrayLiteral
|
19
|
+
# rubocop: disable Layout/EmptyLinesAroundMethodBody
|
20
|
+
# rubocop: disable Style/WhileUntilDo
|
21
|
+
# rubocop: disable Style/MultilineWhenThen
|
22
|
+
# rubocop: disable Layout/ExtraSpacing
|
23
|
+
# rubocop: disable Layout/SpaceInsideRangeLiteral
|
24
|
+
# rubocop: disable Style/CaseEquality
|
25
|
+
# rubocop: disable Style/EmptyCaseCondition
|
26
|
+
# rubocop: disable Style/SymbolArray
|
27
|
+
# rubocop: disable Lint/DuplicateBranch
|
28
|
+
# rubocop: disable Layout/EmptyLineBetweenDefs
|
29
|
+
# rubocop: disable Layout/IndentationConsistency
|
30
|
+
|
31
|
+
|
32
|
+
##
|
33
|
+
# The generated lexer LoxxyRawScanner
|
34
|
+
|
35
|
+
class LoxxyRawScanner
|
36
|
+
require 'strscan'
|
37
|
+
|
38
|
+
# :stopdoc:
|
39
|
+
DIGIT = /\d/
|
40
|
+
ALPHA = /[a-zA-Z_]/
|
41
|
+
# :startdoc:
|
42
|
+
# :stopdoc:
|
43
|
+
class LexerError < StandardError ; end
|
44
|
+
class ScanError < LexerError ; end
|
45
|
+
# :startdoc:
|
46
|
+
|
47
|
+
##
|
48
|
+
# The current line number.
|
49
|
+
|
50
|
+
attr_accessor :lineno
|
51
|
+
##
|
52
|
+
# The file name / path
|
53
|
+
|
54
|
+
attr_accessor :filename
|
55
|
+
|
56
|
+
##
|
57
|
+
# The StringScanner for this lexer.
|
58
|
+
|
59
|
+
attr_accessor :ss
|
60
|
+
|
61
|
+
##
|
62
|
+
# The current lexical state.
|
63
|
+
|
64
|
+
attr_accessor :state
|
65
|
+
|
66
|
+
alias :match :ss
|
67
|
+
|
68
|
+
##
|
69
|
+
# The match groups for the current scan.
|
70
|
+
|
71
|
+
def matches
|
72
|
+
m = (1..9).map { |i| ss[i] }
|
73
|
+
m.pop until m[-1] or m.empty?
|
74
|
+
m
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Yields on the current action.
|
79
|
+
|
80
|
+
def action
|
81
|
+
yield
|
82
|
+
end
|
83
|
+
|
84
|
+
##
|
85
|
+
# The previous position. Only available if the :column option is on.
|
86
|
+
|
87
|
+
attr_accessor :old_pos
|
88
|
+
|
89
|
+
##
|
90
|
+
# The position of the start of the current line. Only available if the
|
91
|
+
# :column option is on.
|
92
|
+
|
93
|
+
attr_accessor :start_of_current_line_pos
|
94
|
+
|
95
|
+
##
|
96
|
+
# The current column, starting at 0. Only available if the
|
97
|
+
# :column option is on.
|
98
|
+
def column
|
99
|
+
old_pos - start_of_current_line_pos
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
##
|
104
|
+
# The current scanner class. Must be overridden in subclasses.
|
105
|
+
|
106
|
+
def scanner_class
|
107
|
+
StringScanner
|
108
|
+
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
|
109
|
+
|
110
|
+
##
|
111
|
+
# Parse the given string.
|
112
|
+
|
113
|
+
def parse str
|
114
|
+
self.ss = scanner_class.new str
|
115
|
+
self.lineno = 1
|
116
|
+
self.start_of_current_line_pos = 0
|
117
|
+
self.state ||= nil
|
118
|
+
|
119
|
+
do_parse
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Read in and parse the file at +path+.
|
124
|
+
|
125
|
+
def parse_file path
|
126
|
+
self.filename = path
|
127
|
+
open path do |f|
|
128
|
+
parse f.read
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# The current location in the parse.
|
134
|
+
|
135
|
+
def location
|
136
|
+
[
|
137
|
+
(filename || "<input>"),
|
138
|
+
lineno,
|
139
|
+
column,
|
140
|
+
].compact.join(":")
|
141
|
+
end
|
142
|
+
|
143
|
+
##
|
144
|
+
# Lex the next token.
|
145
|
+
|
146
|
+
def next_token
|
147
|
+
|
148
|
+
token = nil
|
149
|
+
|
150
|
+
until ss.eos? or token do
|
151
|
+
if ss.peek(1) == "\n"
|
152
|
+
self.lineno += 1
|
153
|
+
# line starts 1 position after the newline
|
154
|
+
self.start_of_current_line_pos = ss.pos + 1
|
155
|
+
end
|
156
|
+
self.old_pos = ss.pos
|
157
|
+
token =
|
158
|
+
case state
|
159
|
+
when nil then
|
160
|
+
case
|
161
|
+
when ss.skip(/[ \t]+/) then
|
162
|
+
# do nothing
|
163
|
+
when ss.skip(/\/\/[^\r\n]*/) then
|
164
|
+
# do nothing
|
165
|
+
when text = ss.scan(/\r|\n/) then
|
166
|
+
newline text
|
167
|
+
when text = ss.scan(/[!=<>]=?/) then
|
168
|
+
action { [:SPECIAL, text] }
|
169
|
+
when text = ss.scan(/[(){},;.\-+\/*]/) then
|
170
|
+
action { [:SPECIAL, text] }
|
171
|
+
when text = ss.scan(/#{DIGIT}+(\.#{DIGIT}+)?/) then
|
172
|
+
action { [:NUMBER, text] }
|
173
|
+
when text = ss.scan(/nil/) then
|
174
|
+
action { [:NIL, text] }
|
175
|
+
when text = ss.scan(/false/) then
|
176
|
+
action { [:FALSE, text] }
|
177
|
+
when text = ss.scan(/true/) then
|
178
|
+
action { [:TRUE, text] }
|
179
|
+
when text = ss.scan(/#{ALPHA}(#{ALPHA}|#{DIGIT})*/) then
|
180
|
+
action { [:IDENTIFIER, text] }
|
181
|
+
when ss.skip(/""/) then
|
182
|
+
action { [:STRING, '""'] }
|
183
|
+
when ss.skip(/"/) then
|
184
|
+
[:state, :IN_STRING]
|
185
|
+
else
|
186
|
+
text = ss.string[ss.pos .. -1]
|
187
|
+
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
|
188
|
+
end
|
189
|
+
when :IN_STRING then
|
190
|
+
case
|
191
|
+
when text = ss.scan(/[^"]+/) then
|
192
|
+
action { [:STRING, "\"#{text}\""] }
|
193
|
+
when ss.skip(/"/) then
|
194
|
+
[:state, nil]
|
195
|
+
else
|
196
|
+
text = ss.string[ss.pos .. -1]
|
197
|
+
raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
|
198
|
+
end
|
199
|
+
else
|
200
|
+
raise ScanError, "undefined state at #{location}: '#{state}'"
|
201
|
+
end # token = case state
|
202
|
+
|
203
|
+
next unless token # allow functions to trigger redo w/ nil
|
204
|
+
end # while
|
205
|
+
|
206
|
+
raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
|
207
|
+
token.nil? || (Array === token && token.size >= 2)
|
208
|
+
|
209
|
+
# auto-switch state
|
210
|
+
self.state = token.last if token && token.first == :state
|
211
|
+
|
212
|
+
token
|
213
|
+
end # def next_token
|
214
|
+
def do_parse
|
215
|
+
tokens = []
|
216
|
+
while (tok = next_token) do
|
217
|
+
(type, lexeme) = tok
|
218
|
+
if type == :state
|
219
|
+
self.state = lexeme
|
220
|
+
next
|
221
|
+
else
|
222
|
+
tokens << [type, lexeme, lineno, column]
|
223
|
+
end
|
224
|
+
end
|
225
|
+
tokens
|
226
|
+
end
|
227
|
+
def newline(txt)
|
228
|
+
if txt == '\r'
|
229
|
+
ss.skip(/\n/) # CR LF sequence
|
230
|
+
self.lineno += 1
|
231
|
+
self.start_of_current_line_pos = ss.pos + 1
|
232
|
+
end
|
233
|
+
nil
|
234
|
+
end
|
235
|
+
end # class
|
236
|
+
|
237
|
+
# rubocop: enable Style/MutableConstant
|
238
|
+
# rubocop: enable Layout/SpaceBeforeSemicolon
|
239
|
+
# rubocop: enable Style/Alias
|
240
|
+
# rubocop: enable Style/AndOr
|
241
|
+
# rubocop: enable Style/MultilineIfModifier
|
242
|
+
# rubocop: enable Style/StringLiterals
|
243
|
+
# rubocop: enable Style/MethodDefParentheses
|
244
|
+
# rubocop: enable Security/Open
|
245
|
+
# rubocop: enable Style/TrailingCommaInArrayLiteral
|
246
|
+
# rubocop: enable Layout/EmptyLinesAroundMethodBody
|
247
|
+
# rubocop: enable Style/WhileUntilDo
|
248
|
+
# rubocop: enable Style/MultilineWhenThen
|
249
|
+
# rubocop: enable Layout/ExtraSpacing
|
250
|
+
# rubocop: enable Layout/SpaceInsideRangeLiteral
|
251
|
+
# rubocop: enable Style/CaseEquality
|
252
|
+
# rubocop: enable Style/EmptyCaseCondition
|
253
|
+
# rubocop: enable Style/SymbolArray
|
254
|
+
# rubocop: enable Lint/DuplicateBranch
|
255
|
+
# rubocop: enable Layout/EmptyLineBetweenDefs
|
256
|
+
# rubocop: enable Layout/IndentationConsistency
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rley'
|
4
|
+
require_relative 'loxxy_raw_scanner.rex'
|
5
|
+
|
6
|
+
class LoxxyTokenizer
|
7
|
+
# @return [LoxxyRawScanner] Scanner generated by `oedipus_lex`gem.
|
8
|
+
attr_reader :scanner
|
9
|
+
|
10
|
+
# @return [String] Input text to tokenize
|
11
|
+
attr_reader :input
|
12
|
+
|
13
|
+
Keyword2name = begin
|
14
|
+
lookup = %w[
|
15
|
+
and class else false fun for if nil or
|
16
|
+
print return super this true var while
|
17
|
+
].map { |x| [x, x.upcase] }.to_h
|
18
|
+
lookup.default = 'IDENTIFIER'
|
19
|
+
lookup.freeze
|
20
|
+
end
|
21
|
+
|
22
|
+
Special2name = {
|
23
|
+
'(' => 'LEFT_PAREN',
|
24
|
+
')' => 'RIGHT_PAREN',
|
25
|
+
'{' => 'LEFT_BRACE',
|
26
|
+
'}' => 'RIGHT_BRACE',
|
27
|
+
',' => 'COMMA',
|
28
|
+
'.' => 'DOT',
|
29
|
+
'-' => 'MINUS',
|
30
|
+
'+' => 'PLUS',
|
31
|
+
';' => 'SEMICOLON',
|
32
|
+
'/' => 'SLASH',
|
33
|
+
'*' => 'STAR',
|
34
|
+
'!' => 'BANG',
|
35
|
+
'!=' => 'BANG_EQUAL',
|
36
|
+
'=' => 'EQUAL',
|
37
|
+
'==' => 'EQUAL_EQUAL',
|
38
|
+
'>' => 'GREATER',
|
39
|
+
'>=' => 'GREATER_EQUAL',
|
40
|
+
'<' => 'LESS',
|
41
|
+
'<=' => 'LESS_EQUAL'
|
42
|
+
}.freeze
|
43
|
+
|
44
|
+
def initialize(source = nil)
|
45
|
+
@scanner = LoxxyRawScanner.new
|
46
|
+
start_with(source)
|
47
|
+
end
|
48
|
+
|
49
|
+
def start_with(source)
|
50
|
+
@input = source
|
51
|
+
end
|
52
|
+
|
53
|
+
def tokens
|
54
|
+
raw_tokens = scanner.parse(input)
|
55
|
+
cooked = raw_tokens.map do |(raw_type, raw_text, line, col)|
|
56
|
+
pos = Rley::Lexical::Position.new(line, col + 1)
|
57
|
+
convert(raw_type, raw_text, pos)
|
58
|
+
end
|
59
|
+
forelast = cooked.last
|
60
|
+
last_col = forelast.position.column + forelast.lexeme.length
|
61
|
+
last_pos = Rley::Lexical::Position.new(forelast.position.line, last_col)
|
62
|
+
cooked << Rley::Lexical::Token.new(nil, 'EOF', last_pos)
|
63
|
+
cooked
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def convert(token_kind, token_text, pos)
|
69
|
+
result = case token_kind
|
70
|
+
when :SPECIAL
|
71
|
+
Rley::Lexical::Token.new(token_text, Special2name[token_text])
|
72
|
+
when :FALSE
|
73
|
+
Rley::Lexical::Literal.new(false, token_text, 'FALSE')
|
74
|
+
when :NUMBER
|
75
|
+
num_val = token_text =~ /\.\d+$/ ? token_text.to_f : token_text.to_i
|
76
|
+
Rley::Lexical::Literal.new(num_val, token_text, 'NUMBER')
|
77
|
+
when :NIL
|
78
|
+
Rley::Lexical::Literal.new(nil, token_text, 'NIL')
|
79
|
+
when :STRING
|
80
|
+
str_val = token_text[1..-2]
|
81
|
+
pos.column = pos.column - 1 unless str_val.empty?
|
82
|
+
Rley::Lexical::Literal.new(str_val, token_text, 'STRING')
|
83
|
+
when :TRUE
|
84
|
+
Rley::Lexical::Literal.new(true, token_text, 'TRUE')
|
85
|
+
when :IDENTIFIER
|
86
|
+
Rley::Lexical::Token.new(token_text, Keyword2name[token_text])
|
87
|
+
else
|
88
|
+
raise ScanError, "Error: [line #{pos.line}:#{column}]: Unexpected token #{token_text}"
|
89
|
+
end
|
90
|
+
|
91
|
+
result.position = pos
|
92
|
+
result
|
93
|
+
end
|
94
|
+
end # class
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require_relative 'loxxy_tokenizer'
|
5
|
+
|
6
|
+
lox_source = <<LOX_END
|
7
|
+
class Base {
|
8
|
+
foo() {
|
9
|
+
print "Base.foo()";
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
class Derived < Base {
|
14
|
+
foo() {
|
15
|
+
print "Derived.foo()";
|
16
|
+
super.foo();
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
Derived().foo();
|
21
|
+
// expect: Derived.foo()
|
22
|
+
// expect: Base.foo()
|
23
|
+
LOX_END
|
24
|
+
|
25
|
+
loxxy_tokenizer = LoxxyTokenizer.new
|
26
|
+
loxxy_tokenizer.start_with(lox_source)
|
27
|
+
tokens = loxxy_tokenizer.tokens
|
28
|
+
File::open('tokens.yaml', 'w') { |f| YAML.dump(tokens, f) }
|
29
|
+
puts 'Done: tokenizer results saved in YAML.'
|
data/lib/rley/constants.rb
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'token'
|
4
|
+
|
5
|
+
module Rley # This module is used as a namespace
|
6
|
+
module Lexical # This module is used as a namespace
|
7
|
+
# A literal (value) is a token that represents a data value in the parsed
|
8
|
+
# language. For instance, in Ruby data values such as strings, numbers,
|
9
|
+
# regular expression,... can appear directly in the source code. These are
|
10
|
+
# examples of literal values. One responsibility of a tokenizer/lexer is
|
11
|
+
# to convert the text representation into a corresponding value in a
|
12
|
+
# convenient format for the interpreter/compiler.
|
13
|
+
class Literal < Token
|
14
|
+
# @return [Object] The value expressed in one of the target datatype.
|
15
|
+
attr_reader(:value)
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# @param aValue [Object] value of the token in internal representation
|
19
|
+
# @param theLexeme [String] the lexeme (= piece of text from input)
|
20
|
+
# @param aTerminal [Syntax::Terminal, String]
|
21
|
+
# @param aPosition [Rley::Lexical::Position] line, column position pf token
|
22
|
+
def initialize(aValue, theLexeme, aTerminal, aPosition = nil)
|
23
|
+
super(theLexeme, aTerminal, aPosition)
|
24
|
+
@value = aValue
|
25
|
+
end
|
26
|
+
end # class
|
27
|
+
end # module
|
28
|
+
end # module
|
29
|
+
# End of file
|
data/lib/rley/lexical/token.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Rley # This module is used as a namespace
|
4
|
-
|
4
|
+
# This module hosts classes that a Rley parser expects
|
5
|
+
# as return values from a tokenizer / lexer.
|
6
|
+
module Lexical
|
5
7
|
# A Position is the location of a lexeme within a source file.
|
6
8
|
Position = Struct.new(:line, :column) do
|
7
9
|
def to_s
|
@@ -28,14 +30,15 @@ module Rley # This module is used as a namespace
|
|
28
30
|
# @return [String] The name of terminal symbol matching the lexeme.
|
29
31
|
attr_reader(:terminal)
|
30
32
|
|
31
|
-
# @return [Position] The position of the lexeme in the source file.
|
32
|
-
|
33
|
+
# @return [Position] The position -in "editor" coordinates- of the lexeme in the source file.
|
34
|
+
attr_accessor(:position)
|
33
35
|
|
34
36
|
# Constructor.
|
35
37
|
# @param theLexeme [String] the lexeme (= piece of text from input)
|
36
38
|
# @param aTerminal [Syntax::Terminal, String]
|
37
39
|
# The terminal symbol corresponding to the lexeme.
|
38
|
-
|
40
|
+
# @param aPositiçon [Rley::Lexical::Position] position of the token in source file
|
41
|
+
def initialize(theLexeme, aTerminal, aPosition = nil)
|
39
42
|
raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
|
40
43
|
|
41
44
|
@lexeme = theLexeme
|
data/lib/rley.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
require_relative './rley/constants'
|
8
8
|
require_relative './rley/interface'
|
9
|
-
require_relative './rley/lexical/
|
9
|
+
require_relative './rley/lexical/literal'
|
10
10
|
require_relative './rley/parser/gfg_earley_parser'
|
11
11
|
require_relative './rley/parse_rep/ast_base_builder'
|
12
12
|
require_relative './rley/parse_tree_visitor'
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../../spec_helper'
|
4
|
+
|
5
|
+
require_relative '../../../lib/rley/syntax/terminal'
|
6
|
+
|
7
|
+
# Load the class under test
|
8
|
+
require_relative '../../../lib/rley/lexical/literal'
|
9
|
+
|
10
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
11
|
+
module Lexical # Open this namespace to avoid module qualifier prefixes
|
12
|
+
describe Literal do
|
13
|
+
let(:lexeme) { '12.34' }
|
14
|
+
let(:a_terminal) { Syntax::Terminal.new('NUMBER') }
|
15
|
+
let(:a_pos) { Position.new(3, 4) }
|
16
|
+
|
17
|
+
context 'Initialization:' do
|
18
|
+
# Default instantiation rule
|
19
|
+
subject { Literal.new(lexeme.to_f, lexeme, a_terminal, a_pos) }
|
20
|
+
|
21
|
+
it 'should be created with a value, lexeme, terminal and position' do
|
22
|
+
expect { Literal.new(lexeme.to_f, lexeme, a_terminal, a_pos) }.not_to raise_error
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should know its value' do
|
26
|
+
expect(subject.value).to eq(lexeme.to_f)
|
27
|
+
end
|
28
|
+
end # context
|
29
|
+
end # describe
|
30
|
+
end # module
|
31
|
+
end # module
|
32
|
+
|
33
|
+
# End of file
|
@@ -13,12 +13,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
13
13
|
let(:lexeme) { '"some text"' }
|
14
14
|
let(:a_terminal) { Syntax::Terminal.new('if') }
|
15
15
|
let(:a_pos) { Position.new(3, 4) }
|
16
|
+
# Default instantiation rule
|
17
|
+
subject { Token.new(lexeme, a_terminal, a_pos) }
|
16
18
|
|
17
19
|
context 'Initialization:' do
|
18
|
-
|
19
|
-
|
20
|
+
it 'could be created with a lexeme and a terminal ' do
|
21
|
+
expect { Token.new(lexeme, a_terminal) }.not_to raise_error
|
22
|
+
end
|
20
23
|
|
21
|
-
it '
|
24
|
+
it 'could be created with a lexeme, a terminal and position' do
|
22
25
|
expect { Token.new(lexeme, a_terminal, a_pos) }.not_to raise_error
|
23
26
|
end
|
24
27
|
|
@@ -30,7 +33,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
30
33
|
expect(subject.terminal).to eq(a_terminal)
|
31
34
|
end
|
32
35
|
|
33
|
-
it 'should know its
|
36
|
+
it 'should know its position' do
|
37
|
+
new_pos = Position.new(5, 7)
|
38
|
+
subject.position = new_pos
|
39
|
+
expect(subject.position).to eq(new_pos)
|
40
|
+
end
|
41
|
+
end # context
|
42
|
+
|
43
|
+
context 'Initialization:' do
|
44
|
+
it 'should accept a new position' do
|
34
45
|
expect(subject.position).to eq(a_pos)
|
35
46
|
end
|
36
47
|
end # context
|
@@ -55,8 +55,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
55
55
|
|
56
56
|
it 'should accept already built terminals' do
|
57
57
|
a = Syntax::Terminal.new('a')
|
58
|
-
b = Syntax::
|
59
|
-
c = Syntax::
|
58
|
+
b = Syntax::Terminal.new('b')
|
59
|
+
c = Syntax::Terminal.new('c')
|
60
60
|
|
61
61
|
subject.add_terminals(a, b, c)
|
62
62
|
expect(subject.symbols.size).to eq(3)
|