loxxy 0.4.07 → 0.4.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/lib/loxxy/front_end/scanner.rb +83 -78
- data/lib/loxxy/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e7e447625d999f4b95129d7feff1586c6759cb929a8faaa82e3ecc94105d0268
|
4
|
+
data.tar.gz: 00a4801454ffd09b2d4e810fd15d0f9b40fea13f46e6d4eed7eba4cab8d35ebe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a59f746cb7b8086ff3e1252c3fc0846229cb6927dcc4d62ad261c5e0c2fec4b1a75176cab6788cedb5c57a2fe9e7bec94d92d7dc63a2c455609a18761f78043
|
7
|
+
data.tar.gz: 66f72710a3f422e4431c88c44b56419f727711c1fbf339491f1fcb2f8f18edb0f4a21ce49a66ee4325ccf3b2576b0b94490d74f96c922a9d340d476e43660881
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
-
## [0.4.
|
1
|
+
## [0.4.08] - 2022-04-09
|
2
|
+
- Refactoring of the `Scanner` class.
|
3
|
+
|
4
|
+
### Changed
|
5
|
+
- `Frontend::Scanner` class: major code refactoring.
|
6
|
+
|
7
|
+
## [0.4.07] - 2021-11-21
|
2
8
|
- Minor fixes; dependency towards Rubies 3+ allowed...
|
3
9
|
|
4
10
|
### Fixed
|
@@ -20,6 +20,15 @@ module Loxxy
|
|
20
20
|
# Delimiters: e.g. parentheses '(', ')'
|
21
21
|
# Separators: e.g. comma
|
22
22
|
class Scanner
|
23
|
+
PATT_BLOCK_COMMENT_BEGIN = /\/\*/.freeze
|
24
|
+
PATT_BLOCK_COMMENT_END = /\*\//.freeze
|
25
|
+
PATT_COMPARISON = /[!=><]=?/.freeze
|
26
|
+
PATT_IDENTIFIER = /[a-zA-Z_][a-zA-Z_0-9]*/.freeze
|
27
|
+
PATT_LINE_COMMENT = /\/\/[^\r\n]*/.freeze
|
28
|
+
PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
|
29
|
+
PATT_NUMBER = /\d+(?:\.\d+)?/.freeze
|
30
|
+
PATT_WHITESPACE = /[ \t\f]+/.freeze
|
31
|
+
|
23
32
|
# @return [StringScanner] Low-level input scanner
|
24
33
|
attr_reader(:scanner)
|
25
34
|
|
@@ -31,7 +40,7 @@ module Loxxy
|
|
31
40
|
|
32
41
|
# One or two special character tokens.
|
33
42
|
# These are enumerated in section 4.2.1 Token type
|
34
|
-
|
43
|
+
Lexeme2name = {
|
35
44
|
'(' => 'LEFT_PAREN',
|
36
45
|
')' => 'RIGHT_PAREN',
|
37
46
|
'{' => 'LEFT_BRACE',
|
@@ -77,16 +86,16 @@ module Loxxy
|
|
77
86
|
# Constructor. Initialize a tokenizer for Lox input.
|
78
87
|
# @param source [String] Lox text to tokenize.
|
79
88
|
def initialize(source = nil)
|
80
|
-
|
81
|
-
|
89
|
+
reset
|
90
|
+
input = source || ''
|
91
|
+
@scanner = StringScanner.new(input)
|
82
92
|
end
|
83
93
|
|
84
94
|
# Reset the tokenizer and make the given text, the current input.
|
85
95
|
# @param source [String] Lox text to tokenize.
|
86
96
|
def start_with(source)
|
97
|
+
reset
|
87
98
|
@scanner.string = source
|
88
|
-
@lineno = 1
|
89
|
-
@line_start = 0
|
90
99
|
end
|
91
100
|
|
92
101
|
# Scan the source and return an array of tokens.
|
@@ -99,39 +108,79 @@ module Loxxy
|
|
99
108
|
end
|
100
109
|
tok_sequence << build_token('EOF', nil)
|
101
110
|
|
102
|
-
|
111
|
+
tok_sequence
|
103
112
|
end
|
104
113
|
|
105
114
|
private
|
106
115
|
|
107
|
-
def
|
108
|
-
|
109
|
-
|
110
|
-
|
116
|
+
def reset
|
117
|
+
@state = :default
|
118
|
+
@lineno = 1
|
119
|
+
@line_start = 0
|
120
|
+
end
|
111
121
|
|
122
|
+
def _next_token
|
123
|
+
nesting_level = 0
|
112
124
|
token = nil
|
113
125
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
elsif scanner.scan(/"/) # Start of string detected...
|
121
|
-
token = build_string_token
|
122
|
-
elsif (lexeme = scanner.scan(/\d+(?:\.\d+)?/))
|
123
|
-
token = build_token('NUMBER', lexeme)
|
124
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*/))
|
125
|
-
keyw = @@keywords[lexeme]
|
126
|
-
tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
|
127
|
-
token = build_token(tok_type, lexeme)
|
128
|
-
else # Unknown token
|
129
|
-
col = scanner.pos - @line_start + 1
|
130
|
-
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
131
|
-
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
132
|
-
end
|
126
|
+
# Loop until end of input reached or token found
|
127
|
+
until token || scanner.eos?
|
128
|
+
if scanner.skip(PATT_NEWLINE)
|
129
|
+
next_line_scanned
|
130
|
+
next
|
131
|
+
end
|
133
132
|
|
134
|
-
|
133
|
+
case @state
|
134
|
+
when :default
|
135
|
+
next if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
|
136
|
+
|
137
|
+
curr_ch = scanner.peek(1)
|
138
|
+
|
139
|
+
token = if scanner.skip(PATT_LINE_COMMENT)
|
140
|
+
next
|
141
|
+
elsif scanner.skip(PATT_BLOCK_COMMENT_BEGIN)
|
142
|
+
@state = :in_block_comment
|
143
|
+
nesting_level = 1
|
144
|
+
next
|
145
|
+
elsif '(){},.;+-/*'.include? curr_ch
|
146
|
+
# Single delimiter or separator character
|
147
|
+
build_token(Lexeme2name[curr_ch], scanner.getch)
|
148
|
+
elsif (lexeme = scanner.scan(PATT_COMPARISON))
|
149
|
+
# One or two special character tokens
|
150
|
+
build_token(Lexeme2name[lexeme], lexeme)
|
151
|
+
elsif scanner.scan(/"/) # Start of string detected...
|
152
|
+
build_string_token
|
153
|
+
elsif (lexeme = scanner.scan(PATT_NUMBER))
|
154
|
+
build_token('NUMBER', lexeme)
|
155
|
+
elsif (lexeme = scanner.scan(PATT_IDENTIFIER))
|
156
|
+
keyw = @@keywords[lexeme]
|
157
|
+
tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
|
158
|
+
build_token(tok_type, lexeme)
|
159
|
+
else # Unknown token
|
160
|
+
col = scanner.pos - @line_start + 1
|
161
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
162
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
163
|
+
end
|
164
|
+
|
165
|
+
when :in_block_comment
|
166
|
+
comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
|
167
|
+
unterminated_comment unless comment_part
|
168
|
+
|
169
|
+
case scanner.matched
|
170
|
+
when PATT_NEWLINE
|
171
|
+
next_line_scanned
|
172
|
+
when PATT_BLOCK_COMMENT_END
|
173
|
+
nesting_level -= 1
|
174
|
+
@state = :default if nesting_level.zero?
|
175
|
+
when PATT_BLOCK_COMMENT_BEGIN
|
176
|
+
nesting_level += 1
|
177
|
+
end
|
178
|
+
next
|
179
|
+
end # case
|
180
|
+
end # until
|
181
|
+
|
182
|
+
unterminated_comment unless nesting_level.zero?
|
183
|
+
token
|
135
184
|
end
|
136
185
|
|
137
186
|
def build_token(aSymbolName, aLexeme)
|
@@ -214,56 +263,12 @@ module Loxxy
|
|
214
263
|
Rley::Lexical::Literal.new(lox_string, lexeme, 'STRING', pos)
|
215
264
|
end
|
216
265
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
loop do
|
221
|
-
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
222
|
-
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
223
|
-
if nl_found
|
224
|
-
ws_found = true
|
225
|
-
next_line
|
226
|
-
end
|
227
|
-
cmt_found = false
|
228
|
-
if scanner.scan(/\/(\/|\*)/)
|
229
|
-
cmt_found = true
|
230
|
-
case scanner.matched
|
231
|
-
when '//'
|
232
|
-
scanner.skip(/[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
|
233
|
-
next_line
|
234
|
-
when '/*'
|
235
|
-
skip_block_comment
|
236
|
-
next
|
237
|
-
end
|
238
|
-
end
|
239
|
-
break unless ws_found || cmt_found
|
240
|
-
end
|
241
|
-
|
242
|
-
scanner.pos
|
243
|
-
end
|
244
|
-
|
245
|
-
def skip_block_comment
|
246
|
-
nesting_level = 1
|
247
|
-
loop do
|
248
|
-
comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
|
249
|
-
unless comment_part
|
250
|
-
msg = "Unterminated '/* ... */' block comment on line #{lineno}"
|
251
|
-
raise ScanError, msg
|
252
|
-
end
|
253
|
-
|
254
|
-
case scanner.matched
|
255
|
-
when /(?:(?:\r\n)|\r|\n)/
|
256
|
-
next_line
|
257
|
-
when '*/'
|
258
|
-
nesting_level -= 1
|
259
|
-
break if nesting_level.zero?
|
260
|
-
when '/*'
|
261
|
-
nesting_level += 1
|
262
|
-
end
|
263
|
-
end
|
266
|
+
def unterminated_comment
|
267
|
+
msg = "Unterminated '/* ... */' block comment on line #{lineno}"
|
268
|
+
raise ScanError, msg
|
264
269
|
end
|
265
270
|
|
266
|
-
def
|
271
|
+
def next_line_scanned
|
267
272
|
@lineno += 1
|
268
273
|
@line_start = scanner.pos
|
269
274
|
end
|
data/lib/loxxy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loxxy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.08
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|