rbscmlex 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/lib/rbscmlex/lexer.rb +141 -53
- data/lib/rbscmlex/token.rb +0 -2
- data/lib/rbscmlex/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2860b1b9f6d206dcfacf04fba88a5c64e9670c97d42423d5fac78615c3609d3a
|
4
|
+
data.tar.gz: c2ab02c9febc928efa76fa47aea965c27a53099b56ff55e180304988f9eea062
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a008f3c549e69bfa839d6788449704afb5b2491084eacde00ce1809769d4c65fffc4f22b0843c5b7522908891146f4ad4f227ef1ae67f1f07d0a33f97bbba3a
|
7
|
+
data.tar.gz: 68e14c34c40ea2b4715a20d9cac1f56c6860eeb6d9d3c8e2d31b6addf66fb529217e88b272ef1ccc252cb52645fb41dc3d77a8c6d0eb5af902b936af7b851e87
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
|
|
7
7
|
## [Unreleased]
|
8
8
|
- (nothing to record here)
|
9
9
|
|
10
|
+
## [0.1.3] - 2021-05-15
|
11
|
+
### Added
|
12
|
+
- Add `Lexer#skip_token(offset)`
|
13
|
+
|
14
|
+
### Changed
|
15
|
+
- Modify `Lexer#next_token` to accept an argument to specify the
|
16
|
+
offset to read position.
|
17
|
+
|
18
|
+
### Fixed
|
19
|
+
- Fix issue #4: Some "peculiar identifiers" are regarded as illegal.
|
20
|
+
|
10
21
|
## [0.1.2] - 2021-05-07
|
11
22
|
### Added
|
12
23
|
- Add a mechanism to initialize a Parser instance from an array of
|
data/lib/rbscmlex/lexer.rb
CHANGED
@@ -25,43 +25,6 @@ module Rbscmlex
|
|
25
25
|
|
26
26
|
end
|
27
27
|
|
28
|
-
# :stopdoc:
|
29
|
-
|
30
|
-
BOOLEAN = /\A#(f(alse)?|t(rue)?)\Z/
|
31
|
-
STRING = /\A\"[^\"]*\"\Z/
|
32
|
-
|
33
|
-
# idents
|
34
|
-
EXTENDED_CHARS = "!\\$%&\\*\\+\\-\\./:<=>\\?@\\^_~"
|
35
|
-
IDENT_PAT = "[a-zA-Z_][a-zA-Z0-9#{EXTENDED_CHARS}]*"
|
36
|
-
IDENTIFIER = Regexp.new("\\A#{IDENT_PAT}\\Z")
|
37
|
-
|
38
|
-
# operators
|
39
|
-
ARITHMETIC_OPS = /\A[+\-*\/%]\Z/
|
40
|
-
COMPARISON_OPS = /\A([<>]=?|=)\Z/
|
41
|
-
|
42
|
-
# numbers
|
43
|
-
REAL_PAT = "(([1-9][0-9]*)|0)(\.[0-9]+)?"
|
44
|
-
RAT_PAT = "#{REAL_PAT}\\/#{REAL_PAT}"
|
45
|
-
C_REAL_PAT = "(#{REAL_PAT}|#{RAT_PAT})"
|
46
|
-
C_IMAG_PAT = "#{C_REAL_PAT}"
|
47
|
-
COMP_PAT = "#{C_REAL_PAT}(\\+|\\-)#{C_IMAG_PAT}i"
|
48
|
-
|
49
|
-
REAL_NUM = Regexp.new("\\A[+-]?#{REAL_PAT}\\Z")
|
50
|
-
RATIONAL = Regexp.new("\\A[+-]?#{RAT_PAT}\\Z")
|
51
|
-
COMPLEX = Regexp.new("\\A[+-]?#{COMP_PAT}\\Z")
|
52
|
-
PURE_IMAG = Regexp.new("\\A[+-](#{C_IMAG_PAT})?i\\Z")
|
53
|
-
|
54
|
-
# char
|
55
|
-
SINGLE_CHAR_PAT = "."
|
56
|
-
SPACE_PAT = "space"
|
57
|
-
NEWLINE_PAT = "newline"
|
58
|
-
|
59
|
-
CHAR_PREFIX = "\#\\\\"
|
60
|
-
CHAR_PAT = "(#{SINGLE_CHAR_PAT}|#{SPACE_PAT}|#{NEWLINE_PAT})"
|
61
|
-
CHAR = Regexp.new("\\A#{CHAR_PREFIX}#{CHAR_PAT}\\Z")
|
62
|
-
|
63
|
-
# :startdoc:
|
64
|
-
|
65
28
|
include Enumerable
|
66
29
|
|
67
30
|
def initialize(obj, form: TOKEN_DEFAULT_FORM)
|
@@ -86,7 +49,8 @@ module Rbscmlex
|
|
86
49
|
end
|
87
50
|
|
88
51
|
def [](index)
|
89
|
-
|
52
|
+
token = @tokens[index]
|
53
|
+
token and convert(token)
|
90
54
|
end
|
91
55
|
|
92
56
|
def each(&blk)
|
@@ -112,16 +76,25 @@ module Rbscmlex
|
|
112
76
|
self[@current_pos]
|
113
77
|
end
|
114
78
|
|
115
|
-
def next_token
|
116
|
-
check_pos
|
117
|
-
|
118
|
-
@next_pos += 1
|
79
|
+
def next_token(offset = 0)
|
80
|
+
check_pos(offset)
|
81
|
+
skip_token(offset)
|
119
82
|
self[@current_pos]
|
120
83
|
end
|
121
84
|
|
122
|
-
def peek_token(
|
123
|
-
|
124
|
-
|
85
|
+
def peek_token(offset = 0)
|
86
|
+
# Since `peek_token` does not modify the position to read, raise
|
87
|
+
# StopIteration only if the next position truly exceed the
|
88
|
+
# bound.
|
89
|
+
check_pos(0)
|
90
|
+
self[@next_pos + offset]
|
91
|
+
end
|
92
|
+
|
93
|
+
def skip_token(offset = 0)
|
94
|
+
check_pos(offset)
|
95
|
+
@current_pos = @next_pos + offset
|
96
|
+
@next_pos += (1 + offset)
|
97
|
+
nil
|
125
98
|
end
|
126
99
|
|
127
100
|
def rewind
|
@@ -192,11 +165,39 @@ module Rbscmlex
|
|
192
165
|
converter ? token.map(&converter) : tokens
|
193
166
|
end
|
194
167
|
|
195
|
-
def check_pos
|
196
|
-
raise StopIteration if @next_pos >= size
|
168
|
+
def check_pos(offset = 0)
|
169
|
+
raise StopIteration if (@next_pos + offset) >= size
|
197
170
|
end
|
198
171
|
|
199
|
-
|
172
|
+
# :stopdoc:
|
173
|
+
|
174
|
+
S2R_MAP = { "(" => "( ", ")" => " ) ", "'" => " ' " }
|
175
|
+
|
176
|
+
BOOLEAN = /\A#(f(alse)?|t(rue)?)\Z/
|
177
|
+
STRING = /\A\"[^\"]*\"\Z/
|
178
|
+
|
179
|
+
# numbers
|
180
|
+
REAL_PAT = "(([1-9][0-9]*)|0)(\.[0-9]+)?"
|
181
|
+
RAT_PAT = "#{REAL_PAT}\\/#{REAL_PAT}"
|
182
|
+
C_REAL_PAT = "(#{REAL_PAT}|#{RAT_PAT})"
|
183
|
+
C_IMAG_PAT = "#{C_REAL_PAT}"
|
184
|
+
COMP_PAT = "#{C_REAL_PAT}(\\+|\\-)#{C_IMAG_PAT}i"
|
185
|
+
|
186
|
+
REAL_NUM = Regexp.new("\\A[+-]?#{REAL_PAT}\\Z")
|
187
|
+
RATIONAL = Regexp.new("\\A[+-]?#{RAT_PAT}\\Z")
|
188
|
+
COMPLEX = Regexp.new("\\A[+-]?#{COMP_PAT}\\Z")
|
189
|
+
PURE_IMAG = Regexp.new("\\A[+-](#{C_IMAG_PAT})?i\\Z")
|
190
|
+
|
191
|
+
# char
|
192
|
+
SINGLE_CHAR_PAT = "."
|
193
|
+
SPACE_PAT = "space"
|
194
|
+
NEWLINE_PAT = "newline"
|
195
|
+
|
196
|
+
CHAR_PREFIX = "\#\\\\"
|
197
|
+
CHAR_PAT = "(#{SINGLE_CHAR_PAT}|#{SPACE_PAT}|#{NEWLINE_PAT})"
|
198
|
+
CHAR = Regexp.new("\\A#{CHAR_PREFIX}#{CHAR_PAT}\\Z")
|
199
|
+
|
200
|
+
# :startdoc:
|
200
201
|
|
201
202
|
def tokenize(src)
|
202
203
|
cooked = src.gsub(/[()']/, S2R_MAP)
|
@@ -213,24 +214,111 @@ module Rbscmlex
|
|
213
214
|
Rbscmlex.new_token(:quotation, literal)
|
214
215
|
when "#("
|
215
216
|
Rbscmlex.new_token(:vec_lparen, literal)
|
217
|
+
when "|" # not supported yet
|
218
|
+
Rbscmlex.new_token(:illegal, literal)
|
216
219
|
when BOOLEAN
|
217
220
|
Rbscmlex.new_token(:boolean, literal)
|
218
|
-
when IDENTIFIER
|
219
|
-
Rbscmlex.new_token(:identifier, literal)
|
220
221
|
when CHAR
|
221
222
|
Rbscmlex.new_token(:character, literal)
|
222
223
|
when STRING
|
223
224
|
Rbscmlex.new_token(:string, literal)
|
224
|
-
when ARITHMETIC_OPS, COMPARISON_OPS
|
225
|
-
Rbscmlex.new_token(:op_proc, literal)
|
226
225
|
when REAL_NUM, RATIONAL, COMPLEX, PURE_IMAG
|
227
226
|
Rbscmlex.new_token(:number, literal)
|
228
227
|
else
|
229
|
-
|
228
|
+
if Identifier.identifier?(literal)
|
229
|
+
Rbscmlex.new_token(:identifier, literal)
|
230
|
+
else
|
231
|
+
Rbscmlex.new_token(:illegal, literal)
|
232
|
+
end
|
230
233
|
end
|
231
234
|
}
|
232
235
|
end
|
233
236
|
|
237
|
+
# Holds functions to check a literal is valid as an identifier
|
238
|
+
# defined in R7RS.
|
239
|
+
#
|
240
|
+
# Call identifier? function as follows:
|
241
|
+
#
|
242
|
+
# Identifier.identifier?(literal)
|
243
|
+
#
|
244
|
+
# It returns true if the literal is valid as an identifier.
|
245
|
+
|
246
|
+
module Identifier
|
247
|
+
|
248
|
+
DIGIT = "0-9"
|
249
|
+
LETTER = "a-zA-Z"
|
250
|
+
SPECIAL_INITIAL = "!\\$%&\\*/:<=>\\?\\^_~"
|
251
|
+
INITIAL = "#{LETTER}#{SPECIAL_INITIAL}"
|
252
|
+
EXPLICIT_SIGN = "\\+\\-"
|
253
|
+
SPECIAL_SUBSEQUENT = "#{EXPLICIT_SIGN}\\.@"
|
254
|
+
SUBSEQUENT = "#{INITIAL}#{DIGIT}#{SPECIAL_SUBSEQUENT}"
|
255
|
+
|
256
|
+
REGEXP_INITIAL = Regexp.new("[#{INITIAL}]")
|
257
|
+
REGEXP_EXPLICIT_SIGN = Regexp.new("[#{EXPLICIT_SIGN}]")
|
258
|
+
REGEXP_SUBSEQUENT = Regexp.new("[#{SUBSEQUENT}]+")
|
259
|
+
|
260
|
+
def self.identifier?(literal)
|
261
|
+
size = literal.size
|
262
|
+
c = literal[0]
|
263
|
+
case c
|
264
|
+
when REGEXP_INITIAL
|
265
|
+
return true if size == 1
|
266
|
+
subsequent?(literal[1..-1])
|
267
|
+
when REGEXP_EXPLICIT_SIGN
|
268
|
+
return true if size == 1
|
269
|
+
if literal[1] == "."
|
270
|
+
dot_identifier?(literal[1..-1])
|
271
|
+
else
|
272
|
+
if sign_subsequent?(literal[1])
|
273
|
+
return true if size == 2
|
274
|
+
subsequent?(literal[2..-1])
|
275
|
+
else
|
276
|
+
false
|
277
|
+
end
|
278
|
+
end
|
279
|
+
when "."
|
280
|
+
dot_identifier?(literal)
|
281
|
+
else
|
282
|
+
false
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def self.subsequent?(sub_literal)
|
287
|
+
REGEXP_SUBSEQUENT === sub_literal
|
288
|
+
end
|
289
|
+
|
290
|
+
def self.sign_subsequent?(sub_literal)
|
291
|
+
return false if sub_literal.size != 1
|
292
|
+
case sub_literal[0]
|
293
|
+
when REGEXP_INITIAL
|
294
|
+
true
|
295
|
+
when REGEXP_EXPLICIT_SIGN
|
296
|
+
true
|
297
|
+
when "@"
|
298
|
+
true
|
299
|
+
else
|
300
|
+
false
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def self.dot_identifier?(sub_literal)
|
305
|
+
return false if sub_literal[0] != "."
|
306
|
+
return true if sub_literal.size == 1
|
307
|
+
if dot_subsequent?(sub_literal[1])
|
308
|
+
return true if sub_literal.size == 2
|
309
|
+
subsequent?(sub_literal[2..-1])
|
310
|
+
else
|
311
|
+
false
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def self.dot_subsequent?(sub_literal)
|
316
|
+
return true if sub_literal == "."
|
317
|
+
sign_subsequent?(sub_literal)
|
318
|
+
end
|
319
|
+
|
320
|
+
end
|
321
|
+
|
234
322
|
# :startdoc:
|
235
323
|
|
236
324
|
end
|
data/lib/rbscmlex/token.rb
CHANGED
data/lib/rbscmlex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbscmlex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mnbi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-05-
|
11
|
+
date: 2021-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A simple lexical analyzer for Scheme
|
14
14
|
email:
|