rbscmlex 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec8d6513252c0196840bfc388ab2cb429d5d1cb579e5fb23d034e6509ab24bf5
4
- data.tar.gz: fd952eecb26646994dcfe9786fa5678b68f5ab04191009079f47acedaba1af3a
3
+ metadata.gz: 2860b1b9f6d206dcfacf04fba88a5c64e9670c97d42423d5fac78615c3609d3a
4
+ data.tar.gz: c2ab02c9febc928efa76fa47aea965c27a53099b56ff55e180304988f9eea062
5
5
  SHA512:
6
- metadata.gz: 6344300170f133448eb6f6b1646ffa0e617a900c3df87b00f9a48cd6c008da73e8b9161cac4afb4d752a37c3981cd7ad1fa324c48044d484c26cd836bcfe5cc9
7
- data.tar.gz: ecf7e267663faa64023c1f72ed82fac38b799d2fd370d7a6725e512780618ebbd70f121bf8031abb3738f9827dbd5e7c855ad700055b18dbbec2b79356d3f16b
6
+ metadata.gz: 8a008f3c549e69bfa839d6788449704afb5b2491084eacde00ce1809769d4c65fffc4f22b0843c5b7522908891146f4ad4f227ef1ae67f1f07d0a33f97bbba3a
7
+ data.tar.gz: 68e14c34c40ea2b4715a20d9cac1f56c6860eeb6d9d3c8e2d31b6addf66fb529217e88b272ef1ccc252cb52645fb41dc3d77a8c6d0eb5af902b936af7b851e87
data/CHANGELOG.md CHANGED
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
  ## [Unreleased]
8
8
  - (nothing to record here)
9
9
 
10
+ ## [0.1.3] - 2021-05-15
11
+ ### Added
12
+ - Add `Lexer#skip_token(offset)`
13
+
14
+ ### Changed
15
+ - Modify `Lexer#next_token` to accept an argument to specify the
16
+ offset to read position.
17
+
18
+ ### Fixed
19
+ - Fix issue #4: Some "peculiar identifiers" are regarded as illegal.
20
+
10
21
  ## [0.1.2] - 2021-05-07
11
22
  ### Added
12
23
  - Add a mechanism to initialize a Parser instance from an array of
@@ -25,43 +25,6 @@ module Rbscmlex
25
25
 
26
26
  end
27
27
 
28
- # :stopdoc:
29
-
30
- BOOLEAN = /\A#(f(alse)?|t(rue)?)\Z/
31
- STRING = /\A\"[^\"]*\"\Z/
32
-
33
- # idents
34
- EXTENDED_CHARS = "!\\$%&\\*\\+\\-\\./:<=>\\?@\\^_~"
35
- IDENT_PAT = "[a-zA-Z_][a-zA-Z0-9#{EXTENDED_CHARS}]*"
36
- IDENTIFIER = Regexp.new("\\A#{IDENT_PAT}\\Z")
37
-
38
- # operators
39
- ARITHMETIC_OPS = /\A[+\-*\/%]\Z/
40
- COMPARISON_OPS = /\A([<>]=?|=)\Z/
41
-
42
- # numbers
43
- REAL_PAT = "(([1-9][0-9]*)|0)(\.[0-9]+)?"
44
- RAT_PAT = "#{REAL_PAT}\\/#{REAL_PAT}"
45
- C_REAL_PAT = "(#{REAL_PAT}|#{RAT_PAT})"
46
- C_IMAG_PAT = "#{C_REAL_PAT}"
47
- COMP_PAT = "#{C_REAL_PAT}(\\+|\\-)#{C_IMAG_PAT}i"
48
-
49
- REAL_NUM = Regexp.new("\\A[+-]?#{REAL_PAT}\\Z")
50
- RATIONAL = Regexp.new("\\A[+-]?#{RAT_PAT}\\Z")
51
- COMPLEX = Regexp.new("\\A[+-]?#{COMP_PAT}\\Z")
52
- PURE_IMAG = Regexp.new("\\A[+-](#{C_IMAG_PAT})?i\\Z")
53
-
54
- # char
55
- SINGLE_CHAR_PAT = "."
56
- SPACE_PAT = "space"
57
- NEWLINE_PAT = "newline"
58
-
59
- CHAR_PREFIX = "\#\\\\"
60
- CHAR_PAT = "(#{SINGLE_CHAR_PAT}|#{SPACE_PAT}|#{NEWLINE_PAT})"
61
- CHAR = Regexp.new("\\A#{CHAR_PREFIX}#{CHAR_PAT}\\Z")
62
-
63
- # :startdoc:
64
-
65
28
  include Enumerable
66
29
 
67
30
  def initialize(obj, form: TOKEN_DEFAULT_FORM)
@@ -86,7 +49,8 @@ module Rbscmlex
86
49
  end
87
50
 
88
51
  def [](index)
89
- convert(@tokens[index])
52
+ token = @tokens[index]
53
+ token and convert(token)
90
54
  end
91
55
 
92
56
  def each(&blk)
@@ -112,16 +76,25 @@ module Rbscmlex
112
76
  self[@current_pos]
113
77
  end
114
78
 
115
- def next_token
116
- check_pos
117
- @current_pos = @next_pos
118
- @next_pos += 1
79
+ def next_token(offset = 0)
80
+ check_pos(offset)
81
+ skip_token(offset)
119
82
  self[@current_pos]
120
83
  end
121
84
 
122
- def peek_token(num = 0)
123
- check_pos
124
- self[@next_pos + num]
85
+ def peek_token(offset = 0)
86
+ # Since `peek_token` does not modify the position to read, raise
87
+ # StopIteration only if the next position truly exceed the
88
+ # bound.
89
+ check_pos(0)
90
+ self[@next_pos + offset]
91
+ end
92
+
93
+ def skip_token(offset = 0)
94
+ check_pos(offset)
95
+ @current_pos = @next_pos + offset
96
+ @next_pos += (1 + offset)
97
+ nil
125
98
  end
126
99
 
127
100
  def rewind
@@ -192,11 +165,39 @@ module Rbscmlex
192
165
  converter ? token.map(&converter) : tokens
193
166
  end
194
167
 
195
- def check_pos
196
- raise StopIteration if @next_pos >= size
168
+ def check_pos(offset = 0)
169
+ raise StopIteration if (@next_pos + offset) >= size
197
170
  end
198
171
 
199
- S2R_MAP = { "(" => "( ", ")" => " ) ", "'" => " ' " } # :nodoc:
172
+ # :stopdoc:
173
+
174
+ S2R_MAP = { "(" => "( ", ")" => " ) ", "'" => " ' " }
175
+
176
+ BOOLEAN = /\A#(f(alse)?|t(rue)?)\Z/
177
+ STRING = /\A\"[^\"]*\"\Z/
178
+
179
+ # numbers
180
+ REAL_PAT = "(([1-9][0-9]*)|0)(\.[0-9]+)?"
181
+ RAT_PAT = "#{REAL_PAT}\\/#{REAL_PAT}"
182
+ C_REAL_PAT = "(#{REAL_PAT}|#{RAT_PAT})"
183
+ C_IMAG_PAT = "#{C_REAL_PAT}"
184
+ COMP_PAT = "#{C_REAL_PAT}(\\+|\\-)#{C_IMAG_PAT}i"
185
+
186
+ REAL_NUM = Regexp.new("\\A[+-]?#{REAL_PAT}\\Z")
187
+ RATIONAL = Regexp.new("\\A[+-]?#{RAT_PAT}\\Z")
188
+ COMPLEX = Regexp.new("\\A[+-]?#{COMP_PAT}\\Z")
189
+ PURE_IMAG = Regexp.new("\\A[+-](#{C_IMAG_PAT})?i\\Z")
190
+
191
+ # char
192
+ SINGLE_CHAR_PAT = "."
193
+ SPACE_PAT = "space"
194
+ NEWLINE_PAT = "newline"
195
+
196
+ CHAR_PREFIX = "\#\\\\"
197
+ CHAR_PAT = "(#{SINGLE_CHAR_PAT}|#{SPACE_PAT}|#{NEWLINE_PAT})"
198
+ CHAR = Regexp.new("\\A#{CHAR_PREFIX}#{CHAR_PAT}\\Z")
199
+
200
+ # :startdoc:
200
201
 
201
202
  def tokenize(src)
202
203
  cooked = src.gsub(/[()']/, S2R_MAP)
@@ -213,24 +214,111 @@ module Rbscmlex
213
214
  Rbscmlex.new_token(:quotation, literal)
214
215
  when "#("
215
216
  Rbscmlex.new_token(:vec_lparen, literal)
217
+ when "|" # not supported yet
218
+ Rbscmlex.new_token(:illegal, literal)
216
219
  when BOOLEAN
217
220
  Rbscmlex.new_token(:boolean, literal)
218
- when IDENTIFIER
219
- Rbscmlex.new_token(:identifier, literal)
220
221
  when CHAR
221
222
  Rbscmlex.new_token(:character, literal)
222
223
  when STRING
223
224
  Rbscmlex.new_token(:string, literal)
224
- when ARITHMETIC_OPS, COMPARISON_OPS
225
- Rbscmlex.new_token(:op_proc, literal)
226
225
  when REAL_NUM, RATIONAL, COMPLEX, PURE_IMAG
227
226
  Rbscmlex.new_token(:number, literal)
228
227
  else
229
- Rbscmlex.new_token(:illegal, literal)
228
+ if Identifier.identifier?(literal)
229
+ Rbscmlex.new_token(:identifier, literal)
230
+ else
231
+ Rbscmlex.new_token(:illegal, literal)
232
+ end
230
233
  end
231
234
  }
232
235
  end
233
236
 
237
+ # Holds functions to check a literal is valid as an identifier
238
+ # defined in R7RS.
239
+ #
240
+ # Call identifier? function as follows:
241
+ #
242
+ # Identifier.identifier?(literal)
243
+ #
244
+ # It returns true if the literal is valid as an identifier.
245
+
246
+ module Identifier
247
+
248
+ DIGIT = "0-9"
249
+ LETTER = "a-zA-Z"
250
+ SPECIAL_INITIAL = "!\\$%&\\*/:<=>\\?\\^_~"
251
+ INITIAL = "#{LETTER}#{SPECIAL_INITIAL}"
252
+ EXPLICIT_SIGN = "\\+\\-"
253
+ SPECIAL_SUBSEQUENT = "#{EXPLICIT_SIGN}\\.@"
254
+ SUBSEQUENT = "#{INITIAL}#{DIGIT}#{SPECIAL_SUBSEQUENT}"
255
+
256
+ REGEXP_INITIAL = Regexp.new("[#{INITIAL}]")
257
+ REGEXP_EXPLICIT_SIGN = Regexp.new("[#{EXPLICIT_SIGN}]")
258
+ REGEXP_SUBSEQUENT = Regexp.new("[#{SUBSEQUENT}]+")
259
+
260
+ def self.identifier?(literal)
261
+ size = literal.size
262
+ c = literal[0]
263
+ case c
264
+ when REGEXP_INITIAL
265
+ return true if size == 1
266
+ subsequent?(literal[1..-1])
267
+ when REGEXP_EXPLICIT_SIGN
268
+ return true if size == 1
269
+ if literal[1] == "."
270
+ dot_identifier?(literal[1..-1])
271
+ else
272
+ if sign_subsequent?(literal[1])
273
+ return true if size == 2
274
+ subsequent?(literal[2..-1])
275
+ else
276
+ false
277
+ end
278
+ end
279
+ when "."
280
+ dot_identifier?(literal)
281
+ else
282
+ false
283
+ end
284
+ end
285
+
286
+ def self.subsequent?(sub_literal)
287
+ REGEXP_SUBSEQUENT === sub_literal
288
+ end
289
+
290
+ def self.sign_subsequent?(sub_literal)
291
+ return false if sub_literal.size != 1
292
+ case sub_literal[0]
293
+ when REGEXP_INITIAL
294
+ true
295
+ when REGEXP_EXPLICIT_SIGN
296
+ true
297
+ when "@"
298
+ true
299
+ else
300
+ false
301
+ end
302
+ end
303
+
304
+ def self.dot_identifier?(sub_literal)
305
+ return false if sub_literal[0] != "."
306
+ return true if sub_literal.size == 1
307
+ if dot_subsequent?(sub_literal[1])
308
+ return true if sub_literal.size == 2
309
+ subsequent?(sub_literal[2..-1])
310
+ else
311
+ false
312
+ end
313
+ end
314
+
315
+ def self.dot_subsequent?(sub_literal)
316
+ return true if sub_literal == "."
317
+ sign_subsequent?(sub_literal)
318
+ end
319
+
320
+ end
321
+
234
322
  # :startdoc:
235
323
 
236
324
  end
@@ -27,8 +27,6 @@ module Rbscmlex
27
27
  :number, # `123`, `456.789`, `1/2`, `3+4i`
28
28
  :character, # `#\a`
29
29
  :string, # `"hoge"`
30
- # operators
31
- :op_proc, # `+`, `-`, ...
32
30
  # control
33
31
  :illegal,
34
32
  ]
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rbscmlex
4
- VERSION = "0.1.2"
5
- RELEASE = "2021-05-07"
4
+ VERSION = "0.1.3"
5
+ RELEASE = "2021-05-15"
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbscmlex
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - mnbi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-07 00:00:00.000000000 Z
11
+ date: 2021-05-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A simple lexical analyzer for Scheme
14
14
  email: