loxxy 0.4.07 → 0.4.08

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7cd3e438270c62a4863cddfd1b737182993ea205d84985e8862a25c74bb2e8c4
4
- data.tar.gz: f6bd428ebc505e238392cbb04e5fb1207eb34752f974ae8f81a23798e4ef51b5
3
+ metadata.gz: e7e447625d999f4b95129d7feff1586c6759cb929a8faaa82e3ecc94105d0268
4
+ data.tar.gz: 00a4801454ffd09b2d4e810fd15d0f9b40fea13f46e6d4eed7eba4cab8d35ebe
5
5
  SHA512:
6
- metadata.gz: 77ac1d09cb77b8afab98c21bb2f8544fd2961473efd1acaae3134a2aba0ed7138270aa86f384fe2ce1f76d865c8cd049dc3b8379b954c70992cc53b068244593
7
- data.tar.gz: 9fa0e0c3c3d96600bc4e8cd0447383a287be6ff483fb2a843056c87175239bc5fd319e0f11cfe8ab496652a773c91d91f8c05176275b07873f08ffad0de7a1f4
6
+ metadata.gz: 4a59f746cb7b8086ff3e1252c3fc0846229cb6927dcc4d62ad261c5e0c2fec4b1a75176cab6788cedb5c57a2fe9e7bec94d92d7dc63a2c455609a18761f78043
7
+ data.tar.gz: 66f72710a3f422e4431c88c44b56419f727711c1fbf339491f1fcb2f8f18edb0f4a21ce49a66ee4325ccf3b2576b0b94490d74f96c922a9d340d476e43660881
data/CHANGELOG.md CHANGED
@@ -1,4 +1,10 @@
1
- ## [0.4.07] - 2022-11-21
1
+ ## [0.4.08] - 2022-04-09
2
+ - Refactoring of the `Scanner` class.
3
+
4
+ ### Changed
5
+ - `Frontend::Scanner` class: major code refactoring.
6
+
7
+ ## [0.4.07] - 2021-11-21
2
8
  - Minor fixes; dependency towards Rubies 3+ allowed...
3
9
 
4
10
  ### Fixed
@@ -20,6 +20,15 @@ module Loxxy
20
20
  # Delimiters: e.g. parentheses '(', ')'
21
21
  # Separators: e.g. comma
22
22
  class Scanner
23
+ PATT_BLOCK_COMMENT_BEGIN = /\/\*/.freeze
24
+ PATT_BLOCK_COMMENT_END = /\*\//.freeze
25
+ PATT_COMPARISON = /[!=><]=?/.freeze
26
+ PATT_IDENTIFIER = /[a-zA-Z_][a-zA-Z_0-9]*/.freeze
27
+ PATT_LINE_COMMENT = /\/\/[^\r\n]*/.freeze
28
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
29
+ PATT_NUMBER = /\d+(?:\.\d+)?/.freeze
30
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
31
+
23
32
  # @return [StringScanner] Low-level input scanner
24
33
  attr_reader(:scanner)
25
34
 
@@ -31,7 +40,7 @@ module Loxxy
31
40
 
32
41
  # One or two special character tokens.
33
42
  # These are enumerated in section 4.2.1 Token type
34
- @@lexeme2name = {
43
+ Lexeme2name = {
35
44
  '(' => 'LEFT_PAREN',
36
45
  ')' => 'RIGHT_PAREN',
37
46
  '{' => 'LEFT_BRACE',
@@ -77,16 +86,16 @@ module Loxxy
77
86
  # Constructor. Initialize a tokenizer for Lox input.
78
87
  # @param source [String] Lox text to tokenize.
79
88
  def initialize(source = nil)
80
- @scanner = StringScanner.new('')
81
- start_with(source) if source
89
+ reset
90
+ input = source || ''
91
+ @scanner = StringScanner.new(input)
82
92
  end
83
93
 
84
94
  # Reset the tokenizer and make the given text, the current input.
85
95
  # @param source [String] Lox text to tokenize.
86
96
  def start_with(source)
97
+ reset
87
98
  @scanner.string = source
88
- @lineno = 1
89
- @line_start = 0
90
99
  end
91
100
 
92
101
  # Scan the source and return an array of tokens.
@@ -99,39 +108,79 @@ module Loxxy
99
108
  end
100
109
  tok_sequence << build_token('EOF', nil)
101
110
 
102
- return tok_sequence
111
+ tok_sequence
103
112
  end
104
113
 
105
114
  private
106
115
 
107
- def _next_token
108
- skip_intertoken_spaces
109
- curr_ch = scanner.peek(1)
110
- return nil if curr_ch.nil? || curr_ch.empty?
116
+ def reset
117
+ @state = :default
118
+ @lineno = 1
119
+ @line_start = 0
120
+ end
111
121
 
122
+ def _next_token
123
+ nesting_level = 0
112
124
  token = nil
113
125
 
114
- if '(){},.;+-/*'.include? curr_ch
115
- # Single delimiter or separator character
116
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
117
- elsif (lexeme = scanner.scan(/[!=><]=?/))
118
- # One or two special character tokens
119
- token = build_token(@@lexeme2name[lexeme], lexeme)
120
- elsif scanner.scan(/"/) # Start of string detected...
121
- token = build_string_token
122
- elsif (lexeme = scanner.scan(/\d+(?:\.\d+)?/))
123
- token = build_token('NUMBER', lexeme)
124
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*/))
125
- keyw = @@keywords[lexeme]
126
- tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
127
- token = build_token(tok_type, lexeme)
128
- else # Unknown token
129
- col = scanner.pos - @line_start + 1
130
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
131
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
132
- end
126
+ # Loop until end of input reached or token found
127
+ until token || scanner.eos?
128
+ if scanner.skip(PATT_NEWLINE)
129
+ next_line_scanned
130
+ next
131
+ end
133
132
 
134
- return token
133
+ case @state
134
+ when :default
135
+ next if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
136
+
137
+ curr_ch = scanner.peek(1)
138
+
139
+ token = if scanner.skip(PATT_LINE_COMMENT)
140
+ next
141
+ elsif scanner.skip(PATT_BLOCK_COMMENT_BEGIN)
142
+ @state = :in_block_comment
143
+ nesting_level = 1
144
+ next
145
+ elsif '(){},.;+-/*'.include? curr_ch
146
+ # Single delimiter or separator character
147
+ build_token(Lexeme2name[curr_ch], scanner.getch)
148
+ elsif (lexeme = scanner.scan(PATT_COMPARISON))
149
+ # One or two special character tokens
150
+ build_token(Lexeme2name[lexeme], lexeme)
151
+ elsif scanner.scan(/"/) # Start of string detected...
152
+ build_string_token
153
+ elsif (lexeme = scanner.scan(PATT_NUMBER))
154
+ build_token('NUMBER', lexeme)
155
+ elsif (lexeme = scanner.scan(PATT_IDENTIFIER))
156
+ keyw = @@keywords[lexeme]
157
+ tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
158
+ build_token(tok_type, lexeme)
159
+ else # Unknown token
160
+ col = scanner.pos - @line_start + 1
161
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
162
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
163
+ end
164
+
165
+ when :in_block_comment
166
+ comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
167
+ unterminated_comment unless comment_part
168
+
169
+ case scanner.matched
170
+ when PATT_NEWLINE
171
+ next_line_scanned
172
+ when PATT_BLOCK_COMMENT_END
173
+ nesting_level -= 1
174
+ @state = :default if nesting_level.zero?
175
+ when PATT_BLOCK_COMMENT_BEGIN
176
+ nesting_level += 1
177
+ end
178
+ next
179
+ end # case
180
+ end # until
181
+
182
+ unterminated_comment unless nesting_level.zero?
183
+ token
135
184
  end
136
185
 
137
186
  def build_token(aSymbolName, aLexeme)
@@ -214,56 +263,12 @@ module Loxxy
214
263
  Rley::Lexical::Literal.new(lox_string, lexeme, 'STRING', pos)
215
264
  end
216
265
 
217
- # Skip non-significant whitespaces and comments.
218
- # Advance the scanner until something significant is found.
219
- def skip_intertoken_spaces
220
- loop do
221
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
222
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
223
- if nl_found
224
- ws_found = true
225
- next_line
226
- end
227
- cmt_found = false
228
- if scanner.scan(/\/(\/|\*)/)
229
- cmt_found = true
230
- case scanner.matched
231
- when '//'
232
- scanner.skip(/[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
233
- next_line
234
- when '/*'
235
- skip_block_comment
236
- next
237
- end
238
- end
239
- break unless ws_found || cmt_found
240
- end
241
-
242
- scanner.pos
243
- end
244
-
245
- def skip_block_comment
246
- nesting_level = 1
247
- loop do
248
- comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
249
- unless comment_part
250
- msg = "Unterminated '/* ... */' block comment on line #{lineno}"
251
- raise ScanError, msg
252
- end
253
-
254
- case scanner.matched
255
- when /(?:(?:\r\n)|\r|\n)/
256
- next_line
257
- when '*/'
258
- nesting_level -= 1
259
- break if nesting_level.zero?
260
- when '/*'
261
- nesting_level += 1
262
- end
263
- end
266
+ def unterminated_comment
267
+ msg = "Unterminated '/* ... */' block comment on line #{lineno}"
268
+ raise ScanError, msg
264
269
  end
265
270
 
266
- def next_line
271
+ def next_line_scanned
267
272
  @lineno += 1
268
273
  @line_start = scanner.pos
269
274
  end
data/lib/loxxy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Loxxy
4
- VERSION = '0.4.07'
4
+ VERSION = '0.4.08'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loxxy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.07
4
+ version: 0.4.08
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-21 00:00:00.000000000 Z
11
+ date: 2022-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rley