loxxy 0.4.07 → 0.4.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7cd3e438270c62a4863cddfd1b737182993ea205d84985e8862a25c74bb2e8c4
4
- data.tar.gz: f6bd428ebc505e238392cbb04e5fb1207eb34752f974ae8f81a23798e4ef51b5
3
+ metadata.gz: e7e447625d999f4b95129d7feff1586c6759cb929a8faaa82e3ecc94105d0268
4
+ data.tar.gz: 00a4801454ffd09b2d4e810fd15d0f9b40fea13f46e6d4eed7eba4cab8d35ebe
5
5
  SHA512:
6
- metadata.gz: 77ac1d09cb77b8afab98c21bb2f8544fd2961473efd1acaae3134a2aba0ed7138270aa86f384fe2ce1f76d865c8cd049dc3b8379b954c70992cc53b068244593
7
- data.tar.gz: 9fa0e0c3c3d96600bc4e8cd0447383a287be6ff483fb2a843056c87175239bc5fd319e0f11cfe8ab496652a773c91d91f8c05176275b07873f08ffad0de7a1f4
6
+ metadata.gz: 4a59f746cb7b8086ff3e1252c3fc0846229cb6927dcc4d62ad261c5e0c2fec4b1a75176cab6788cedb5c57a2fe9e7bec94d92d7dc63a2c455609a18761f78043
7
+ data.tar.gz: 66f72710a3f422e4431c88c44b56419f727711c1fbf339491f1fcb2f8f18edb0f4a21ce49a66ee4325ccf3b2576b0b94490d74f96c922a9d340d476e43660881
data/CHANGELOG.md CHANGED
@@ -1,4 +1,10 @@
1
- ## [0.4.07] - 2022-11-21
1
+ ## [0.4.08] - 2022-04-09
2
+ - Refactoring of the `Scanner` class.
3
+
4
+ ### Changed
5
+ - `Frontend::Scanner` class: major code refactoring.
6
+
7
+ ## [0.4.07] - 2021-11-21
2
8
  - Minor fixes; dependency towards Rubies 3+ allowed...
3
9
 
4
10
  ### Fixed
@@ -20,6 +20,15 @@ module Loxxy
20
20
  # Delimiters: e.g. parentheses '(', ')'
21
21
  # Separators: e.g. comma
22
22
  class Scanner
23
+ PATT_BLOCK_COMMENT_BEGIN = /\/\*/.freeze
24
+ PATT_BLOCK_COMMENT_END = /\*\//.freeze
25
+ PATT_COMPARISON = /[!=><]=?/.freeze
26
+ PATT_IDENTIFIER = /[a-zA-Z_][a-zA-Z_0-9]*/.freeze
27
+ PATT_LINE_COMMENT = /\/\/[^\r\n]*/.freeze
28
+ PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
29
+ PATT_NUMBER = /\d+(?:\.\d+)?/.freeze
30
+ PATT_WHITESPACE = /[ \t\f]+/.freeze
31
+
23
32
  # @return [StringScanner] Low-level input scanner
24
33
  attr_reader(:scanner)
25
34
 
@@ -31,7 +40,7 @@ module Loxxy
31
40
 
32
41
  # One or two special character tokens.
33
42
  # These are enumerated in section 4.2.1 Token type
34
- @@lexeme2name = {
43
+ Lexeme2name = {
35
44
  '(' => 'LEFT_PAREN',
36
45
  ')' => 'RIGHT_PAREN',
37
46
  '{' => 'LEFT_BRACE',
@@ -77,16 +86,16 @@ module Loxxy
77
86
  # Constructor. Initialize a tokenizer for Lox input.
78
87
  # @param source [String] Lox text to tokenize.
79
88
  def initialize(source = nil)
80
- @scanner = StringScanner.new('')
81
- start_with(source) if source
89
+ reset
90
+ input = source || ''
91
+ @scanner = StringScanner.new(input)
82
92
  end
83
93
 
84
94
  # Reset the tokenizer and make the given text, the current input.
85
95
  # @param source [String] Lox text to tokenize.
86
96
  def start_with(source)
97
+ reset
87
98
  @scanner.string = source
88
- @lineno = 1
89
- @line_start = 0
90
99
  end
91
100
 
92
101
  # Scan the source and return an array of tokens.
@@ -99,39 +108,79 @@ module Loxxy
99
108
  end
100
109
  tok_sequence << build_token('EOF', nil)
101
110
 
102
- return tok_sequence
111
+ tok_sequence
103
112
  end
104
113
 
105
114
  private
106
115
 
107
- def _next_token
108
- skip_intertoken_spaces
109
- curr_ch = scanner.peek(1)
110
- return nil if curr_ch.nil? || curr_ch.empty?
116
+ def reset
117
+ @state = :default
118
+ @lineno = 1
119
+ @line_start = 0
120
+ end
111
121
 
122
+ def _next_token
123
+ nesting_level = 0
112
124
  token = nil
113
125
 
114
- if '(){},.;+-/*'.include? curr_ch
115
- # Single delimiter or separator character
116
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
117
- elsif (lexeme = scanner.scan(/[!=><]=?/))
118
- # One or two special character tokens
119
- token = build_token(@@lexeme2name[lexeme], lexeme)
120
- elsif scanner.scan(/"/) # Start of string detected...
121
- token = build_string_token
122
- elsif (lexeme = scanner.scan(/\d+(?:\.\d+)?/))
123
- token = build_token('NUMBER', lexeme)
124
- elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*/))
125
- keyw = @@keywords[lexeme]
126
- tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
127
- token = build_token(tok_type, lexeme)
128
- else # Unknown token
129
- col = scanner.pos - @line_start + 1
130
- _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
131
- raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
132
- end
126
+ # Loop until end of input reached or token found
127
+ until token || scanner.eos?
128
+ if scanner.skip(PATT_NEWLINE)
129
+ next_line_scanned
130
+ next
131
+ end
133
132
 
134
- return token
133
+ case @state
134
+ when :default
135
+ next if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
136
+
137
+ curr_ch = scanner.peek(1)
138
+
139
+ token = if scanner.skip(PATT_LINE_COMMENT)
140
+ next
141
+ elsif scanner.skip(PATT_BLOCK_COMMENT_BEGIN)
142
+ @state = :in_block_comment
143
+ nesting_level = 1
144
+ next
145
+ elsif '(){},.;+-/*'.include? curr_ch
146
+ # Single delimiter or separator character
147
+ build_token(Lexeme2name[curr_ch], scanner.getch)
148
+ elsif (lexeme = scanner.scan(PATT_COMPARISON))
149
+ # One or two special character tokens
150
+ build_token(Lexeme2name[lexeme], lexeme)
151
+ elsif scanner.scan(/"/) # Start of string detected...
152
+ build_string_token
153
+ elsif (lexeme = scanner.scan(PATT_NUMBER))
154
+ build_token('NUMBER', lexeme)
155
+ elsif (lexeme = scanner.scan(PATT_IDENTIFIER))
156
+ keyw = @@keywords[lexeme]
157
+ tok_type = keyw ? keyw.upcase : 'IDENTIFIER'
158
+ build_token(tok_type, lexeme)
159
+ else # Unknown token
160
+ col = scanner.pos - @line_start + 1
161
+ _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
162
+ raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
163
+ end
164
+
165
+ when :in_block_comment
166
+ comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
167
+ unterminated_comment unless comment_part
168
+
169
+ case scanner.matched
170
+ when PATT_NEWLINE
171
+ next_line_scanned
172
+ when PATT_BLOCK_COMMENT_END
173
+ nesting_level -= 1
174
+ @state = :default if nesting_level.zero?
175
+ when PATT_BLOCK_COMMENT_BEGIN
176
+ nesting_level += 1
177
+ end
178
+ next
179
+ end # case
180
+ end # until
181
+
182
+ unterminated_comment unless nesting_level.zero?
183
+ token
135
184
  end
136
185
 
137
186
  def build_token(aSymbolName, aLexeme)
@@ -214,56 +263,12 @@ module Loxxy
214
263
  Rley::Lexical::Literal.new(lox_string, lexeme, 'STRING', pos)
215
264
  end
216
265
 
217
- # Skip non-significant whitespaces and comments.
218
- # Advance the scanner until something significant is found.
219
- def skip_intertoken_spaces
220
- loop do
221
- ws_found = scanner.skip(/[ \t\f]+/) ? true : false
222
- nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
223
- if nl_found
224
- ws_found = true
225
- next_line
226
- end
227
- cmt_found = false
228
- if scanner.scan(/\/(\/|\*)/)
229
- cmt_found = true
230
- case scanner.matched
231
- when '//'
232
- scanner.skip(/[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
233
- next_line
234
- when '/*'
235
- skip_block_comment
236
- next
237
- end
238
- end
239
- break unless ws_found || cmt_found
240
- end
241
-
242
- scanner.pos
243
- end
244
-
245
- def skip_block_comment
246
- nesting_level = 1
247
- loop do
248
- comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
249
- unless comment_part
250
- msg = "Unterminated '/* ... */' block comment on line #{lineno}"
251
- raise ScanError, msg
252
- end
253
-
254
- case scanner.matched
255
- when /(?:(?:\r\n)|\r|\n)/
256
- next_line
257
- when '*/'
258
- nesting_level -= 1
259
- break if nesting_level.zero?
260
- when '/*'
261
- nesting_level += 1
262
- end
263
- end
266
+ def unterminated_comment
267
+ msg = "Unterminated '/* ... */' block comment on line #{lineno}"
268
+ raise ScanError, msg
264
269
  end
265
270
 
266
- def next_line
271
+ def next_line_scanned
267
272
  @lineno += 1
268
273
  @line_start = scanner.pos
269
274
  end
data/lib/loxxy/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Loxxy
4
- VERSION = '0.4.07'
4
+ VERSION = '0.4.08'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loxxy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.07
4
+ version: 0.4.08
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-21 00:00:00.000000000 Z
11
+ date: 2022-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rley