ether-code 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,7 @@ const TokenType = {
23
23
 
24
24
  COLON: 'COLON',
25
25
  DOUBLE_COLON: 'DOUBLE_COLON',
26
+ SEMICOLON: 'SEMICOLON',
26
27
  EQUALS: 'EQUALS',
27
28
  DOUBLE_EQUALS: 'DOUBLE_EQUALS',
28
29
  NOT_EQUALS: 'NOT_EQUALS',
@@ -31,7 +32,6 @@ const TokenType = {
31
32
  SPREAD: 'SPREAD',
32
33
  ARROW: 'ARROW',
33
34
  FAT_ARROW: 'FAT_ARROW',
34
- SEMICOLON: 'SEMICOLON',
35
35
 
36
36
  PLUS: 'PLUS',
37
37
  MINUS: 'MINUS',
@@ -100,20 +100,17 @@ class EtherLexer {
100
100
  this.indentStack = [0]
101
101
  this.currentIndent = 0
102
102
  this.atLineStart = true
103
- this.options = options
104
- this.lastTokenType = null
105
103
 
106
- this.compoundKeywordsCache = null
107
- }
108
-
109
- isAtEnd() {
110
- return this.pos >= this.source.length
104
+ this.options = {
105
+ trackComments: options.trackComments || false,
106
+ allowUnicode: options.allowUnicode !== false,
107
+ tabSize: options.tabSize || 2
108
+ }
111
109
  }
112
110
 
113
111
  peek(offset = 0) {
114
- const pos = this.pos + offset
115
- if (pos >= this.source.length) return '\0'
116
- return this.source[pos]
112
+ const idx = this.pos + offset
113
+ return idx < this.source.length ? this.source[idx] : null
117
114
  }
118
115
 
119
116
  advance() {
@@ -122,6 +119,7 @@ class EtherLexer {
122
119
  if (char === '\n') {
123
120
  this.line++
124
121
  this.column = 1
122
+ this.atLineStart = true
125
123
  } else {
126
124
  this.column++
127
125
  }
@@ -129,21 +127,65 @@ class EtherLexer {
129
127
  }
130
128
 
131
129
  match(expected) {
132
- if (this.isAtEnd()) return false
133
- if (this.source[this.pos] !== expected) return false
134
- this.advance()
135
- return true
130
+ if (this.peek() === expected) {
131
+ this.advance()
132
+ return true
133
+ }
134
+ return false
136
135
  }
137
136
 
138
- skipWhitespace() {
139
- while (!this.isAtEnd()) {
140
- const char = this.peek()
141
- if (char === ' ' || char === '\t' || char === '\r') {
142
- this.advance()
143
- } else {
144
- break
145
- }
137
+ matchSequence(seq) {
138
+ for (let i = 0; i < seq.length; i++) {
139
+ if (this.peek(i) !== seq[i]) return false
146
140
  }
141
+ for (let i = 0; i < seq.length; i++) {
142
+ this.advance()
143
+ }
144
+ return true
145
+ }
146
+
147
+ isAtEnd() {
148
+ return this.pos >= this.source.length
149
+ }
150
+
151
+ isDigit(char) {
152
+ if (!char) return false
153
+ return char >= '0' && char <= '9'
154
+ }
155
+
156
+ isHexDigit(char) {
157
+ if (!char) return false
158
+ return this.isDigit(char) || (char >= 'a' && char <= 'f') || (char >= 'A' && char <= 'F')
159
+ }
160
+
161
+ isAlpha(char) {
162
+ if (!char) return false
163
+ const code = char.charCodeAt(0)
164
+ if ((code >= 65 && code <= 90) || (code >= 97 && code <= 122)) return true
165
+ if (char === '_') return true
166
+ if (this.options.allowUnicode) {
167
+ if (code >= 0x00C0 && code <= 0x024F) return true
168
+ if (code >= 0x0400 && code <= 0x04FF) return true
169
+ if (code >= 0x4E00 && code <= 0x9FFF) return true
170
+ if (code >= 0x3040 && code <= 0x30FF) return true
171
+ if (code >= 0x0600 && code <= 0x06FF) return true
172
+ if (code >= 0x0370 && code <= 0x03FF) return true
173
+ }
174
+ return false
175
+ }
176
+
177
+ isAlphaNumeric(char) {
178
+ return this.isAlpha(char) || this.isDigit(char)
179
+ }
180
+
181
+ isWhitespace(char) {
182
+ if (!char) return false
183
+ return char === ' ' || char === '\t' || char === '\r'
184
+ }
185
+
186
+ addToken(type, value, startColumn = null) {
187
+ const col = startColumn !== null ? startColumn : this.column
188
+ this.tokens.push(new Token(type, value, this.line, col, this.currentIndent))
147
189
  }
148
190
 
149
191
  tokenize() {
@@ -153,7 +195,7 @@ class EtherLexer {
153
195
 
154
196
  while (this.indentStack.length > 1) {
155
197
  this.indentStack.pop()
156
- this.tokens.push(new Token(TokenType.DEDENT, '', this.line, this.column, this.currentIndent))
198
+ this.tokens.push(new Token(TokenType.DEDENT, '', this.line, this.column, 0))
157
199
  }
158
200
 
159
201
  this.tokens.push(new Token(TokenType.EOF, '', this.line, this.column, 0))
@@ -163,37 +205,44 @@ class EtherLexer {
163
205
  scanToken() {
164
206
  if (this.atLineStart) {
165
207
  this.handleIndentation()
166
- this.atLineStart = false
208
+ if (this.isAtEnd()) return
167
209
  }
168
210
 
169
- this.skipWhitespace()
170
-
171
- if (this.isAtEnd()) return
172
-
173
211
  const char = this.peek()
174
212
 
175
213
  if (char === '\n') {
176
- this.handleNewline()
214
+ this.advance()
215
+ this.addToken(TokenType.NEWLINE, '\n')
177
216
  return
178
217
  }
179
218
 
180
- if (char === '/' && this.peek(1) === '/') {
181
- this.scanLineComment()
219
+ if (this.isWhitespace(char)) {
220
+ this.advance()
182
221
  return
183
222
  }
184
223
 
185
- if (char === '/' && this.peek(1) === '*') {
186
- this.scanBlockComment()
224
+ if (char === '#') {
225
+ const next = this.peek(1)
226
+ if (next && (this.isHexDigit(next) || this.isAlpha(next))) {
227
+ this.scanHashToken()
228
+ return
229
+ }
230
+ this.scanComment()
187
231
  return
188
232
  }
189
233
 
190
- if (char === '#') {
191
- this.scanLineComment()
234
+ if (char === '/' && this.peek(1) === '/') {
235
+ this.scanComment()
236
+ return
237
+ }
238
+
239
+ if (char === '/' && this.peek(1) === '*') {
240
+ this.scanBlockComment()
192
241
  return
193
242
  }
194
243
 
195
244
  if (char === '"' || char === "'") {
196
- this.scanString()
245
+ this.scanString(char)
197
246
  return
198
247
  }
199
248
 
@@ -202,43 +251,24 @@ class EtherLexer {
202
251
  return
203
252
  }
204
253
 
205
- if (this.isDigit(char) || (char === '.' && this.isDigit(this.peek(1)))) {
254
+ if (this.isDigit(char) || (char === '-' && this.isDigit(this.peek(1)))) {
206
255
  this.scanNumber()
207
256
  return
208
257
  }
209
258
 
210
- if (this.isAlpha(char) || char === '_' || char === '$') {
259
+ if (this.isAlpha(char)) {
211
260
  this.scanIdentifier()
212
261
  return
213
262
  }
214
263
 
215
- if (this.isOperator(char)) {
216
- this.scanOperator()
217
- return
218
- }
219
-
220
- this.advance()
221
- this.tokens.push(new Token(TokenType.ERROR, `Caractère inattendu: ${char}`, this.line, this.column - 1, this.currentIndent))
222
- }
223
-
224
- handleNewline() {
225
- const startLine = this.line
226
- const startCol = this.column
227
- this.advance()
228
-
229
- while (!this.isAtEnd() && this.peek() === '\n') {
230
- this.advance()
231
- }
232
-
233
- this.tokens.push(new Token(TokenType.NEWLINE, '\n', startLine, startCol, this.currentIndent))
234
- this.atLineStart = true
264
+ this.scanOperator()
235
265
  }
236
266
 
237
267
  handleIndentation() {
238
268
  let indent = 0
239
269
  while (!this.isAtEnd() && (this.peek() === ' ' || this.peek() === '\t')) {
240
270
  if (this.peek() === '\t') {
241
- indent += 4
271
+ indent += this.options.tabSize
242
272
  } else {
243
273
  indent++
244
274
  }
@@ -246,238 +276,374 @@ class EtherLexer {
246
276
  }
247
277
 
248
278
  if (this.isAtEnd() || this.peek() === '\n') {
279
+ this.atLineStart = false
249
280
  return
250
281
  }
282
+
283
+ if (this.peek() === '#') {
284
+ const next = this.peek(1)
285
+ if (!next || (!this.isAlpha(next) && !this.isHexDigit(next))) {
286
+ this.atLineStart = false
287
+ return
288
+ }
289
+ }
251
290
 
252
291
  if (this.peek() === '/' && this.peek(1) === '/') {
292
+ this.atLineStart = false
253
293
  return
254
294
  }
255
295
 
256
- if (this.peek() === '#') {
257
- return
258
- }
296
+ this.currentIndent = indent
297
+ this.atLineStart = false
259
298
 
260
299
  const currentLevel = this.indentStack[this.indentStack.length - 1]
261
300
 
262
301
  if (indent > currentLevel) {
263
302
  this.indentStack.push(indent)
264
- this.currentIndent = indent
265
303
  this.tokens.push(new Token(TokenType.INDENT, indent, this.line, 1, indent))
266
304
  } else if (indent < currentLevel) {
267
- while (this.indentStack.length > 1 && indent < this.indentStack[this.indentStack.length - 1]) {
305
+ while (this.indentStack.length > 1 && this.indentStack[this.indentStack.length - 1] > indent) {
268
306
  this.indentStack.pop()
269
- this.tokens.push(new Token(TokenType.DEDENT, '', this.line, 1, this.indentStack[this.indentStack.length - 1]))
307
+ this.tokens.push(new Token(TokenType.DEDENT, '', this.line, 1, indent))
308
+ }
309
+ if (this.indentStack[this.indentStack.length - 1] !== indent) {
310
+ this.tokens.push(new Token(TokenType.ERROR, 'Indentation incohérente', this.line, 1, indent))
270
311
  }
271
- this.currentIndent = this.indentStack[this.indentStack.length - 1]
272
312
  }
273
313
  }
274
314
 
275
- scanLineComment() {
315
+ scanComment() {
276
316
  const startLine = this.line
277
317
  const startCol = this.column
278
-
318
+ let comment = ''
319
+
279
320
  if (this.peek() === '#') {
280
321
  this.advance()
281
322
  } else {
282
323
  this.advance()
283
324
  this.advance()
284
325
  }
326
+
327
+ while (!this.isAtEnd() && this.peek() !== '\n') {
328
+ comment += this.advance()
329
+ }
330
+
331
+ if (this.options.trackComments) {
332
+ this.tokens.push(new Token(TokenType.COMMENT, comment.trim(), startLine, startCol, this.currentIndent))
333
+ }
334
+ }
335
+
336
+ scanHashToken() {
337
+ const startLine = this.line
338
+ const startCol = this.column
339
+
340
+ this.advance()
285
341
 
286
342
  let value = ''
287
- while (!this.isAtEnd() && this.peek() !== '\n') {
288
- value += this.advance()
343
+ let isHexColor = true
344
+ let charCount = 0
345
+
346
+ while (!this.isAtEnd()) {
347
+ const char = this.peek()
348
+ if (this.isHexDigit(char)) {
349
+ value += this.advance()
350
+ charCount++
351
+ } else if (this.isAlpha(char) && !this.isHexDigit(char)) {
352
+ isHexColor = false
353
+ value += this.advance()
354
+ } else if (char === '-' || char === '_') {
355
+ isHexColor = false
356
+ value += this.advance()
357
+ } else {
358
+ break
359
+ }
289
360
  }
290
361
 
291
- this.tokens.push(new Token(TokenType.COMMENT, value.trim(), startLine, startCol, this.currentIndent))
362
+ if (isHexColor && (charCount === 3 || charCount === 4 || charCount === 6 || charCount === 8)) {
363
+ this.tokens.push(new Token(TokenType.HEX, '#' + value, startLine, startCol, this.currentIndent))
364
+ } else {
365
+ this.tokens.push(new Token(TokenType.HASH, '#', startLine, startCol, this.currentIndent))
366
+ if (value) {
367
+ this.tokens.push(new Token(TokenType.IDENTIFIER, value, startLine, startCol + 1, this.currentIndent))
368
+ }
369
+ }
292
370
  }
293
371
 
294
372
  scanBlockComment() {
295
373
  const startLine = this.line
296
374
  const startCol = this.column
297
-
375
+ let comment = ''
376
+
298
377
  this.advance()
299
378
  this.advance()
300
-
301
- let value = ''
379
+
302
380
  while (!this.isAtEnd()) {
303
381
  if (this.peek() === '*' && this.peek(1) === '/') {
304
382
  this.advance()
305
383
  this.advance()
306
384
  break
307
385
  }
308
- value += this.advance()
386
+ comment += this.advance()
387
+ }
388
+
389
+ if (this.options.trackComments) {
390
+ this.tokens.push(new Token(TokenType.BLOCK_COMMENT, comment.trim(), startLine, startCol, this.currentIndent))
309
391
  }
310
-
311
- this.tokens.push(new Token(TokenType.BLOCK_COMMENT, value.trim(), startLine, startCol, this.currentIndent))
312
392
  }
313
393
 
314
- scanString() {
394
+ scanString(quote) {
315
395
  const startLine = this.line
316
396
  const startCol = this.column
317
- const quote = this.advance()
318
-
397
+ let value = ''
398
+ let isBlock = false
399
+
400
+ this.advance()
401
+
319
402
  if (this.peek() === quote && this.peek(1) === quote) {
320
403
  this.advance()
321
404
  this.advance()
322
- return this.scanBlockString(quote, startLine, startCol)
405
+ isBlock = true
323
406
  }
324
-
325
- let value = ''
326
- let escaped = false
327
-
407
+
328
408
  while (!this.isAtEnd()) {
329
- const char = this.peek()
330
-
331
- if (escaped) {
332
- switch (char) {
409
+ if (isBlock) {
410
+ if (this.peek() === quote && this.peek(1) === quote && this.peek(2) === quote) {
411
+ this.advance()
412
+ this.advance()
413
+ this.advance()
414
+ break
415
+ }
416
+ } else {
417
+ if (this.peek() === quote) {
418
+ this.advance()
419
+ break
420
+ }
421
+ if (this.peek() === '\n') {
422
+ this.tokens.push(new Token(TokenType.ERROR, 'Chaîne non terminée', startLine, startCol, this.currentIndent))
423
+ return
424
+ }
425
+ }
426
+
427
+ if (this.peek() === '\\' && !isBlock) {
428
+ this.advance()
429
+ const escaped = this.advance()
430
+ switch (escaped) {
333
431
  case 'n': value += '\n'; break
334
432
  case 't': value += '\t'; break
335
433
  case 'r': value += '\r'; break
336
434
  case '\\': value += '\\'; break
337
435
  case "'": value += "'"; break
338
436
  case '"': value += '"'; break
339
- default: value += char
437
+ case '0': value += '\0'; break
438
+ case 'x':
439
+ let hex = ''
440
+ for (let i = 0; i < 2 && this.isHexDigit(this.peek()); i++) {
441
+ hex += this.advance()
442
+ }
443
+ value += String.fromCharCode(parseInt(hex, 16))
444
+ break
445
+ case 'u':
446
+ let unicode = ''
447
+ if (this.peek() === '{') {
448
+ this.advance()
449
+ while (this.isHexDigit(this.peek())) {
450
+ unicode += this.advance()
451
+ }
452
+ if (this.peek() === '}') this.advance()
453
+ } else {
454
+ for (let i = 0; i < 4 && this.isHexDigit(this.peek()); i++) {
455
+ unicode += this.advance()
456
+ }
457
+ }
458
+ value += String.fromCodePoint(parseInt(unicode, 16))
459
+ break
460
+ default:
461
+ value += escaped
340
462
  }
341
- escaped = false
342
- this.advance()
343
- } else if (char === '\\') {
344
- escaped = true
345
- this.advance()
346
- } else if (char === quote) {
347
- this.advance()
348
- break
349
- } else if (char === '\n') {
350
- break
351
463
  } else {
352
464
  value += this.advance()
353
465
  }
354
466
  }
355
-
356
- this.tokens.push(new Token(TokenType.STRING, value, startLine, startCol, this.currentIndent))
357
- }
358
467
 
359
- scanBlockString(quote, startLine, startCol) {
360
- let value = ''
361
-
362
- while (!this.isAtEnd()) {
363
- if (this.peek() === quote && this.peek(1) === quote && this.peek(2) === quote) {
364
- this.advance()
365
- this.advance()
366
- this.advance()
367
- break
368
- }
369
- value += this.advance()
370
- }
371
-
372
- this.tokens.push(new Token(TokenType.BLOCK_STRING, value, startLine, startCol, this.currentIndent))
468
+ this.tokens.push(new Token(
469
+ isBlock ? TokenType.BLOCK_STRING : TokenType.STRING,
470
+ value,
471
+ startLine,
472
+ startCol,
473
+ this.currentIndent
474
+ ))
373
475
  }
374
476
 
375
477
  scanTemplateString() {
376
478
  const startLine = this.line
377
479
  const startCol = this.column
378
- this.advance()
379
-
380
480
  let value = ''
381
-
481
+
482
+ this.advance()
483
+
382
484
  while (!this.isAtEnd() && this.peek() !== '`') {
383
- if (this.peek() === '\\' && this.peek(1) === '`') {
485
+ if (this.peek() === '\\') {
384
486
  this.advance()
385
487
  value += this.advance()
386
488
  } else {
387
489
  value += this.advance()
388
490
  }
389
491
  }
390
-
391
- if (!this.isAtEnd()) {
492
+
493
+ if (this.peek() === '`') {
392
494
  this.advance()
393
495
  }
394
-
395
- this.tokens.push(new Token(TokenType.STRING, value, startLine, startCol, this.currentIndent))
496
+
497
+ this.tokens.push(new Token(TokenType.BLOCK_STRING, value, startLine, startCol, this.currentIndent))
396
498
  }
397
499
 
398
500
  scanNumber() {
399
501
  const startLine = this.line
400
502
  const startCol = this.column
401
503
  let value = ''
402
- let tokenType = TokenType.INTEGER
403
-
504
+ let type = TokenType.INTEGER
505
+
506
+ if (this.peek() === '-') {
507
+ value += this.advance()
508
+ }
509
+
404
510
  if (this.peek() === '0' && (this.peek(1) === 'x' || this.peek(1) === 'X')) {
405
511
  value += this.advance()
406
512
  value += this.advance()
407
- while (!this.isAtEnd() && this.isHexDigit(this.peek())) {
513
+ while (this.isHexDigit(this.peek())) {
408
514
  value += this.advance()
409
515
  }
410
516
  this.tokens.push(new Token(TokenType.HEX, value, startLine, startCol, this.currentIndent))
411
517
  return
412
518
  }
413
-
414
- while (!this.isAtEnd() && this.isDigit(this.peek())) {
415
- value += this.advance()
519
+
520
+ if (this.peek() === '0' && (this.peek(1) === 'b' || this.peek(1) === 'B')) {
521
+ this.advance()
522
+ this.advance()
523
+ let binValue = ''
524
+ while (this.peek() === '0' || this.peek() === '1') {
525
+ binValue += this.advance()
526
+ }
527
+ this.tokens.push(new Token(TokenType.INTEGER, parseInt(binValue, 2), startLine, startCol, this.currentIndent))
528
+ return
416
529
  }
417
-
418
- if (this.peek() === '.' && this.isDigit(this.peek(1))) {
419
- tokenType = TokenType.FLOAT
420
- value += this.advance()
421
- while (!this.isAtEnd() && this.isDigit(this.peek())) {
530
+
531
+ if (this.peek() === '0' && (this.peek(1) === 'o' || this.peek(1) === 'O')) {
532
+ this.advance()
533
+ this.advance()
534
+ let octValue = ''
535
+ while (this.peek() >= '0' && this.peek() <= '7') {
536
+ octValue += this.advance()
537
+ }
538
+ this.tokens.push(new Token(TokenType.INTEGER, parseInt(octValue, 8), startLine, startCol, this.currentIndent))
539
+ return
540
+ }
541
+
542
+ while (this.isDigit(this.peek()) || this.peek() === '_') {
543
+ if (this.peek() !== '_') {
422
544
  value += this.advance()
545
+ } else {
546
+ this.advance()
423
547
  }
424
548
  }
425
-
426
- if (this.peek() === 'e' || this.peek() === 'E') {
427
- tokenType = TokenType.FLOAT
549
+
550
+ if (this.peek() === '.' && this.isDigit(this.peek(1))) {
551
+ type = TokenType.FLOAT
428
552
  value += this.advance()
429
- if (this.peek() === '+' || this.peek() === '-') {
430
- value += this.advance()
553
+ while (this.isDigit(this.peek()) || this.peek() === '_') {
554
+ if (this.peek() !== '_') {
555
+ value += this.advance()
556
+ } else {
557
+ this.advance()
558
+ }
431
559
  }
432
- while (!this.isAtEnd() && this.isDigit(this.peek())) {
560
+ }
561
+
562
+ if (this.peek() === 'e' || this.peek() === 'E') {
563
+ const nextChar = this.peek(1)
564
+ if (this.isDigit(nextChar) || nextChar === '+' || nextChar === '-') {
565
+ type = TokenType.FLOAT
433
566
  value += this.advance()
567
+ if (this.peek() === '+' || this.peek() === '-') {
568
+ value += this.advance()
569
+ }
570
+ while (this.isDigit(this.peek())) {
571
+ value += this.advance()
572
+ }
434
573
  }
435
574
  }
575
+
576
+ if (type === TokenType.FLOAT) {
577
+ let numValue = String(parseFloat(value))
578
+ const unit = this.scanCSSUnit()
579
+ if (unit) numValue += unit
580
+ this.tokens.push(new Token(type, numValue, startLine, startCol, this.currentIndent))
581
+ } else {
582
+ let numValue = String(parseInt(value, 10))
583
+ const unit = this.scanCSSUnit()
584
+ if (unit) numValue += unit
585
+ this.tokens.push(new Token(type, numValue, startLine, startCol, this.currentIndent))
586
+ }
587
+ }
588
+
589
+ scanCSSUnit() {
590
+ const units = ['px', 'em', 'rem', 'vh', 'vw', 'vmin', 'vmax', '%', 'fr', 's', 'ms', 'deg', 'rad', 'turn', 'ch', 'ex', 'cm', 'mm', 'in', 'pt', 'pc', 'cqw', 'cqh']
436
591
 
437
- const numValue = tokenType === TokenType.INTEGER ? parseInt(value, 10) : parseFloat(value)
438
- this.tokens.push(new Token(tokenType, numValue, startLine, startCol, this.currentIndent))
592
+ for (const unit of units) {
593
+ let matches = true
594
+ for (let i = 0; i < unit.length; i++) {
595
+ const char = this.peek(i)
596
+ if (!char || char.toLowerCase() !== unit[i].toLowerCase()) {
597
+ matches = false
598
+ break
599
+ }
600
+ }
601
+ if (matches) {
602
+ const afterUnit = this.peek(unit.length)
603
+ if (!afterUnit || !this.isAlpha(afterUnit)) {
604
+ let result = ''
605
+ for (let i = 0; i < unit.length; i++) {
606
+ result += this.advance()
607
+ }
608
+ return result
609
+ }
610
+ }
611
+ }
612
+ return ''
439
613
  }
440
614
 
441
615
  scanIdentifier() {
442
616
  const startLine = this.line
443
617
  const startCol = this.column
444
618
  let value = ''
445
-
446
- if (this.peek() === '$') {
447
- value += this.advance()
448
- }
449
-
450
- while (!this.isAtEnd() && (this.isAlphaNumeric(this.peek()) || this.peek() === '_' || this.peek() === '-' && this.isAlpha(this.peek(1)))) {
619
+
620
+ while (this.isAlphaNumeric(this.peek()) || this.peek() === '-' || this.peek() === '_') {
621
+ if (this.peek() === '-' && this.peek(1) === '>') {
622
+ break
623
+ }
451
624
  value += this.advance()
452
625
  }
453
626
 
454
627
  let keepLooking = true
455
- while (keepLooking && !this.isAtEnd()) {
628
+ while (keepLooking && this.peek() === ' ') {
456
629
  const savedPos = this.pos
457
630
  const savedLine = this.line
458
631
  const savedCol = this.column
459
632
 
460
- if (this.peek() === ' ') {
461
- let spaces = ''
462
- while (this.peek() === ' ') {
463
- spaces += this.advance()
633
+ this.advance()
634
+
635
+ let nextWord = ''
636
+ while (this.isAlphaNumeric(this.peek()) || this.peek() === '-' || this.peek() === '_') {
637
+ if (this.peek() === '-' && this.peek(1) === '>') {
638
+ break
464
639
  }
465
-
466
- if (this.isAlpha(this.peek()) || this.peek() === "'" || this.peek() === '-') {
467
- let nextWord = ''
468
- while (!this.isAtEnd() && (this.isAlphaNumeric(this.peek()) || this.peek() === '_' || this.peek() === "'" || this.peek() === '-' && this.isAlpha(this.peek(1)))) {
469
- nextWord += this.advance()
470
- }
471
-
472
- const compound = value + ' ' + nextWord
473
- if (this.isCompoundKeyword(compound) || this.couldBeCompoundStart(compound)) {
474
- value = compound
475
- } else {
476
- this.pos = savedPos
477
- this.line = savedLine
478
- this.column = savedCol
479
- keepLooking = false
480
- }
640
+ nextWord += this.advance()
641
+ }
642
+
643
+ if (nextWord) {
644
+ const combined = value + ' ' + nextWord
645
+ if (this.isCompoundKeyword(combined) || this.couldBeCompoundStart(combined)) {
646
+ value = combined
481
647
  } else {
482
648
  this.pos = savedPos
483
649
  this.line = savedLine
@@ -504,28 +670,42 @@ class EtherLexer {
504
670
 
505
671
  couldBeCompoundStart(partial) {
506
672
  const normalized = this.normalizeAccents(partial)
507
- const compounds = this.getCompoundKeywords()
673
+ const compounds = [
674
+ 'retour a la ligne', 'a la soumission', 'a la reinitialisation',
675
+ 'inferieur ou egal', 'superieur ou egal',
676
+ 'gauche a droite', 'droite a gauche',
677
+ 'mettre a jour', 'case a cocher',
678
+ 'pour chaque', 'sinon si', 'tant que',
679
+ 'espace de noms', 'inclure une fois', 'requerir une fois',
680
+ 'valeur entier', 'valeur entiere', 'valeur flottant', 'valeur chaine', 'valeur booleen',
681
+ 'obtenir type', 'est entier', 'est chaine', 'est tableau', 'est flottant',
682
+ 'est booleen', 'est null', 'est nul', 'est objet', 'est numerique',
683
+ 'est callable', 'est ressource', 'est scalaire', 'est vide',
684
+ 'sous chaine', 'couper espaces', 'encoder json', 'decoder json',
685
+ 'fichier existe', 'lire fichier', 'ecrire fichier',
686
+ 'somme tableau', 'filtrer tableau', 'mapper tableau',
687
+ 'fusionner tableaux', 'reduire tableau', 'inverser tableau',
688
+ 'compter elements', 'tableau cles', 'tableau valeurs',
689
+ 'hacher mot', 'verifier mot',
690
+ 'hacher mot de', 'verifier mot de',
691
+ 'hacher mot de passe', 'verifier mot de passe',
692
+ 'demarrer session', 'detruire session',
693
+ 'strict types'
694
+ ]
508
695
  return compounds.some(c => c.startsWith(normalized + ' '))
509
696
  }
510
697
 
698
+
511
699
  normalizeAccents(str) {
512
- const accentsMap = {
513
- 'à': 'a', 'â': 'a', 'ä': 'a', 'á': 'a',
514
- 'è': 'e', 'ê': 'e', 'ë': 'e', 'é': 'e',
515
- 'ì': 'i', 'î': 'i', 'ï': 'i', 'í': 'i',
516
- 'ò': 'o', 'ô': 'o', 'ö': 'o', 'ó': 'o',
517
- 'ù': 'u', 'û': 'u', 'ü': 'u', 'ú': 'u',
518
- 'ç': 'c', 'ñ': 'n'
519
- }
520
- return str.toLowerCase().split('').map(c => accentsMap[c] || c).join('')
700
+ if (typeof str !== 'string') return String(str)
701
+ return str.toLowerCase()
702
+ .normalize('NFD')
703
+ .replace(/[\u0300-\u036f]/g, '')
521
704
  }
522
705
 
523
- getCompoundKeywords() {
524
- if (this.compoundKeywordsCache) {
525
- return this.compoundKeywordsCache
526
- }
527
-
528
- this.compoundKeywordsCache = [
706
+ isCompoundKeyword(phrase) {
707
+ const normalized = this.normalizeAccents(phrase)
708
+ const compounds = [
529
709
  'en ligne', 'hors ligne', 'au survol', 'au clic', 'au focus',
530
710
  'au double clic', 'au changement', 'a la soumission',
531
711
  'retour a la ligne', 'lecture seule', 'zone texte',
@@ -566,172 +746,23 @@ class EtherLexer {
566
746
  'espacement lettres', 'espacement mots',
567
747
  'espace blanc', 'debordement texte',
568
748
  'flou fond', 'filtre fond',
569
-
570
- 'obtenir type', 'definir type',
571
- 'est tableau', 'est booleen', 'est entier', 'est chaine',
572
- 'est nul', 'est flottant', 'est numerique', 'est objet',
573
- 'est ressource', 'est scalaire', 'est iterable', 'est comptable',
574
- 'est appelable', 'est vide',
575
- 'valeur entier', 'valeur flottant', 'valeur chaine', 'valeur booleen',
576
- 'afficher formate', 'formater chaine',
577
- 'afficher variable', 'exporter variable',
578
- 'imprimer lisible', 'deboguer zval',
749
+ 'pour chaque', 'sinon si', 'tant que',
750
+ 'inclure une fois', 'requerir une fois',
751
+ 'valeur entier', 'valeur entiere', 'valeur flottant', 'valeur chaine', 'valeur booleen',
752
+ 'obtenir type', 'est entier', 'est chaine', 'est tableau', 'est flottant',
753
+ 'est booleen', 'est null', 'est nul', 'est objet', 'est numerique',
754
+ 'est callable', 'est ressource', 'est scalaire', 'est vide',
755
+ 'sous chaine', 'couper espaces', 'encoder json', 'decoder json',
756
+ 'fichier existe', 'lire fichier', 'ecrire fichier',
579
757
  'somme tableau', 'filtrer tableau', 'mapper tableau',
580
- 'fusionner tableaux', 'reduire tableau',
581
- 'trier tableau', 'inverser tableau', 'melanger tableau',
582
- 'premier tableau', 'dernier tableau',
583
- 'cles tableau', 'valeurs tableau',
584
- 'rechercher tableau', 'existe cle', 'existe tableau',
585
- 'couper espaces', 'couper gauche', 'couper droite',
586
- 'en majuscules', 'en minuscules',
587
- 'premiere majuscule', 'majuscules mots',
588
- 'longueur chaine', 'sous chaine',
589
- 'position chaine', 'remplacer chaine',
590
- 'exploser chaine', 'imploser tableau',
591
- 'lire fichier', 'ecrire fichier',
592
- 'fichier existe', 'supprimer fichier',
593
- 'creer dossier', 'lire dossier', 'supprimer dossier',
594
- 'encoder json', 'decoder json',
595
- 'encoder base64', 'decoder base64',
596
- 'encoder url', 'decoder url',
597
- 'hacher md5', 'hacher sha1', 'hacher sha256',
758
+ 'fusionner tableaux', 'reduire tableau', 'inverser tableau',
759
+ 'compter elements', 'tableau cles', 'tableau valeurs',
760
+ 'hacher mot', 'verifier mot',
598
761
  'hacher mot de passe', 'verifier mot de passe',
599
- 'mot de passe',
600
762
  'demarrer session', 'detruire session',
601
- 'sinon si',
602
- 'nouvelle instance', 'nouveau datetime',
603
- 'classe abstraite', 'classe finale',
604
- 'methode abstraite', 'methode finale',
605
- 'propriete privee', 'propriete protegee', 'propriete publique',
606
- 'lecture seule',
607
-
608
- 'creer base de donnees', 'creer table', 'creer vue',
609
- 'creer index', 'creer index unique',
610
- 'creer schema', 'creer sequence', 'creer type', 'creer domaine',
611
- 'creer fonction', 'creer procedure', 'creer declencheur',
612
- 'creer ou remplacer', 'creer role',
613
- 'si n\'existe pas', 'si nexiste pas',
614
- 'modifier table', 'modifier colonne', 'modifier base de donnees',
615
- 'modifier index', 'modifier sequence', 'modifier vue',
616
- 'ajouter colonne', 'supprimer colonne',
617
- 'renommer en', 'renommer colonne', 'modifier type',
618
- 'definir defaut', 'supprimer defaut',
619
- 'supprimer base de donnees', 'supprimer table', 'supprimer vue',
620
- 'supprimer index', 'supprimer schema', 'supprimer sequence',
621
- 'supprimer fonction', 'supprimer procedure', 'supprimer declencheur',
622
- 'supprimer role', 'supprimer contrainte',
623
- 'cle primaire', 'cle etrangere', 'non nul', 'par defaut',
624
- 'auto increment', 'unique contrainte',
625
- 'verifier contrainte', 'references table',
626
- 'sur suppression', 'sur mise a jour',
627
- 'cascade suppression', 'cascade mise a jour',
628
- 'definir null', 'sans action',
629
- 'inserer dans', 'inserer valeurs',
630
- 'mettre a jour', 'definir valeur',
631
- 'supprimer de', 'supprimer ou',
632
- 'selectionner depuis', 'selectionner tout',
633
- 'selectionner distinct', 'selectionner comme',
634
- 'depuis table', 'ou condition', 'et condition',
635
- 'grouper par', 'ordonner par', 'limiter a',
636
- 'decaler de', 'avoir condition',
637
- 'jointure gauche', 'jointure droite',
638
- 'jointure interne', 'jointure externe',
639
- 'jointure croisee', 'jointure naturelle',
640
- 'jointure complete',
641
- 'union tout', 'sauf tout', 'intersection tout',
642
- 'compter tout', 'somme de', 'moyenne de',
643
- 'maximum de', 'minimum de',
644
- 'comme motif', 'similaire a', 'correspond a regex',
645
- 'correspond regex insensible',
646
- 'ne correspond pas regex', 'ne correspond pas regex insensible',
647
- 'est null', 'n\'est pas null', 'nest pas null',
648
- 'est distinct de', 'n\'est pas distinct de', 'nest pas distinct de',
649
- 'entre et', 'pas entre',
650
- 'tout de', 'quelconque de', 'aucun de',
651
- 'dans liste', 'pas dans liste',
652
- 'existe sous requete', 'n\'existe pas sous requete',
653
- 'debut transaction', 'valider transaction', 'annuler transaction',
654
- 'point de sauvegarde', 'revenir a', 'liberer sauvegarde',
655
- 'definir transaction', 'lecture seule', 'lecture ecriture',
656
- 'niveau isolation', 'lecture non validee',
657
- 'lecture validee', 'lecture repetable', 'serialisable',
658
- 'verrouiller table', 'deverrouiller tables',
659
- 'verrouiller ligne', 'pour mise a jour',
660
- 'pour partage', 'sauter verrouille', 'sans attendre',
661
- 'accorder privileges', 'revoquer privileges',
662
- 'accorder tout', 'revoquer tout',
663
- 'avec option grant', 'option admin',
664
- 'creer utilisateur', 'supprimer utilisateur',
665
- 'modifier utilisateur', 'definir mot de passe',
666
- 'expliquer analyser', 'expliquer verbose',
667
- 'commentaire sur', 'commentaire ligne', 'commentaire bloc',
668
- 'tableau contient', 'contenu dans tableau',
669
- 'chevauchement tableau', 'longueur tableau',
670
- 'ajouter tableau', 'prefixer tableau',
671
- 'concatener tableaux', 'supprimer tableau',
672
- 'remplacer tableau', 'position tableau', 'positions tableau',
673
- 'vers chaine tableau', 'depuis chaine tableau',
674
- 'denombrer tableau',
675
- 'remplacer regex', 'correspondance regex', 'correspondances regex',
676
- 'diviser regex', 'tableau regex',
677
- 'extraire json', 'extraire texte json',
678
- 'contient json', 'existe json',
679
- 'type json', 'tableau json', 'objet json',
680
- 'cle json', 'valeur json',
681
- 'ensemble json', 'supprimer cle json',
682
- 'definir valeur json', 'inserer json',
683
- 'concatener json', 'chaque json',
684
- 'elements json', 'cles json',
685
- 'vers json', 'depuis json',
686
- 'maintenant timestamp', 'date courante', 'heure courante',
687
- 'timestamp courant', 'extraire date',
688
- 'partie date', 'tronquer date',
689
- 'age entre', 'intervalle temps',
690
- 'debut mois', 'fin mois',
691
- 'ajouter intervalle', 'soustraire intervalle',
692
- 'fuseau horaire', 'convertir fuseau',
693
- 'generer serie', 'generer dates',
694
- 'valeur suivante', 'valeur courante',
695
- 'definir valeur sequence', 'redemarrer sequence',
696
- 'fenetre sur', 'partitionner par',
697
- 'lignes precedentes', 'lignes suivantes',
698
- 'premiere valeur', 'derniere valeur',
699
- 'nieme valeur', 'rang dense', 'numero ligne',
700
- 'pourcentage rang', 'distribution cumulee',
701
- 'ntile groupe', 'valeur precedente', 'valeur suivante fenetre',
702
- 'somme cumulative', 'moyenne mobile',
703
- 'table temporaire', 'table non journalisee',
704
- 'sur conflit', 'ne rien faire', 'mettre a jour conflit',
705
- 'retourner tout', 'retourner insere',
706
- 'avec requete', 'requete recursive',
707
- 'table virtuelle', 'table materialisee',
708
- 'rafraichir vue', 'rafraichir concurrent',
709
- 'activer declencheur', 'desactiver declencheur',
710
- 'avant insertion', 'apres insertion',
711
- 'avant mise a jour', 'apres mise a jour',
712
- 'avant suppression', 'apres suppression',
713
- 'pour chaque ligne', 'pour chaque instruction',
714
- 'quand condition', 'executer procedure',
715
- 'declarer variable', 'definir variable',
716
- 'debut bloc', 'fin bloc',
717
- 'si condition', 'sinon si condition', 'sinon condition',
718
- 'boucle tant que', 'boucle pour', 'sortir boucle',
719
- 'continuer boucle', 'retourner valeur',
720
- 'lever exception', 'capturer exception',
721
- 'bloc exception', 'quand autres',
722
- 'afficher message', 'afficher avertissement', 'afficher erreur',
723
- 'curseur pour', 'ouvrir curseur', 'fermer curseur',
724
- 'recuperer suivant', 'recuperer precedent',
725
- 'recuperer premier', 'recuperer dernier',
726
- 'ligne trouvee', 'ligne non trouvee'
763
+ 'strict types'
727
764
  ]
728
-
729
- return this.compoundKeywordsCache
730
- }
731
-
732
- isCompoundKeyword(phrase) {
733
- const normalized = this.normalizeAccents(phrase)
734
- return this.getCompoundKeywords().includes(normalized)
765
+ return compounds.includes(normalized)
735
766
  }
736
767
 
737
768
  scanOperator() {
@@ -803,11 +834,19 @@ class EtherLexer {
803
834
 
804
835
  case '-':
805
836
  if (this.match('-')) {
806
- this.tokens.push(new Token(TokenType.MINUS, '--', startLine, startCol, this.currentIndent))
807
- } else if (this.match('=')) {
808
- this.tokens.push(new Token(TokenType.MINUS, '-=', startLine, startCol, this.currentIndent))
837
+ if (this.isAlpha(this.peek()) || this.peek() === '-') {
838
+ let varName = '--'
839
+ while (this.isAlphaNumeric(this.peek()) || this.peek() === '-' || this.peek() === '_') {
840
+ varName += this.advance()
841
+ }
842
+ this.tokens.push(new Token(TokenType.IDENTIFIER, varName, startLine, startCol, this.currentIndent))
843
+ } else {
844
+ this.tokens.push(new Token(TokenType.MINUS, '--', startLine, startCol, this.currentIndent))
845
+ }
809
846
  } else if (this.match('>')) {
810
847
  this.tokens.push(new Token(TokenType.ARROW, '->', startLine, startCol, this.currentIndent))
848
+ } else if (this.match('=')) {
849
+ this.tokens.push(new Token(TokenType.MINUS, '-=', startLine, startCol, this.currentIndent))
811
850
  } else {
812
851
  this.tokens.push(new Token(TokenType.MINUS, '-', startLine, startCol, this.currentIndent))
813
852
  }
@@ -828,10 +867,10 @@ class EtherLexer {
828
867
  break
829
868
 
830
869
  case '/':
831
- if (this.match('/')) {
832
- this.tokens.push(new Token(TokenType.DOUBLE_SLASH, '//', startLine, startCol, this.currentIndent))
833
- } else if (this.match('=')) {
870
+ if (this.match('=')) {
834
871
  this.tokens.push(new Token(TokenType.SLASH, '/=', startLine, startCol, this.currentIndent))
872
+ } else if (this.match('/')) {
873
+ this.tokens.push(new Token(TokenType.DOUBLE_SLASH, '//', startLine, startCol, this.currentIndent))
835
874
  } else {
836
875
  this.tokens.push(new Token(TokenType.SLASH, '/', startLine, startCol, this.currentIndent))
837
876
  }
@@ -853,13 +892,7 @@ class EtherLexer {
853
892
  this.tokens.push(new Token(TokenType.LTE, '<=', startLine, startCol, this.currentIndent))
854
893
  }
855
894
  } else if (this.match('<')) {
856
- if (this.match('=')) {
857
- this.tokens.push(new Token(TokenType.LT, '<<=', startLine, startCol, this.currentIndent))
858
- } else {
859
- this.tokens.push(new Token(TokenType.LT, '<<', startLine, startCol, this.currentIndent))
860
- }
861
- } else if (this.match('>')) {
862
- this.tokens.push(new Token(TokenType.NOT_EQUALS, '<>', startLine, startCol, this.currentIndent))
895
+ this.tokens.push(new Token(TokenType.LT, '<<', startLine, startCol, this.currentIndent))
863
896
  } else {
864
897
  this.tokens.push(new Token(TokenType.LT, '<', startLine, startCol, this.currentIndent))
865
898
  }
@@ -869,8 +902,8 @@ class EtherLexer {
869
902
  if (this.match('=')) {
870
903
  this.tokens.push(new Token(TokenType.GTE, '>=', startLine, startCol, this.currentIndent))
871
904
  } else if (this.match('>')) {
872
- if (this.match('=')) {
873
- this.tokens.push(new Token(TokenType.GT, '>>=', startLine, startCol, this.currentIndent))
905
+ if (this.match('>')) {
906
+ this.tokens.push(new Token(TokenType.GT, '>>>', startLine, startCol, this.currentIndent))
874
907
  } else {
875
908
  this.tokens.push(new Token(TokenType.GT, '>>', startLine, startCol, this.currentIndent))
876
909
  }
@@ -894,6 +927,8 @@ class EtherLexer {
894
927
  this.tokens.push(new Token(TokenType.DOUBLE_PIPE, '||', startLine, startCol, this.currentIndent))
895
928
  } else if (this.match('=')) {
896
929
  this.tokens.push(new Token(TokenType.PIPE, '|=', startLine, startCol, this.currentIndent))
930
+ } else if (this.match('>')) {
931
+ this.tokens.push(new Token(TokenType.PIPE, '|>', startLine, startCol, this.currentIndent))
897
932
  } else {
898
933
  this.tokens.push(new Token(TokenType.PIPE, '|', startLine, startCol, this.currentIndent))
899
934
  }
@@ -920,8 +955,6 @@ class EtherLexer {
920
955
  }
921
956
  } else if (this.match('.')) {
922
957
  this.tokens.push(new Token(TokenType.QUESTION, '?.', startLine, startCol, this.currentIndent))
923
- } else if (this.match('-') && this.match('>')) {
924
- this.tokens.push(new Token(TokenType.ARROW, '?->', startLine, startCol, this.currentIndent))
925
958
  } else {
926
959
  this.tokens.push(new Token(TokenType.QUESTION, '?', startLine, startCol, this.currentIndent))
927
960
  }
@@ -968,34 +1001,10 @@ class EtherLexer {
968
1001
  break
969
1002
 
970
1003
  default:
971
- this.tokens.push(new Token(TokenType.ERROR, `Caractère inattendu: ${char}`, startLine, startCol, this.currentIndent))
1004
+ this.tokens.push(new Token(TokenType.ERROR, `Caractère inattendu: ${char}`, startLine, startCol, this.currentIndent))
972
1005
  }
973
1006
  }
974
1007
 
975
- isDigit(char) {
976
- return char >= '0' && char <= '9'
977
- }
978
-
979
- isHexDigit(char) {
980
- return this.isDigit(char) || (char >= 'a' && char <= 'f') || (char >= 'A' && char <= 'F')
981
- }
982
-
983
- isAlpha(char) {
984
- return (char >= 'a' && char <= 'z') ||
985
- (char >= 'A' && char <= 'Z') ||
986
- char === '_' ||
987
- char === '$' ||
988
- char.charCodeAt(0) > 127
989
- }
990
-
991
- isAlphaNumeric(char) {
992
- return this.isAlpha(char) || this.isDigit(char)
993
- }
994
-
995
- isOperator(char) {
996
- return ':=!,.<>+-*/%&|^~?@#\\()[]{};\n\r\t '.indexOf(char) !== -1
997
- }
998
-
999
1008
  static tokenize(source, options = {}) {
1000
1009
  const lexer = new EtherLexer(source, options)
1001
1010
  return lexer.tokenize()