code-ruby 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/VERSION +1 -1
  4. data/lib/code/format.rb +4 -2
  5. data/lib/code/parser.rb +1113 -6
  6. data/lib/code-ruby.rb +0 -1
  7. data/spec/code/node/call_spec.rb +1 -1
  8. data/spec/code/parser/boolean_spec.rb +1 -1
  9. data/spec/code/parser/chained_call_spec.rb +1 -1
  10. data/spec/code/parser/dictionary_spec.rb +1 -1
  11. data/spec/code/parser/function_spec.rb +1 -1
  12. data/spec/code/parser/group_spec.rb +1 -1
  13. data/spec/code/parser/if_modifier_spec.rb +1 -1
  14. data/spec/code/parser/list_spec.rb +1 -1
  15. data/spec/code/parser/number_spec.rb +1 -1
  16. data/spec/code/parser/string_spec.rb +1 -1
  17. data/spec/code_spec.rb +1 -1
  18. metadata +1 -42
  19. data/lib/code/parser/addition.rb +0 -23
  20. data/lib/code/parser/and_operator.rb +0 -19
  21. data/lib/code/parser/bitwise_and.rb +0 -23
  22. data/lib/code/parser/bitwise_or.rb +0 -23
  23. data/lib/code/parser/boolean.rb +0 -23
  24. data/lib/code/parser/call.rb +0 -165
  25. data/lib/code/parser/chained_call.rb +0 -31
  26. data/lib/code/parser/class.rb +0 -15
  27. data/lib/code/parser/code.rb +0 -27
  28. data/lib/code/parser/dictionary.rb +0 -76
  29. data/lib/code/parser/equal.rb +0 -67
  30. data/lib/code/parser/equality.rb +0 -42
  31. data/lib/code/parser/function.rb +0 -135
  32. data/lib/code/parser/greater.rb +0 -32
  33. data/lib/code/parser/group.rb +0 -58
  34. data/lib/code/parser/if.rb +0 -101
  35. data/lib/code/parser/if_modifier.rb +0 -39
  36. data/lib/code/parser/label_name.rb +0 -14
  37. data/lib/code/parser/left_operation.rb +0 -44
  38. data/lib/code/parser/list.rb +0 -47
  39. data/lib/code/parser/multiplication.rb +0 -39
  40. data/lib/code/parser/name.rb +0 -188
  41. data/lib/code/parser/negation.rb +0 -32
  42. data/lib/code/parser/not_keyword.rb +0 -29
  43. data/lib/code/parser/nothing.rb +0 -19
  44. data/lib/code/parser/number.rb +0 -156
  45. data/lib/code/parser/or_keyword.rb +0 -27
  46. data/lib/code/parser/or_operator.rb +0 -19
  47. data/lib/code/parser/power.rb +0 -19
  48. data/lib/code/parser/range.rb +0 -19
  49. data/lib/code/parser/rescue.rb +0 -19
  50. data/lib/code/parser/right_operation.rb +0 -44
  51. data/lib/code/parser/shift.rb +0 -23
  52. data/lib/code/parser/splat.rb +0 -33
  53. data/lib/code/parser/square_bracket.rb +0 -44
  54. data/lib/code/parser/statement.rb +0 -11
  55. data/lib/code/parser/string.rb +0 -85
  56. data/lib/code/parser/ternary.rb +0 -45
  57. data/lib/code/parser/unary_minus.rb +0 -33
  58. data/lib/code/parser/while.rb +0 -81
  59. data/lib/code/parser/whitespace.rb +0 -51
data/lib/code/parser.rb CHANGED
@@ -2,11 +2,125 @@
2
2
 
3
3
  class Code
4
4
  class Parser
5
- class Error < Error
5
+ class Error < ::Code::Error
6
6
  end
7
7
 
8
+ class Language
9
+ end
10
+
11
+ Token = Data.define(:type, :value, :position, :newline_before, :space_before)
12
+
13
+ KEYWORDS = %w[
14
+ and
15
+ begin
16
+ do
17
+ else
18
+ elsif
19
+ elsunless
20
+ end
21
+ false
22
+ if
23
+ loop
24
+ not
25
+ nothing
26
+ or
27
+ rescue
28
+ true
29
+ unless
30
+ until
31
+ while
32
+ ].freeze
33
+
34
+ MULTI_CHAR_OPERATORS = %w[
35
+ &.
36
+ &&
37
+ &&=
38
+ **
39
+ *=
40
+ +=
41
+ -=
42
+ ..
43
+ ...
44
+ /=
45
+ ::
46
+ <<=
47
+ <<
48
+ <=>
49
+ <=
50
+ ===
51
+ ==
52
+ =~
53
+ >=
54
+ >>=
55
+ >>
56
+ ||=
57
+ ||
58
+ |=
59
+ !==
60
+ !=
61
+ !~
62
+ %=
63
+ ^=
64
+ =>
65
+ ].sort_by(&:length).reverse.freeze
66
+
67
+ INFIX_PRECEDENCE = {
68
+ "if" => [10, 9],
69
+ "unless" => [10, 9],
70
+ "while" => [10, 9],
71
+ "until" => [10, 9],
72
+ "or" => [20, 21],
73
+ "=" => [30, 29],
74
+ "+=" => [30, 29],
75
+ "-=" => [30, 29],
76
+ "*=" => [30, 29],
77
+ "/=" => [30, 29],
78
+ "%=" => [30, 29],
79
+ "<<=" => [30, 29],
80
+ ">>=" => [30, 29],
81
+ "&=" => [30, 29],
82
+ "|=" => [30, 29],
83
+ "^=" => [30, 29],
84
+ "||=" => [30, 29],
85
+ "&&=" => [30, 29],
86
+ "rescue" => [35, 34],
87
+ "?" => [40, 39],
88
+ ".." => [50, 51],
89
+ "..." => [50, 51],
90
+ "||" => [60, 61],
91
+ "and" => [70, 71],
92
+ "&&" => [70, 71],
93
+ "==" => [80, 81],
94
+ "===" => [80, 81],
95
+ "!=" => [80, 81],
96
+ "!==" => [80, 81],
97
+ "<=>" => [80, 81],
98
+ "=~" => [80, 81],
99
+ "~=" => [80, 81],
100
+ "!~" => [80, 81],
101
+ ">=" => [90, 91],
102
+ "<=" => [90, 91],
103
+ ">" => [90, 91],
104
+ "<" => [90, 91],
105
+ "|" => [100, 101],
106
+ "^" => [105, 106],
107
+ "&" => [110, 111],
108
+ "<<" => [120, 121],
109
+ ">>" => [120, 121],
110
+ "+" => [130, 131],
111
+ "-" => [130, 131],
112
+ "*" => [140, 141],
113
+ "/" => [140, 141],
114
+ "%" => [140, 141],
115
+ "×" => [140, 141],
116
+ "÷" => [140, 141],
117
+ "**" => [160, 159]
118
+ }.freeze
119
+
8
120
  def initialize(input)
9
- @input = input
121
+ @input = input.to_s
122
+ @tokens = lex(@input)
123
+ @index = 0
10
124
  end
11
125
 
12
126
  def self.parse(...)
@@ -14,13 +128,1006 @@ class Code
14
128
  end
15
129
 
16
130
  def parse
17
- Code.parse(input)
18
- rescue Language::Parser::NotEndOfInput => e
19
- raise Error, e.message
131
+ Node::Code.new(parse_code)
20
132
  end
21
133
 
22
134
  private
23
135
 
24
- attr_reader :input
136
+ attr_reader :input, :tokens
137
+
138
+ def parse_code(stop_keywords: [], stop_values: [])
139
+ statements = []
140
+ skip_newlines
141
+
142
+ until eof?
143
+ break if stop?(stop_keywords, stop_values)
144
+
145
+ statements << parse_expression
146
+ consume_newlines
147
+ end
148
+
149
+ statements
150
+ end
151
+
152
+ def parse_expression(min_bp = 0)
153
+ token = advance
154
+ left = nud(token)
155
+
156
+ loop do
157
+ token = current
158
+ break if token.type == :eof
159
+ if token.type == :newline
160
+ next_token = next_significant_token
161
+ break unless continuation_after_newline?(next_token)
162
+
163
+ skip_newlines
164
+ next
165
+ end
166
+
167
+ if postfix_start?(token)
168
+ break if token.newline_before && !newline_postfix_continuation?(token)
169
+ break if token.value == "[" && token.space_before
170
+ break if call_like_postfix?(token) && !callable_expression?(left)
171
+
172
+ left = led_postfix(left)
173
+ next
174
+ end
175
+
176
+ operator = infix_operator(token)
177
+ break unless operator
178
+
179
+ left_bp, right_bp = INFIX_PRECEDENCE.fetch(operator)
180
+ break if left_bp < min_bp
181
+
182
+ advance
183
+ left = led_infix(left, operator, right_bp)
184
+ end
185
+
186
+ left
187
+ end
188
+
189
+ def nud(token)
190
+ case token.type
191
+ when :identifier
192
+ { call: { name: token.value } }
193
+ when :number
194
+ token.value
195
+ when :symbol
196
+ { string: [{ text: token.value }] }
197
+ when :string
198
+ { string: token.value.map { |part| string_part_to_raw(part) } }
199
+ when :keyword
200
+ nud_keyword(token)
201
+ when :operator
202
+ nud_operator(token)
203
+ when :punctuation
204
+ nud_punctuation(token)
205
+ else
206
+ raise_parse_error("unexpected token #{token.value.inspect}", token)
207
+ end
208
+ end
209
+
210
+ def nud_keyword(token)
211
+ case token.value
212
+ when "true", "false"
213
+ { boolean: token.value }
214
+ when "nothing"
215
+ { nothing: token.value }
216
+ when "if", "unless"
217
+ parse_if_expression(token.value)
218
+ when "while", "until", "loop"
219
+ parse_while_expression(token.value)
220
+ when "begin"
221
+ body = parse_code(stop_keywords: ["end"])
222
+ advance if match?(:keyword, "end")
223
+ { group: body }
224
+ when "not"
225
+ { not: { operator: token.value, right: parse_expression(25) } }
226
+ else
227
+ { call: { name: token.value } }
228
+ end
229
+ end
230
+
231
+ def nud_operator(token)
232
+ case token.value
233
+ when "&"
234
+ { splat: { operator: token.value, right: parse_expression(5) } }
235
+ when "!", "~", "+"
236
+ wrap_prefixed_expression(:negation, token.value, parse_expression(145))
237
+ when "-"
238
+ wrap_prefixed_expression(:unary_minus, token.value, parse_expression(159))
239
+ else
240
+ raise_parse_error("unexpected operator #{token.value.inspect}", token)
241
+ end
242
+ end
243
+
244
+ def nud_punctuation(token)
245
+ case token.value
246
+ when "("
247
+ parse_group_or_function
248
+ when "["
249
+ parse_list
250
+ when "{"
251
+ parse_dictionary
252
+ when ":"
253
+ parse_symbol_literal
254
+ else
255
+ raise_parse_error("unexpected punctuation #{token.value.inspect}", token)
256
+ end
257
+ end
258
+
259
+ def led_postfix(left)
260
+ case current.value
261
+ when ".", "::", "&."
262
+ operator = advance.value
263
+ statement = parse_expression(151)
264
+ append_left_operation(left, operator, statement)
265
+ when "["
266
+ advance
267
+ statements = []
268
+ skip_newlines
269
+ unless match?(:punctuation, "]")
270
+ loop do
271
+ statements << parse_expression
272
+ skip_newlines
273
+ break unless match?(:punctuation, ",")
274
+
275
+ advance
276
+ skip_newlines
277
+ end
278
+ end
279
+ expect(:punctuation, "]")
280
+ { square_bracket: { left: left, statements: statements } }
281
+ when "("
282
+ arguments = parse_call_arguments
283
+ attach_call_arguments(left, arguments)
284
+ when "{"
285
+ attach_call_block(left, parse_block("{"))
286
+ else
287
+ attach_call_block(left, parse_block("do"))
288
+ end
289
+ end
290
+
291
+ def led_infix(left, operator, right_bp)
292
+ case operator
293
+ when "=", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=",
294
+ "^=", "||=", "&&="
295
+ skip_newlines
296
+ { right_operation: { left: left, operator: operator, right: parse_expression(right_bp) } }
297
+ when "if", "unless", "while", "until", "rescue"
298
+ skip_newlines
299
+ { right_operation: { left: left, operator: operator, right: parse_expression(right_bp) } }
300
+ when "?"
301
+ skip_newlines
302
+ middle = parse_expression
303
+ right =
304
+ if match?(:punctuation, ":")
305
+ advance
306
+ skip_newlines
307
+ parse_expression(right_bp)
308
+ end
309
+ { ternary: { left: left, middle: middle, right: right } }
310
+ else
311
+ skip_newlines
312
+ right = parse_expression(right_bp)
313
+
314
+ if operator == "**"
315
+ { right_operation: { left: left, operator: operator, right: right } }
316
+ else
317
+ append_left_operation(left, operator, right)
318
+ end
319
+ end
320
+ end
321
+
322
+ def parse_if_expression(operator)
323
+ skip_newlines
324
+ statement = parse_expression
325
+ body = parse_body(%w[elsif elsunless else end])
326
+ elses = []
327
+
328
+ loop do
329
+ skip_newlines
330
+
331
+ if match?(:keyword, "elsif") || match?(:keyword, "elsunless")
332
+ else_operator = advance.value
333
+ skip_newlines
334
+ else_statement = parse_expression
335
+ else_body = parse_body(%w[elsif elsunless else end])
336
+ elses << { operator: else_operator, statement: else_statement, body: else_body }
337
+ next
338
+ end
339
+
340
+ if match?(:keyword, "else")
341
+ advance
342
+ skip_newlines
343
+
344
+ if match?(:keyword, "if") || match?(:keyword, "unless")
345
+ else_operator = advance.value
346
+ skip_newlines
347
+ else_statement = parse_expression
348
+ else_body = parse_body(%w[elsif elsunless else end])
349
+ elses << { operator: else_operator, statement: else_statement, body: else_body }
350
+ else
351
+ elses << { operator: "else", body: parse_body(%w[end]) }
352
+ end
353
+
354
+ break
355
+ end
356
+
357
+ break
358
+ end
359
+
360
+ advance if match?(:keyword, "end")
361
+
362
+ {
363
+ if: {
364
+ first_operator: operator,
365
+ first_statement: statement,
366
+ first_body: body,
367
+ elses: elses
368
+ }
369
+ }
370
+ end
371
+
372
+ def parse_while_expression(operator)
373
+ skip_newlines
374
+
375
+ statement = parse_expression unless operator == "loop"
376
+ body = parse_body(%w[end])
377
+ advance if match?(:keyword, "end")
378
+
379
+ {
380
+ while: {
381
+ operator: operator,
382
+ statement: statement,
383
+ body: body
384
+ }.compact
385
+ }
386
+ end
387
+
388
+ def parse_group_or_function
389
+ checkpoint = @index
390
+ parameters = try_parse_parameters(")")
391
+
392
+ if parameters && match?(:operator, "=>")
393
+ advance
394
+ body = parse_lambda_body
395
+ return { function: { parameters: parameters, body: body } }
396
+ end
397
+
398
+ @index = checkpoint
399
+ code = parse_code(stop_values: [")"])
400
+ expect(:punctuation, ")")
401
+ { group: code }
402
+ end
403
+
404
+ def parse_lambda_body
405
+ if match?(:punctuation, "{")
406
+ parse_delimited_code("{", "}")
407
+ elsif match?(:keyword, "do") || match?(:keyword, "begin")
408
+ advance
409
+ parse_code(stop_keywords: ["end"]).tap { advance if match?(:keyword, "end") }
410
+ else
411
+ [parse_expression]
412
+ end
413
+ end
414
+
415
+ def parse_list
416
+ elements = []
417
+ skip_newlines
418
+
419
+ unless match?(:punctuation, "]")
420
+ loop do
421
+ elements << parse_code(stop_values: [",", "]"])
422
+ break unless match?(:punctuation, ",")
423
+
424
+ advance
425
+ skip_newlines
426
+ break if match?(:punctuation, "]")
427
+ end
428
+ end
429
+
430
+ expect(:punctuation, "]")
431
+ { list: elements }
432
+ end
433
+
434
+ def parse_dictionary
435
+ key_values = []
436
+ skip_newlines
437
+
438
+ unless match?(:punctuation, "}")
439
+ loop do
440
+ key_values << parse_dictionary_entry
441
+ skip_newlines
442
+ break unless match?(:punctuation, ",")
443
+
444
+ advance
445
+ skip_newlines
446
+ break if match?(:punctuation, "}")
447
+ end
448
+ end
449
+
450
+ advance if match?(:punctuation, "}")
451
+ { dictionnary: key_values }
452
+ end
453
+
454
+ def parse_dictionary_entry
455
+ if label_name_start?(current) && next_token_value == ":"
456
+ name = advance.value
457
+ advance
458
+ code = parse_optional_code([",", "}"])
459
+ return { name_code: { name: name, code: code }.compact }
460
+ end
461
+
462
+ statement = parse_expression
463
+
464
+ if match?(:punctuation, ":")
465
+ advance
466
+ code = parse_optional_code([",", "}"])
467
+ { statement_code: { statement: statement, code: code }.compact }
468
+ elsif match?(:operator, "=>")
469
+ advance
470
+ code = parse_optional_code([",", "}"])
471
+ { statement_code: { statement: statement, code: code }.compact }
472
+ else
473
+ { code: wrap_code(statement) }
474
+ end
475
+ end
476
+
477
+ def parse_symbol_literal
478
+ token = current
479
+ unless label_name_start?(token)
480
+ raise_parse_error("expected symbol name", token)
481
+ end
482
+
483
+ { string: [{ text: advance.value }] }
484
+ end
485
+
486
+ def parse_body(stop_keywords)
487
+ if match?(:keyword, "do") || match?(:keyword, "begin")
488
+ advance
489
+ parse_code(stop_keywords: ["end"])
490
+ elsif match?(:punctuation, "{")
491
+ parse_delimited_code("{", "}")
492
+ else
493
+ parse_code(stop_keywords: stop_keywords)
494
+ end
495
+ end
496
+
497
+ def parse_delimited_code(opening, closing)
498
+ expect(:punctuation, opening)
499
+ parse_code(stop_values: [closing]).tap { expect(:punctuation, closing) }
500
+ end
501
+
502
+ def parse_call_arguments
503
+ expect(:punctuation, "(")
504
+ arguments = []
505
+ skip_newlines
506
+
507
+ unless match?(:punctuation, ")")
508
+ loop do
509
+ arguments << parse_argument
510
+ skip_newlines
511
+ break unless match?(:punctuation, ",")
512
+
513
+ advance
514
+ skip_newlines
515
+ break if match?(:punctuation, ")")
516
+ end
517
+ end
518
+
519
+ expect(:punctuation, ")")
520
+ arguments
521
+ end
522
+
523
+ def parse_argument
524
+ if label_name_start?(current) && next_token_value == ":"
525
+ name = advance.value
526
+ advance
527
+ { name: name, value: parse_code(stop_values: [",", ")"]) }
528
+ else
529
+ { value: parse_code(stop_values: [",", ")"]) }
530
+ end
531
+ end
532
+
533
+ def parse_block(kind)
534
+ if kind == "{"
535
+ expect(:punctuation, "{")
536
+ parameters = parse_pipe_parameters
537
+ body = parse_code(stop_values: ["}"])
538
+ expect(:punctuation, "}")
539
+ else
540
+ expect(:keyword, "do")
541
+ parameters = parse_pipe_parameters
542
+ body = parse_code(stop_keywords: ["end"])
543
+ expect(:keyword, "end")
544
+ end
545
+
546
+ { parameters: parameters || [], body: body }
547
+ end
548
+
549
+ def parse_pipe_parameters
550
+ return unless match?(:operator, "|")
551
+
552
+ advance
553
+ parameters = []
554
+ skip_newlines
555
+
556
+ until match?(:operator, "|")
557
+ parameters << parse_parameter
558
+ skip_newlines
559
+ break unless match?(:punctuation, ",")
560
+
561
+ advance
562
+ skip_newlines
563
+ end
564
+
565
+ expect(:operator, "|")
566
+ parameters
567
+ end
568
+
569
+ def try_parse_parameters(closing)
570
+ parameters = []
571
+ skip_newlines
572
+
573
+ until match?(:punctuation, closing)
574
+ return nil if eof?
575
+
576
+ parameters << parse_parameter
577
+ skip_newlines
578
+
579
+ break unless match?(:punctuation, ",")
580
+
581
+ advance
582
+ skip_newlines
583
+ end
584
+
585
+ expect(:punctuation, closing)
586
+ parameters
587
+ rescue Error
588
+ nil
589
+ end
590
+
591
+ def parse_parameter
592
+ prefix = parse_parameter_prefix
593
+
594
+ if label_name_start?(current) && next_token_value == ":"
595
+ name = advance.value
596
+ advance
597
+ default = parse_optional_code([",", ")", "|"])
598
+ return { name: name, keyword: ":", default: default }.compact
599
+ end
600
+
601
+ name = advance.value if current.type == :identifier || keyword_name?(current)
602
+ default =
603
+ if match?(:operator, "=")
604
+ advance
605
+ parse_code(stop_values: [",", ")", "|"])
606
+ end
607
+
608
+ {
609
+ name: name,
610
+ regular_splat: (true if prefix == "*"),
611
+ keyword_splat: (true if prefix == "**"),
612
+ block: (true if prefix == "&"),
613
+ spread: (true if %w[. .. ...].include?(prefix)),
614
+ default: default
615
+ }.compact
616
+ end
617
+
618
+ def parse_parameter_prefix
619
+ return unless current.type == :operator
620
+
621
+ return advance.value if %w[* ** & .. ... .].include?(current.value)
622
+ end
623
+
624
+ def parse_optional_code(stop_values)
625
+ skip_newlines
626
+ return if stop_values.include?(current.value)
627
+
628
+ parse_code(stop_values: stop_values)
629
+ end
630
+
631
+ def attach_call_arguments(left, arguments)
632
+ update_terminal_call(left) do |call|
633
+ call[:arguments] = arguments
634
+ end
635
+ end
636
+
637
+ def attach_call_block(left, block)
638
+ update_terminal_call(left) do |call|
639
+ call[:block] = block
640
+ end
641
+ end
642
+
643
+ def update_terminal_call(left)
644
+ if left.key?(:call)
645
+ call = left[:call].deep_dup
646
+ yield(call)
647
+ { call: call }
648
+ elsif left.key?(:left_operation)
649
+ raw = left.deep_dup
650
+ target = raw[:left_operation][:others].last
651
+
652
+ if target
653
+ call = target[:statement].fetch(:call).deep_dup
654
+ yield(call)
655
+ target[:statement] = { call: call }
656
+ else
657
+ call = raw[:left_operation][:first].fetch(:call).deep_dup
658
+ yield(call)
659
+ raw[:left_operation][:first] = { call: call }
660
+ end
661
+
662
+ raw
663
+ else
664
+ raise Error, "call arguments require a callable expression"
665
+ end
666
+ end
667
+
668
+ def append_left_operation(left, operator, statement)
669
+ if left.key?(:left_operation) &&
670
+ same_left_operation_group?(left[:left_operation], operator)
671
+ raw = left.deep_dup
672
+ target = raw[:left_operation][:others].last
673
+ target[:statement] = append_left_operation(target[:statement], operator, statement)
674
+ raw
675
+ else
676
+ {
677
+ left_operation: {
678
+ first: left,
679
+ others: [{ operator: operator, statement: statement }]
680
+ }
681
+ }
682
+ end
683
+ end
684
+
685
+ def same_left_operation_group?(left_operation, operator)
686
+ others = left_operation[:others] || []
687
+ return false if others.empty?
688
+
689
+ operator_group(others.last[:operator]) == operator_group(operator)
690
+ end
691
+
692
+ def operator_group(operator)
693
+ case operator
694
+ when ".", "::", "&." then :chain
695
+ when "or", "and" then :keyword_logic
696
+ when "||" then :or_operator
697
+ when "&&" then :and_operator
698
+ when "==", "===", "!=", "!==", "<=>", "=~", "~=", "!~" then :equality
699
+ when ">", "<", ">=", "<=" then :comparison
700
+ when "|", "^" then :bitwise_or
701
+ when "&" then :bitwise_and
702
+ when "<<", ">>" then :shift
703
+ when "+", "-" then :addition
704
+ when "*", "/", "%", "×", "÷" then :multiplication
705
+ when "..", "..." then :range
706
+ else
707
+ operator
708
+ end
709
+ end
710
+
711
+ def wrap_prefixed_expression(type, operator, right)
712
+ node = { type => { operator: operator, right: right } }
713
+ return node unless right.is_a?(Hash) && right.key?(:left_operation)
714
+
715
+ left_operation = right[:left_operation].deep_dup
716
+ {
717
+ left_operation: {
718
+ first: { type => { operator: operator, right: left_operation[:first] } },
719
+ others: left_operation[:others]
720
+ }
721
+ }
722
+ end
723
+
724
+ def wrap_code(statement)
725
+ statement.is_a?(Array) ? statement : [statement]
726
+ end
727
+
728
+ def string_part_to_raw(part)
729
+ if part[:type] == :text
730
+ { text: part[:value] }
731
+ else
732
+ { code: self.class.parse(part[:value]).to_raw }
733
+ end
734
+ end
735
+
736
+ def stop?(stop_keywords, stop_values)
737
+ (current.type == :keyword && stop_keywords.include?(current.value)) ||
738
+ stop_values.include?(current.value)
739
+ end
740
+
741
+ def postfix_start?(token)
742
+ token.type == :punctuation && ["(", "[", "{"].include?(token.value) ||
743
+ token.type == :operator && [".", "::", "&."].include?(token.value) ||
744
+ token.type == :keyword && token.value == "do"
745
+ end
746
+
747
+ def call_like_postfix?(token)
748
+ token.type == :punctuation && ["(", "{"].include?(token.value) ||
749
+ token.type == :keyword && token.value == "do"
750
+ end
751
+
752
+ def infix_operator(token)
753
+ return token.value if token.type == :operator && INFIX_PRECEDENCE.key?(token.value)
754
+ return token.value if token.type == :keyword && INFIX_PRECEDENCE.key?(token.value)
755
+ return token.value if token.type == :punctuation && token.value == "?"
756
+ end
757
+
758
+ def label_name_start?(token)
759
+ token.type == :identifier || token.type == :keyword
760
+ end
761
+
762
+ def keyword_name?(token)
763
+ token.type == :keyword && !%w[if unless while until loop not rescue or and do begin else elsif elsunless end true false nothing].include?(token.value)
764
+ end
765
+
766
+ def callable_expression?(expression)
767
+ return true if expression.key?(:call)
768
+ return false unless expression.key?(:left_operation)
769
+
770
+ tail = expression[:left_operation][:others]&.last
771
+ target = tail ? tail[:statement] : expression[:left_operation][:first]
772
+ target.is_a?(Hash) && target.key?(:call)
773
+ end
774
+
775
+ def current
776
+ tokens.fetch(@index)
777
+ end
778
+
779
+ def next_token_value
780
+ tokens.fetch(@index + 1, Token.new(type: :eof, value: nil, position: input.length, newline_before: false, space_before: false)).value
781
+ end
782
+
783
+ def advance
784
+ token = current
785
+ @index += 1
786
+ token
787
+ end
788
+
789
+ def match?(type, value = nil)
790
+ token = current
791
+ token.type == type && (value.nil? || token.value == value)
792
+ end
793
+
794
+ def expect(type, value = nil)
795
+ token = current
796
+ return advance if token.type == type && (value.nil? || token.value == value)
797
+
798
+ expected = value || type
799
+ raise_parse_error("expected #{expected.inspect}", token)
800
+ end
801
+
802
+ def eof?
803
+ current.type == :eof
804
+ end
805
+
806
+ def skip_newlines
807
+ advance while current.type == :newline
808
+ end
809
+
810
+ def next_significant_token
811
+ index = @index
812
+ index += 1 while tokens[index]&.type == :newline
813
+ tokens.fetch(index, Token.new(type: :eof, value: nil, position: input.length, newline_before: false, space_before: false))
814
+ end
815
+
816
+ def continuation_after_newline?(token)
817
+ return false if token.type == :eof
818
+ return true if token.type == :operator && INFIX_PRECEDENCE.key?(token.value)
819
+ return true if token.type == :punctuation && token.value == "?"
820
+
821
+ token.type == :operator && [".", "::", "&."].include?(token.value)
822
+ end
823
+
824
+ def newline_postfix_continuation?(token)
825
+ token.type == :operator && [".", "::", "&."].include?(token.value)
826
+ end
827
+
828
+ def consume_newlines
829
+ skip_newlines
830
+ end
831
+
832
+ def raise_parse_error(message, token = current)
833
+ raise Error, "#{message} at #{token.position}"
834
+ end
835
+
836
+ def lex(source)
837
+ tokens = []
838
+ index = 0
839
+ newline_before = false
840
+ space_before = false
841
+
842
+ while index < source.length
843
+ char = source[index]
844
+
845
+ if char == " " || char == "\t"
846
+ index += 1
847
+ space_before = true
848
+ next
849
+ end
850
+
851
+ if char == "\n" || char == "\r"
852
+ if char == "\r" && source[index + 1] == "\n"
853
+ index += 2
854
+ else
855
+ index += 1
856
+ end
857
+
858
+ tokens << Token.new(type: :newline, value: "\n", position: index - 1, newline_before: false, space_before: false)
859
+ newline_before = true
860
+ space_before = false
861
+ next
862
+ end
863
+
864
+ if char == "#"
865
+ index += 1
866
+ index += 1 while index < source.length && !["\n", "\r"].include?(source[index])
867
+ space_before = true unless newline_before
868
+ next
869
+ end
870
+
871
+ if source[index, 2] == "//"
872
+ index += 2
873
+ index += 1 while index < source.length && !["\n", "\r"].include?(source[index])
874
+ space_before = true unless newline_before
875
+ next
876
+ end
877
+
878
+ if source[index, 2] == "/*"
879
+ index += 2
880
+ while index < source.length && source[index, 2] != "*/"
881
+ newline_before ||= ["\n", "\r"].include?(source[index])
882
+ index += 1
883
+ end
884
+ index += 2 if source[index, 2] == "*/"
885
+ space_before = true unless newline_before
886
+ next
887
+ end
888
+
889
+ if (string_data = scan_string(source, index))
890
+ tokens << Token.new(type: :string, value: string_data[:parts], position: index, newline_before: newline_before, space_before: space_before)
891
+ index = string_data[:index]
892
+ newline_before = false
893
+ space_before = false
894
+ next
895
+ end
896
+
897
+ if (symbol_data = scan_symbol(source, index))
898
+ tokens << Token.new(type: :symbol, value: symbol_data[:value], position: index, newline_before: newline_before, space_before: space_before)
899
+ index = symbol_data[:index]
900
+ newline_before = false
901
+ space_before = false
902
+ next
903
+ end
904
+
905
+ if (number_data = scan_number(source, index))
906
+ tokens << Token.new(type: :number, value: number_data[:raw], position: index, newline_before: newline_before, space_before: space_before)
907
+ index = number_data[:index]
908
+ newline_before = false
909
+ space_before = false
910
+ next
911
+ end
912
+
913
+ operator = MULTI_CHAR_OPERATORS.find { |candidate| source[index, candidate.length] == candidate }
914
+ if operator
915
+ type = operator == "=>" ? :operator : :operator
916
+ tokens << Token.new(type: type, value: operator, position: index, newline_before: newline_before, space_before: space_before)
917
+ index += operator.length
918
+ newline_before = false
919
+ space_before = false
920
+ next
921
+ end
922
+
923
+ if %w[( ) [ ] { } , ? :].include?(char)
924
+ tokens << Token.new(type: :punctuation, value: char, position: index, newline_before: newline_before, space_before: space_before)
925
+ index += 1
926
+ newline_before = false
927
+ space_before = false
928
+ next
929
+ end
930
+
931
+ if %w[. & | = ! ~ + - * / % < > ^ × ÷].include?(char)
932
+ tokens << Token.new(type: :operator, value: char, position: index, newline_before: newline_before, space_before: space_before)
933
+ index += 1
934
+ newline_before = false
935
+ space_before = false
936
+ next
937
+ end
938
+
939
+ identifier = scan_identifier(source, index)
940
+ if identifier
941
+ type = KEYWORDS.include?(identifier) ? :keyword : :identifier
942
+ tokens << Token.new(type: type, value: identifier, position: index, newline_before: newline_before, space_before: space_before)
943
+ index += identifier.length
944
+ newline_before = false
945
+ space_before = false
946
+ next
947
+ end
948
+
949
+ raise Error, "unexpected character #{char.inspect} at #{index}"
950
+ end
951
+
952
+ tokens << Token.new(type: :eof, value: nil, position: source.length, newline_before: newline_before, space_before: space_before)
953
+ tokens
954
+ end
955
+
956
+ def scan_string(source, index)
957
+ quote = source[index]
958
+ return unless quote == "'" || quote == '"'
959
+
960
+ parts = []
961
+ text = +""
962
+ i = index + 1
963
+
964
+ while i < source.length
965
+ char = source[i]
966
+
967
+ if char == "\\"
968
+ i += 1
969
+ break if i >= source.length
970
+
971
+ escaped = source[i]
972
+ if escaped == quote || escaped == "{"
973
+ text << escaped
974
+ else
975
+ text << "\\" << escaped
976
+ end
977
+ i += 1
978
+ next
979
+ end
980
+
981
+ if char == quote
982
+ parts << { type: :text, value: text } unless text.empty?
983
+ return { parts: parts, index: i + 1 }
984
+ end
985
+
986
+ if char == "{"
987
+ parts << { type: :text, value: text } unless text.empty?
988
+ text = +""
989
+ code, i = extract_braced(source, i)
990
+ parts << { type: :code, value: code }
991
+ next
992
+ end
993
+
994
+ text << char
995
+ i += 1
996
+ end
997
+
998
+ parts << { type: :text, value: text } unless text.empty?
999
+ { parts: parts, index: i }
1000
+ end
1001
+
1002
+ def extract_braced(source, index)
1003
+ depth = 1
1004
+ i = index + 1
1005
+ body = +""
1006
+
1007
+ while i < source.length
1008
+ char = source[i]
1009
+
1010
+ if char == "{"
1011
+ depth += 1
1012
+ body << char
1013
+ i += 1
1014
+ elsif char == "}"
1015
+ depth -= 1
1016
+ return [body, i + 1] if depth.zero?
1017
+
1018
+ body << char
1019
+ i += 1
1020
+ else
1021
+ body << char
1022
+ i += 1
1023
+ end
1024
+ end
1025
+
1026
+ [body, i]
1027
+ end
1028
+
1029
+ def scan_number(source, index)
1030
+ rest = source[index..]
1031
+ return unless rest
1032
+
1033
+ if (match = /\A0[xX][0-9a-fA-F](?:_?[0-9a-fA-F])*/.match(rest))
1034
+ return { raw: { number: { base_16: match[0][2..].delete("_") } }, index: index + match[0].length }
1035
+ end
1036
+
1037
+ if (match = /\A0[oO][0-7](?:_?[0-7])*/.match(rest))
1038
+ return { raw: { number: { base_8: match[0][2..].delete("_") } }, index: index + match[0].length }
1039
+ end
1040
+
1041
+ if (match = /\A0[bB][01](?:_?[01])*/.match(rest))
1042
+ return { raw: { number: { base_2: match[0][2..].delete("_") } }, index: index + match[0].length }
1043
+ end
1044
+
1045
+ if (match = /\A[0-9](?:_?[0-9])*\.[0-9](?:_?[0-9])*(?:[eE][0-9](?:_?[0-9])*(?:\.[0-9](?:_?[0-9])*)?)?/.match(rest))
1046
+ decimal, exponent = match[0].split(/[eE]/, 2)
1047
+ raw = { decimal: decimal.delete("_") }
1048
+ raw[:exponent] = exponent_to_raw(exponent) if exponent
1049
+ return { raw: { number: { decimal: raw } }, index: index + match[0].length }
1050
+ end
1051
+
1052
+ if (match = /\A[0-9](?:_?[0-9])*(?:[eE][0-9](?:_?[0-9])*(?:\.[0-9](?:_?[0-9])*)?)?/.match(rest))
1053
+ whole, exponent = match[0].split(/[eE]/, 2)
1054
+ raw = { whole: whole.delete("_") }
1055
+ raw[:exponent] = exponent_to_raw(exponent) if exponent
1056
+ return { raw: { number: { base_10: raw } }, index: index + match[0].length }
1057
+ end
1058
+ end
1059
+
1060
+ def exponent_to_raw(exponent)
1061
+ exponent = exponent.delete("_")
1062
+ return { number: { decimal: { decimal: exponent } } } if exponent.include?(".")
1063
+
1064
+ { number: { base_10: { whole: exponent } } }
1065
+ end
1066
+
1067
+ def scan_symbol(source, index)
1068
+ return unless source[index] == ":"
1069
+ return if source[index, 2] == "::"
1070
+ return unless symbol_start_context?(source, index)
1071
+
1072
+ next_char = source[index + 1]
1073
+ return if next_char.nil? || next_char =~ /\s/
1074
+ return if %w[( ) [ ] { } ,].include?(next_char)
1075
+
1076
+ value = +""
1077
+ i = index + 1
1078
+
1079
+ while i < source.length
1080
+ char = source[i]
1081
+ break if char =~ /\s/
1082
+ break if "()[]{}.,:&|=~*/%<>^#".include?(char)
1083
+
1084
+ if char == "!" || char == "?"
1085
+ value << char
1086
+ i += 1
1087
+ break
1088
+ end
1089
+
1090
+ value << char
1091
+ i += 1
1092
+ end
1093
+
1094
+ return if value.empty?
1095
+
1096
+ { value: value, index: i }
1097
+ end
1098
+
1099
+ def symbol_start_context?(source, index)
1100
+ return true if index.zero?
1101
+
1102
+ previous = source[index - 1]
1103
+ previous =~ /\s/ || "([{,=:+-!*/%<>?&|^\n\r".include?(previous)
1104
+ end
1105
+
1106
+ def scan_identifier(source, index)
1107
+ value = +""
1108
+ i = index
1109
+
1110
+ while i < source.length
1111
+ char = source[i]
1112
+ break if char =~ /\s/
1113
+
1114
+ if char == "!" || char == "?"
1115
+ if value.empty? || source[i + 1] == "=" || source[i + 1] == "~"
1116
+ break
1117
+ end
1118
+
1119
+ value << char
1120
+ i += 1
1121
+ break
1122
+ end
1123
+
1124
+ break if "()[]{}.,:&|=~+-*/%<>^#".include?(char)
1125
+
1126
+ value << char
1127
+ i += 1
1128
+ end
1129
+
1130
+ value unless value.empty?
1131
+ end
25
1132
  end
26
1133
  end