igniter_lang 0.1.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1736 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "json"
5
+
6
+ # =============================================================================
7
+ # Igniter-Lang Minimal Parser
8
+ # Implements PROP-014 / PROP-015 grammar kernel.
9
+ # Outputs ParsedProgram JSON for acceptance testing.
10
+ #
11
+ # Grammar (subset):
12
+ # SourceFile := ModuleDecl? ImportDecl* TopDecl*
13
+ # TopDecl := ContractDecl | TypeDecl | FunctionDecl | OLAPPointDecl
14
+ # | AssumptionsDecl
15
+ # | TraitDecl | ImplDecl | ContractShapeDecl
16
+ # ContractDecl := "contract" Name TypeParams? Implements? "{" BodyDecl* "}"
17
+ # BodyDecl := EscapeDecl | InputDecl | ReadDecl | ComputeDecl
18
+ # | SnapshotDecl | WindowDecl | OutputDecl
19
+ # FunctionDecl := "def" Name "(" Params? ")" "->" TypeRef "{" Body "}"
20
+ # TypeDecl := "type" Name "{" FieldDecl* "}"
21
+ # Expr := Literal | Ref | BinOp | Call | FieldAccess
22
+ # | IfExpr | BlockExpr | Lambda | ArrayLit | RecordLit
23
+ # =============================================================================
24
+
25
+ module IgniterLang
26
+ # ---------------------------------------------------------------------------
27
+ # Token types
28
+ # ---------------------------------------------------------------------------
29
+ TOKEN_TYPES = %i[
30
+ keyword ident string_lit int_lit float_lit bool_lit nil_lit
31
+ symbol_lit lbrace rbrace lparen rparen lbracket rbracket
32
+ dot dot_dot comma colon double_colon dot_dot_dot arrow fat_arrow
33
+ op assign pipe question bang
34
+ newline eof comment
35
+ ].freeze
36
+
37
+ Token = Struct.new(:type, :value, :line, :col)
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Lexer
41
+ # ---------------------------------------------------------------------------
42
+ KEYWORDS = %w[
43
+ module import contract contract_shape type def trait impl
44
+ input output compute read snapshot window escape
45
+ stream fold_stream
46
+ assumptions assumption uses
47
+ olap_point
48
+ invariant predicate severity label message overridable_with
49
+ from lifecycle using implements
50
+ pipeline step scoped_by cardinality schema_version tenant_free
51
+ if else let
52
+ true false nil
53
+ and or not
54
+ ].freeze
55
+
56
+ class Lexer
57
+ def initialize(source)
58
+ @source = source
59
+ @pos = 0
60
+ @line = 1
61
+ @col = 1
62
+ @tokens = []
63
+ end
64
+
65
+ def tokenize
66
+ until @pos >= @source.length
67
+ skip_whitespace_and_comments
68
+ break if @pos >= @source.length
69
+
70
+ tok = next_token
71
+ @tokens << tok if tok && tok.type != :comment
72
+ end
73
+ @tokens << Token.new(:eof, nil, @line, @col)
74
+ @tokens
75
+ end
76
+
77
+ private
78
+
79
+ def peek(offset = 0) = @source[@pos + offset]
80
+ def advance
81
+ ch = @source[@pos]
82
+ @pos += 1
83
+ if ch == "\n"
84
+ @line += 1
85
+ @col = 1
86
+ else
87
+ @col += 1
88
+ end
89
+ ch
90
+ end
91
+
92
+ def skip_whitespace_and_comments
93
+ loop do
94
+ # skip whitespace
95
+ while @pos < @source.length && @source[@pos] =~ /[ \t\r\n]/
96
+ advance
97
+ end
98
+ # skip -- line comments
99
+ if @pos + 1 < @source.length && @source[@pos] == "-" && @source[@pos + 1] == "-"
100
+ while @pos < @source.length && @source[@pos] != "\n"
101
+ advance
102
+ end
103
+ else
104
+ break
105
+ end
106
+ end
107
+ end
108
+
109
+ def next_token
110
+ l, c = @line, @col
111
+ ch = peek
112
+
113
+ case ch
114
+ when '"' then read_string(l, c)
115
+ when /[0-9]/ then read_number(l, c)
116
+ when ":" then read_symbol_or_colon(l, c)
117
+ when "-"
118
+ if peek(1) == ">"
119
+ advance; advance
120
+ Token.new(:arrow, "->", l, c)
121
+ else
122
+ advance
123
+ Token.new(:op, "-", l, c)
124
+ end
125
+ when "+"
126
+ if peek(1) == "+"
127
+ advance; advance
128
+ Token.new(:op, "++", l, c)
129
+ else
130
+ advance
131
+ Token.new(:op, "+", l, c)
132
+ end
133
+ when "*" then advance; Token.new(:op, "*", l, c)
134
+ when "/" then advance; Token.new(:op, "/", l, c)
135
+ when "=" then
136
+ if peek(1) == "="
137
+ advance; advance; Token.new(:op, "==", l, c)
138
+ else
139
+ advance; Token.new(:assign, "=", l, c)
140
+ end
141
+ when "!" then
142
+ if peek(1) == "="
143
+ advance; advance; Token.new(:op, "!=", l, c)
144
+ else
145
+ advance; Token.new(:bang, "!", l, c)
146
+ end
147
+ when "<" then
148
+ if peek(1) == "="
149
+ advance; advance; Token.new(:op, "<=", l, c)
150
+ else
151
+ advance; Token.new(:op, "<", l, c)
152
+ end
153
+ when ">" then
154
+ if peek(1) == "="
155
+ advance; advance; Token.new(:op, ">=", l, c)
156
+ else
157
+ advance; Token.new(:op, ">", l, c)
158
+ end
159
+ when "&" then
160
+ if peek(1) == "&"
161
+ advance; advance; Token.new(:op, "&&", l, c)
162
+ else
163
+ advance; Token.new(:op, "&", l, c)
164
+ end
165
+ when "|" then
166
+ if peek(1) == "|"
167
+ advance; advance; Token.new(:op, "||", l, c)
168
+ else
169
+ advance; Token.new(:pipe, "|", l, c)
170
+ end
171
+ when "{" then advance; Token.new(:lbrace, "{", l, c)
172
+ when "}" then advance; Token.new(:rbrace, "}", l, c)
173
+ when "(" then advance; Token.new(:lparen, "(", l, c)
174
+ when ")" then advance; Token.new(:rparen, ")", l, c)
175
+ when "[" then advance; Token.new(:lbracket, "[", l, c)
176
+ when "]" then advance; Token.new(:rbracket, "]", l, c)
177
+ when "." then
178
+ if peek(1) == "."
179
+ advance; advance
180
+ Token.new(:dot_dot, "..", l, c)
181
+ else
182
+ advance; Token.new(:dot, ".", l, c)
183
+ end
184
+ when "," then advance; Token.new(:comma, ",", l, c)
185
+ when "@" then advance; Token.new(:at, "@", l, c)
186
+ when /[a-zA-Z_]/ then read_ident_or_keyword(l, c)
187
+ else
188
+ advance
189
+ nil
190
+ end
191
+ end
192
+
193
+ def read_string(l, c)
194
+ advance # consume opening "
195
+ buf = +""
196
+ until peek == '"' || @pos >= @source.length
197
+ buf << advance
198
+ end
199
+ advance # consume closing "
200
+ Token.new(:string_lit, buf, l, c)
201
+ end
202
+
203
+ def read_number(l, c)
204
+ buf = +""
205
+ while @pos < @source.length && peek =~ /[0-9]/
206
+ buf << advance
207
+ end
208
+ if peek == "." && @source[@pos + 1] =~ /[0-9]/
209
+ buf << advance
210
+ while @pos < @source.length && peek =~ /[0-9]/
211
+ buf << advance
212
+ end
213
+ Token.new(:float_lit, buf.to_f, l, c)
214
+ else
215
+ Token.new(:int_lit, buf.to_i, l, c)
216
+ end
217
+ end
218
+
219
+ def read_symbol_or_colon(l, c)
220
+ advance # consume ':'
221
+ if peek =~ /[a-zA-Z_]/
222
+ buf = +""
223
+ while @pos < @source.length && peek =~ /[a-zA-Z0-9_]/
224
+ buf << advance
225
+ end
226
+ Token.new(:symbol_lit, buf, l, c)
227
+ else
228
+ Token.new(:colon, ":", l, c)
229
+ end
230
+ end
231
+
232
+ def read_ident_or_keyword(l, c)
233
+ buf = +""
234
+ while @pos < @source.length && peek =~ /[a-zA-Z0-9_.]/
235
+ # Stop at '..' or '.' followed by non-alpha (module path separator only)
236
+ if peek == "."
237
+ break unless @source[@pos + 1] =~ /[A-Z]/ # only Module.Name paths
238
+ end
239
+ buf << advance
240
+ end
241
+ type = KEYWORDS.include?(buf) ? :keyword : :ident
242
+ # bool literals
243
+ type = :bool_lit if %w[true false].include?(buf)
244
+ type = :nil_lit if buf == "nil"
245
+ Token.new(type, buf, l, c)
246
+ end
247
+ end
248
+
249
+ # ---------------------------------------------------------------------------
250
+ # Parser — recursive descent
251
+ # ---------------------------------------------------------------------------
252
+ class ParseError < StandardError
253
+ attr_reader :line, :col
254
+ def initialize(msg, line = nil, col = nil)
255
+ super(msg)
256
+ @line = line
257
+ @col = col
258
+ end
259
+ end
260
+
261
+ class Parser
262
+ def initialize(tokens)
263
+ @tokens = tokens
264
+ @pos = 0
265
+ @errors = []
266
+ end
267
+
268
+ def parse
269
+ program = { "kind" => "source_file", "module" => nil, "imports" => [],
270
+ "traits" => [], "impls" => [], "contract_shapes" => [],
271
+ "contracts" => [], "types" => [], "functions" => [],
272
+ "pipelines" => [], "olap_points" => [], "assumptions" => [],
273
+ "parse_errors" => [] }
274
+
275
+ # optional module declaration
276
+ if peek_kw?("module")
277
+ advance
278
+ program["module"] = parse_module_path
279
+ end
280
+
281
+ # imports
282
+ while peek_kw?("import")
283
+ advance
284
+ program["imports"] << parse_import
285
+ end
286
+
287
+ # top-level declarations
288
+ until peek_type?(:eof)
289
+ decl = parse_top_decl
290
+ case decl&.fetch("kind")
291
+ when "trait" then program["traits"] << decl
292
+ when "impl" then program["impls"] << decl
293
+ when "contract_shape" then program["contract_shapes"] << decl
294
+ when "contract" then program["contracts"] << decl
295
+ when "type" then program["types"] << decl
296
+ when "function" then program["functions"] << decl
297
+ when "pipeline" then program["pipelines"] << decl
298
+ when "olap_point" then program["olap_points"] << decl
299
+ when "assumptions" then program["assumptions"].concat(decl.fetch("assumptions", []))
300
+ end
301
+ end
302
+
303
+ program["parse_errors"] = @errors
304
+ program
305
+ end
306
+
307
+ private
308
+
309
+ # ---- Token navigation --------------------------------------------------
310
+
311
+ def peek(offset = 0) = @tokens[@pos + offset]
312
+ def current = @tokens[@pos]
313
+ def advance = @tokens[@pos].tap { @pos += 1 }
314
+
315
+ def peek_type?(type) = peek&.type == type
316
+ def peek_value?(val) = peek&.value == val
317
+ def peek_kw?(kw) = peek&.type == :keyword && peek&.value == kw
318
+ def peek_ident? = peek&.type == :ident
319
+ def peek_symbol?(name) = peek&.type == :symbol_lit && peek&.value == name
320
+
321
+ def expect_type!(type)
322
+ tok = advance
323
+ raise ParseError.new("Expected #{type}, got #{tok.type}(#{tok.value})", tok.line, tok.col) unless tok.type == type
324
+ tok
325
+ end
326
+
327
+ def expect_kw!(kw)
328
+ tok = advance
329
+ raise ParseError.new("Expected keyword '#{kw}', got #{tok.value}", tok.line, tok.col) unless tok.value == kw
330
+ tok
331
+ end
332
+
333
+ def expect_value!(val)
334
+ tok = advance
335
+ raise ParseError.new("Expected '#{val}', got #{tok.value}", tok.line, tok.col) unless tok.value == val
336
+ tok
337
+ end
338
+
339
+ def name_token!(types = %i[ident keyword])
340
+ tok = peek
341
+ raise ParseError.new("Expected name, got #{tok.type}(#{tok.value})", tok.line, tok.col) unless types.include?(tok.type)
342
+ advance.value
343
+ end
344
+
345
+ # ---- Module / Import ---------------------------------------------------
346
+
347
+ def parse_module_path
348
+ parts = []
349
+ parts << name_token!(%i[ident])
350
+ while peek_type?(:dot)
351
+ advance
352
+ parts << name_token!(%i[ident])
353
+ end
354
+ parts.join(".")
355
+ end
356
+
357
+ def parse_import
358
+ path_parts = []
359
+ path_parts << name_token!(%i[ident])
360
+ names = nil
361
+ loop do
362
+ if peek_type?(:dot) && peek(1)&.type == :lbrace
363
+ advance; advance
364
+ names = []
365
+ until peek_type?(:rbrace)
366
+ names << name_token!(%i[ident])
367
+ advance if peek_type?(:comma)
368
+ end
369
+ expect_type!(:rbrace)
370
+ break
371
+ elsif peek_type?(:dot) && peek(1)&.type == :ident
372
+ advance
373
+ path_parts << name_token!(%i[ident])
374
+ else
375
+ break
376
+ end
377
+ end
378
+ { "module_path" => path_parts.join("."), "names" => names }
379
+ end
380
+
381
+ # ---- Top-level declarations --------------------------------------------
382
+
383
+ CONTRACT_MODIFIERS = %w[pure observed effect privileged irreversible].freeze
384
+
385
+ def parse_top_decl
386
+ tok = peek
387
+ case tok.value
388
+ when "trait" then advance; parse_trait_decl
389
+ when "impl" then advance; parse_impl_decl
390
+ when "contract_shape" then advance; parse_contract_shape_decl
391
+ when "contract" then advance; parse_contract_decl
392
+ when *CONTRACT_MODIFIERS
393
+ modifier = tok.value
394
+ advance
395
+ if peek.value == "contract"
396
+ advance
397
+ parse_contract_decl(modifier: modifier)
398
+ else
399
+ @errors << { "message" => "Expected 'contract' after modifier '#{modifier}'", "line" => tok.line }
400
+ nil
401
+ end
402
+ when "type" then advance; parse_type_decl
403
+ when "def" then advance; parse_function_decl
404
+ when "pipeline" then advance; parse_pipeline_decl
405
+ when "olap_point" then advance; parse_olap_point_decl
406
+ when "assumptions" then advance; parse_assumptions_block
407
+ else
408
+ @errors << { "message" => "Unexpected token: #{tok.value}", "line" => tok.line }
409
+ advance
410
+ nil
411
+ end
412
+ end
413
+
414
+ def parse_assumptions_block
415
+ expect_type!(:lbrace)
416
+ assumptions = []
417
+ until peek_type?(:rbrace) || peek_type?(:eof)
418
+ tok = peek
419
+ if tok.value == "assumption"
420
+ advance
421
+ assumption = parse_assumption_decl(tok)
422
+ assumptions << assumption if assumption
423
+ else
424
+ add_parse_error(
425
+ rule: "OOF-P0",
426
+ message: "Expected 'assumption' declaration inside assumptions block",
427
+ token: tok.value.to_s,
428
+ line: tok.line,
429
+ col: tok.col
430
+ )
431
+ advance
432
+ end
433
+ end
434
+ expect_type!(:rbrace)
435
+ { "kind" => "assumptions", "assumptions" => assumptions }
436
+ end
437
+
438
+ def parse_assumption_decl(assumption_tok)
439
+ unless peek_ident?
440
+ add_parse_error(
441
+ rule: "OOF-P28",
442
+ message: "assumption declaration requires a name",
443
+ token: peek&.value.to_s,
444
+ line: assumption_tok.line,
445
+ col: assumption_tok.col
446
+ )
447
+ skip_balanced_block if peek_type?(:lbrace)
448
+ return nil
449
+ end
450
+
451
+ name = name_token!(%i[ident])
452
+ expect_type!(:lbrace)
453
+ fields = {}
454
+ until peek_type?(:rbrace) || peek_type?(:eof)
455
+ field_tok = peek
456
+ field = name_token!(%i[ident keyword])
457
+ fields[field] = parse_assumption_field_value(field, field_tok)
458
+ advance if peek_type?(:comma)
459
+ end
460
+ expect_type!(:rbrace)
461
+ { "kind" => "assumption_decl", "name" => name, "fields" => fields }
462
+ end
463
+
464
+ def parse_assumption_field_value(field, field_tok)
465
+ advance if peek_type?(:colon)
466
+ case field
467
+ when "kind"
468
+ if peek_type?(:symbol_lit)
469
+ advance.value
470
+ else
471
+ add_parse_error(rule: "OOF-P0", message: "assumption kind requires a symbol literal", token: field, line: field_tok.line, col: field_tok.col)
472
+ nil
473
+ end
474
+ when "statement", "source"
475
+ parse_optional_string_assumption_field(field, field_tok)
476
+ when "strength"
477
+ parse_assumption_strength(field_tok)
478
+ else
479
+ add_parse_error(rule: "OOF-P0", message: "Unknown assumption field: #{field}", token: field, line: field_tok.line, col: field_tok.col)
480
+ nil
481
+ end
482
+ end
483
+
484
+ def parse_optional_string_assumption_field(field, field_tok)
485
+ return advance.value if peek_type?(:string_lit)
486
+ return nil if peek_type?(:nil_lit) && advance
487
+
488
+ add_parse_error(
489
+ rule: "OOF-P0",
490
+ message: "assumption #{field} requires a string literal",
491
+ token: field,
492
+ line: field_tok.line,
493
+ col: field_tok.col
494
+ )
495
+ nil
496
+ end
497
+
498
+ def parse_assumption_strength(field_tok)
499
+ return advance.value if peek_type?(:float_lit) || peek_type?(:int_lit)
500
+
501
+ add_parse_error(
502
+ rule: "OOF-P0",
503
+ message: "assumption strength requires a numeric literal",
504
+ token: "strength",
505
+ line: field_tok.line,
506
+ col: field_tok.col
507
+ )
508
+ nil
509
+ end
510
+
511
+ def parse_pipeline_decl
512
+ name_tok = peek
513
+ name = name_token!(%i[ident])
514
+ expect_type!(:lbracket)
515
+ in_type = parse_type_ref
516
+ expect_type!(:comma)
517
+ out_type = parse_type_ref
518
+ expect_type!(:comma)
519
+ err_type = parse_type_ref
520
+ expect_type!(:rbracket)
521
+ expect_type!(:lbrace)
522
+ steps = []
523
+ until peek_type?(:rbrace) || peek_type?(:eof)
524
+ if peek_kw?("step")
525
+ advance
526
+ steps << parse_step_decl
527
+ else
528
+ tok = peek
529
+ @errors << { "message" => "Expected 'step', got #{tok.value}", "line" => tok.line }
530
+ advance
531
+ end
532
+ end
533
+ if steps.empty?
534
+ add_parse_error(
535
+ rule: "OOF-PG1",
536
+ message: "pipeline must contain at least one step",
537
+ token: name,
538
+ line: name_tok.line,
539
+ col: name_tok.col
540
+ )
541
+ end
542
+ expect_type!(:rbrace)
543
+ { "kind" => "pipeline", "name" => name,
544
+ "in_type" => in_type, "out_type" => out_type, "err_type" => err_type,
545
+ "steps" => steps }
546
+ end
547
+
548
+ def parse_olap_point_decl
549
+ name_tok = peek
550
+ name = name_token!(%i[ident])
551
+ expect_type!(:lbrace)
552
+ dimensions = {}
553
+ measure = nil
554
+ granularity = {}
555
+ source = nil
556
+ indexed = []
557
+
558
+ until peek_type?(:rbrace) || peek_type?(:eof)
559
+ clause_tok = peek
560
+ clause = name_token!(%i[ident keyword])
561
+ expect_type!(:colon)
562
+
563
+ case clause
564
+ when "dimensions"
565
+ dimensions = parse_olap_type_map
566
+ when "measure"
567
+ measure = parse_type_ref
568
+ when "granularity"
569
+ granularity = parse_olap_symbol_map
570
+ when "source"
571
+ source = parse_olap_source_expr
572
+ when "indexed"
573
+ indexed = parse_olap_symbol_list
574
+ else
575
+ add_parse_error(
576
+ rule: "OOF-P0",
577
+ message: "Unknown olap_point clause: #{clause}",
578
+ token: clause,
579
+ line: clause_tok.line,
580
+ col: clause_tok.col
581
+ )
582
+ skip_until_olap_clause_boundary
583
+ end
584
+ end
585
+
586
+ add_parse_error(
587
+ rule: "OOF-P0",
588
+ message: "olap_point '#{name}' must declare dimensions",
589
+ token: name,
590
+ line: name_tok.line,
591
+ col: name_tok.col
592
+ ) if dimensions.empty?
593
+
594
+ add_parse_error(
595
+ rule: "OOF-P0",
596
+ message: "olap_point '#{name}' must declare measure",
597
+ token: name,
598
+ line: name_tok.line,
599
+ col: name_tok.col
600
+ ) if measure.nil?
601
+
602
+ expect_type!(:rbrace)
603
+ {
604
+ "kind" => "olap_point",
605
+ "name" => name,
606
+ "dimensions" => dimensions,
607
+ "measure" => measure,
608
+ "granularity" => granularity,
609
+ "source" => source,
610
+ "indexed" => indexed
611
+ }
612
+ end
613
+
614
+ def parse_olap_type_map
615
+ expect_type!(:lbrace)
616
+ dims = {}
617
+ until peek_type?(:rbrace) || peek_type?(:eof)
618
+ dim = name_token!(%i[ident keyword])
619
+ expect_type!(:colon)
620
+ dims[dim] = parse_type_ref
621
+ advance if peek_type?(:comma)
622
+ end
623
+ expect_type!(:rbrace)
624
+ dims
625
+ end
626
+
627
+ def parse_olap_symbol_map
628
+ expect_type!(:lbrace)
629
+ values = {}
630
+ until peek_type?(:rbrace) || peek_type?(:eof)
631
+ key = name_token!(%i[ident keyword])
632
+ expect_type!(:colon)
633
+ values[key] = parse_olap_symbol_value
634
+ advance if peek_type?(:comma)
635
+ end
636
+ expect_type!(:rbrace)
637
+ values
638
+ end
639
+
640
+ def parse_olap_symbol_list
641
+ expect_type!(:lbracket)
642
+ values = []
643
+ until peek_type?(:rbracket) || peek_type?(:eof)
644
+ values << parse_olap_symbol_value
645
+ advance if peek_type?(:comma)
646
+ end
647
+ expect_type!(:rbracket)
648
+ values
649
+ end
650
+
651
+ def parse_olap_symbol_value
652
+ if peek_type?(:symbol_lit)
653
+ advance.value
654
+ else
655
+ name_token!(%i[ident keyword])
656
+ end
657
+ end
658
+
659
+ def parse_olap_source_expr
660
+ tokens = []
661
+ depth = 0
662
+ until peek_type?(:eof)
663
+ break if depth.zero? && (peek_type?(:rbrace) || olap_clause_boundary?(peek, peek(1)))
664
+
665
+ tok = advance
666
+ depth += 1 if %i[lbrace lparen lbracket].include?(tok.type)
667
+ depth -= 1 if %i[rbrace rparen rbracket].include?(tok.type)
668
+ tokens << tok
669
+ end
670
+ return nil if tokens.empty?
671
+
672
+ { "kind" => "raw_expr", "tokens" => tokens.map { |tok| tok.value.to_s } }
673
+ end
674
+
675
+ def parse_step_decl
676
+ name_tok = peek
677
+ name = name_token!(%i[ident])
678
+ unless peek_type?(:colon)
679
+ add_parse_error(
680
+ rule: "OOF-PG2",
681
+ message: "step must reference a contract",
682
+ token: name,
683
+ line: name_tok.line,
684
+ col: name_tok.col
685
+ )
686
+ skip_optional_block_or_step_tail
687
+ return { "kind" => "step", "name" => name, "ref" => nil }
688
+ end
689
+
690
+ expect_type!(:colon)
691
+ ref = parse_qualified_ref
692
+ { "kind" => "step", "name" => name, "ref" => ref }
693
+ end
694
+
695
+ def parse_contract_decl(modifier: nil)
696
+ name = name_token!(%i[ident])
697
+ type_params = peek_type?(:lbracket) ? parse_contract_type_params : []
698
+ implements = peek_kw?("implements") ? parse_implements_clause : nil
699
+ expect_type!(:lbrace)
700
+ body = []
701
+ until peek_type?(:rbrace) || peek_type?(:eof)
702
+ body << parse_body_decl
703
+ end
704
+ expect_type!(:rbrace)
705
+ node = { "kind" => "contract", "name" => name, "modifier" => modifier || "pure", "type_params" => type_params }
706
+ node["implements"] = implements if implements
707
+ node["body"] = body.compact
708
+ node
709
+ end
710
+
711
+ def parse_trait_decl
712
+ name = name_token!(%i[ident])
713
+ type_params = peek_type?(:lbracket) ? parse_simple_type_params : []
714
+ expect_type!(:lbrace)
715
+ methods = []
716
+ until peek_type?(:rbrace) || peek_type?(:eof)
717
+ expect_kw!("def")
718
+ methods << parse_trait_method
719
+ end
720
+ expect_type!(:rbrace)
721
+ { "kind" => "trait", "name" => name, "type_params" => type_params, "methods" => methods }
722
+ end
723
+
724
+ def parse_trait_method
725
+ name = name_token!(%i[ident])
726
+ params = parse_params
727
+ expect_type!(:arrow)
728
+ return_type = parse_type_ref
729
+ { "kind" => "trait_method", "name" => name, "params" => params, "return_type" => return_type }
730
+ end
731
+
732
+ def parse_impl_decl
733
+ trait_ref = parse_type_ref_node
734
+ expect_kw!("using")
735
+ {
736
+ "kind" => "impl",
737
+ "trait_ref" => trait_ref,
738
+ "using" => { "kind" => "qualified_ref", "name" => parse_qualified_ref }
739
+ }
740
+ end
741
+
742
+ def parse_contract_shape_decl
743
+ name = name_token!(%i[ident])
744
+ type_params = peek_type?(:lbracket) ? parse_simple_type_params : []
745
+ expect_type!(:lbrace)
746
+ body = []
747
+ until peek_type?(:rbrace) || peek_type?(:eof)
748
+ tok = peek
749
+ case tok.value
750
+ when "input" then advance; body << parse_input_decl
751
+ when "output" then advance; body << parse_output_decl
752
+ else
753
+ @errors << { "message" => "Unknown contract_shape declaration: #{tok.value}", "line" => tok.line }
754
+ advance
755
+ end
756
+ end
757
+ expect_type!(:rbrace)
758
+ { "kind" => "contract_shape", "name" => name, "type_params" => type_params, "body" => body.compact }
759
+ end
760
+
761
+ def parse_body_decl
762
+ tok = peek
763
+ case tok.value
764
+ when "input" then advance; parse_input_decl
765
+ when "output" then advance; parse_output_decl
766
+ when "compute" then advance; parse_compute_decl
767
+ when "read" then advance; parse_read_decl
768
+ when "snapshot" then advance; parse_snapshot_decl
769
+ when "window" then advance; parse_window_decl
770
+ when "escape" then advance; parse_escape_decl
771
+ when "stream" then advance; parse_stream_decl
772
+ when "fold_stream" then advance; parse_fold_stream_decl
773
+ when "invariant" then advance; parse_invariant_decl
774
+ when "uses" then advance; parse_uses_decl
775
+ when "pipeline"
776
+ add_parse_error(
777
+ rule: "OOF-P2",
778
+ message: "pipeline/step is not valid inside a contract body",
779
+ token: tok.value,
780
+ line: tok.line,
781
+ col: tok.col
782
+ )
783
+ skip_invalid_declaration_block
784
+ nil
785
+ when "step"
786
+ add_parse_error(
787
+ rule: "OOF-P2",
788
+ message: "pipeline/step is not valid inside a contract body",
789
+ token: tok.value,
790
+ line: tok.line,
791
+ col: tok.col
792
+ )
793
+ skip_invalid_body_decl
794
+ nil
795
+ when "scoped_by"
796
+ add_parse_error(
797
+ rule: "OOF-PG3",
798
+ message: "scoped_by is only valid on read declarations",
799
+ token: tok.value,
800
+ line: tok.line,
801
+ col: tok.col
802
+ )
803
+ skip_invalid_body_decl
804
+ nil
805
+ when "tenant_free"
806
+ add_parse_error(
807
+ rule: "OOF-PG5",
808
+ message: "tenant_free is only valid on read declarations",
809
+ token: tok.value,
810
+ line: tok.line,
811
+ col: tok.col
812
+ )
813
+ skip_invalid_body_decl
814
+ nil
815
+ else
816
+ @errors << { "message" => "Unknown body declaration: #{tok.value}", "line" => tok.line }
817
+ advance; nil
818
+ end
819
+ end
820
+
821
+ def parse_input_decl
822
+ name = name_token!(%i[ident])
823
+ expect_type!(:colon)
824
+ type_ref = parse_type_ref
825
+ { "kind" => "input", "name" => name, "type_annotation" => type_ref }
826
+ end
827
+
828
+ def parse_output_decl
829
+ name = name_token!(%i[ident])
830
+ expect_type!(:colon)
831
+ type_ref = parse_type_ref
832
+ lifecycle = peek_kw?("lifecycle") ? (advance; parse_lifecycle) : nil
833
+ node = { "kind" => "output", "name" => name, "type_annotation" => type_ref }
834
+ node["lifecycle"] = lifecycle if lifecycle
835
+ node["evidence"] = parse_evidence_list if peek_value?("evidence")
836
+ node
837
+ end
838
+
839
+ def parse_uses_decl
840
+ tok = peek
841
+ unless peek_kw?("assumptions")
842
+ add_parse_error(
843
+ rule: "OOF-P0",
844
+ message: "uses declaration currently supports only 'uses assumptions NAME'",
845
+ token: tok&.value.to_s,
846
+ line: tok&.line || 0,
847
+ col: tok&.col || 0
848
+ )
849
+ skip_until_body_boundary
850
+ return nil
851
+ end
852
+ advance
853
+ name = name_token!(%i[ident])
854
+ { "kind" => "uses_assumptions", "name" => name }
855
+ end
856
+
857
+ def parse_evidence_list
858
+ expect_value!("evidence")
859
+ expect_type!(:lbracket)
860
+ refs = []
861
+ until peek_type?(:rbracket) || peek_type?(:eof)
862
+ refs << name_token!(%i[ident keyword])
863
+ advance if peek_type?(:comma)
864
+ end
865
+ expect_type!(:rbracket)
866
+ refs
867
+ end
868
+
869
+ def parse_compute_decl
870
+ name = name_token!(%i[ident])
871
+ type_ref = nil
872
+ if peek_type?(:colon)
873
+ advance
874
+ type_ref = parse_type_ref
875
+ end
876
+ expect_type!(:assign)
877
+ expr = parse_expr
878
+ bound = parse_optional_stream_bound if expr.fetch("kind", nil) == "call" && expr.fetch("fn", nil) == "fold_stream"
879
+ if bound
880
+ node = { "kind" => "fold_stream", "name" => name, "expr" => expr }
881
+ node["type_annotation"] = type_ref if type_ref
882
+ node["bound"] = bound
883
+ return node
884
+ end
885
+ node = { "kind" => "compute", "name" => name, "expr" => expr }
886
+ node["type_annotation"] = type_ref if type_ref
887
+ node
888
+ end
889
+
890
+ def parse_read_decl
891
+ name = name_token!(%i[ident])
892
+ expect_type!(:colon)
893
+ type_ref = parse_type_ref
894
+ expect_kw!("from")
895
+ from = expect_type!(:string_lit).value
896
+ lifecycle = peek_kw?("lifecycle") ? (advance; parse_lifecycle) : nil
897
+ scoped_by = peek_kw?("scoped_by") ? (advance; name_token!(%i[ident])) : nil
898
+ cardinality = peek_kw?("cardinality") ? (advance; parse_cardinality_bound) : nil
899
+ schema_ver = peek_kw?("schema_version") ? (advance; expect_type!(:string_lit).value) : nil
900
+ tenant_free = peek_kw?("tenant_free") ? (advance; true) : false
901
+ if tenant_free && scoped_by
902
+ @errors << { "message" => "OOF-PG3: scoped_by and tenant_free are mutually exclusive on read '#{name}'",
903
+ "line" => 0 }
904
+ end
905
+ node = { "kind" => "read", "name" => name, "type_annotation" => type_ref, "from" => from }
906
+ node["lifecycle"] = lifecycle if lifecycle
907
+ node["scoped_by"] = scoped_by if scoped_by
908
+ node["cardinality"] = cardinality if cardinality
909
+ node["schema_version"] = schema_ver if schema_ver
910
+ node["tenant_free"] = tenant_free
911
+ node
912
+ end
913
+
914
+ def parse_cardinality_bound
915
+ min_tok = expect_type!(:int_lit)
916
+ # '..' is now lexed as a single :dot_dot token
917
+ if peek_type?(:dot_dot)
918
+ advance
919
+ else
920
+ tok = peek
921
+ @errors << { "message" => "Expected '..' in cardinality, got #{tok&.value}", "line" => tok&.line }
922
+ end
923
+ max_tok = expect_type!(:int_lit)
924
+ { "min" => min_tok.value, "max" => max_tok.value }
925
+ end
926
+
927
+ def parse_snapshot_decl
928
+ name = name_token!(%i[ident])
929
+ expect_type!(:assign)
930
+ expr = parse_expr
931
+ lifecycle = peek_kw?("lifecycle") ? (advance; parse_lifecycle) : nil
932
+ node = { "kind" => "snapshot", "name" => name, "expr" => expr }
933
+ node["lifecycle"] = lifecycle if lifecycle
934
+ node
935
+ end
936
+
937
+ def parse_window_decl
938
+ label = expect_type!(:string_lit).value
939
+ expect_type!(:lbrace)
940
+ opts = {}
941
+ until peek_type?(:rbrace) || peek_type?(:eof)
942
+ key = name_token!(%i[ident keyword])
943
+ advance if peek_type?(:colon) # consume optional : separator between key and value
944
+ val = parse_window_value
945
+ opts[key] = val
946
+ advance if peek_type?(:comma)
947
+ end
948
+ expect_type!(:rbrace)
949
+ { "kind" => "window", "label" => label, "options" => opts }
950
+ end
951
+
952
+ def parse_window_value
953
+ if peek_type?(:int_lit)
954
+ advance.value
955
+ elsif peek_type?(:float_lit)
956
+ advance.value
957
+ elsif peek_type?(:symbol_lit)
958
+ advance.value
959
+ else
960
+ name_token!(%i[ident keyword])
961
+ end
962
+ end
963
+
964
+ def parse_escape_decl
965
+ name = name_token!(%i[ident])
966
+ { "kind" => "escape", "name" => name }
967
+ end
968
+
969
+ # PINV-3: parse invariant declaration
970
+ # invariant <name>
971
+ # predicate: <compute_ref>
972
+ # severity: :<error|warn|soft|metric> (default: error)
973
+ # label: "<string>" (optional)
974
+ # message: "<string>" (optional)
975
+ # overridable_with: :<symbol> (optional; only on :warn)
976
+ def parse_invariant_decl
977
+ name_tok = peek
978
+ name = name_token!(%i[ident])
979
+ predicate_ref = nil
980
+ severity = "error"
981
+ label = nil
982
+ message = nil
983
+ overridable_with = nil
984
+
985
+ # Parse attribute lines until we hit something that doesn't look like an attribute
986
+ while peek_kw?("predicate") || peek_kw?("severity") || peek_kw?("label") ||
987
+ peek_kw?("message") || peek_kw?("overridable_with")
988
+ attr_tok = peek
989
+ attr = advance.value
990
+ expect_type!(:colon)
991
+ case attr
992
+ when "predicate"
993
+ predicate_ref = name_token!(%i[ident])
994
+ when "severity"
995
+ if peek_type?(:symbol_lit)
996
+ severity = advance.value
997
+ unless %w[error warn soft metric].include?(severity)
998
+ add_parse_error(
999
+ rule: "OOF-IV2",
1000
+ message: "Unknown severity '#{severity}'; expected :error :warn :soft :metric",
1001
+ token: severity,
1002
+ line: attr_tok.line,
1003
+ col: attr_tok.col
1004
+ )
1005
+ severity = "error" # recover
1006
+ end
1007
+ else
1008
+ add_parse_error(
1009
+ rule: "OOF-IV2",
1010
+ message: "severity: requires a symbol literal (:error, :warn, :soft, :metric)",
1011
+ token: peek&.value.to_s,
1012
+ line: attr_tok.line,
1013
+ col: attr_tok.col
1014
+ )
1015
+ end
1016
+ when "label"
1017
+ label = peek_type?(:string_lit) ? advance.value : name_token!(%i[ident])
1018
+ when "message"
1019
+ message = peek_type?(:string_lit) ? advance.value : name_token!(%i[ident])
1020
+ when "overridable_with"
1021
+ overridable_with = peek_type?(:symbol_lit) ? advance.value : name_token!(%i[ident])
1022
+ end
1023
+ end
1024
+
1025
+ # PINV-3: OOF-IV1 — missing predicate: field
1026
+ if predicate_ref.nil?
1027
+ add_parse_error(
1028
+ rule: "OOF-IV1",
1029
+ message: "invariant '#{name}' missing required predicate: field",
1030
+ token: name,
1031
+ line: name_tok.line,
1032
+ col: name_tok.col
1033
+ )
1034
+ end
1035
+
1036
+ # PINV-3: OOF-I4 — overridable_with: on severity: :error invariant (static case)
1037
+ if overridable_with && severity == "error"
1038
+ add_parse_error(
1039
+ rule: "OOF-I4",
1040
+ message: ":error invariants cannot be overridden — use :warn if override is intended",
1041
+ token: name,
1042
+ line: name_tok.line,
1043
+ col: name_tok.col
1044
+ )
1045
+ end
1046
+
1047
+ {
1048
+ "kind" => "invariant",
1049
+ "name" => name,
1050
+ "predicate_ref" => predicate_ref,
1051
+ "severity" => severity,
1052
+ "label" => label,
1053
+ "message" => message,
1054
+ "overridable_with" => overridable_with,
1055
+ "source_span" => {
1056
+ "line" => name_tok.line,
1057
+ "col" => name_tok.col
1058
+ }
1059
+ }
1060
+ end
1061
+
1062
+ def parse_stream_decl
1063
+ # stream <name>: <Type>
1064
+ name = name_token!(%i[ident])
1065
+ expect_type!(:colon)
1066
+ type_ref = parse_type_ref
1067
+ {
1068
+ "kind" => "stream",
1069
+ "name" => name,
1070
+ "type_annotation" => type_ref,
1071
+ "fragment_class" => "escape",
1072
+ "escape_capability" => "stream_input"
1073
+ }
1074
+ end
1075
+
1076
+ def parse_fold_stream_decl
1077
+ # fold_stream <name> = fold_stream(<stream_ref>, <init>, <fn>) @<bound_annotation>
1078
+ name_tok = peek
1079
+ name = name_token!(%i[ident])
1080
+ expect_type!(:assign)
1081
+ # consume optional explicit 'fold_stream' call name (may already be consumed as keyword)
1082
+ # Expression parser handles the call: fold_stream(stream_ref, init, fn)
1083
+ expr = parse_expr
1084
+ # Parse optional bound annotation: @window_bounded or @count_bounded(n)
1085
+ bound = parse_optional_stream_bound
1086
+ unless bound
1087
+ # No bound annotation — OOF-S1: unbounded fold
1088
+ add_parse_error(
1089
+ rule: "OOF-S1",
1090
+ message: "fold_stream '#{name}' is unbounded — must declare @window_bounded or @count_bounded(n)",
1091
+ token: name,
1092
+ line: name_tok.line,
1093
+ col: name_tok.col
1094
+ )
1095
+ end
1096
+ node = { "kind" => "fold_stream", "name" => name, "expr" => expr }
1097
+ node["bound"] = bound if bound
1098
+ node
1099
+ end
1100
+
1101
+ def parse_optional_stream_bound
1102
+ return nil unless peek_type?(:at)
1103
+
1104
+ at_tok = advance
1105
+ bound_name = name_token!(%i[ident keyword])
1106
+ case bound_name
1107
+ when "window_bounded"
1108
+ { "kind" => "window_bounded" }
1109
+ when "count_bounded"
1110
+ expect_type!(:lparen)
1111
+ n_tok = peek
1112
+ if peek_type?(:int_lit)
1113
+ n = advance.value
1114
+ bound = { "kind" => "count_bounded", "n" => n }
1115
+ else
1116
+ add_parse_error(
1117
+ rule: "OOF-S5",
1118
+ message: "@count_bounded requires a statically-known Integer literal",
1119
+ token: n_tok&.value.to_s,
1120
+ line: n_tok&.line || 0,
1121
+ col: n_tok&.col || 0
1122
+ )
1123
+ bound = { "kind" => "count_bounded", "n" => nil }
1124
+ end
1125
+ expect_type!(:rparen)
1126
+ bound
1127
+ else
1128
+ add_parse_error(
1129
+ rule: "OOF-S1",
1130
+ message: "Unknown bound annotation '@#{bound_name}'; expected @window_bounded or @count_bounded(n)",
1131
+ token: bound_name,
1132
+ line: at_tok.line,
1133
+ col: at_tok.col
1134
+ )
1135
+ nil
1136
+ end
1137
+ end
1138
+
1139
+ # ---- Type declarations -------------------------------------------------
1140
+
1141
+ def parse_type_decl
1142
+ name = name_token!(%i[ident])
1143
+ expect_type!(:lbrace)
1144
+ fields = []
1145
+ until peek_type?(:rbrace) || peek_type?(:eof)
1146
+ fname = name_token!(%i[ident keyword])
1147
+ expect_type!(:colon)
1148
+ ftype = parse_type_ref
1149
+ optional = peek_type?(:question) ? (advance; true) : false
1150
+ fields << { "name" => fname, "type_annotation" => ftype, "optional" => optional }
1151
+ advance if peek_type?(:comma)
1152
+ end
1153
+ expect_type!(:rbrace)
1154
+ { "kind" => "type", "name" => name, "fields" => fields }
1155
+ end
1156
+
1157
+ # ---- Function declarations ---------------------------------------------
1158
+
1159
+ def parse_function_decl
1160
+ name = name_token!(%i[ident])
1161
+ params = parse_params
1162
+ expect_type!(:arrow)
1163
+ return_type = parse_type_ref
1164
+ body = parse_block_body
1165
+ { "kind" => "function", "name" => name, "params" => params,
1166
+ "return_type" => return_type, "body" => body }
1167
+ end
1168
+
1169
+ def parse_params
1170
+ expect_type!(:lparen)
1171
+ params = []
1172
+ until peek_type?(:rparen) || peek_type?(:eof)
1173
+ pname = name_token!(%i[ident])
1174
+ expect_type!(:colon)
1175
+ ptype = parse_type_ref
1176
+ params << { "name" => pname, "type_annotation" => ptype }
1177
+ advance if peek_type?(:comma)
1178
+ end
1179
+ expect_type!(:rparen)
1180
+ params
1181
+ end
1182
+
1183
+ def parse_block_body
1184
+ expect_type!(:lbrace)
1185
+ stmts = []
1186
+ expr = nil
1187
+ until peek_type?(:rbrace) || peek_type?(:eof)
1188
+ if peek_kw?("let")
1189
+ stmts << parse_let_stmt
1190
+ else
1191
+ expr = parse_expr
1192
+ break if peek_type?(:rbrace)
1193
+ stmts << { "kind" => "expr_stmt", "expr" => expr }
1194
+ expr = nil
1195
+ end
1196
+ end
1197
+ expect_type!(:rbrace)
1198
+ { "stmts" => stmts, "return_expr" => expr }
1199
+ end
1200
+
1201
+ def parse_let_stmt
1202
+ expect_kw!("let")
1203
+ name = name_token!(%i[ident])
1204
+ expect_type!(:assign)
1205
+ expr = parse_expr
1206
+ { "kind" => "let", "name" => name, "expr" => expr }
1207
+ end
1208
+
1209
+ # ---- TypeRef -----------------------------------------------------------
1210
+
1211
+ def parse_simple_type_params
1212
+ expect_type!(:lbracket)
1213
+ params = []
1214
+ until peek_type?(:rbracket) || peek_type?(:eof)
1215
+ params << name_token!(%i[ident])
1216
+ advance if peek_type?(:comma)
1217
+ end
1218
+ expect_type!(:rbracket)
1219
+ params
1220
+ end
1221
+
1222
+ def parse_contract_type_params
1223
+ expect_type!(:lbracket)
1224
+ params = []
1225
+ until peek_type?(:rbracket) || peek_type?(:eof)
1226
+ name = name_token!(%i[ident])
1227
+ bounds = peek_type?(:colon) ? (advance; parse_type_param_bounds(name)) : []
1228
+ params << { "name" => name, "bounds" => bounds }
1229
+ advance if peek_type?(:comma)
1230
+ end
1231
+ expect_type!(:rbracket)
1232
+ params
1233
+ end
1234
+
1235
+ def parse_type_param_bounds(param_name)
1236
+ bounds = []
1237
+ loop do
1238
+ trait_ref = parse_type_ref_node(default_type_args: [param_name])
1239
+ bounds << { "trait_ref" => trait_ref }
1240
+ break unless peek_value?("&")
1241
+
1242
+ advance
1243
+ end
1244
+ bounds
1245
+ end
1246
+
1247
+ def parse_implements_clause
1248
+ expect_kw!("implements")
1249
+ parse_type_ref_node
1250
+ end
1251
+
1252
+ def parse_type_ref_node(default_type_args: [])
1253
+ name = name_token!(%i[ident keyword])
1254
+ type_args = []
1255
+ if peek_type?(:lbracket)
1256
+ advance
1257
+ until peek_type?(:rbracket) || peek_type?(:eof)
1258
+ type_args << parse_type_ref
1259
+ advance if peek_type?(:comma)
1260
+ end
1261
+ expect_type!(:rbracket)
1262
+ elsif default_type_args.any?
1263
+ type_args = default_type_args
1264
+ end
1265
+ { "name" => name, "type_args" => type_args }
1266
+ end
1267
+
1268
+ def parse_qualified_ref
1269
+ parts = [name_token!(%i[ident keyword])]
1270
+ while peek_type?(:dot)
1271
+ advance
1272
+ parts << name_token!(%i[ident keyword])
1273
+ end
1274
+ parts.join(".")
1275
+ end
1276
+
1277
+ def parse_type_ref
1278
+ name_tok = peek
1279
+ name = name_token!(%i[ident keyword])
1280
+ if peek_type?(:lbracket)
1281
+ advance
1282
+ # Decimal[N]: structured node with integer scale param
1283
+ if name == "Decimal" && peek_type?(:int_lit)
1284
+ scale = advance.value # Integer
1285
+ expect_type!(:rbracket)
1286
+ return { "kind" => "type_ref", "name" => "Decimal", "params" => [scale] }
1287
+ end
1288
+ params = []
1289
+ until peek_type?(:rbracket) || peek_type?(:eof)
1290
+ params << parse_type_ref_param(name, params.length)
1291
+ advance if peek_type?(:comma)
1292
+ end
1293
+ expect_type!(:rbracket)
1294
+ { "kind" => "type_ref", "name" => name, "params" => params }
1295
+ else
1296
+ if name == "Decimal"
1297
+ add_parse_error(
1298
+ rule: "OOF-DM3",
1299
+ message: "Decimal type requires scale parameter: Decimal[N]",
1300
+ token: name,
1301
+ line: name_tok.line,
1302
+ col: name_tok.col
1303
+ )
1304
+ return { "kind" => "type_ref", "name" => "Unknown", "original" => "Decimal", "params" => [] }
1305
+ end
1306
+ name
1307
+ end
1308
+ end
1309
+
1310
+ def parse_type_ref_param(parent_name, index)
1311
+ if parent_name == "OLAPPoint" && index == 1 && peek_type?(:lbrace)
1312
+ { "kind" => "dims_record", "dims" => parse_olap_type_map }
1313
+ else
1314
+ normalize_type_param(parse_type_ref)
1315
+ end
1316
+ end
1317
+
1318
+ # Normalize a bare type name string into a structured TypeRef node.
1319
+ # Used only when assembling params inside a generic type like History[T].
1320
+ # Existing callers that receive bare strings are unaffected.
1321
+ def normalize_type_param(ref)
1322
+ ref.is_a?(String) ? { "kind" => "type_ref", "name" => ref, "params" => [] } : ref
1323
+ end
1324
+
1325
+ def add_parse_error(rule:, message:, token:, line:, col:, severity: "error")
1326
+ @errors << {
1327
+ "rule" => rule,
1328
+ "severity" => severity,
1329
+ "message" => message,
1330
+ "token" => token,
1331
+ "line" => line,
1332
+ "col" => col
1333
+ }
1334
+ end
1335
+
1336
+ def skip_optional_block_or_step_tail
1337
+ if peek_type?(:lbrace)
1338
+ skip_balanced_block
1339
+ return
1340
+ end
1341
+
1342
+ skip_until_body_boundary
1343
+ end
1344
+
1345
+ def skip_invalid_body_decl
1346
+ advance
1347
+ if peek_type?(:lbrace)
1348
+ skip_balanced_block
1349
+ return
1350
+ end
1351
+
1352
+ skip_until_body_boundary
1353
+ end
1354
+
1355
+ def skip_invalid_declaration_block
1356
+ advance
1357
+ until peek_type?(:eof) || peek_type?(:rbrace) || peek_type?(:lbrace)
1358
+ advance
1359
+ end
1360
+ skip_balanced_block if peek_type?(:lbrace)
1361
+ end
1362
+
1363
+ def skip_balanced_block
1364
+ return unless peek_type?(:lbrace)
1365
+
1366
+ depth = 0
1367
+ loop do
1368
+ tok = advance
1369
+ depth += 1 if tok.type == :lbrace
1370
+ depth -= 1 if tok.type == :rbrace
1371
+ break if depth <= 0 || peek_type?(:eof)
1372
+ end
1373
+ end
1374
+
1375
+ def skip_until_body_boundary
1376
+ until peek_type?(:eof) || peek_type?(:rbrace) || body_boundary_token?(peek)
1377
+ advance
1378
+ end
1379
+ end
1380
+
1381
+ def skip_until_olap_clause_boundary
1382
+ until peek_type?(:eof) || peek_type?(:rbrace) || olap_clause_boundary?(peek, peek(1))
1383
+ advance
1384
+ end
1385
+ end
1386
+
1387
+ def body_boundary_token?(tok)
1388
+ tok&.type == :keyword &&
1389
+ %w[input output compute read snapshot window escape stream fold_stream invariant uses pipeline step scoped_by tenant_free].include?(tok.value)
1390
+ end
1391
+
1392
+ def olap_clause_boundary?(tok, next_tok)
1393
+ tok && %i[ident keyword].include?(tok.type) &&
1394
+ %w[dimensions measure granularity source indexed].include?(tok.value) &&
1395
+ next_tok&.type == :colon
1396
+ end
1397
+
1398
+ def parse_lifecycle
1399
+ tok = advance # should be :symbol_lit
1400
+ tok.value
1401
+ end
1402
+
1403
+ def parse_lifecycle_or_symbol
1404
+ if peek_type?(:symbol_lit)
1405
+ advance.value
1406
+ else
1407
+ name_token!(%i[ident keyword])
1408
+ end
1409
+ end
1410
+
1411
+ # ---- Expressions -------------------------------------------------------
1412
+
1413
+ def parse_expr
1414
+ parse_binary_or(0)
1415
+ end
1416
+
1417
+ def parse_binary_or(min_prec)
1418
+ left = parse_unary
1419
+
1420
+ loop do
1421
+ op = peek&.value
1422
+ prec = binary_prec(op)
1423
+ break if prec.nil? || prec < min_prec
1424
+
1425
+ op_tok = advance
1426
+ right = parse_binary_or(prec + 1)
1427
+ left = { "kind" => "binary_op", "op" => op_tok.value, "left" => left, "right" => right }
1428
+ end
1429
+
1430
+ left
1431
+ end
1432
+
1433
+ BINARY_OPS = {
1434
+ "||" => 1, "&&" => 2,
1435
+ "==" => 3, "!=" => 3, "<" => 3, ">" => 3, "<=" => 3, ">=" => 3,
1436
+ "++" => 4,
1437
+ "+" => 5, "-" => 5,
1438
+ "*" => 6, "/" => 6
1439
+ }.freeze
1440
+
1441
+ def binary_prec(op)
1442
+ BINARY_OPS[op]
1443
+ end
1444
+
1445
+ def parse_unary
1446
+ if peek_type?(:bang)
1447
+ op = advance.value
1448
+ expr = parse_postfix
1449
+ return { "kind" => "unary_op", "op" => op, "operand" => expr }
1450
+ end
1451
+ parse_postfix
1452
+ end
1453
+
1454
+ def parse_postfix
1455
+ expr = parse_primary
1456
+
1457
+ loop do
1458
+ if peek_type?(:dot)
1459
+ advance
1460
+ field = name_token!(%i[ident keyword])
1461
+ expr = { "kind" => "field_access", "object" => expr, "field" => field }
1462
+ elsif peek_type?(:lbracket)
1463
+ advance
1464
+ index = index_slice_ahead? ? parse_index_slice_record : parse_expr
1465
+ expect_type!(:rbracket)
1466
+ expr = { "kind" => "index_access", "object" => expr, "index" => index }
1467
+ elsif peek_type?(:lparen) && expr["kind"] == "ref"
1468
+ # function call: name(args)
1469
+ fn_name = expr["name"]
1470
+ advance
1471
+ args = []
1472
+ until peek_type?(:rparen) || peek_type?(:eof)
1473
+ args << parse_call_arg
1474
+ advance if peek_type?(:comma)
1475
+ end
1476
+ expect_type!(:rparen)
1477
+ expr = { "kind" => "call", "fn" => fn_name, "args" => args }
1478
+ else
1479
+ break
1480
+ end
1481
+ end
1482
+
1483
+ expr
1484
+ end
1485
+
1486
+ def index_slice_ahead?
1487
+ %i[ident keyword].include?(peek&.type) && peek(1)&.type == :colon
1488
+ end
1489
+
1490
+ def parse_index_slice_record
1491
+ fields = {}
1492
+ until peek_type?(:rbracket) || peek_type?(:eof)
1493
+ key = name_token!(%i[ident keyword])
1494
+ expect_type!(:colon)
1495
+ fields[key] = parse_expr
1496
+ advance if peek_type?(:comma)
1497
+ end
1498
+ { "kind" => "slice_record", "fields" => fields }
1499
+ end
1500
+
1501
+ def parse_call_arg
1502
+ # Check for lambda: "name ->" or "(params) ->"
1503
+ if peek_type?(:lparen) && lambda_ahead?
1504
+ parse_lambda
1505
+ elsif peek_type?(:ident) && peek(1)&.type == :arrow
1506
+ parse_lambda
1507
+ else
1508
+ parse_expr
1509
+ end
1510
+ end
1511
+
1512
+ def lambda_ahead?
1513
+ saved = @pos
1514
+ depth = 0
1515
+ while @pos < @tokens.length
1516
+ t = @tokens[@pos]
1517
+ case t.type
1518
+ when :lparen then depth += 1
1519
+ when :rparen then
1520
+ depth -= 1
1521
+ if depth == 0
1522
+ @pos += 1
1523
+ result = @tokens[@pos]&.type == :arrow
1524
+ @pos = saved
1525
+ return result
1526
+ end
1527
+ when :eof then break
1528
+ end
1529
+ @pos += 1
1530
+ end
1531
+ @pos = saved
1532
+ false
1533
+ end
1534
+
1535
+ def parse_lambda
1536
+ params = []
1537
+ if peek_type?(:lparen)
1538
+ advance
1539
+ until peek_type?(:rparen) || peek_type?(:eof)
1540
+ pname = name_token!(%i[ident])
1541
+ params << pname
1542
+ advance if peek_type?(:comma)
1543
+ end
1544
+ expect_type!(:rparen)
1545
+ elsif peek_type?(:ident)
1546
+ params << advance.value
1547
+ end
1548
+ expect_type!(:arrow)
1549
+ body = peek_type?(:lbrace) ? parse_lambda_block : parse_expr
1550
+ { "kind" => "lambda", "params" => params, "body" => body }
1551
+ end
1552
+
1553
+ def parse_lambda_block
1554
+ expect_type!(:lbrace)
1555
+ stmts = []
1556
+ expr = nil
1557
+ until peek_type?(:rbrace) || peek_type?(:eof)
1558
+ if peek_kw?("let")
1559
+ stmts << parse_let_stmt
1560
+ else
1561
+ expr = parse_expr
1562
+ break if peek_type?(:rbrace)
1563
+ stmts << { "kind" => "expr_stmt", "expr" => expr }
1564
+ expr = nil
1565
+ end
1566
+ end
1567
+ expect_type!(:rbrace)
1568
+ { "kind" => "block", "stmts" => stmts, "return_expr" => expr }
1569
+ end
1570
+
1571
+ def parse_primary
1572
+ tok = peek
1573
+
1574
+ case tok.type
1575
+ when :keyword
1576
+ case tok.value
1577
+ when "if" then advance; parse_if_expr
1578
+ when "true" then advance; { "kind" => "literal", "value" => true, "type_tag" => "Bool" }
1579
+ when "false" then advance; { "kind" => "literal", "value" => false, "type_tag" => "Bool" }
1580
+ when "nil" then advance; { "kind" => "literal", "value" => nil, "type_tag" => "Nil" }
1581
+ else
1582
+ advance; { "kind" => "ref", "name" => tok.value }
1583
+ end
1584
+ when :ident
1585
+ advance; { "kind" => "ref", "name" => tok.value }
1586
+ when :int_lit
1587
+ advance; { "kind" => "literal", "value" => tok.value, "type_tag" => "Integer" }
1588
+ when :float_lit
1589
+ advance; { "kind" => "literal", "value" => tok.value, "type_tag" => "Float" }
1590
+ when :string_lit
1591
+ advance; { "kind" => "literal", "value" => tok.value, "type_tag" => "String" }
1592
+ when :symbol_lit
1593
+ advance; { "kind" => "symbol", "value" => tok.value }
1594
+ when :bool_lit
1595
+ advance; { "kind" => "literal", "value" => tok.value == "true", "type_tag" => "Bool" }
1596
+ when :lbracket
1597
+ parse_array_literal
1598
+ when :lbrace
1599
+ parse_record_or_block
1600
+ when :lparen
1601
+ advance
1602
+ expr = parse_expr
1603
+ expect_type!(:rparen)
1604
+ expr
1605
+ else
1606
+ @errors << { "message" => "Unexpected token in expression: #{tok.type}(#{tok.value})", "line" => tok.line }
1607
+ advance
1608
+ { "kind" => "error", "token" => tok.value }
1609
+ end
1610
+ end
1611
+
1612
+ def parse_if_expr
1613
+ cond = parse_expr
1614
+ then_block = parse_block_body
1615
+ else_block = nil
1616
+ if peek_kw?("else")
1617
+ advance
1618
+ else_block = parse_block_body
1619
+ end
1620
+ { "kind" => "if_expr", "cond" => cond, "then" => then_block, "else" => else_block }
1621
+ end
1622
+
1623
+ def parse_array_literal
1624
+ expect_type!(:lbracket)
1625
+ items = []
1626
+ until peek_type?(:rbracket) || peek_type?(:eof)
1627
+ items << parse_expr
1628
+ advance if peek_type?(:comma)
1629
+ end
1630
+ expect_type!(:rbracket)
1631
+ { "kind" => "array_literal", "items" => items }
1632
+ end
1633
+
1634
+ def parse_record_or_block
1635
+ # { key: value, ... } — record literal
1636
+ expect_type!(:lbrace)
1637
+ fields = {}
1638
+ until peek_type?(:rbrace) || peek_type?(:eof)
1639
+ key = name_token!(%i[ident keyword])
1640
+ expect_type!(:colon)
1641
+ val = parse_expr
1642
+ fields[key] = val
1643
+ advance if peek_type?(:comma)
1644
+ end
1645
+ expect_type!(:rbrace)
1646
+ { "kind" => "record_literal", "fields" => fields }
1647
+ end
1648
+ end
1649
+
1650
+ # ---------------------------------------------------------------------------
1651
+ # ParsedProgram builder (public API)
1652
+ # ---------------------------------------------------------------------------
1653
+ class ParsedProgram
1654
+ attr_reader :ast, :source_hash, :errors
1655
+
1656
+ def self.parse(source, source_path: "<stdin>")
1657
+ require "digest"
1658
+ tokens = Lexer.new(source).tokenize
1659
+ parser = Parser.new(tokens)
1660
+ ast = parser.parse
1661
+ new(ast: ast, source: source, source_path: source_path)
1662
+ end
1663
+
1664
+ def initialize(ast:, source:, source_path:)
1665
+ require "digest"
1666
+ @ast = ast
1667
+ @source_path = source_path
1668
+ @source_hash = "sha256:#{Digest::SHA256.hexdigest(source)}"
1669
+ @errors = ast.fetch("parse_errors", [])
1670
+ end
1671
+
1672
+ def valid?
1673
+ @errors.empty?
1674
+ end
1675
+
1676
+ def to_json(**opts)
1677
+ JSON.generate(to_h, **opts)
1678
+ end
1679
+
1680
+ def to_h
1681
+ {
1682
+ "kind" => "parsed_program",
1683
+ "source_path" => @source_path,
1684
+ "source_hash" => @source_hash,
1685
+ "grammar_version" => grammar_version,
1686
+ "module" => @ast["module"],
1687
+ "imports" => @ast["imports"],
1688
+ "traits" => @ast["traits"],
1689
+ "impls" => @ast["impls"],
1690
+ "contract_shapes" => @ast["contract_shapes"],
1691
+ "contracts" => @ast["contracts"],
1692
+ "types" => @ast["types"],
1693
+ "functions" => @ast["functions"],
1694
+ "pipelines" => @ast.fetch("pipelines", []),
1695
+ "olap_points" => @ast.fetch("olap_points", []),
1696
+ "assumptions" => @ast.fetch("assumptions", []),
1697
+ "parse_errors" => @errors
1698
+ }
1699
+ end
1700
+
1701
+ def grammar_version
1702
+ decimal_type_ref = lambda { |n|
1703
+ n.is_a?(Hash) && n["kind"] == "type_ref" && n["name"] == "Decimal"
1704
+ }
1705
+ return "assumptions-v0" if @ast.fetch("assumptions", []).any? ||
1706
+ @ast.fetch("contracts", []).any? { |c|
1707
+ c.fetch("body", []).any? { |n| n.is_a?(Hash) && n["kind"] == "uses_assumptions" }
1708
+ }
1709
+ return "olap-point-v0" if @ast.fetch("olap_points", []).any?
1710
+
1711
+ has_decimal = @ast.fetch("contracts", []).any? { |c|
1712
+ c.fetch("body", []).any? { |node|
1713
+ node.is_a?(Hash) && (
1714
+ decimal_type_ref.call(node["type_annotation"]) ||
1715
+ decimal_type_ref.call(node.fetch("type_annotation", nil))
1716
+ )
1717
+ }
1718
+ } || @ast.fetch("types", []).any? { |t| decimal_type_ref.call(t["alias"]) }
1719
+ return "decimal-v0" if has_decimal
1720
+
1721
+ return "spark-pipeline-v0" if @ast.fetch("pipelines", []).any? ||
1722
+ @ast.fetch("contracts", []).any? { |c|
1723
+ c.fetch("body", []).any? { |n|
1724
+ n.is_a?(Hash) && n["scoped_by"]
1725
+ }
1726
+ }
1727
+
1728
+ return "polymorphic-v0" if @ast.fetch("traits", []).any? ||
1729
+ @ast.fetch("impls", []).any? ||
1730
+ @ast.fetch("contract_shapes", []).any? ||
1731
+ @ast.fetch("contracts", []).any? { |contract| contract.fetch("type_params", []).any? }
1732
+
1733
+ "0.1.0"
1734
+ end
1735
+ end
1736
+ end