odin-foundation 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/lib/odin/diff/differ.rb +115 -0
  3. data/lib/odin/diff/patcher.rb +64 -0
  4. data/lib/odin/export.rb +330 -0
  5. data/lib/odin/parsing/parser.rb +1193 -0
  6. data/lib/odin/parsing/token.rb +26 -0
  7. data/lib/odin/parsing/token_type.rb +40 -0
  8. data/lib/odin/parsing/tokenizer.rb +825 -0
  9. data/lib/odin/parsing/value_parser.rb +322 -0
  10. data/lib/odin/resolver/import_resolver.rb +137 -0
  11. data/lib/odin/serialization/canonicalize.rb +112 -0
  12. data/lib/odin/serialization/stringify.rb +582 -0
  13. data/lib/odin/transform/format_exporters.rb +819 -0
  14. data/lib/odin/transform/source_parsers.rb +385 -0
  15. data/lib/odin/transform/transform_engine.rb +2837 -0
  16. data/lib/odin/transform/transform_parser.rb +979 -0
  17. data/lib/odin/transform/transform_types.rb +278 -0
  18. data/lib/odin/transform/verb_context.rb +87 -0
  19. data/lib/odin/transform/verbs/aggregation_verbs.rb +106 -0
  20. data/lib/odin/transform/verbs/collection_verbs.rb +640 -0
  21. data/lib/odin/transform/verbs/datetime_verbs.rb +602 -0
  22. data/lib/odin/transform/verbs/financial_verbs.rb +356 -0
  23. data/lib/odin/transform/verbs/geo_verbs.rb +125 -0
  24. data/lib/odin/transform/verbs/numeric_verbs.rb +434 -0
  25. data/lib/odin/transform/verbs/object_verbs.rb +123 -0
  26. data/lib/odin/types/array_item.rb +42 -0
  27. data/lib/odin/types/diff.rb +89 -0
  28. data/lib/odin/types/directive.rb +28 -0
  29. data/lib/odin/types/document.rb +92 -0
  30. data/lib/odin/types/document_builder.rb +67 -0
  31. data/lib/odin/types/dyn_value.rb +270 -0
  32. data/lib/odin/types/errors.rb +149 -0
  33. data/lib/odin/types/modifiers.rb +45 -0
  34. data/lib/odin/types/ordered_map.rb +79 -0
  35. data/lib/odin/types/schema.rb +262 -0
  36. data/lib/odin/types/value_type.rb +28 -0
  37. data/lib/odin/types/values.rb +618 -0
  38. data/lib/odin/types.rb +12 -0
  39. data/lib/odin/utils/format_utils.rb +186 -0
  40. data/lib/odin/utils/path_utils.rb +25 -0
  41. data/lib/odin/utils/security_limits.rb +17 -0
  42. data/lib/odin/validation/format_validators.rb +238 -0
  43. data/lib/odin/validation/redos_protection.rb +102 -0
  44. data/lib/odin/validation/schema_parser.rb +813 -0
  45. data/lib/odin/validation/schema_serializer.rb +262 -0
  46. data/lib/odin/validation/validator.rb +1061 -0
  47. data/lib/odin/version.rb +5 -0
  48. data/lib/odin.rb +90 -0
  49. metadata +160 -0
@@ -0,0 +1,1193 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Odin
4
+ module Parsing
5
+ class OdinParser
6
+ MAX_NESTING_DEPTH = Utils::SecurityLimits::MAX_DEPTH
7
+ MAX_ARRAY_INDEX = Utils::SecurityLimits::MAX_ARRAY_INDEX
8
+
9
+ def parse(text, options = nil)
10
+ text = text.encode("UTF-8") if text.is_a?(String) && text.encoding != Encoding::UTF_8
11
+ tokens = Tokenizer.new(text).tokenize
12
+ build_document(tokens, text, options)
13
+ end
14
+
15
+ private
16
+
17
+ def build_document(tokens, source, options)
18
+ @tokens = tokens
19
+ @source = source
20
+ @pos = 0
21
+
22
+ # State
23
+ @context = ""
24
+ @previous_context = ""
25
+ @metadata_mode = false
26
+ @assigned_paths = {}
27
+ @array_indices = {}
28
+
29
+ # Tabular state
30
+ @tabular_mode = false
31
+ @tabular_primitive = false
32
+ @tabular_columns = []
33
+ @tabular_array_path = ""
34
+ @tabular_row_index = 0
35
+
36
+ # Document chaining
37
+ @documents = []
38
+ @current_builder = Types::OdinDocumentBuilder.new
39
+ @current_metadata = {}
40
+ @current_modifiers = {}
41
+ @current_comments = {}
42
+
43
+ # Directives
44
+ @directives = []
45
+
46
+ while @pos < @tokens.length
47
+ token = @tokens[@pos]
48
+
49
+ case token.type
50
+ when TokenType::EOF
51
+ break
52
+ when TokenType::NEWLINE
53
+ @pos += 1
54
+ # Blank line after {$} metadata exits metadata mode (Java parity)
55
+ if @metadata_mode && @context.empty?
56
+ nt = @tokens[@pos]
57
+ if nt && nt.type == TokenType::NEWLINE
58
+ @metadata_mode = false
59
+ end
60
+ end
61
+ next
62
+ when TokenType::COMMENT
63
+ @pos += 1
64
+ next
65
+ when TokenType::HEADER_OPEN
66
+ exit_tabular_mode!
67
+ parse_header
68
+ next
69
+ when TokenType::REFERENCE
70
+ # Check for @import, @schema, @if directives
71
+ if %w[import schema if].include?(token.value)
72
+ parse_at_directive_from_ref(token)
73
+ next
74
+ end
75
+ # Check for invalid @directive
76
+ if token.value.empty? || !token.value.match?(/\A[a-zA-Z]/)
77
+ # Bare @ or @unknown at line start — check if followed by =
78
+ nt = peek_token
79
+ if nt&.type == TokenType::EQUALS
80
+ raise Errors::ParseError.new(
81
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
82
+ token.line, token.column,
83
+ "@ cannot be used as a path on the left side of assignment"
84
+ )
85
+ end
86
+ # Unknown @directive
87
+ raise Errors::ParseError.new(
88
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
89
+ token.line, token.column,
90
+ "Invalid directive: @#{token.value}"
91
+ )
92
+ end
93
+ # Otherwise fall through to tabular/assignment handling
94
+ if @tabular_mode
95
+ parse_tabular_row
96
+ next
97
+ end
98
+ @pos += 1
99
+ next
100
+ when TokenType::PATH
101
+ if token.value.start_with?("---")
102
+ handle_doc_separator
103
+ next
104
+ end
105
+ # Check for @directive at document level
106
+ if token.value.start_with?("@")
107
+ parse_at_directive(token)
108
+ next
109
+ end
110
+ exit_tabular_mode!
111
+ parse_assignment
112
+ next
113
+ when TokenType::PIPE
114
+ # Pipe-based tabular (not used in current golden tests, but handle)
115
+ skip_to_newline
116
+ next
117
+ when TokenType::ERROR
118
+ handle_error_token(token)
119
+ next
120
+ else
121
+ # In tabular mode, data rows start with a value token
122
+ if @tabular_mode
123
+ parse_tabular_row
124
+ next
125
+ end
126
+
127
+ # Check for --- separator as standalone token
128
+ if token.type == TokenType::MODIFIER && token.value == "-"
129
+ if peek_is_doc_separator?
130
+ handle_doc_separator
131
+ next
132
+ end
133
+ end
134
+
135
+ @pos += 1
136
+ end
137
+ end
138
+
139
+ validate_array_contiguity!
140
+ finalize_documents
141
+ end
142
+
143
+ def current_token
144
+ @tokens[@pos]
145
+ end
146
+
147
+ def peek_token(offset = 1)
148
+ p = @pos + offset
149
+ p < @tokens.length ? @tokens[p] : nil
150
+ end
151
+
152
+ def advance
153
+ t = @tokens[@pos]
154
+ @pos += 1
155
+ t
156
+ end
157
+
158
+ def expect(type)
159
+ t = current_token
160
+ if t.nil? || t.type != type
161
+ line = t&.line || 0
162
+ col = t&.column || 0
163
+ raise Errors::ParseError.new(
164
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
165
+ line, col,
166
+ "Expected #{type}, got #{t&.type}"
167
+ )
168
+ end
169
+ advance
170
+ end
171
+
172
+ def skip_newlines
173
+ while @pos < @tokens.length && @tokens[@pos].type == TokenType::NEWLINE
174
+ @pos += 1
175
+ end
176
+ end
177
+
178
+ def skip_to_newline
179
+ while @pos < @tokens.length
180
+ break if @tokens[@pos].type == TokenType::NEWLINE || @tokens[@pos].type == TokenType::EOF
181
+ @pos += 1
182
+ end
183
+ @pos += 1 if @pos < @tokens.length && @tokens[@pos].type == TokenType::NEWLINE
184
+ end
185
+
186
+ # --- Header Parsing ---
187
+
188
+ def parse_header
189
+ open_token = advance # consume HEADER_OPEN
190
+
191
+ # Collect the path content between { and }
192
+ if current_token&.type == TokenType::HEADER_CLOSE
193
+ # Empty header {} - reset to root
194
+ advance
195
+ @context = ""
196
+ @previous_context = ""
197
+ @metadata_mode = false
198
+ return
199
+ end
200
+
201
+ if current_token&.type == TokenType::PATH
202
+ path_token = advance
203
+ raw_path = path_token.value.strip
204
+
205
+ # Expect HEADER_CLOSE
206
+ if current_token&.type == TokenType::HEADER_CLOSE
207
+ advance
208
+ else
209
+ raise Errors::ParseError.new(
210
+ Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
211
+ open_token.line, open_token.column,
212
+ "Missing closing brace"
213
+ )
214
+ end
215
+
216
+ validate_header_path!(raw_path, path_token)
217
+ resolve_header_path(raw_path, path_token)
218
+ else
219
+ # Try to read whatever is there until HEADER_CLOSE
220
+ if current_token&.type == TokenType::HEADER_CLOSE
221
+ advance
222
+ @context = ""
223
+ @metadata_mode = false
224
+ else
225
+ raise Errors::ParseError.new(
226
+ Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
227
+ open_token.line, open_token.column,
228
+ "Invalid header"
229
+ )
230
+ end
231
+ end
232
+ end
233
+
234
+ def resolve_header_path(raw_path, token)
235
+ # Check for metadata header
236
+ if raw_path == "$"
237
+ @context = ""
238
+ @metadata_mode = true
239
+ return
240
+ end
241
+
242
+ # Check for metadata sub-path: $key or $.key
243
+ if raw_path.start_with?("$")
244
+ @metadata_mode = true
245
+ sub = raw_path[1..]
246
+ sub = sub[1..] if sub.start_with?(".")
247
+ @context = sub || ""
248
+ return
249
+ end
250
+
251
+ @metadata_mode = false
252
+
253
+ # Check for tabular: path[] : col1, col2
254
+ if raw_path =~ /\A(.+)\[\]\s*:\s*(.+)\z/
255
+ array_path = $1
256
+ columns_str = $2
257
+ setup_tabular(array_path, columns_str, token)
258
+ return
259
+ end
260
+
261
+ # Check for relative header
262
+ if raw_path.start_with?(".")
263
+ relative = raw_path[1..]
264
+ if @previous_context.empty?
265
+ @context = relative
266
+ else
267
+ @context = "#{@previous_context}.#{relative}"
268
+ end
269
+ else
270
+ @context = raw_path
271
+ @previous_context = raw_path
272
+ end
273
+
274
+ # Validate depth
275
+ validate_depth!(@context, token)
276
+
277
+ # Validate array indices in header path
278
+ validate_path_indices!(@context, token)
279
+ end
280
+
281
+ def setup_tabular(array_path, columns_str, token)
282
+ # Relative paths (starting with .) resolve relative to previous context
283
+ # Absolute paths are used as-is (same logic as resolve_header_path)
284
+ resolved_path = if array_path.start_with?(".")
285
+ if @previous_context.empty?
286
+ array_path[1..]
287
+ else
288
+ "#{@previous_context}#{array_path}"
289
+ end
290
+ else
291
+ array_path
292
+ end
293
+ # Update previous_context for non-relative paths (same as resolve_header_path)
294
+ @previous_context = resolved_path unless array_path.start_with?(".")
295
+
296
+ @tabular_mode = true
297
+ @tabular_array_path = resolved_path
298
+ @tabular_row_index = 0
299
+
300
+ columns_str = columns_str.strip
301
+ if columns_str == "~"
302
+ @tabular_primitive = true
303
+ @tabular_columns = []
304
+ else
305
+ @tabular_primitive = false
306
+ raw_cols = columns_str.split(",").map(&:strip)
307
+ @tabular_columns = resolve_tabular_columns(raw_cols)
308
+ end
309
+
310
+ @context = resolved_path
311
+ end
312
+
313
+ def resolve_tabular_columns(raw_cols)
314
+ resolved = []
315
+ last_context = ""
316
+
317
+ raw_cols.each do |col|
318
+ if col.start_with?(".")
319
+ # Relative column: use last context prefix
320
+ resolved << "#{last_context}#{col}"
321
+ else
322
+ resolved << col
323
+ # Update context to the prefix of this column (everything before the last segment)
324
+ if col.include?(".")
325
+ last_context = col.sub(/\.[^.]+\z/, "")
326
+ else
327
+ last_context = ""
328
+ end
329
+ end
330
+ end
331
+
332
+ resolved
333
+ end
334
+
335
+ def exit_tabular_mode!
336
+ return unless @tabular_mode
337
+ @tabular_mode = false
338
+ @tabular_primitive = false
339
+ @tabular_columns = []
340
+ end
341
+
342
+ # --- Assignment Parsing ---
343
+
344
+ def parse_assignment
345
+ path_token = advance # consume PATH
346
+
347
+ # Validate error tokens from tokenizer
348
+ check_for_error_before_equals!
349
+
350
+ # Expect EQUALS
351
+ eq = current_token
352
+ unless eq&.type == TokenType::EQUALS
353
+ raise Errors::ParseError.new(
354
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
355
+ path_token.line, path_token.column,
356
+ "Expected '=' after path"
357
+ )
358
+ end
359
+ advance # consume EQUALS
360
+
361
+ # Parse modifiers
362
+ mods = parse_modifiers
363
+
364
+ # Parse value
365
+ value = parse_value(path_token)
366
+
367
+ # Parse trailing directives
368
+ directives = parse_trailing_directives
369
+
370
+ # Parse trailing comment
371
+ comment = nil
372
+ if current_token&.type == TokenType::COMMENT
373
+ comment = current_token.value
374
+ advance
375
+ end
376
+
377
+ # Apply modifiers to value
378
+ value = value.with_modifiers(mods) if mods.any?
379
+
380
+ # Apply directives to value
381
+ value = value.with_directives(directives) unless directives.empty?
382
+
383
+ # Resolve full path
384
+ raw_path = path_token.value
385
+ full_path = resolve_path(raw_path)
386
+
387
+ # Normalize leading zeros in array indices: [007] -> [7]
388
+ full_path = full_path.gsub(/\[(\d+)\]/) { |m| "[#{$1.to_i}]" }
389
+
390
+ # Validate depth
391
+ validate_depth!(full_path, path_token)
392
+
393
+ # Track array indices
394
+ track_array_index(full_path, path_token)
395
+
396
+ if @metadata_mode
397
+ # Check duplicate in metadata
398
+ if @current_metadata.key?(full_path)
399
+ raise Errors::ParseError.new(
400
+ Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
401
+ path_token.line, path_token.column,
402
+ "Duplicate metadata key: #{full_path}"
403
+ )
404
+ end
405
+ @current_metadata[full_path] = value
406
+ else
407
+ # Check duplicate
408
+ if @assigned_paths.key?(full_path)
409
+ raise Errors::ParseError.new(
410
+ Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
411
+ path_token.line, path_token.column,
412
+ "Duplicate path: #{full_path}"
413
+ )
414
+ end
415
+ @assigned_paths[full_path] = true
416
+ @current_builder.set(full_path, value, modifiers: mods.any? ? mods : nil, comment: comment)
417
+ @current_modifiers[full_path] = mods if mods.any?
418
+ end
419
+ end
420
+
421
+ def check_for_error_before_equals!
422
+ if current_token&.type == TokenType::ERROR
423
+ err_token = current_token
424
+ val = err_token.value
425
+
426
+ if val == "@#"
427
+ raise Errors::ParseError.new(
428
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
429
+ err_token.line, err_token.column,
430
+ "@# is invalid"
431
+ )
432
+ end
433
+
434
+ raise Errors::ParseError.new(
435
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
436
+ err_token.line, err_token.column,
437
+ val
438
+ )
439
+ end
440
+ end
441
+
442
+ def resolve_path(raw_path)
443
+ if @context.empty?
444
+ raw_path
445
+ else
446
+ "#{@context}.#{raw_path}"
447
+ end
448
+ end
449
+
450
+ def parse_modifiers
451
+ req = false
452
+ conf = false
453
+ depr = false
454
+
455
+ while current_token&.type == TokenType::MODIFIER
456
+ case current_token.value
457
+ when "!" then req = true
458
+ when "*" then conf = true
459
+ when "-" then depr = true
460
+ end
461
+ advance
462
+ end
463
+
464
+ if req || conf || depr
465
+ Types::OdinModifiers.new(required: req, confidential: conf, deprecated: depr)
466
+ else
467
+ Types::OdinModifiers::NONE
468
+ end
469
+ end
470
+
471
+ def parse_value(context_token, allow_bare: false)
472
+ t = current_token
473
+
474
+ if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF
475
+ raise Errors::ParseError.new(
476
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
477
+ context_token.line, context_token.column,
478
+ "Expected value"
479
+ )
480
+ end
481
+
482
+ if t.type == TokenType::ERROR
483
+ handle_error_token(t)
484
+ end
485
+
486
+ # Check for bare strings (unquoted) — raise P002 unless in verb arg context
487
+ if !allow_bare && t.type == TokenType::STRING && t.raw == "bare"
488
+ raise Errors::ParseError.new(
489
+ Errors::ParseErrorCode::BARE_STRING_NOT_ALLOWED,
490
+ t.line, t.column,
491
+ "Strings must be quoted"
492
+ )
493
+ end
494
+
495
+ advance
496
+ ValueParser.parse_value(t)
497
+ end
498
+
499
+ def parse_trailing_directives
500
+ directives = []
501
+ while current_token&.type == TokenType::DIRECTIVE
502
+ dir_token = advance
503
+ dir_name = dir_token.value
504
+
505
+ # Check if next token is a directive value (string)
506
+ dir_value = nil
507
+ if current_token&.type == TokenType::STRING
508
+ dir_value = current_token.value
509
+ advance
510
+ end
511
+
512
+ directives << Types::OdinDirective.new(dir_name, dir_value)
513
+ end
514
+ directives
515
+ end
516
+
517
+ # --- Tabular Row Parsing ---
518
+
519
+ def parse_tabular_row
520
+ if @tabular_primitive
521
+ parse_tabular_primitive_row
522
+ else
523
+ parse_tabular_object_row
524
+ end
525
+ end
526
+
527
+ def parse_tabular_primitive_row
528
+ # Single value per row
529
+ t = current_token
530
+ return skip_to_newline if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF
531
+
532
+ value = parse_tabular_cell_value
533
+ full_path = "#{@tabular_array_path}[#{@tabular_row_index}]"
534
+
535
+ track_array_index(full_path, t)
536
+
537
+ if @assigned_paths.key?(full_path)
538
+ raise Errors::ParseError.new(
539
+ Errors::ParseErrorCode::DUPLICATE_PATH_ASSIGNMENT,
540
+ t.line, t.column,
541
+ "Duplicate path: #{full_path}"
542
+ )
543
+ end
544
+
545
+ @assigned_paths[full_path] = true
546
+ @current_builder.set(full_path, value)
547
+
548
+ @tabular_row_index += 1
549
+ skip_to_newline
550
+ end
551
+
552
+ def parse_tabular_object_row
553
+ row_token = current_token
554
+ col_idx = 0
555
+ row_idx = @tabular_row_index
556
+
557
+ while col_idx < @tabular_columns.length
558
+ t = current_token
559
+ break if t.nil? || t.type == TokenType::NEWLINE || t.type == TokenType::EOF || t.type == TokenType::HEADER_OPEN
560
+
561
+ # Check for comma (separator between cells)
562
+ # An absent cell is indicated by consecutive commas or trailing comma
563
+
564
+ if is_value_token?(t)
565
+ value = parse_tabular_cell_value
566
+ col_name = @tabular_columns[col_idx]
567
+ full_path = "#{@tabular_array_path}[#{row_idx}].#{col_name}"
568
+
569
+ track_array_index(full_path, row_token)
570
+
571
+ @assigned_paths[full_path] = true
572
+ @current_builder.set(full_path, value)
573
+ end
574
+ # else: absent cell, skip
575
+
576
+ col_idx += 1
577
+
578
+ # Skip comma separator
579
+ # After a value or absent cell, look for comma
580
+ t = current_token
581
+ if t&.type == TokenType::PATH && t.value == ","
582
+ advance
583
+ elsif t&.type == TokenType::COMMENT
584
+ break
585
+ elsif t&.type == TokenType::NEWLINE || t&.type == TokenType::EOF
586
+ break
587
+ end
588
+ end
589
+
590
+ @tabular_row_index += 1
591
+ skip_to_newline
592
+ end
593
+
594
+ def is_value_token?(t)
595
+ case t.type
596
+ when TokenType::STRING, TokenType::NUMBER, TokenType::INTEGER,
597
+ TokenType::CURRENCY, TokenType::PERCENT, TokenType::BOOLEAN,
598
+ TokenType::NULL, TokenType::REFERENCE, TokenType::BINARY,
599
+ TokenType::DATE, TokenType::TIMESTAMP, TokenType::TIME,
600
+ TokenType::DURATION, TokenType::VERB, TokenType::MODIFIER
601
+ true
602
+ when TokenType::PATH
603
+ # Bare booleans in tabular context
604
+ t.value == "true" || t.value == "false"
605
+ else
606
+ false
607
+ end
608
+ end
609
+
610
+ def parse_tabular_cell_value
611
+ t = current_token
612
+ return Types::NULL if t.nil?
613
+
614
+ # Handle modifiers on cell values
615
+ mods = parse_modifiers
616
+
617
+ t = current_token
618
+ return Types::NULL if t.nil? || t.type == TokenType::NEWLINE
619
+
620
+ # Handle PATH tokens that are bare booleans (true/false) in tabular context
621
+ if t.type == TokenType::PATH && (t.value == "true" || t.value == "false")
622
+ advance
623
+ value = t.value == "true" ? Types::TRUE_VAL : Types::FALSE_VAL
624
+ value = value.with_modifiers(mods) if mods.any?
625
+ return value
626
+ end
627
+
628
+ advance
629
+ value = ValueParser.parse_value(t)
630
+ value = value.with_modifiers(mods) if mods.any?
631
+ value
632
+ end
633
+
634
+ # --- At-Directive Parsing (@import, @schema, @if) ---
635
+
636
+ def parse_at_directive(token)
637
+ directive_text = token.value
638
+ advance # consume the PATH token
639
+
640
+ case directive_text
641
+ when "@import"
642
+ parse_import_directive(token)
643
+ when "@schema"
644
+ parse_schema_directive(token)
645
+ when "@if"
646
+ parse_if_directive(token)
647
+ else
648
+ raise Errors::ParseError.new(
649
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
650
+ token.line, token.column,
651
+ "Invalid directive: #{directive_text}"
652
+ )
653
+ end
654
+ end
655
+
656
+ # Handle @import/@schema/@if when tokenized as REFERENCE tokens
657
+ def parse_at_directive_from_ref(token)
658
+ directive_name = token.value # "import", "schema", "if"
659
+ advance # consume the REFERENCE token
660
+
661
+ case directive_name
662
+ when "import"
663
+ parse_import_directive_from_tokens(token)
664
+ when "schema"
665
+ parse_schema_directive(token)
666
+ when "if"
667
+ parse_if_directive(token)
668
+ else
669
+ raise Errors::ParseError.new(
670
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
671
+ token.line, token.column,
672
+ "Invalid directive: @#{directive_name}"
673
+ )
674
+ end
675
+ end
676
+
677
+ def parse_import_directive_from_tokens(token)
678
+ # Collect all remaining tokens on this line as the import path
679
+ parts = []
680
+ alias_name = nil
681
+
682
+ while current_token && current_token.type != TokenType::NEWLINE &&
683
+ current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
684
+ t = advance
685
+ parts << t.value.to_s
686
+ end
687
+
688
+ # Skip trailing comment
689
+ advance if current_token&.type == TokenType::COMMENT
690
+
691
+ if parts.empty?
692
+ raise Errors::ParseError.new(
693
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
694
+ token.line, token.column,
695
+ "Import directive requires a path"
696
+ )
697
+ end
698
+
699
+ # Reconstruct the import path, handling "as" alias
700
+ # The path was split across multiple tokens. We need to rejoin them.
701
+ # Look for "as" keyword
702
+ full_text = parts.join("")
703
+
704
+ # Check for "as" in the token values
705
+ as_idx = nil
706
+ parts.each_with_index do |p, i|
707
+ if p == "as" && i > 0
708
+ as_idx = i
709
+ break
710
+ end
711
+ end
712
+
713
+ if as_idx
714
+ # Path is everything before "as", alias is everything after
715
+ import_path = parts[0...as_idx].join("")
716
+ remaining = parts[as_idx + 1..]
717
+ if remaining.empty? || remaining.join("").strip.empty?
718
+ raise Errors::ParseError.new(
719
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
720
+ token.line, token.column,
721
+ "Import alias requires identifier"
722
+ )
723
+ end
724
+ alias_name = remaining.join("").strip
725
+ import_path = import_path.strip
726
+ else
727
+ import_path = full_text.strip
728
+ end
729
+
730
+ # Handle path reconstruction: tokenizer splits "./other.odin" into multiple tokens
731
+ # We may need to add dots/slashes back
732
+ @directives << { type: "import", path: import_path, alias: alias_name }
733
+ end
734
+
735
+ def parse_import_directive(token)
736
+ # Expect: PATH (file path) [PATH("as") PATH(alias)]
737
+ # The tokenizer puts the rest of the line as subsequent tokens
738
+ # We need to collect the import path
739
+ path_parts = []
740
+ alias_name = nil
741
+
742
+ # Read tokens until newline/EOF/comment
743
+ while current_token && current_token.type != TokenType::NEWLINE &&
744
+ current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
745
+ t = advance
746
+ if t.type == TokenType::PATH || t.type == TokenType::STRING
747
+ path_parts << t.value
748
+ elsif t.type == TokenType::EQUALS
749
+ path_parts << "="
750
+ else
751
+ path_parts << t.value.to_s
752
+ end
753
+ end
754
+
755
+ # Skip trailing comment
756
+ advance if current_token&.type == TokenType::COMMENT
757
+
758
+ if path_parts.empty?
759
+ raise Errors::ParseError.new(
760
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
761
+ token.line, token.column,
762
+ "Import directive requires a path"
763
+ )
764
+ end
765
+
766
+ # Check for alias: "path as alias"
767
+ as_idx = path_parts.index("as")
768
+ if as_idx
769
+ import_path = path_parts[0...as_idx].join(" ")
770
+ if as_idx + 1 < path_parts.length
771
+ alias_name = path_parts[as_idx + 1]
772
+ else
773
+ raise Errors::ParseError.new(
774
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
775
+ token.line, token.column,
776
+ "Invalid import alias syntax"
777
+ )
778
+ end
779
+ else
780
+ import_path = path_parts.join(" ")
781
+ end
782
+
783
+ @directives << { type: "import", path: import_path, alias: alias_name }
784
+ end
785
+
786
+ def parse_schema_directive(token)
787
+ parts = []
788
+ while current_token && current_token.type != TokenType::NEWLINE &&
789
+ current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
790
+ parts << advance.value.to_s
791
+ end
792
+ advance if current_token&.type == TokenType::COMMENT
793
+
794
+ if parts.empty?
795
+ raise Errors::ParseError.new(
796
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
797
+ token.line, token.column,
798
+ "Schema directive requires a URL"
799
+ )
800
+ end
801
+
802
+ @directives << { type: "schema", url: parts.join("") }
803
+ end
804
+
805
+ def parse_if_directive(token)
806
+ parts = []
807
+ while current_token && current_token.type != TokenType::NEWLINE &&
808
+ current_token.type != TokenType::EOF && current_token.type != TokenType::COMMENT
809
+ t = advance
810
+ parts << t.value.to_s
811
+ end
812
+ advance if current_token&.type == TokenType::COMMENT
813
+
814
+ if parts.empty?
815
+ raise Errors::ParseError.new(
816
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
817
+ token.line, token.column,
818
+ "If directive requires a condition"
819
+ )
820
+ end
821
+
822
+ # Reconstruct condition with proper spacing
823
+ condition = parts.join(" ")
824
+ @directives << { type: "if", condition: condition }
825
+ end
826
+
827
+ # --- Document Chaining ---
828
+
829
+ def handle_doc_separator
830
+ # Skip --- tokens
831
+ skip_to_newline
832
+
833
+ # Finalize current document
834
+ finalize_current_document
835
+
836
+ # Reset state for next document
837
+ @context = ""
838
+ @previous_context = ""
839
+ @metadata_mode = false
840
+ @assigned_paths = {}
841
+ @array_indices = {}
842
+ @current_builder = Types::OdinDocumentBuilder.new
843
+ @current_metadata = {}
844
+ @current_modifiers = {}
845
+ @current_comments = {}
846
+ @directives = []
847
+ end
848
+
849
+ def peek_is_doc_separator?
850
+ # Check if current position has --- pattern
851
+ # This is already handled by the tokenizer PATH token
852
+ false
853
+ end
854
+
855
+ def finalize_current_document
856
+ validate_array_contiguity!
857
+
858
+ doc_data = {
859
+ metadata: @current_metadata.dup,
860
+ assignments: @current_builder.instance_variable_get(:@assignments).dup,
861
+ modifiers: @current_modifiers.dup,
862
+ directives: @directives.dup
863
+ }
864
+ @documents << doc_data
865
+ end
866
+
867
+ def finalize_documents
868
+ if @documents.empty?
869
+ # Single document
870
+ build_single_document
871
+ else
872
+ # We have chained documents, finalize the last one
873
+ finalize_current_document
874
+ build_chained_result
875
+ end
876
+ end
877
+
878
+ def build_single_document
879
+ # Build OdinDocument from accumulated state
880
+ assignments = @current_builder.instance_variable_get(:@assignments)
881
+ comments = @current_builder.instance_variable_get(:@comments)
882
+ Types::OdinDocument.new(
883
+ assignments: assignments,
884
+ metadata: @current_metadata,
885
+ modifiers: @current_modifiers,
886
+ comments: comments
887
+ )
888
+ end
889
+
890
+ def build_chained_result
891
+ # For chained documents, return a special result
892
+ # The first document is the "primary" one
893
+ # Return it as an OdinDocument with chained_documents attribute
894
+ primary = @documents[0]
895
+
896
+ doc = Types::OdinDocument.new(
897
+ assignments: primary[:assignments],
898
+ metadata: primary[:metadata],
899
+ modifiers: primary[:modifiers],
900
+ comments: {}
901
+ )
902
+
903
+ # Store chained documents in instance variable
904
+ chained = @documents.map do |d|
905
+ Types::OdinDocument.new(
906
+ assignments: d[:assignments],
907
+ metadata: d[:metadata],
908
+ modifiers: d[:modifiers],
909
+ comments: {}
910
+ )
911
+ end
912
+
913
+ # Use a wrapper that includes chained docs
914
+ ParseResult.new(doc, chained, @documents)
915
+ end
916
+
917
+ # --- Validation ---
918
+
919
+ def validate_header_path!(raw_path, token)
920
+ # Check for malformed array indices in header: [, [}, [abc], etc.
921
+ if raw_path =~ /\[/
922
+ # Validate all bracket pairs
923
+ raw_path.scan(/\[([^\]]*)\]?/).each do |match|
924
+ content = match[0]
925
+ # Check if bracket is properly closed
926
+ unless raw_path.include?("[#{content}]")
927
+ raise Errors::ParseError.new(
928
+ Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
929
+ token.line, token.column,
930
+ "Invalid array index in header"
931
+ )
932
+ end
933
+ # If it has content, validate it's a valid index (digits or empty for tabular)
934
+ unless content.empty? || content.match?(/\A\d+\z/)
935
+ # Allow tabular syntax: path[] : cols
936
+ next if content.strip.empty?
937
+ raise Errors::ParseError.new(
938
+ Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
939
+ token.line, token.column,
940
+ "Invalid array index: #{content}"
941
+ )
942
+ end
943
+ end
944
+ end
945
+ end
946
+
947
+ def validate_depth!(path, token)
948
+ depth = path_depth(path)
949
+ if depth > MAX_NESTING_DEPTH
950
+ raise Errors::ParseError.new(
951
+ Errors::ParseErrorCode::MAXIMUM_DEPTH_EXCEEDED,
952
+ token.line, token.column,
953
+ "Path depth #{depth} exceeds maximum #{MAX_NESTING_DEPTH}"
954
+ )
955
+ end
956
+ end
957
+
958
+ def path_depth(path)
959
+ depth = 1
960
+ path.each_char do |c|
961
+ depth += 1 if c == "." || c == "["
962
+ end
963
+ depth
964
+ end
965
+
966
+ def validate_path_indices!(path, token)
967
+ path.scan(/\[(\d+)\]/).each do |match|
968
+ idx = match[0].to_i
969
+ if idx > MAX_ARRAY_INDEX
970
+ raise Errors::ParseError.new(
971
+ Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
972
+ token.line, token.column,
973
+ "Array index #{idx} exceeds maximum"
974
+ )
975
+ end
976
+ end
977
+
978
+ # Check for negative index
979
+ if path =~ /\[-/
980
+ raise Errors::ParseError.new(
981
+ Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
982
+ token.line, token.column,
983
+ "Negative array index"
984
+ )
985
+ end
986
+ end
987
+
988
+ def track_array_index(full_path, token)
989
+ # Check for array index range
990
+ cumulative = 0
991
+ full_path.scan(/\[(\-?\d+)\]/).each do |match|
992
+ idx_str = match[0]
993
+ idx = idx_str.to_i
994
+
995
+ if idx < 0
996
+ raise Errors::ParseError.new(
997
+ Errors::ParseErrorCode::INVALID_ARRAY_INDEX,
998
+ token.line, token.column,
999
+ "Negative array index: #{idx}"
1000
+ )
1001
+ end
1002
+
1003
+ if idx > MAX_ARRAY_INDEX
1004
+ raise Errors::ParseError.new(
1005
+ Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
1006
+ token.line, token.column,
1007
+ "Array index #{idx} out of range"
1008
+ )
1009
+ end
1010
+
1011
+ cumulative += idx
1012
+ if cumulative > MAX_ARRAY_INDEX
1013
+ raise Errors::ParseError.new(
1014
+ Errors::ParseErrorCode::ARRAY_INDEX_OUT_OF_RANGE,
1015
+ token.line, token.column,
1016
+ "Cumulative array index #{cumulative} out of range"
1017
+ )
1018
+ end
1019
+ end
1020
+
1021
+ # Track first array index for contiguity check
1022
+ if full_path =~ /\A([^\[]*)\[(\d+)\]/
1023
+ array_base = $1
1024
+ idx = $2.to_i
1025
+ @array_indices[array_base] ||= []
1026
+ @array_indices[array_base] << idx unless @array_indices[array_base].include?(idx)
1027
+ end
1028
+ end
1029
+
1030
+ def validate_array_contiguity!
1031
+ @array_indices.each do |path, indices|
1032
+ next if indices.empty?
1033
+ sorted = indices.sort
1034
+ if sorted[0] != 0
1035
+ raise Errors::ParseError.new(
1036
+ Errors::ParseErrorCode::NON_CONTIGUOUS_ARRAY_INDICES,
1037
+ 0, 0,
1038
+ "Array '#{path}' does not start at index 0"
1039
+ )
1040
+ end
1041
+ sorted.each_with_index do |idx, i|
1042
+ if idx != i
1043
+ raise Errors::ParseError.new(
1044
+ Errors::ParseErrorCode::NON_CONTIGUOUS_ARRAY_INDICES,
1045
+ 0, 0,
1046
+ "Non-contiguous array indices for '#{path}': expected #{i}, got #{idx}"
1047
+ )
1048
+ end
1049
+ end
1050
+ end
1051
+ end
1052
+
1053
+ # --- Error Handling ---
1054
+
1055
+ def handle_error_token(token)
1056
+ val = token.value
1057
+
1058
+ case val
1059
+ when "@#"
1060
+ raise Errors::ParseError.new(
1061
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
1062
+ token.line, token.column,
1063
+ "@# is invalid"
1064
+ )
1065
+ when /\AUnterminated string/
1066
+ raise Errors::ParseError.new(
1067
+ Errors::ParseErrorCode::UNTERMINATED_STRING,
1068
+ token.line, token.column,
1069
+ val
1070
+ )
1071
+ when /\AUnterminated/
1072
+ raise Errors::ParseError.new(
1073
+ Errors::ParseErrorCode::UNTERMINATED_STRING,
1074
+ token.line, token.column,
1075
+ val
1076
+ )
1077
+ when /\AInvalid escape/
1078
+ raise Errors::ParseError.new(
1079
+ Errors::ParseErrorCode::INVALID_ESCAPE_SEQUENCE,
1080
+ token.line, token.column,
1081
+ val
1082
+ )
1083
+ when /\AInvalid boolean/, /\AInvalid numeric/
1084
+ raise Errors::ParseError.new(
1085
+ Errors::ParseErrorCode::INVALID_TYPE_PREFIX,
1086
+ token.line, token.column,
1087
+ val
1088
+ )
1089
+ when /\AInvalid unicode/
1090
+ raise Errors::ParseError.new(
1091
+ Errors::ParseErrorCode::INVALID_ESCAPE_SEQUENCE,
1092
+ token.line, token.column,
1093
+ val
1094
+ )
1095
+ when /\AUnterminated header/
1096
+ raise Errors::ParseError.new(
1097
+ Errors::ParseErrorCode::INVALID_HEADER_SYNTAX,
1098
+ token.line, token.column,
1099
+ val
1100
+ )
1101
+ when /\AEmpty directive/
1102
+ raise Errors::ParseError.new(
1103
+ Errors::ParseErrorCode::INVALID_DIRECTIVE,
1104
+ token.line, token.column,
1105
+ val
1106
+ )
1107
+ when /\AEmpty verb/
1108
+ raise Errors::ParseError.new(
1109
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
1110
+ token.line, token.column,
1111
+ val
1112
+ )
1113
+ else
1114
+ raise Errors::ParseError.new(
1115
+ Errors::ParseErrorCode::UNEXPECTED_CHARACTER,
1116
+ token.line, token.column,
1117
+ val
1118
+ )
1119
+ end
1120
+ end
1121
+ end
1122
+
1123
+ # Result wrapper for chained documents
1124
+ class ParseResult
1125
+ attr_reader :metadata, :assignments, :modifiers, :chained_documents, :raw_documents
1126
+
1127
+ def initialize(primary_doc, chained_docs, raw_docs)
1128
+ @primary = primary_doc
1129
+ @chained_documents = chained_docs
1130
+ @raw_documents = raw_docs
1131
+ @assignments = primary_doc.assignments
1132
+ @metadata = primary_doc.metadata
1133
+ @modifiers = primary_doc.all_modifiers
1134
+ end
1135
+
1136
+ def get(path)
1137
+ @primary.get(path)
1138
+ end
1139
+
1140
+ def [](path)
1141
+ get(path)
1142
+ end
1143
+
1144
+ def include?(path)
1145
+ @primary.include?(path)
1146
+ end
1147
+
1148
+ def size
1149
+ @primary.size
1150
+ end
1151
+
1152
+ def paths
1153
+ @primary.paths
1154
+ end
1155
+
1156
+ def empty?
1157
+ @primary.empty?
1158
+ end
1159
+
1160
+ def each_assignment(&block)
1161
+ @primary.each_assignment(&block)
1162
+ end
1163
+
1164
+ def each_metadata(&block)
1165
+ @primary.each_metadata(&block)
1166
+ end
1167
+
1168
+ def modifiers_for(path)
1169
+ @primary.modifiers_for(path)
1170
+ end
1171
+
1172
+ def all_modifiers
1173
+ @primary.all_modifiers
1174
+ end
1175
+
1176
+ def comment_for(path)
1177
+ @primary.comment_for(path)
1178
+ end
1179
+
1180
+ def all_comments
1181
+ @primary.all_comments
1182
+ end
1183
+
1184
+ def metadata_value(key)
1185
+ @metadata[key]
1186
+ end
1187
+
1188
+ def documents
1189
+ @chained_documents
1190
+ end
1191
+ end
1192
+ end
1193
+ end