odin-foundation 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/lib/odin/export.rb +1 -1
  3. data/lib/odin/forms/accessibility.rb +95 -0
  4. data/lib/odin/forms/css.rb +42 -0
  5. data/lib/odin/forms/parser.rb +719 -0
  6. data/lib/odin/forms/renderer.rb +534 -0
  7. data/lib/odin/forms/types.rb +102 -0
  8. data/lib/odin/forms/units.rb +41 -0
  9. data/lib/odin/forms.rb +55 -0
  10. data/lib/odin/parsing/parser.rb +25 -1
  11. data/lib/odin/parsing/tokenizer.rb +38 -20
  12. data/lib/odin/parsing/value_parser.rb +65 -7
  13. data/lib/odin/resolver/import_resolver.rb +40 -12
  14. data/lib/odin/resolver/type_registry.rb +54 -0
  15. data/lib/odin/transform/format_exporters.rb +88 -48
  16. data/lib/odin/transform/source_parsers.rb +2 -2
  17. data/lib/odin/transform/transform_engine.rb +1388 -246
  18. data/lib/odin/transform/transform_expr.rb +222 -0
  19. data/lib/odin/transform/transform_parser.rb +377 -19
  20. data/lib/odin/transform/transform_types.rb +23 -7
  21. data/lib/odin/transform/verb_context.rb +19 -1
  22. data/lib/odin/transform/verbs/aggregation_verbs.rb +2 -1
  23. data/lib/odin/transform/verbs/collection_verbs.rb +164 -89
  24. data/lib/odin/transform/verbs/datetime_verbs.rb +86 -15
  25. data/lib/odin/transform/verbs/extra_verbs.rb +613 -0
  26. data/lib/odin/transform/verbs/financial_verbs.rb +116 -27
  27. data/lib/odin/transform/verbs/geo_verbs.rb +7 -0
  28. data/lib/odin/transform/verbs/numeric_verbs.rb +85 -64
  29. data/lib/odin/transform/verbs/object_verbs.rb +31 -26
  30. data/lib/odin/types/errors.rb +9 -1
  31. data/lib/odin/types/schema.rb +20 -3
  32. data/lib/odin/utils/format_utils.rb +31 -15
  33. data/lib/odin/validation/format_validators.rb +7 -9
  34. data/lib/odin/validation/invariant_evaluator.rb +410 -0
  35. data/lib/odin/validation/schema_definition_validator.rb +357 -0
  36. data/lib/odin/validation/schema_parser.rb +234 -21
  37. data/lib/odin/validation/validator.rb +281 -123
  38. data/lib/odin/version.rb +1 -1
  39. data/lib/odin.rb +100 -4
  40. metadata +14 -2
@@ -8,6 +8,7 @@ module Odin
8
8
 
9
9
  class TransformError < StandardError
10
10
  attr_reader :code
11
+ attr_accessor :segment, :field
11
12
 
12
13
  def initialize(message, code: "E001")
13
14
  @code = code
@@ -15,6 +16,33 @@ module Odin
15
16
  end
16
17
  end
17
18
 
19
+ # A warning carrying a stable transform error code. Collected alongside
20
+ # string warnings; consumers that inspect codes read `.code`.
21
+ class TransformWarning
22
+ attr_reader :code, :message
23
+ attr_accessor :segment, :field
24
+
25
+ def initialize(message, code:)
26
+ @message = message
27
+ @code = code
28
+ end
29
+
30
+ def to_s
31
+ @message
32
+ end
33
+ end
34
+
35
+ # Raised during expression evaluation to carry a coded TransformError up to
36
+ # the mapping handler, which preserves the code under fail/warn.
37
+ class CodedTransformError < StandardError
38
+ attr_reader :transform_error
39
+
40
+ def initialize(transform_error)
41
+ @transform_error = transform_error
42
+ super(transform_error.message)
43
+ end
44
+ end
45
+
18
46
  # ── Transform Error Codes ──
19
47
  # T001-T010 are reserved for core transform errors.
20
48
  # T011+ are for implementation-specific errors.
@@ -30,6 +58,26 @@ module Odin
30
58
  T009_LOOP_SOURCE_NOT_ARRAY = "T009"
31
59
  T010_POSITION_OVERFLOW = "T010"
32
60
  T011_INCOMPATIBLE_CONVERSION = "T011"
61
+ T012_DANGLING_BRANCH = "T012"
62
+ T014_NESTED_INTERPOLATION = "T014"
63
+ end
64
+
65
+ # Create a T014 Nested Interpolation error.
66
+ def self.nested_interpolation_error(expr, segment = nil)
67
+ err = TransformError.new(
68
+ "Nested interpolation is not allowed: ${#{expr}}",
69
+ code: ErrorCodes::T014_NESTED_INTERPOLATION
70
+ )
71
+ err.segment = segment if segment
72
+ err
73
+ end
74
+
75
+ # Create a T012 Dangling Branch error (elif/else with no preceding if).
76
+ def self.dangling_branch_error(directive)
77
+ TransformError.new(
78
+ "'#{directive}' segment has no preceding 'if'",
79
+ code: ErrorCodes::T012_DANGLING_BRANCH
80
+ )
33
81
  end
34
82
 
35
83
  # Create a T011 Incompatible Conversion error.
@@ -42,13 +90,56 @@ module Odin
42
90
  )
43
91
  end
44
92
 
93
+ # Create a T001 Unknown Verb error.
94
+ def self.unknown_verb_error(verb_name)
95
+ TransformError.new("Unknown verb: #{verb_name}", code: ErrorCodes::T001_UNKNOWN_VERB)
96
+ end
97
+
98
+ # Create a T003 Lookup Table Not Found error.
99
+ def self.lookup_table_not_found_error(table_name)
100
+ TransformError.new("Lookup table not found: #{table_name}", code: ErrorCodes::T003_LOOKUP_TABLE_NOT_FOUND)
101
+ end
102
+
103
+ # Create a T004 Lookup Key Not Found error.
104
+ def self.lookup_key_not_found_error(table_name, key)
105
+ TransformError.new("Lookup key '#{key}' not found in table '#{table_name}'", code: ErrorCodes::T004_LOOKUP_KEY_NOT_FOUND)
106
+ end
107
+
108
+ # Create a T005 Source Path Not Found error.
109
+ def self.source_path_not_found_error(path)
110
+ TransformError.new("Source path not found: #{path}", code: ErrorCodes::T005_SOURCE_PATH_NOT_FOUND)
111
+ end
112
+
113
+ # Create a T006 Invalid Output Format error.
114
+ def self.invalid_output_format_error(format)
115
+ TransformError.new("Invalid or unsupported output format: #{format}", code: ErrorCodes::T006_INVALID_OUTPUT_FORMAT)
116
+ end
117
+
118
+ # Create a T008 Accumulator Overflow error.
119
+ def self.accumulator_overflow_error(name, value)
120
+ TransformError.new("Accumulator '#{name}' overflow with value #{value}", code: ErrorCodes::T008_ACCUMULATOR_OVERFLOW)
121
+ end
122
+
123
+ # Create a T009 Loop Source Not Array error.
124
+ def self.loop_source_not_array_error(path)
125
+ TransformError.new("Loop source path '#{path}' does not resolve to an array", code: ErrorCodes::T009_LOOP_SOURCE_NOT_ARRAY)
126
+ end
127
+
128
+ # The required-source-missing code: a present-but-null :required field.
129
+ SOURCE_MISSING = "SOURCE_MISSING"
130
+
45
131
  attr_reader :verb_registry
46
132
 
47
133
  def initialize
48
134
  @verb_registry = build_verb_registry
49
135
  end
50
136
 
51
- def execute(transform_def, source_data)
137
+ def execute(transform_def, source_data, import_resolver: nil)
138
+ # Merge imported tables, constants, accumulators, and segments.
139
+ if import_resolver && transform_def.imports && !transform_def.imports.empty?
140
+ resolve_imports(transform_def, import_resolver)
141
+ end
142
+
52
143
  # Check for multi-record mode (discriminator dispatch)
53
144
  disc_config = transform_def.discriminator_config
54
145
  if disc_config
@@ -71,9 +162,7 @@ module Odin
71
162
  passes = transform_def.passes
72
163
  if passes.empty?
73
164
  # Single implicit pass
74
- transform_def.segments.each do |segment|
75
- process_segment(segment, source, context, output)
76
- end
165
+ process_segment_list(transform_def.segments, source, context, output)
77
166
  else
78
167
  # Multi-pass: explicit passes first, then pass-0 (implicit)
79
168
  all_passes = passes.include?(0) ? passes : passes + [0]
@@ -84,12 +173,8 @@ module Odin
84
173
  end
85
174
  first_pass = false
86
175
 
87
- transform_def.segments.each do |segment|
88
- seg_pass = segment.pass || 0
89
- next unless seg_pass == pass_num
90
-
91
- process_segment(segment, source, context, output)
92
- end
176
+ pass_segments = transform_def.segments.select { |s| (s.pass || 0) == pass_num }
177
+ process_segment_list(pass_segments, source, context, output)
93
178
  end
94
179
  end
95
180
 
@@ -107,7 +192,7 @@ module Odin
107
192
  # 7. Convert output to plain Ruby for result (DynValues -> native Ruby)
108
193
  plain_output = deep_to_ruby(output)
109
194
 
110
- TransformResult.new(output: plain_output, formatted: formatted, output_dv: output_dv, errors: context.errors)
195
+ TransformResult.new(output: plain_output, formatted: formatted, output_dv: output_dv, errors: context.errors, warnings: context.warnings)
111
196
  end
112
197
 
113
198
  # ── Multi-Record Execution (discriminator-based routing) ──
@@ -203,7 +288,7 @@ module Odin
203
288
  # Convert output to plain Ruby
204
289
  plain_output = deep_to_ruby(output)
205
290
 
206
- TransformResult.new(output: plain_output, formatted: formatted, output_dv: output_dv, errors: context.errors)
291
+ TransformResult.new(output: plain_output, formatted: formatted, output_dv: output_dv, errors: context.errors, warnings: context.warnings)
207
292
  end
208
293
 
209
294
  private def parse_discriminator_config(config)
@@ -275,17 +360,47 @@ module Odin
275
360
  # Public for unit testing verbs directly
276
361
  def invoke_verb(name, args, context)
277
362
  verb_fn = @verb_registry[name]
278
- raise TransformError.new("Unknown verb: %#{name}") unless verb_fn
363
+ raise CodedTransformError.new(self.class.unknown_verb_error(name)) unless verb_fn
279
364
 
280
365
  verb_fn.call(args, context)
281
366
  end
282
367
 
368
+ # Verbs whose leading arguments must be numeric; checked under strictTypes.
369
+ NUMERIC_ARG_VERBS = %w[
370
+ sqrt abs round floor ceil negate sign trunc ln log log10 log2 exp pow
371
+ add subtract multiply divide mod between formatNumber formatInteger
372
+ formatCurrency toRadians toDegrees
373
+ ].freeze
374
+
375
+ NUMERIC_DYN_TYPES = %i[integer float float_raw currency currency_raw percent null].freeze
376
+
377
+ # T002: under strictTypes, a numeric verb argument that is not a numeric
378
+ # (or null) value fails the field.
379
+ def check_verb_arg_types!(verb_name, args)
380
+ return unless NUMERIC_ARG_VERBS.include?(verb_name)
381
+
382
+ args.each_with_index do |arg, i|
383
+ next if arg.nil?
384
+ next if NUMERIC_DYN_TYPES.include?(arg.type)
385
+
386
+ err = TransformError.new(
387
+ "Verb '#{verb_name}' arg #{i + 1}: expected number, got #{arg.type}",
388
+ code: ErrorCodes::T002_INVALID_VERB_ARGS
389
+ )
390
+ raise CodedTransformError.new(err)
391
+ end
392
+ end
393
+
283
394
  # ── Expression Evaluation ──
284
395
 
285
396
  def evaluate(expr, context)
286
397
  case expr
287
398
  when LiteralExpr
288
- expr.value
399
+ val = expr.value
400
+ if val.is_a?(Types::DynValue) && val.string? && val.value.include?("${")
401
+ return interpolate_string(val.value, context)
402
+ end
403
+ val
289
404
  when CopyExpr
290
405
  val = resolve_path(expr.source_path, context)
291
406
  # Apply CopyExpr-level extraction directives only for compatible source formats
@@ -308,15 +423,71 @@ module Odin
308
423
 
309
424
  private
310
425
 
311
- # Emulate JavaScript's signed 32-bit right shift (>>).
312
- # Ruby integers are arbitrary precision and always do logical (unsigned) shift,
313
- # but JS >> sign-extends from bit 31.
426
+ # Merge imported lookup tables, constants, accumulators, and named segments
427
+ # into this transform. Local declarations win over imported ones; imported
428
+ # segments are appended so their mappings remain referenceable.
429
+ def resolve_imports(transform_def, resolver)
430
+ seen = {}
431
+ transform_def.imports.each do |path|
432
+ next if seen[path]
433
+
434
+ seen[path] = true
435
+ text = resolver.call(path)
436
+ next if text.nil?
437
+
438
+ imported = TransformParser.new.parse(text)
439
+
440
+ imported.tables.each do |name, table|
441
+ transform_def.tables[name] = table unless transform_def.tables.key?(name)
442
+ end
443
+ imported.constants.each do |name, value|
444
+ transform_def.constants[name] = value unless transform_def.constants.key?(name)
445
+ end
446
+ imported.accumulators.each do |name, acc_def|
447
+ transform_def.accumulators[name] = acc_def unless transform_def.accumulators.key?(name)
448
+ end
449
+
450
+ existing_names = transform_def.segments.map(&:name)
451
+ imported.segments.each do |seg|
452
+ next if seg.name.to_s.empty? || existing_names.include?(seg.name)
453
+
454
+ transform_def.segments << seg
455
+ end
456
+ end
457
+ end
458
+
459
+ # Signed 32-bit right shift (>>) with sign-extension from bit 31.
460
+ # Ruby integers are arbitrary precision and always do logical (unsigned) shift.
314
461
  def js_signed_rshift(val, shift)
315
462
  val = val & 0xFFFFFFFF
316
463
  val -= 0x100000000 if val >= 0x80000000
317
464
  (val >> shift) & 0xFF
318
465
  end
319
466
 
467
+ # Extract a [key, value] pair from a %toObject entry in either
468
+ # pair-array ([k, v]) or {key, value} / {k, v} object form.
469
+ def to_object_pair(item)
470
+ return nil unless item.is_a?(Types::DynValue)
471
+
472
+ if item.array?
473
+ items = item.value || []
474
+ return nil if items.length < 2
475
+ return [items[0].to_string, items[1]]
476
+ end
477
+
478
+ if item.object?
479
+ entries = item.value || {}
480
+ if entries.key?("key") && entries.key?("value")
481
+ return [entries["key"].to_string, entries["value"]]
482
+ end
483
+ if entries.key?("k") && entries.key?("v")
484
+ return [entries["k"].to_string, entries["v"]]
485
+ end
486
+ end
487
+
488
+ nil
489
+ end
490
+
320
491
  def deep_to_ruby(obj)
321
492
  case obj
322
493
  when Types::DynValue
@@ -417,7 +588,8 @@ module Odin
417
588
  when Types::OdinBoolean then Types::DynValue.of_bool(val.value)
418
589
  when Types::OdinNull then Types::DynValue.of_null
419
590
  when Types::OdinCurrency
420
- Types::DynValue.of_currency(val.value, val.respond_to?(:decimal_places) ? val.decimal_places : 2)
591
+ Types::DynValue.of_currency(val.value, val.respond_to?(:decimal_places) ? val.decimal_places : 2,
592
+ val.respond_to?(:currency_code) ? val.currency_code : nil)
421
593
  when Types::OdinReference then Types::DynValue.of_reference(val.path)
422
594
  when Types::OdinBinary then Types::DynValue.of_binary(val.data)
423
595
  else Types::DynValue.of_null
@@ -430,6 +602,13 @@ module Odin
430
602
  context = VerbContext.new
431
603
  context.source = source
432
604
  context.source_format = transform_def.source_format || ""
605
+ ov = transform_def.header.target_options["onValidation"]
606
+ context.on_validation = ov if ov && !ov.empty?
607
+ om = transform_def.header.target_options["onMissing"]
608
+ context.on_missing = om if om && !om.empty?
609
+ oe = transform_def.header.target_options["onError"]
610
+ context.on_error = oe if oe && !oe.empty?
611
+ context.strict_types = transform_def.header.strict_types
433
612
 
434
613
  # Initialize constants
435
614
  transform_def.constants.each do |key, val|
@@ -449,6 +628,34 @@ module Odin
449
628
  context
450
629
  end
451
630
 
631
+ # Report a missing lookup key (T004) honoring the on_missing policy.
632
+ # Defaults to silent null; raises only when on_missing is fail/warn.
633
+ def report_lookup_miss(context, table_name, key)
634
+ case context.on_missing
635
+ when "fail"
636
+ context.errors << self.class.lookup_key_not_found_error(table_name, key)
637
+ when "warn"
638
+ context.warnings << TransformWarning.new(
639
+ "Lookup key '#{key}' not found in table '#{table_name}'",
640
+ code: ErrorCodes::T004_LOOKUP_KEY_NOT_FOUND
641
+ )
642
+ end
643
+ end
644
+
645
+ # Report a missing lookup table (T003) honoring the on_missing policy.
646
+ # Distinct from a missing key (T004): the referenced table was never declared.
647
+ def report_table_not_found(context, table_name)
648
+ case context.on_missing
649
+ when "fail"
650
+ context.errors << self.class.lookup_table_not_found_error(table_name)
651
+ when "warn"
652
+ context.warnings << TransformWarning.new(
653
+ "Lookup table not found: #{table_name}",
654
+ code: ErrorCodes::T003_LOOKUP_TABLE_NOT_FOUND
655
+ )
656
+ end
657
+ end
658
+
452
659
  def reset_non_persist_accumulators(context, accumulator_defs)
453
660
  accumulator_defs.each do |key, acc_def|
454
661
  next if acc_def.persist
@@ -459,6 +666,52 @@ module Odin
459
666
 
460
667
  # ── Segment Processing ──
461
668
 
669
+ # Process a list of segments, honoring if/elif/else conditional chains.
670
+ # A chain is a run of consecutive segments: one `if`, then any `elif`, then
671
+ # an optional `else`. Only the first branch whose condition holds is emitted.
672
+ def process_segment_list(segments, source, context, output)
673
+ # :none = no active chain; :pending = chain open, none taken; :taken = a branch taken
674
+ branch = :none
675
+
676
+ segments.each do |segment|
677
+ if segment.if_condition
678
+ taken = evaluate_condition(segment.if_condition, source, context)
679
+ branch = taken ? :taken : :pending
680
+ process_segment(segment, source, context, output) if taken
681
+ elsif segment.elif_condition
682
+ if branch == :none
683
+ context.errors << self.class.dangling_branch_error("elif")
684
+ next
685
+ end
686
+ next if branch == :taken
687
+
688
+ taken = evaluate_condition(segment.elif_condition, source, context)
689
+ branch = taken ? :taken : :pending
690
+ process_segment(segment, source, context, output) if taken
691
+ elsif segment.is_else
692
+ if branch == :none
693
+ context.errors << self.class.dangling_branch_error("else")
694
+ next
695
+ end
696
+ process_segment(segment, source, context, output) if branch == :pending
697
+ branch = :none
698
+ else
699
+ branch = :none
700
+ process_segment(segment, source, context, output)
701
+ end
702
+ end
703
+ end
704
+
705
+ # A segment whose name begins with "_" is a computation-only sink:
706
+ # it runs for side effects (accumulators, verbs) and is never emitted.
707
+ def sink_segment?(segment)
708
+ name = segment.name.to_s
709
+ return false if name.empty?
710
+
711
+ last = name.split(".").last || name
712
+ last.start_with?("_")
713
+ end
714
+
462
715
  def process_segment(segment, source, context, output, modifier_prefix: "")
463
716
  # Check _when condition
464
717
  if segment.when_condition
@@ -483,6 +736,12 @@ module Odin
483
736
  seg_name = segment.name
484
737
  full_prefix = modifier_prefix.empty? ? seg_name : "#{modifier_prefix}.#{seg_name}"
485
738
 
739
+ # Literal block: emit interpolated text lines instead of field mappings.
740
+ if segment.is_literal
741
+ process_literal_segment(segment, source, context, output)
742
+ return
743
+ end
744
+
486
745
  # Handle _each (loop over array)
487
746
  if segment.each_source
488
747
  process_loop_segment(segment, source, context, output, modifier_prefix: full_prefix)
@@ -500,6 +759,9 @@ module Odin
500
759
  process_segment(child, source, context, segment_result, modifier_prefix: full_prefix)
501
760
  end
502
761
 
762
+ # Sink section: side effects only, nothing emitted.
763
+ return if sink_segment?(segment)
764
+
503
765
  # Merge segment result into output
504
766
  if segment_result.any?
505
767
  if segment.is_array && segment.array_index
@@ -528,84 +790,325 @@ module Odin
528
790
  end
529
791
  end
530
792
 
531
- def process_loop_segment(segment, source, context, output, modifier_prefix: "")
532
- # Resolve the array to iterate
533
- each_path = segment.each_source
534
- items = resolve_path_from_string(each_path, source, context)
793
+ # Render a :literal segment to interpolated text lines. Under a :loop the
794
+ # block renders once per item; lines are emitted verbatim by the formatter.
795
+ def process_literal_segment(segment, source, context, output)
796
+ template = segment.literal_body.to_s
797
+ lines = []
798
+ render = lambda do |ctx|
799
+ render_literal(template, ctx, segment.path).split("\n", -1).each { |l| lines << l }
800
+ end
535
801
 
536
- # If the resolved value is null (path not found), skip iteration
537
- # matching TypeScript which checks Array.isArray(items)
538
- if items.is_a?(Types::DynValue) && items.null?
539
- return
802
+ if (segment.loops && !segment.loops.empty?) || segment.each_source
803
+ loops = segment_loops(segment)
804
+ dummy = []
805
+ begin
806
+ iterate_loops(loops, 0, segment, source, context, dummy, on_item: render)
807
+ rescue CodedTransformError => e
808
+ coded = e.transform_error
809
+ coded.segment = segment.name
810
+ case context.on_error
811
+ when "warn"
812
+ context.warnings << TransformWarning.new(coded.message, code: coded.code).tap { |w| w.segment = segment.name }
813
+ when "skip"
814
+ # drop silently
815
+ else
816
+ context.errors << coded
817
+ end
818
+ end
819
+ else
820
+ render.call(context)
540
821
  end
541
822
 
542
- # If the resolved value is not an array, wrap single non-null values
543
- if items.is_a?(Types::DynValue) && !items.array?
544
- items = Types::DynValue.of_array([items])
823
+ set_output_path(output, segment.name, { "__literalLines" => lines })
824
+ end
825
+
826
+ def render_literal(template, context, segment_path)
827
+ interpolate_literal_block(template, context)
828
+ rescue TransformError => e
829
+ if e.code == ErrorCodes::T014_NESTED_INTERPOLATION
830
+ e.segment ||= segment_path
831
+ context.errors << e
832
+ ""
833
+ else
834
+ raise
545
835
  end
836
+ end
546
837
 
547
- return unless items.is_a?(Types::DynValue) && items.array?
838
+ # Interpolate ${...} in a literal block body. Escapes: \${ -> ${, \$ -> $,
839
+ # \\ -> \. A ${...} whose expression contains another ${ is a T014 error.
840
+ def interpolate_literal_block(template, context)
841
+ out = +""
842
+ i = 0
843
+ len = template.length
844
+ while i < len
845
+ ch = template[i]
846
+ if ch == "\\"
847
+ nxt = template[i + 1]
848
+ if nxt == "$" && template[i + 2] == "{"
849
+ out << "${"; i += 3; next
850
+ elsif nxt == "\\"
851
+ out << "\\"; i += 2; next
852
+ elsif nxt == "$"
853
+ out << "$"; i += 2; next
854
+ else
855
+ out << "\\"; i += 1; next
856
+ end
857
+ end
548
858
 
549
- # Check if this is a scalar array loop (only has _ = expr mappings)
550
- has_underscore_only = segment.field_mappings.all? { |m| m.target_field == "_" } &&
551
- segment.field_mappings.any? && segment.children.empty?
859
+ if ch == "$" && template[i + 1] == "{"
860
+ close = template.index("}", i + 2)
861
+ if close.nil?
862
+ out << template[i..]
863
+ break
864
+ end
865
+ expr = template[(i + 2)...close]
866
+ raise TransformEngine.nested_interpolation_error(expr) if expr.include?("${")
867
+ out << evaluate_interpolation_expr(expr.strip, context)
868
+ i = close + 1
869
+ next
870
+ end
871
+
872
+ out << ch
873
+ i += 1
874
+ end
875
+ out
876
+ end
877
+
878
+ def evaluate_interpolation_expr(expr, context)
879
+ if expr.start_with?("%")
880
+ parser = TransformParser.new
881
+ parsed, = parser.send(:parse_expr_from_tokens, parser.send(:tokenize_expression, expr))
882
+ parsed ? dynvalue_string(evaluate(parsed, context)) : "${#{expr}}"
883
+ elsif expr.start_with?("@")
884
+ dynvalue_string(resolve_path(expr[1..], context))
885
+ else
886
+ "${#{expr}}"
887
+ end
888
+ end
552
889
 
890
+ def process_loop_segment(segment, source, context, output, modifier_prefix: "")
891
+ loops = segment_loops(segment)
553
892
  results = []
893
+ begin
894
+ iterate_loops(loops, 0, segment, source, context, results, modifier_prefix: modifier_prefix)
895
+ rescue CodedTransformError => e
896
+ coded = e.transform_error
897
+ coded.segment = segment.name
898
+ case context.on_error
899
+ when "warn"
900
+ context.warnings << TransformWarning.new(coded.message, code: coded.code).tap { |w| w.segment = segment.name }
901
+ when "skip"
902
+ # drop silently
903
+ else
904
+ context.errors << coded
905
+ end
906
+ return
907
+ end
908
+
909
+ return if sink_segment?(segment)
910
+
911
+ seg_name = segment.name
912
+ # Always set the array in output, even if empty
913
+ set_output_path(output, seg_name, results)
914
+ end
915
+
916
+ # Normalize a segment's loop directives to a list of {source:, alias:} specs.
917
+ def segment_loops(segment)
918
+ if segment.loops && !segment.loops.empty?
919
+ segment.loops
920
+ else
921
+ [{ source: segment.each_source, alias: nil }]
922
+ end
923
+ end
924
+
925
+ # Drive one or more :loop directives as a nested cross-product. Each level
926
+ # binds its alias and current item, then recurses; the innermost level emits
927
+ # one element per item. A non-array source at any level yields no rows.
928
+ def iterate_loops(loops, depth, segment, source, context, results, modifier_prefix: "", on_item: nil)
554
929
  loop_ctx = context.dup_for_loop
555
930
  raise TransformError.new("Maximum loop nesting depth exceeded") if loop_ctx.loop_depth > VerbContext::MAX_LOOP_DEPTH
556
931
 
932
+ spec = loops[depth]
933
+ is_outermost = depth.zero?
934
+ is_innermost = depth == loops.length - 1
935
+
936
+ items = resolve_loop_items(spec[:source], is_outermost, source, context)
937
+ unless items.is_a?(Types::DynValue) && items.array?
938
+ # A present non-array scalar is a T009 error; an absent/null source
939
+ # yields zero rows silently.
940
+ if items.is_a?(Types::DynValue) && !items.null?
941
+ raise CodedTransformError.new(self.class.loop_source_not_array_error(spec[:source].to_s))
942
+ end
943
+ return
944
+ end
945
+
946
+ has_underscore_only = segment.field_mappings.all? { |m| m.target_field == "_" } &&
947
+ segment.field_mappings.any? && segment.children.empty?
948
+
557
949
  items.value.each_with_index do |item, idx|
558
- loop_ctx.current_item = item
559
- loop_ctx.loop_index = idx
560
- loop_ctx.loop_length = items.value.length
561
- loop_ctx.loop_vars["_item"] = item
562
- loop_ctx.loop_vars["_index"] = Types::DynValue.of_integer(idx)
563
- loop_ctx.loop_vars["_length"] = Types::DynValue.of_integer(items.value.length)
950
+ item_ctx = loop_ctx.dup_for_loop
951
+ item_ctx.loop_depth = loop_ctx.loop_depth
952
+ item_ctx.current_item = item
953
+ item_ctx.loop_index = idx
954
+ item_ctx.loop_length = items.value.length
955
+ item_ctx.loop_vars["_item"] = item
956
+ item_ctx.loop_vars["_index"] = Types::DynValue.of_integer(idx)
957
+ item_ctx.loop_vars["_length"] = Types::DynValue.of_integer(items.value.length)
958
+ item_ctx.aliases[spec[:alias]] = item if spec[:alias]
564
959
 
565
- if segment.counter_name
566
- loop_ctx.loop_vars[segment.counter_name] = Types::DynValue.of_integer(idx)
960
+ # :counter binds the innermost index and resets per outer item.
961
+ if segment.counter_name && is_innermost
962
+ item_ctx.loop_vars[segment.counter_name] = Types::DynValue.of_integer(idx)
963
+ end
964
+
965
+ unless is_innermost
966
+ iterate_loops(loops, depth + 1, segment, item, item_ctx, results,
967
+ modifier_prefix: modifier_prefix, on_item: on_item)
968
+ next
969
+ end
970
+
971
+ if on_item
972
+ on_item.call(item_ctx)
973
+ next
567
974
  end
568
975
 
569
976
  if has_underscore_only
570
- # Scalar array: evaluate the _ mapping and use the result as the array element
571
977
  val = Types::DynValue.of_null
572
978
  segment.field_mappings.each do |mapping|
573
- val = evaluate(mapping.expression, loop_ctx)
574
- # Apply extraction directives first (:pos, :len, :field) as a group
979
+ val = evaluate(mapping.expression, item_ctx)
575
980
  val = apply_extraction_directives(val, mapping.directives)
576
- # Apply remaining directives
577
981
  mapping.directives.each do |directive|
578
982
  next if %w[pos len field].include?(directive.name)
579
- val = apply_directive(val, directive, item, loop_ctx)
983
+ val = apply_directive(val, directive, item, item_ctx)
580
984
  end
581
985
  end
582
986
  results << val
583
987
  else
584
988
  item_result = {}
585
989
  segment.field_mappings.each do |mapping|
586
- process_mapping(mapping, item, loop_ctx, item_result, modifier_prefix: modifier_prefix)
990
+ process_mapping(mapping, item, item_ctx, item_result, modifier_prefix: modifier_prefix)
587
991
  end
588
-
589
- # Process children
590
992
  segment.children.each do |child|
591
- process_segment(child, item, loop_ctx, item_result, modifier_prefix: modifier_prefix)
993
+ process_segment(child, item, item_ctx, item_result, modifier_prefix: modifier_prefix)
592
994
  end
593
-
594
995
  results << item_result if item_result.any?
595
996
  end
596
997
  end
998
+ end
597
999
 
598
- seg_name = segment.name
599
- # Always set the array in output, even if empty (Java parity)
600
- set_output_path(output, seg_name, results)
1000
+ # Resolve the array for a loop level. Outermost resolves against the source
1001
+ # root; inner levels resolve relative (.field) or aliased paths against the
1002
+ # current item.
1003
+ def resolve_loop_items(path, is_outermost, source, context)
1004
+ p = path.to_s
1005
+ p = p[1..] if p.start_with?("@")
1006
+
1007
+ items = if p.start_with?(".")
1008
+ base = context.in_loop? && context.current_item ? context.current_item : source
1009
+ resolve_dotted_path(base, p[1..])
1010
+ elsif is_outermost
1011
+ resolve_path_from_string(p, source, context)
1012
+ else
1013
+ first = p.split(".").first
1014
+ if context.aliases.key?(first)
1015
+ aliased = context.aliases[first]
1016
+ rest = p.include?(".") ? p[(first.length + 1)..] : ""
1017
+ rest.empty? ? aliased : resolve_dotted_path(aliased, rest)
1018
+ else
1019
+ base = context.in_loop? && context.current_item ? context.current_item : source
1020
+ resolve_dotted_path(base, p)
1021
+ end
1022
+ end
1023
+
1024
+ items
1025
+ end
1026
+
1027
+ # Precompiled :validate / :enum / :range data for a mapping.
1028
+ CompiledValidation = Struct.new(
1029
+ :pattern, :regex, :regex_error,
1030
+ :enum_allowed, :enum_label,
1031
+ :range_str, :range_min, :range_max,
1032
+ keyword_init: true
1033
+ )
1034
+
1035
+ # Per-mapping directive references and flags, derived once from the mapping's
1036
+ # data-independent directives/modifiers and reused across all executions.
1037
+ MappingMods = Struct.new(
1038
+ :if_dir, :unless_dir, :object_dir,
1039
+ :has_default, :has_raw, :has_array,
1040
+ :extraction_dir_names, :required,
1041
+ :validate_dir, :enum_dir, :range_dir,
1042
+ :validation, :validation_active,
1043
+ keyword_init: true
1044
+ )
1045
+
1046
+ # Build (or reuse) the precomputed modifier data for a mapping, memoized on
1047
+ # the mapping object so the directive list is scanned only once.
1048
+ def mapping_mods(mapping)
1049
+ cached = mapping.instance_variable_get(:@__mods)
1050
+ return cached if cached
1051
+
1052
+ directives = mapping.directives
1053
+ validate_dir = directives.find { |d| d.name == "validate" }
1054
+ enum_dir = directives.find { |d| d.name == "enum" }
1055
+ range_dir = directives.find { |d| d.name == "range" }
1056
+
1057
+ mods = MappingMods.new(
1058
+ if_dir: directives.find { |d| d.name == "if" },
1059
+ unless_dir: directives.find { |d| d.name == "unless" },
1060
+ object_dir: directives.find { |d| d.name == "object" },
1061
+ has_default: directives.any? { |d| d.name == "default" },
1062
+ has_raw: directives.any? { |d| d.name == "raw" },
1063
+ has_array: directives.any? { |d| d.name == "array" },
1064
+ extraction_dir_names: directives.map(&:name) & %w[pos len field trim],
1065
+ required: mapping.modifiers.include?(FieldModifier::REQUIRED),
1066
+ validate_dir: validate_dir,
1067
+ enum_dir: enum_dir,
1068
+ range_dir: range_dir,
1069
+ validation: compile_validation(validate_dir, enum_dir, range_dir),
1070
+ validation_active: !validate_dir.nil? || !enum_dir.nil? || !range_dir.nil?
1071
+ )
1072
+ mapping.instance_variable_set(:@__mods, mods)
1073
+ mods
1074
+ end
1075
+
1076
+ # Precompile the regex / enum set / range bounds for validation directives.
1077
+ def compile_validation(validate_dir, enum_dir, range_dir)
1078
+ cv = CompiledValidation.new
1079
+
1080
+ if validate_dir && !validate_dir.value.nil?
1081
+ cv.pattern = validate_dir.value.to_s
1082
+ begin
1083
+ cv.regex = Regexp.new(cv.pattern)
1084
+ rescue RegexpError
1085
+ cv.regex_error = true
1086
+ end
1087
+ end
1088
+
1089
+ if enum_dir && !enum_dir.value.nil?
1090
+ allowed = enum_dir.value.to_s.split(",").map { |v| v.strip.gsub(/\A["']|["']\z/, "") }
1091
+ cv.enum_allowed = allowed
1092
+ cv.enum_label = allowed.join(", ")
1093
+ end
1094
+
1095
+ if range_dir && !range_dir.value.nil?
1096
+ cv.range_str = range_dir.value.to_s
1097
+ parts = cv.range_str.split("..")
1098
+ cv.range_min = (Float(parts[0]) rescue nil)
1099
+ cv.range_max = (Float(parts[1]) rescue nil)
1100
+ end
1101
+
1102
+ cv
601
1103
  end
602
1104
 
603
1105
  def process_mapping(mapping, source, context, output, modifier_prefix: "")
604
1106
  target = mapping.target_field
605
1107
 
606
- # Handle _pass directive and other underscore-prefixed targets
607
- # but still evaluate `_` (bare underscore) for side effects like accumulate
608
- if target == "_"
1108
+ # An unrecognized `_`-prefixed target is a computation-only sink: it runs
1109
+ # for side effects (accumulators, counters) but is never emitted. This
1110
+ # lets several %accumulate/%set advance in one loop pass.
1111
+ if target.start_with?("_")
609
1112
  begin
610
1113
  evaluate(mapping.expression, context)
611
1114
  rescue StandardError => e
@@ -613,38 +1116,106 @@ module Odin
613
1116
  end
614
1117
  return
615
1118
  end
616
- return if target.start_with?("_")
617
1119
 
618
1120
  begin
619
- # Evaluate expression
620
- # Extraction directives (pos, len, field, trim) only apply for extraction-compatible
621
- # source formats. For output formats like fixed-width, these directives are used by
622
- # the formatter for positioning, NOT for input extraction.
623
- src_fmt = context.source_format
624
- extraction_compatible = %w[fixed-width csv delimited flat].include?(src_fmt)
625
- has_extraction = extraction_compatible &&
626
- mapping.directives.any? { |d| %w[pos len field trim].include?(d.name) }
627
-
628
- # Check if CopyExpr already has its own extraction directives
629
- # (applied during evaluate() for compatible source formats)
630
- expr_has_own_extraction = extraction_compatible && expr_has_extraction_directives?(mapping.expression)
631
-
632
- if has_extraction && mapping.expression.is_a?(VerbExpr) && !expr_has_own_extraction
633
- # For verb expressions with extraction directives, apply extraction
634
- # to the first CopyExpr argument before calling the verb (matching Java behavior)
635
- val = evaluate_verb_with_extraction(mapping.expression, context, mapping.directives)
1121
+ mods = mapping_mods(mapping)
1122
+
1123
+ # Field-level :if / :unless gate the assignment on a comparison expression.
1124
+ if_dir = mods.if_dir
1125
+ if if_dir
1126
+ return unless evaluate_condition(if_dir.value.to_s, source, context)
1127
+ end
1128
+ unless_dir = mods.unless_dir
1129
+ if unless_dir
1130
+ return if evaluate_condition(unless_dir.value.to_s, source, context)
1131
+ end
1132
+
1133
+ # A :default modifier handles a missing lookup; suppress errors raised during evaluation.
1134
+ has_default = mods.has_default
1135
+ errors_before = has_default ? context.errors.length : 0
1136
+
1137
+ # :object builds a nested object from an inline {key = @path, …} spec.
1138
+ object_dir = mods.object_dir
1139
+ if object_dir
1140
+ val = build_inline_object(object_dir.value.to_s, context)
636
1141
  else
637
- val = evaluate(mapping.expression, context)
638
- # Apply extraction directives only if expression doesn't handle its own extraction
639
- if has_extraction && !expr_has_own_extraction
640
- val = apply_extraction_directives(val, mapping.directives)
1142
+ # Evaluate expression
1143
+ # Extraction directives (pos, len, field, trim) only apply for extraction-compatible
1144
+ # source formats. For output formats like fixed-width, these directives are used by
1145
+ # the formatter for positioning, NOT for input extraction.
1146
+ src_fmt = context.source_format
1147
+ extraction_compatible = %w[fixed-width csv delimited flat].include?(src_fmt)
1148
+ has_extraction = extraction_compatible &&
1149
+ mapping.directives.any? { |d| %w[pos len field trim].include?(d.name) }
1150
+
1151
+ # Check if CopyExpr already has its own extraction directives
1152
+ # (applied during evaluate() for compatible source formats)
1153
+ expr_has_own_extraction = extraction_compatible && expr_has_extraction_directives?(mapping.expression)
1154
+
1155
+ if has_extraction && mapping.expression.is_a?(VerbExpr) && !expr_has_own_extraction
1156
+ # For verb expressions with extraction directives, apply extraction
1157
+ # to the first CopyExpr argument before calling the verb
1158
+ val = evaluate_verb_with_extraction(mapping.expression, context, mapping.directives)
1159
+ else
1160
+ val = evaluate(mapping.expression, context)
1161
+ # Apply extraction directives only if expression doesn't handle its own extraction
1162
+ if has_extraction && !expr_has_own_extraction
1163
+ val = apply_extraction_directives(val, mapping.directives)
1164
+ end
641
1165
  end
1166
+
1167
+ # Apply remaining directives (non-extraction: type, default, upper, lower, etc.)
1168
+ mapping.directives.each do |directive|
1169
+ next if %w[pos len field trim if unless object raw array cdata validate enum range].include?(directive.name)
1170
+ val = apply_directive(val, directive, source, context)
1171
+ end
1172
+ end
1173
+
1174
+ # If a :default rescued a null result, drop errors raised during evaluation.
1175
+ if has_default && context.errors.length > errors_before
1176
+ context.errors.slice!(errors_before..)
1177
+ end
1178
+
1179
+ # Validation modifiers: :validate / :enum / :range (honors onValidation policy).
1180
+ return unless validate_field_value(val, mapping, context)
1181
+
1182
+ # :raw emits inline JSON structurally instead of an escaped string.
1183
+ if mods.has_raw
1184
+ val = parse_raw_json_value(val)
642
1185
  end
643
1186
 
644
- # Apply remaining directives (non-extraction: type, default, upper, lower, etc.)
645
- mapping.directives.each do |directive|
646
- next if %w[pos len field trim].include?(directive.name)
647
- val = apply_directive(val, directive, source, context)
1187
+ # :array wraps the value in a single-element array.
1188
+ if mods.has_array
1189
+ val = Types::DynValue.of_array([val.is_a?(Types::DynValue) ? val : Types::DynValue.from_ruby(val)])
1190
+ end
1191
+
1192
+ # Missing source path: a :required field always fails (T005); an ordinary
1193
+ # field honors the onMissing policy (fail -> T005, warn -> warning,
1194
+ # skip/default -> keep null). A path that is merely null is not "missing".
1195
+ required = mods.required
1196
+ val_null = val.is_a?(Types::DynValue) && val.null?
1197
+ if val_null && copy_source_absent?(mapping, source, context)
1198
+ raw_path = mapping.expression.is_a?(CopyExpr) ? mapping.expression.source_path : target
1199
+ path = raw_path.to_s.start_with?(".") ? raw_path[1..] : raw_path.to_s
1200
+ if required
1201
+ context.errors << self.class.source_path_not_found_error(path)
1202
+ return
1203
+ end
1204
+ case context.on_missing
1205
+ when "fail"
1206
+ context.errors << self.class.source_path_not_found_error(path)
1207
+ return
1208
+ when "warn"
1209
+ context.warnings << TransformWarning.new(
1210
+ "Source path not found: #{path}", code: ErrorCodes::T005_SOURCE_PATH_NOT_FOUND
1211
+ )
1212
+ end
1213
+ elsif required && val_null
1214
+ # Required field present but explicitly null.
1215
+ context.errors << TransformError.new(
1216
+ "Required field '#{target}' is missing or null", code: SOURCE_MISSING
1217
+ )
1218
+ return
648
1219
  end
649
1220
 
650
1221
  # Track field modifiers with full path
@@ -656,9 +1227,87 @@ module Odin
656
1227
  # Store DynValue directly to preserve type information (date, timestamp, etc.)
657
1228
  dv_val = val.is_a?(Types::DynValue) ? val : Types::DynValue.from_ruby(val)
658
1229
  set_path(output, target, dv_val)
1230
+ rescue CodedTransformError => e
1231
+ # Coded errors carry a stable T-code; preserve it under fail/warn.
1232
+ coded = e.transform_error
1233
+ coded.field = target
1234
+ case context.on_error
1235
+ when "warn"
1236
+ context.warnings << TransformWarning.new(coded.message, code: coded.code).tap { |w| w.field = target }
1237
+ when "skip"
1238
+ # drop silently
1239
+ else
1240
+ context.errors << coded
1241
+ end
659
1242
  rescue StandardError => e
660
- context.errors << TransformError.new(e.message)
1243
+ case context.on_error
1244
+ when "warn"
1245
+ context.warnings << e.message
1246
+ when "skip"
1247
+ # drop silently
1248
+ else
1249
+ context.errors << TransformError.new(e.message)
1250
+ end
1251
+ end
1252
+ end
1253
+
1254
+ # Whether a mapping copies a source path that is absent (undefined) — distinct
1255
+ # from a path present with a null value. Only plain copy expressions qualify;
1256
+ # verbs, literals, objects, and special paths are never "missing source".
1257
+ def copy_source_absent?(mapping, source, context)
1258
+ expr = mapping.expression
1259
+ return false unless expr.is_a?(CopyExpr)
1260
+ # A :default / :object modifier supplies its own fallback.
1261
+ return false if mapping.directives.any? { |d| %w[default object].include?(d.name) }
1262
+
1263
+ path = expr.source_path.to_s
1264
+ return false if path.empty? || path.start_with?("$") || path == "_index"
1265
+ return false if context.loop_vars.key?(path)
1266
+
1267
+ base = source.is_a?(Types::DynValue) ? source : context.source
1268
+ if path.start_with?(".")
1269
+ base = context.current_item if context.in_loop? && context.current_item
1270
+ target_path = path[1..]
1271
+ else
1272
+ first = path.split(".").first
1273
+ if context.aliases.key?(first)
1274
+ base = context.aliases[first]
1275
+ target_path = path.include?(".") ? path[(first.length + 1)..] : ""
1276
+ else
1277
+ target_path = path
1278
+ end
661
1279
  end
1280
+
1281
+ resolved = target_path.to_s.empty? ? base : resolve_dotted_path(base, target_path)
1282
+ # resolve_dotted_path collapses both absent and explicit-null to of_null; an
1283
+ # absent path is one whose leaf key is not explicitly present.
1284
+ return false unless resolved.is_a?(Types::DynValue) && resolved.null?
1285
+
1286
+ !path_present?(base, target_path)
1287
+ end
1288
+
1289
+ # True when target_path resolves to an explicitly-present key (even if its
1290
+ # value is null); false when any segment along the path is missing.
1291
+ def path_present?(base, target_path)
1292
+ return true if target_path.to_s.empty?
1293
+ return false unless base.is_a?(Types::DynValue)
1294
+
1295
+ segments = parse_path_segments(target_path)
1296
+ current = base
1297
+ segments.each do |seg|
1298
+ return false unless current.is_a?(Types::DynValue)
1299
+
1300
+ if seg.is_a?(Integer)
1301
+ return false unless current.array? && seg < current.value.length
1302
+
1303
+ current = current.get_index(seg)
1304
+ else
1305
+ return false unless current.object? && current.value.key?(seg)
1306
+
1307
+ current = current.get(seg)
1308
+ end
1309
+ end
1310
+ true
662
1311
  end
663
1312
 
664
1313
  # ── Path Assignment (nested object creation for dotted paths) ──
@@ -736,7 +1385,11 @@ module Odin
736
1385
 
737
1386
  if path.start_with?("$accumulator.") || path.start_with?("$accumulators.")
738
1387
  key = path.sub(/\A\$(?:accumulator|accumulators)\./, "")
739
- return context.get_accumulator(key)
1388
+ acc = context.get_accumulator(key)
1389
+ return acc unless acc.null?
1390
+ # Loop counters declared via :counter are also readable through the accumulator reference.
1391
+ return context.loop_vars[key] if context.loop_vars.key?(key)
1392
+ return acc
740
1393
  end
741
1394
 
742
1395
  # _index, _length loop vars
@@ -750,6 +1403,10 @@ module Odin
750
1403
  return context.loop_vars[path]
751
1404
  end
752
1405
 
1406
+ # A leading :loop :as alias resolves against its bound item.
1407
+ aliased = resolve_via_alias(path, context)
1408
+ return aliased unless aliased.nil?
1409
+
753
1410
  # Determine source to navigate
754
1411
  current_source = if context.in_loop? && context.current_item
755
1412
  context.current_item
@@ -761,6 +1418,19 @@ module Odin
761
1418
  resolve_dotted_path(current_source, path)
762
1419
  end
763
1420
 
1421
+ # When the first dotted segment names a :loop alias, resolve the remainder
1422
+ # against the bound item. Returns nil when the path is not alias-led.
1423
+ def resolve_via_alias(path, context)
1424
+ return nil unless context.respond_to?(:aliases) && context.aliases && !context.aliases.empty?
1425
+
1426
+ first = path.split(".").first
1427
+ return nil unless context.aliases.key?(first)
1428
+
1429
+ aliased = context.aliases[first]
1430
+ rest = path.include?(".") ? path[(first.length + 1)..] : ""
1431
+ rest.empty? ? aliased : resolve_dotted_path(aliased, rest)
1432
+ end
1433
+
764
1434
  def resolve_path_from_string(path_str, source, context)
765
1435
  return source if path_str.nil? || path_str.empty?
766
1436
 
@@ -773,6 +1443,10 @@ module Odin
773
1443
  return context.in_loop? && context.current_item ? context.current_item : source
774
1444
  end
775
1445
 
1446
+ # A leading :loop :as alias resolves against its bound item.
1447
+ aliased = resolve_via_alias(sub_path, context)
1448
+ return aliased unless aliased.nil?
1449
+
776
1450
  current_source = if context.in_loop? && context.current_item
777
1451
  context.current_item
778
1452
  else
@@ -858,17 +1532,10 @@ module Odin
858
1532
  verb_name = expr.verb_name
859
1533
  args = expr.arguments
860
1534
 
861
- # Lazy evaluation for conditional verbs — apply extraction to result
862
- case verb_name
863
- when "ifElse"
864
- val = evaluate_if_else(args, context)
865
- return apply_extraction_directives(val, extraction_directives)
866
- when "cond"
867
- val = evaluate_cond(args, context)
868
- return apply_extraction_directives(val, extraction_directives)
869
- when "switch"
870
- val = evaluate_switch(args, context)
871
- return apply_extraction_directives(val, extraction_directives)
1535
+ # Lazy evaluation for control-flow verbs — apply extraction to result.
1536
+ if !context.strict_types && LAZY_VERBS.include?(verb_name) && !(expr.respond_to?(:custom) && expr.custom)
1537
+ handled, val = evaluate_lazy_verb(verb_name, args, context)
1538
+ return apply_extraction_directives(val, extraction_directives) if handled
872
1539
  end
873
1540
 
874
1541
  # Eager evaluation: apply extraction directives to CopyExpr arguments
@@ -905,18 +1572,19 @@ module Odin
905
1572
  invoke_verb(verb_name, evaluated_args, context)
906
1573
  end
907
1574
 
1575
+ # Control-flow verbs that evaluate the condition first and run only the
1576
+ # selected branch; and/or/coalesce short-circuit. Strict-types mode
1577
+ # evaluates eagerly so every argument is validated.
1578
+ LAZY_VERBS = %w[ifElse ifNull ifEmpty coalesce and or cond switch].freeze
1579
+
908
1580
  def evaluate_verb(expr, context)
909
1581
  verb_name = expr.verb_name
910
1582
  args = expr.arguments
911
1583
 
912
- # Lazy evaluation for conditional verbs
913
- case verb_name
914
- when "ifElse"
915
- return evaluate_if_else(args, context)
916
- when "cond"
917
- return evaluate_cond(args, context)
918
- when "switch"
919
- return evaluate_switch(args, context)
1584
+ # Lazy evaluation for control-flow verbs (skipped under strict types).
1585
+ if !context.strict_types && LAZY_VERBS.include?(verb_name) && !(expr.respond_to?(:custom) && expr.custom)
1586
+ handled, value = evaluate_lazy_verb(verb_name, args, context)
1587
+ return value if handled
920
1588
  end
921
1589
 
922
1590
  # Eager evaluation for all other verbs
@@ -935,10 +1603,58 @@ module Odin
935
1603
  return evaluated_args.first || Types::DynValue.of_null
936
1604
  end
937
1605
 
1606
+ # T002: enforce verb argument types under strictTypes.
1607
+ if context.strict_types
1608
+ check_verb_arg_types!(verb_name, evaluated_args)
1609
+ end
1610
+
938
1611
  # Look up and invoke verb
939
1612
  invoke_verb(verb_name, evaluated_args, context)
940
1613
  end
941
1614
 
1615
+ # Dispatch a lazy control-flow verb. Returns [handled, value]; handled is
1616
+ # false when arity is too low, so the caller falls back to eager paths.
1617
+ def evaluate_lazy_verb(verb_name, args, context)
1618
+ ev = ->(i) { evaluate(args[i], context) }
1619
+ case verb_name
1620
+ when "ifElse"
1621
+ return [false, Types::DynValue.of_null] if args.length < 3
1622
+ [true, ev.call(0).truthy? ? ev.call(1) : ev.call(2)]
1623
+ when "ifNull"
1624
+ return [false, Types::DynValue.of_null] if args.length < 2
1625
+ v0 = ev.call(0)
1626
+ [true, v0.null? ? ev.call(1) : v0]
1627
+ when "ifEmpty"
1628
+ return [false, Types::DynValue.of_null] if args.length < 2
1629
+ v0 = ev.call(0)
1630
+ [true, lazy_empty?(v0) ? ev.call(1) : v0]
1631
+ when "coalesce"
1632
+ args.each_index do |i|
1633
+ v = ev.call(i)
1634
+ return [true, v] unless v.null?
1635
+ end
1636
+ [true, Types::DynValue.of_null]
1637
+ when "and"
1638
+ return [false, Types::DynValue.of_null] if args.length < 2
1639
+ return [true, Types::DynValue.of_bool(false)] unless ev.call(0).truthy?
1640
+ [true, Types::DynValue.of_bool(ev.call(1).truthy?)]
1641
+ when "or"
1642
+ return [false, Types::DynValue.of_null] if args.length < 2
1643
+ return [true, Types::DynValue.of_bool(true)] if ev.call(0).truthy?
1644
+ [true, Types::DynValue.of_bool(ev.call(1).truthy?)]
1645
+ when "cond"
1646
+ [true, evaluate_cond(args, context)]
1647
+ when "switch"
1648
+ [true, evaluate_switch(args, context)]
1649
+ else
1650
+ [false, Types::DynValue.of_null]
1651
+ end
1652
+ end
1653
+
1654
+ def lazy_empty?(v)
1655
+ v.nil? || v.null? || (v.string? && v.value.empty?)
1656
+ end
1657
+
942
1658
  def evaluate_if_else(args, context)
943
1659
  return Types::DynValue.of_null if args.length < 3
944
1660
 
@@ -1009,17 +1725,37 @@ module Odin
1009
1725
  return increment
1010
1726
  end
1011
1727
 
1012
- # Numeric accumulation
1013
- new_val = Types::DynValue.of_float(current.to_number + increment.to_number)
1014
- # Preserve integer type if both are integers
1015
- if current.integer? && increment.integer?
1016
- new_val = Types::DynValue.of_integer(current.to_number + increment.to_number)
1728
+ sum = current.to_number + increment.to_number
1729
+
1730
+ # T008: the result exceeds representable numeric capacity (non-finite, or
1731
+ # an integer accumulator beyond the safe-integer magnitude where precision
1732
+ # is lost). Retain the last valid value.
1733
+ if accumulator_overflow?(current, sum)
1734
+ context.errors << self.class.accumulator_overflow_error(name, sum)
1735
+ return current
1017
1736
  end
1018
1737
 
1738
+ # Preserve integer type if both are integers
1739
+ new_val = if current.integer? && increment.integer?
1740
+ Types::DynValue.of_integer(sum)
1741
+ else
1742
+ Types::DynValue.of_float(sum.to_f)
1743
+ end
1744
+
1019
1745
  context.set_accumulator(name, new_val)
1020
1746
  new_val
1021
1747
  end
1022
1748
 
1749
+ # The largest integer that survives a double round-trip (2^53 - 1).
1750
+ MAX_SAFE_INTEGER = 9_007_199_254_740_991
1751
+
1752
+ def accumulator_overflow?(current, sum)
1753
+ f = sum.to_f
1754
+ return true if f.nan? || f.infinite?
1755
+
1756
+ current.integer? && sum.abs > MAX_SAFE_INTEGER
1757
+ end
1758
+
1023
1759
  def handle_set(raw_args, evaluated_args, context)
1024
1760
  return Types::DynValue.of_null if raw_args.length < 2
1025
1761
 
@@ -1099,8 +1835,9 @@ module Odin
1099
1835
  elsif str.match?(/\A-?\d+\.\d+\z/)
1100
1836
  Types::DynValue.of_float(str.to_f)
1101
1837
  else
1102
- # Treat as path without @
1103
- resolve_dotted_path(source.is_a?(Types::DynValue) ? source : context.source, str)
1838
+ # Treat as path without @; an unresolved bare word is a string literal.
1839
+ resolved = resolve_dotted_path(source.is_a?(Types::DynValue) ? source : context.source, str)
1840
+ resolved.is_a?(Types::DynValue) && resolved.null? ? Types::DynValue.of_string(str) : resolved
1104
1841
  end
1105
1842
  end
1106
1843
 
@@ -1350,6 +2087,14 @@ module Odin
1350
2087
  end
1351
2088
 
1352
2089
  def coerce_to_currency(val, dp = 2, currency_code = nil)
2090
+ if val.type == :currency || val.type == :currency_raw
2091
+ existing_dp = val.decimal_places || dp
2092
+ code = currency_code || val.currency_code
2093
+ if val.type == :currency_raw
2094
+ return Types::DynValue.of_currency_raw(val.value, existing_dp, code)
2095
+ end
2096
+ return Types::DynValue.of_currency(val.value.to_f, existing_dp, code)
2097
+ end
1353
2098
  if val.type == :float || val.type == :float_raw
1354
2099
  f = val.to_number.to_f
1355
2100
  formatted = format("%.#{dp}f", f)
@@ -1443,6 +2188,153 @@ module Odin
1443
2188
  end
1444
2189
  end
1445
2190
 
2191
+ # ── Inline Object / Raw JSON / Validation ──
2192
+
2193
+ # Build a structural object from an inline ":object {key = @path, …}" spec.
2194
+ def build_inline_object(spec, context)
2195
+ trimmed = spec.strip.sub(/\A\{/, "").sub(/\}\z/, "")
2196
+ entries = {}
2197
+ unless trimmed.strip.empty?
2198
+ split_object_pairs(trimmed).each do |pair|
2199
+ eq = pair.index("=")
2200
+ next unless eq
2201
+
2202
+ key = pair[0...eq].strip
2203
+ rhs = pair[(eq + 1)..].strip
2204
+ next if key.empty?
2205
+
2206
+ expr, = TransformParser.new.parse_expression_string(rhs)
2207
+ entries[key] = evaluate(expr, context)
2208
+ end
2209
+ end
2210
+ Types::DynValue.of_object(entries)
2211
+ end
2212
+
2213
+ # Split an inline-object body on commas not nested inside braces.
2214
+ def split_object_pairs(body)
2215
+ pairs = []
2216
+ depth = 0
2217
+ current = +""
2218
+ body.each_char do |ch|
2219
+ depth += 1 if ch == "{"
2220
+ depth -= 1 if ch == "}"
2221
+ if ch == "," && depth.zero?
2222
+ pairs << current
2223
+ current = +""
2224
+ else
2225
+ current << ch
2226
+ end
2227
+ end
2228
+ pairs << current unless current.strip.empty?
2229
+ pairs
2230
+ end
2231
+
2232
+ # Parse a string value as JSON for :raw, producing a structural DynValue.
2233
+ def parse_raw_json_value(val)
2234
+ return val unless val.is_a?(Types::DynValue) && val.string?
2235
+
2236
+ begin
2237
+ Types::DynValue.from_ruby(JSON.parse(val.value))
2238
+ rescue StandardError
2239
+ val
2240
+ end
2241
+ end
2242
+
2243
+ # Validate a value against :validate / :enum / :range directives.
2244
+ # Returns false when the field should be dropped (onValidation = skip / fail).
2245
+ def validate_field_value(val, mapping, context)
2246
+ return true if val.is_a?(Types::DynValue) && val.null?
2247
+
2248
+ cv = mapping_mods(mapping).validation
2249
+ policy = context.on_validation || "fail"
2250
+ failures = []
2251
+
2252
+ if cv.pattern
2253
+ str = dynvalue_string(val)
2254
+ if cv.regex_error
2255
+ failures << "invalid validation pattern '#{cv.pattern}'"
2256
+ elsif !cv.regex.match?(str)
2257
+ failures << "value '#{str}' does not match pattern '#{cv.pattern}'"
2258
+ end
2259
+ end
2260
+
2261
+ if cv.enum_allowed
2262
+ str = dynvalue_string(val)
2263
+ failures << "value '#{str}' is not one of [#{cv.enum_label}]" unless cv.enum_allowed.include?(str)
2264
+ end
2265
+
2266
+ if cv.range_str
2267
+ num = numeric_of(val)
2268
+ if num.nil?
2269
+ failures << "value '#{dynvalue_string(val)}' is not numeric for range #{cv.range_str}"
2270
+ elsif (cv.range_min && num < cv.range_min) || (cv.range_max && num > cv.range_max)
2271
+ failures << "value #{num} is outside range #{cv.range_str}"
2272
+ end
2273
+ end
2274
+
2275
+ return true if failures.empty?
2276
+
2277
+ message = "Validation failed for '#{mapping.target_field}': #{failures.join('; ')}"
2278
+ case policy
2279
+ when "warn"
2280
+ # Warn but still emit.
2281
+ true
2282
+ when "skip"
2283
+ false
2284
+ else
2285
+ context.errors << TransformError.new(message, code: "T013")
2286
+ false
2287
+ end
2288
+ end
2289
+
2290
+ def numeric_of(val)
2291
+ return nil unless val.is_a?(Types::DynValue)
2292
+
2293
+ case val.type
2294
+ when :integer, :float, :float_raw, :currency, :currency_raw, :percent
2295
+ val.to_number.to_f
2296
+ when :string
2297
+ Float(val.value) rescue nil
2298
+ else
2299
+ nil
2300
+ end
2301
+ end
2302
+
2303
+ def dynvalue_string(val)
2304
+ return val.to_s unless val.is_a?(Types::DynValue)
2305
+
2306
+ FormatExporters.send(:dynvalue_to_string, val)
2307
+ end
2308
+
2309
+ MAX_INTERPOLATIONS = 320
2310
+
2311
+ # Interpolate ${...} expressions within a string template.
2312
+ # Supports ${@path}, ${%verb args}, and \${...} (literal ${...}).
2313
+ def interpolate_string(template, context)
2314
+ count = 0
2315
+ result = template.gsub(/\\?\$\{([^}]+)\}/) do
2316
+ match = Regexp.last_match(0)
2317
+ expr = Regexp.last_match(1)
2318
+ count += 1
2319
+ next match if count > MAX_INTERPOLATIONS
2320
+
2321
+ # Escaped \${ — emit a literal ${...}.
2322
+ next "${#{expr}}" if match.start_with?("\\")
2323
+
2324
+ trimmed = expr.strip
2325
+ if trimmed.start_with?("%")
2326
+ parsed, = TransformParser.new.send(:parse_expr_from_tokens,
2327
+ TransformParser.new.send(:tokenize_expression, trimmed))
2328
+ parsed ? dynvalue_string(evaluate(parsed, context)) : match
2329
+ elsif trimmed.start_with?("@")
2330
+ dynvalue_string(resolve_path(trimmed[1..], context))
2331
+ else
2332
+ match
2333
+ end
2334
+ end
2335
+ Types::DynValue.of_string(result)
2336
+ end
2337
+
1446
2338
  # ── Object Expression Evaluation ──
1447
2339
 
1448
2340
  def evaluate_object(expr, context)
@@ -1456,10 +2348,36 @@ module Odin
1456
2348
 
1457
2349
  # ── Format Output ──
1458
2350
 
2351
+ # Output formats with a registered formatter. An unrecognized format raises
2352
+ # T006 rather than silently defaulting to JSON.
2353
+ KNOWN_OUTPUT_FORMATS = %w[json odin xml csv fixed-width flat properties].freeze
2354
+
1459
2355
  def format_output(output_dv, transform_def, context = nil)
1460
2356
  target_format = transform_def.target_format
1461
2357
  return nil unless target_format
1462
2358
 
2359
+ unless KNOWN_OUTPUT_FORMATS.include?(target_format)
2360
+ context.errors << self.class.invalid_output_format_error(target_format) if context
2361
+ return ""
2362
+ end
2363
+
2364
+ # T007: positional layout directives (:pos/:len) only apply to fixed-width
2365
+ # output; on any other target they are invalid for the format.
2366
+ if context && target_format != "fixed-width"
2367
+ transform_def.segments.each do |segment|
2368
+ segment.field_mappings.each do |mapping|
2369
+ next unless mapping.directives.any? { |d| %w[pos len].include?(d.name) }
2370
+
2371
+ err = TransformError.new(
2372
+ "Modifier ':pos/:len' is not valid for #{target_format} output",
2373
+ code: ErrorCodes::T007_INVALID_MODIFIER
2374
+ )
2375
+ err.field = mapping.target_field
2376
+ context.errors << err
2377
+ end
2378
+ end
2379
+ end
2380
+
1463
2381
  case target_format
1464
2382
  when "json"
1465
2383
  topts = transform_def.header.target_options
@@ -1488,7 +2406,7 @@ module Odin
1488
2406
  end
1489
2407
  FormatExporters.to_csv(csv_dv, delimiter: delimiter, header: include_header)
1490
2408
  when "fixed-width"
1491
- format_fixed_width_output(output_dv, transform_def)
2409
+ format_fixed_width_output(output_dv, transform_def, context)
1492
2410
  when "flat", "properties"
1493
2411
  style = transform_def.header.target_options["style"]
1494
2412
  if style == "yaml"
@@ -1502,12 +2420,35 @@ module Odin
1502
2420
  end
1503
2421
  end
1504
2422
 
1505
- # Format output as fixed-width text (segment-based, matching TypeScript)
1506
- def format_fixed_width_output(output_dv, transform_def)
1507
- line_width = 80
2423
+ # Format output as fixed-width text (segment-based)
2424
+ def format_fixed_width_output(output_dv, transform_def, context = nil)
1508
2425
  lw = transform_def.header.target_options["lineWidth"]
1509
- line_width = lw.to_i if lw && lw.to_i > 0
2426
+ has_line_width = !lw.nil? && parse_target_int(lw, 0) > 0
2427
+ line_width = has_line_width ? parse_target_int(lw, 80) : 80
2428
+
2429
+ # T010: a field whose pos+len exceeds the configured line width overflows.
2430
+ if has_line_width && context
2431
+ transform_def.segments.each do |segment|
2432
+ segment.field_mappings.each do |mapping|
2433
+ pos_dir = mapping.directives.find { |d| d.name == "pos" }
2434
+ len_dir = mapping.directives.find { |d| d.name == "len" }
2435
+ next unless pos_dir && len_dir
2436
+
2437
+ pos = pos_dir.value.to_i
2438
+ len = len_dir.value.to_i
2439
+ next unless pos + len > line_width
2440
+
2441
+ err = TransformError.new(
2442
+ "Field '#{mapping.target_field}' position #{pos} + length #{len} exceeds line width #{line_width}",
2443
+ code: ErrorCodes::T010_POSITION_OVERFLOW
2444
+ )
2445
+ err.field = mapping.target_field
2446
+ context.errors << err
2447
+ end
2448
+ end
2449
+ end
1510
2450
  default_pad = transform_def.header.target_options["padChar"] || " "
2451
+ truncate = transform_def.header.target_options["truncate"] == "true"
1511
2452
  line_ending = transform_def.header.target_options["lineEnding"] || "\n"
1512
2453
 
1513
2454
  lines = []
@@ -1516,16 +2457,22 @@ module Odin
1516
2457
  seg_name = segment.name
1517
2458
  seg_data = resolve_segment_data(output_dv, seg_name)
1518
2459
 
2460
+ literal_lines = extract_literal_lines(seg_data)
2461
+ if literal_lines
2462
+ literal_lines.each { |l| lines << l }
2463
+ next
2464
+ end
2465
+
1519
2466
  if segment.is_array && seg_data.is_a?(Array)
1520
2467
  # Array segment: one line per item
1521
2468
  seg_data.each do |item|
1522
2469
  data = item.is_a?(Types::DynValue) ? dynvalue_to_flat_hash(item) : (item.is_a?(Hash) ? item : {})
1523
- lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad)
2470
+ lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad, has_line_width, truncate)
1524
2471
  end
1525
2472
  elsif segment.is_array && seg_data.is_a?(Types::DynValue) && seg_data.array?
1526
2473
  seg_data.value.each do |item|
1527
2474
  data = dynvalue_to_flat_hash(item)
1528
- lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad)
2475
+ lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad, has_line_width, truncate)
1529
2476
  end
1530
2477
  else
1531
2478
  # Single segment: one line
@@ -1536,7 +2483,7 @@ module Odin
1536
2483
  else
1537
2484
  dynvalue_to_flat_hash(output_dv)
1538
2485
  end
1539
- lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad)
2486
+ lines << format_fwf_line(segment.field_mappings, data, line_width, default_pad, has_line_width, truncate)
1540
2487
  end
1541
2488
  end
1542
2489
 
@@ -1553,6 +2500,16 @@ module Odin
1553
2500
  default_val
1554
2501
  end
1555
2502
 
2503
+ # Returns the verbatim lines of a rendered :literal segment, or nil.
2504
+ def extract_literal_lines(seg_data)
2505
+ return nil unless seg_data.is_a?(Types::DynValue) && seg_data.object?
2506
+
2507
+ marker = seg_data.get("__literalLines")
2508
+ return nil unless marker.is_a?(Types::DynValue) && marker.array?
2509
+
2510
+ marker.value.map { |v| v.is_a?(Types::DynValue) ? v.to_string : v.to_s }
2511
+ end
2512
+
1556
2513
  def resolve_segment_data(output_dv, seg_name)
1557
2514
  return output_dv unless output_dv.is_a?(Types::DynValue) && output_dv.object?
1558
2515
 
@@ -1575,7 +2532,7 @@ module Odin
1575
2532
  result
1576
2533
  end
1577
2534
 
1578
- def format_fwf_line(mappings, data, line_width, default_pad)
2535
+ def format_fwf_line(mappings, data, line_width, default_pad, has_line_width = false, truncate = false)
1579
2536
  # Sort mappings by :pos for deterministic output
1580
2537
  sorted = mappings.sort_by do |m|
1581
2538
  pos_dir = m.directives.find { |d| d.name == "pos" }
@@ -1638,10 +2595,19 @@ module Odin
1638
2595
  end
1639
2596
  end
1640
2597
 
2598
+ # Pad the record to the configured fixed line width.
2599
+ if has_line_width
2600
+ if line.length < line_width
2601
+ line += default_pad * (line_width - line.length)
2602
+ elsif line.length > line_width && truncate
2603
+ line = line[0...line_width]
2604
+ end
2605
+ end
2606
+
1641
2607
  line
1642
2608
  end
1643
2609
 
1644
- # ── XML Output Formatting (segment-based, matching TypeScript) ──
2610
+ # ── XML Output Formatting (segment-based) ──
1645
2611
 
1646
2612
  def format_xml_output(output_dv, transform_def, context)
1647
2613
  topts = transform_def.header.target_options
@@ -1650,17 +2616,25 @@ module Odin
1650
2616
  indent_val = topts["indent"]
1651
2617
  indent_size = indent_val ? parse_target_int(indent_val, 2) : 2
1652
2618
  indent_str = " " * indent_size
2619
+ # emitTypeHints=false produces plain XML with no odin: attributes/namespace
2620
+ eth_val = topts["emitTypeHints"]
2621
+ emit_type_hints = eth_val != "false" && eth_val != "?false"
2622
+ namespaces = transform_def.header.target_namespaces || {}
1653
2623
 
1654
2624
  xml = +""
1655
2625
  xml << %{<?xml version="1.0" encoding="UTF-8"?>\n} if include_declaration
1656
2626
 
1657
- # Collect which fields have :attr directive per segment
2627
+ # Collect per-field :attr, :ns and :cdata directives per segment
1658
2628
  attr_fields = {}
2629
+ ns_fields = {}
2630
+ cdata_fields = {}
1659
2631
  transform_def.segments.each do |segment|
1660
2632
  segment.field_mappings.each do |mapping|
1661
- if mapping.directives.any? { |d| d.name == "attr" }
1662
- attr_fields["#{segment.name}.#{mapping.target_field}"] = true
1663
- end
2633
+ full = "#{segment.name}.#{mapping.target_field}"
2634
+ attr_fields[full] = true if mapping.directives.any? { |d| d.name == "attr" }
2635
+ cdata_fields[full] = true if mapping.directives.any? { |d| d.name == "cdata" }
2636
+ ns_dir = mapping.directives.find { |d| d.name == "ns" }
2637
+ ns_fields[full] = ns_dir.value if ns_dir
1664
2638
  end
1665
2639
  end
1666
2640
 
@@ -1677,7 +2651,9 @@ module Odin
1677
2651
  []
1678
2652
  end
1679
2653
  items.each do |item|
1680
- xml << render_xml_segment_element(seg_name, item, segment, attr_fields, is_array: true, indent_str: indent_str)
2654
+ xml << render_xml_segment_element(seg_name, item, segment, attr_fields, ns_fields, cdata_fields,
2655
+ is_array: true, indent_str: indent_str,
2656
+ emit_type_hints: emit_type_hints, namespaces: namespaces)
1681
2657
  end
1682
2658
  else
1683
2659
  data = if seg_data.is_a?(Types::DynValue)
@@ -1687,14 +2663,16 @@ module Odin
1687
2663
  else
1688
2664
  output_dv
1689
2665
  end
1690
- xml << render_xml_segment_element(seg_name, data, segment, attr_fields, is_array: false, indent_str: indent_str)
2666
+ xml << render_xml_segment_element(seg_name, data, segment, attr_fields, ns_fields, cdata_fields,
2667
+ is_array: false, indent_str: indent_str,
2668
+ emit_type_hints: emit_type_hints, namespaces: namespaces)
1691
2669
  end
1692
2670
  end
1693
2671
 
1694
2672
  xml
1695
2673
  end
1696
2674
 
1697
- def render_xml_segment_element(seg_name, data, segment, attr_fields, is_array: false, indent_str: " ")
2675
+ def render_xml_segment_element(seg_name, data, segment, attr_fields, ns_fields, cdata_fields = {}, is_array: false, indent_str: " ", emit_type_hints: true, namespaces: {})
1698
2676
  return "" unless data.is_a?(Types::DynValue) && data.object?
1699
2677
 
1700
2678
  entries = data.value
@@ -1718,21 +2696,40 @@ module Odin
1718
2696
  end
1719
2697
  end
1720
2698
 
1721
- # Non-array segments always get xmlns:odin namespace
1722
- ns = !is_array ? ' xmlns:odin="https://odin.foundation/ns"' : ""
2699
+ # xmlns:odin only when type hints are emitted; omitted on namespaced roots without typed content
2700
+ include_odin_ns = emit_type_hints && !is_array && (namespaces.empty? || has_typed)
2701
+ odin_ns = include_odin_ns ? ' xmlns:odin="https://odin.foundation/ns"' : ""
2702
+ ns_decls = !is_array ? build_xml_namespace_decls(namespaces) : ""
1723
2703
  attrs = attr_parts.empty? ? "" : " #{attr_parts.join(' ')}"
1724
2704
 
1725
- xml = +"<#{seg_name}#{ns}#{attrs}>\n"
2705
+ xml = +"<#{seg_name}#{odin_ns}#{ns_decls}#{attrs}>\n"
1726
2706
  child_keys.each do |key|
1727
2707
  val = entries[key]
1728
2708
  next unless val
1729
- type_attr = xml_type_attr(val)
1730
- xml << "#{indent_str}<#{key}#{type_attr}>#{xml_escape_attr(val_to_xml_string(val))}</#{key}>\n"
2709
+ tag = ns_qualify_xml(key, ns_fields["#{seg_name}.#{key}"])
2710
+ type_attr = emit_type_hints ? xml_type_attr(val) : ""
2711
+ text = if cdata_fields["#{seg_name}.#{key}"]
2712
+ "<![CDATA[#{val_to_xml_string(val)}]]>"
2713
+ else
2714
+ xml_escape_attr(val_to_xml_string(val))
2715
+ end
2716
+ xml << "#{indent_str}<#{tag}#{type_attr}>#{text}</#{tag}>\n"
1731
2717
  end
1732
2718
  xml << "</#{seg_name}>\n"
1733
2719
  xml
1734
2720
  end
1735
2721
 
2722
+ # Build xmlns:<prefix> declarations for target namespaces in insertion order
2723
+ def build_xml_namespace_decls(namespaces)
2724
+ return "" if namespaces.nil? || namespaces.empty?
2725
+ namespaces.map { |prefix, uri| " xmlns:#{prefix}=\"#{xml_escape_attr(uri)}\"" }.join
2726
+ end
2727
+
2728
+ # Qualify an element name with its namespace prefix when :ns is set
2729
+ def ns_qualify_xml(key, prefix)
2730
+ prefix ? "#{prefix}:#{key}" : key
2731
+ end
2732
+
1736
2733
  def xml_type_attr(dv)
1737
2734
  case dv.type
1738
2735
  when :null then ' odin:type="null"'
@@ -1742,8 +2739,8 @@ module Odin
1742
2739
  v = dv.value.to_f
1743
2740
  v == v.to_i.to_f && v.abs < 1e15 ? ' odin:type="integer"' : ' odin:type="number"'
1744
2741
  when :currency, :currency_raw
1745
- v = dv.value.to_f
1746
- v == v.to_i.to_f && v.abs < 1e15 ? ' odin:type="integer"' : ' odin:type="number"'
2742
+ # every currency is first-class; a coded currency also carries its ISO code
2743
+ dv.currency_code ? " odin:type=\"currency\" odin:currencyCode=\"#{dv.currency_code}\"" : ' odin:type="currency"'
1747
2744
  when :percent then ' odin:type="percent"'
1748
2745
  else ""
1749
2746
  end
@@ -1757,8 +2754,12 @@ module Odin
1757
2754
  when :float then FormatExporters.send(:format_number, dv.value)
1758
2755
  when :string then dv.value
1759
2756
  when :currency
1760
- v = dv.value.to_f
1761
- v == v.to_i && v.abs < 1e15 ? v.to_i.to_s : v.to_s
2757
+ # render at the value's decimal scale (default 2), preserving precision
2758
+ dp = dv.decimal_places || 2
2759
+ format("%.#{dp}f", dv.value.to_f)
2760
+ when :currency_raw
2761
+ # raw string already carries exact precision
2762
+ dv.value.to_s
1762
2763
  else FormatExporters.send(:dynvalue_to_string, dv)
1763
2764
  end
1764
2765
  end
@@ -1771,6 +2772,31 @@ module Odin
1771
2772
  .gsub("'", "&apos;")
1772
2773
  end
1773
2774
 
2775
+ # True when every backslash escape in a JSON string body is well-formed.
2776
+ def valid_json_escapes?(s)
2777
+ i = 0
2778
+ len = s.length
2779
+ while i < len
2780
+ if s[i] == "\\"
2781
+ i += 1
2782
+ return false if i >= len
2783
+ case s[i]
2784
+ when "\"", "\\", "/", "b", "f", "n", "r", "t"
2785
+ i += 1
2786
+ when "u"
2787
+ hex = s[(i + 1), 4]
2788
+ return false if hex.nil? || hex.length < 4 || hex !~ /\A[0-9a-fA-F]{4}\z/
2789
+ i += 5
2790
+ else
2791
+ return false
2792
+ end
2793
+ else
2794
+ i += 1
2795
+ end
2796
+ end
2797
+ true
2798
+ end
2799
+
1774
2800
  # ── Verb Registry ──
1775
2801
 
1776
2802
  def build_verb_registry
@@ -1787,6 +2813,7 @@ module Odin
1787
2813
  Verbs::AggregationVerbs.register(registry)
1788
2814
  Verbs::ObjectVerbs.register(registry)
1789
2815
  Verbs::GeoVerbs.register(registry)
2816
+ Verbs::ExtraVerbs.register(registry)
1790
2817
 
1791
2818
  registry
1792
2819
  end
@@ -1799,7 +2826,7 @@ module Odin
1799
2826
  registry["capitalize"] = ->(args, _ctx) {
1800
2827
  if args[0]&.string?
1801
2828
  s = args[0].value
1802
- Types::DynValue.of_string(s.empty? ? s : s[0].upcase + s[1..])
2829
+ Types::DynValue.of_string(s.empty? ? s : s[0].upcase + s[1..].downcase)
1803
2830
  else
1804
2831
  args[0] || Types::DynValue.of_null
1805
2832
  end
@@ -1860,7 +2887,19 @@ module Odin
1860
2887
  # Type checks
1861
2888
  registry["typeOf"] = ->(args, _ctx) {
1862
2889
  v = args[0]
1863
- type_str = v.nil? ? "null" : v.type.to_s
2890
+ type_str =
2891
+ if v.nil?
2892
+ "null"
2893
+ else
2894
+ {
2895
+ null: "null", bool: "boolean", integer: "integer",
2896
+ float: "number", float_raw: "number", string: "string",
2897
+ array: "array", object: "object", date: "date",
2898
+ timestamp: "timestamp", time: "time", duration: "duration",
2899
+ currency: "currency", currency_raw: "currency", percent: "percent",
2900
+ reference: "reference", binary: "binary"
2901
+ }.fetch(v.type, "unknown")
2902
+ end
1864
2903
  Types::DynValue.of_string(type_str)
1865
2904
  }
1866
2905
  registry["isString"] = ->(args, _ctx) { Types::DynValue.of_bool(args[0]&.string? || false) }
@@ -1872,9 +2911,9 @@ module Odin
1872
2911
 
1873
2912
  # Coercion
1874
2913
  registry["coerceString"] = ->(args, _ctx) { Types::DynValue.of_string(args[0]&.to_string || "") }
1875
- registry["coerceNumber"] = ->(args, _ctx) { Types::DynValue.of_float(args[0]&.to_number&.to_f || 0.0) }
1876
- registry["coerceInteger"] = ->(args, _ctx) { Types::DynValue.of_integer(args[0]&.to_number&.to_i || 0) }
1877
- registry["coerceBoolean"] = ->(args, _ctx) { Types::DynValue.of_bool(args[0]&.truthy? || false) }
2914
+ registry["coerceNumber"] = ->(args, _ctx) { Verbs::NumericVerbs.numeric_result(Verbs::NumericVerbs.to_number(args[0])) }
2915
+ registry["coerceInteger"] = ->(args, _ctx) { Types::DynValue.of_integer(Verbs::NumericVerbs.to_number(args[0]).floor) }
2916
+ registry["coerceBoolean"] = ->(args, _ctx) { Types::DynValue.of_bool(Verbs::NumericVerbs.coerce_boolean(args[0])) }
1878
2917
 
1879
2918
  # Arithmetic
1880
2919
  registry["add"] = ->(args, _ctx) {
@@ -1924,13 +2963,19 @@ module Odin
1924
2963
  ctx.set_accumulator(name, increment)
1925
2964
  increment
1926
2965
  else
1927
- new_val = if current.integer? && increment.integer?
1928
- Types::DynValue.of_integer(current.to_number + increment.to_number)
1929
- else
1930
- Types::DynValue.of_float(current.to_number.to_f + increment.to_number.to_f)
1931
- end
1932
- ctx.set_accumulator(name, new_val)
1933
- new_val
2966
+ sum = current.to_number + increment.to_number
2967
+ if accumulator_overflow?(current, sum)
2968
+ ctx.errors << self.class.accumulator_overflow_error(name, sum)
2969
+ current
2970
+ else
2971
+ new_val = if current.integer? && increment.integer?
2972
+ Types::DynValue.of_integer(sum)
2973
+ else
2974
+ Types::DynValue.of_float(sum.to_f)
2975
+ end
2976
+ ctx.set_accumulator(name, new_val)
2977
+ new_val
2978
+ end
1934
2979
  end
1935
2980
  }
1936
2981
 
@@ -1962,16 +3007,23 @@ module Odin
1962
3007
  table_name = table_ref[0...dot_index]
1963
3008
  return_column = table_ref[(dot_index + 1)..]
1964
3009
 
1965
- table = ctx.get_table(table_name)
1966
- return Types::DynValue.of_null unless table
1967
-
1968
3010
  # Get match values (all args after table ref)
1969
3011
  match_values = args[1..].map { |a| a&.to_string || "" }
3012
+ match_key = match_values.join(", ")
3013
+
3014
+ table = ctx.get_table(table_name)
3015
+ unless table
3016
+ report_table_not_found(ctx, table_name)
3017
+ return Types::DynValue.of_null
3018
+ end
1970
3019
 
1971
3020
  # Build list of match columns (all columns except return column)
1972
3021
  columns = table.columns
1973
3022
  return_col_index = columns.index(return_column)
1974
- return Types::DynValue.of_null unless return_col_index
3023
+ unless return_col_index
3024
+ report_lookup_miss(ctx, table_name, match_key)
3025
+ return Types::DynValue.of_null
3026
+ end
1975
3027
 
1976
3028
  match_col_names = columns.reject { |c| c == return_column }
1977
3029
 
@@ -1993,6 +3045,7 @@ module Odin
1993
3045
  end
1994
3046
  end
1995
3047
 
3048
+ report_lookup_miss(ctx, table_name, match_key)
1996
3049
  Types::DynValue.of_null
1997
3050
  }
1998
3051
 
@@ -2045,15 +3098,23 @@ module Odin
2045
3098
 
2046
3099
  # Sequence
2047
3100
  registry["sequence"] = ->(args, ctx) {
2048
- name = args[0]&.to_string || "default"
2049
- val = ctx.next_sequence(name)
2050
- Types::DynValue.of_integer(val)
3101
+ return Types::DynValue.of_integer(1) if args.empty?
3102
+
3103
+ name = args[0].to_string
3104
+ start_value = args.length > 1 ? (args[1]&.to_number || 1).floor : 1
3105
+ current = ctx.sequences[name]
3106
+ current = current.nil? ? start_value : current + 1
3107
+ ctx.sequences[name] = current
3108
+ Types::DynValue.of_integer(current)
2051
3109
  }
2052
3110
 
2053
3111
  registry["resetSequence"] = ->(args, ctx) {
2054
- name = args[0]&.to_string || "default"
2055
- ctx.reset_sequence(name)
2056
- Types::DynValue.of_integer(0)
3112
+ return Types::DynValue.of_null if args.empty?
3113
+
3114
+ name = args[0].to_string
3115
+ value = args.length > 1 ? (args[1]&.to_number || 0).floor : 0
3116
+ ctx.sequences[name] = value
3117
+ Types::DynValue.of_integer(value)
2057
3118
  }
2058
3119
 
2059
3120
  # Min/Max of variadic
@@ -2140,19 +3201,20 @@ module Odin
2140
3201
  Types::DynValue.of_bool(args[0]&.truthy? || args[1]&.truthy?)
2141
3202
  }
2142
3203
 
2143
- # Comparison
2144
- registry["lt"] = ->(args, _ctx) {
2145
- Types::DynValue.of_bool((args[0]&.to_number || 0) < (args[1]&.to_number || 0))
2146
- }
2147
- registry["lte"] = ->(args, _ctx) {
2148
- Types::DynValue.of_bool((args[0]&.to_number || 0) <= (args[1]&.to_number || 0))
2149
- }
2150
- registry["gt"] = ->(args, _ctx) {
2151
- Types::DynValue.of_bool((args[0]&.to_number || 0) > (args[1]&.to_number || 0))
2152
- }
2153
- registry["gte"] = ->(args, _ctx) {
2154
- Types::DynValue.of_bool((args[0]&.to_number || 0) >= (args[1]&.to_number || 0))
2155
- }
3204
+ # Comparison: numeric when both coerce to numbers, else lexical.
3205
+ compare = lambda do |a, b|
3206
+ an = Verbs::NumericVerbs.to_double(a)
3207
+ bn = Verbs::NumericVerbs.to_double(b)
3208
+ if !an.nil? && !bn.nil?
3209
+ an <=> bn
3210
+ else
3211
+ (a&.to_string || "") <=> (b&.to_string || "")
3212
+ end
3213
+ end
3214
+ registry["lt"] = ->(args, _ctx) { Types::DynValue.of_bool(compare.call(args[0], args[1]) < 0) }
3215
+ registry["lte"] = ->(args, _ctx) { Types::DynValue.of_bool(compare.call(args[0], args[1]) <= 0) }
3216
+ registry["gt"] = ->(args, _ctx) { Types::DynValue.of_bool(compare.call(args[0], args[1]) > 0) }
3217
+ registry["gte"] = ->(args, _ctx) { Types::DynValue.of_bool(compare.call(args[0], args[1]) >= 0) }
2156
3218
 
2157
3219
  # String operations
2158
3220
  registry["contains"] = ->(args, _ctx) {
@@ -2226,13 +3288,10 @@ module Odin
2226
3288
  end
2227
3289
  }
2228
3290
 
2229
- # Assertions
3291
+ # Assertions: pass the value through when truthy, else null.
2230
3292
  registry["assert"] = ->(args, _ctx) {
2231
3293
  condition = args[0]
2232
- msg = args[1]&.to_string || "Assertion failed"
2233
- raise TransformError.new(msg) unless condition&.truthy?
2234
-
2235
- Types::DynValue.of_bool(true)
3294
+ condition&.truthy? ? condition : Types::DynValue.of_null
2236
3295
  }
2237
3296
 
2238
3297
  # Switch/cond handled via lazy evaluation in evaluate_verb
@@ -2280,16 +3339,24 @@ module Odin
2280
3339
  }
2281
3340
 
2282
3341
  registry["titleCase"] = ->(args, _ctx) {
2283
- s = args[0]&.to_string || ""
2284
- Types::DynValue.of_string(s.gsub(/\b\w/) { |m| m.upcase })
3342
+ v = args[0]
3343
+ next Types::DynValue.of_null if v.nil? || v.null?
3344
+ s = v.to_string
3345
+ next Types::DynValue.of_string("") if s.empty?
3346
+ result = s.split(/\s+/).map { |w| w.empty? ? "" : w[0].upcase + w[1..].downcase }.join(" ")
3347
+ Types::DynValue.of_string(result)
2285
3348
  }
2286
3349
 
2287
3350
  registry["slugify"] = ->(args, _ctx) {
2288
- s = args[0]&.to_string || ""
3351
+ v = args[0]
3352
+ next Types::DynValue.of_null if v.nil? || v.null?
3353
+ s = v.to_string
3354
+ next Types::DynValue.of_string("") if s.empty?
2289
3355
  result = s.downcase
2290
- .gsub(/[^a-z0-9\s-]/, "")
2291
- .strip
2292
- .gsub(/[\s-]+/, "-")
3356
+ .gsub(/[^a-z0-9_\s-]/, "")
3357
+ .gsub(/[\s_]+/, "-")
3358
+ .gsub(/-+/, "-")
3359
+ .gsub(/\A-+|-+\z/, "")
2293
3360
  Types::DynValue.of_string(result)
2294
3361
  }
2295
3362
 
@@ -2366,12 +3433,15 @@ module Odin
2366
3433
  }
2367
3434
 
2368
3435
  registry["split"] = ->(args, _ctx) {
2369
- s = args[0]&.to_string || ""
2370
- delimiter = args[1]&.to_string || ","
2371
- parts = s.split(delimiter, -1)
2372
- # If a third argument (index) is provided, return that element
3436
+ v = args[0]
3437
+ next Types::DynValue.of_null if v.nil? || v.null?
3438
+ s = v.to_string
3439
+ delimiter = args[1]&.to_string || ""
3440
+ parts = delimiter.empty? ? [s] : s.split(delimiter, -1)
3441
+ # If a third argument (index) is provided, return that element.
2373
3442
  if args[2] && !args[2].null?
2374
- idx = args[2].to_number&.to_i || 0
3443
+ idx = Verbs::NumericVerbs.to_double(args[2])&.to_i || 0
3444
+ idx += parts.length if idx < 0
2375
3445
  if idx >= 0 && idx < parts.length
2376
3446
  Types::DynValue.of_string(parts[idx])
2377
3447
  else
@@ -2402,12 +3472,14 @@ module Odin
2402
3472
  }
2403
3473
 
2404
3474
  registry["match"] = ->(args, _ctx) {
3475
+ next Types::DynValue.of_null if args.length < 2
2405
3476
  s = args[0]&.to_string || ""
2406
3477
  pattern = args[1]&.to_string || ""
2407
3478
  begin
3479
+ next Types::DynValue.of_null if pattern.length > 256 || s.length > 100_000
2408
3480
  Types::DynValue.of_bool(!!(s =~ Regexp.new(pattern)))
2409
3481
  rescue RegexpError
2410
- Types::DynValue.of_bool(false)
3482
+ Types::DynValue.of_null
2411
3483
  end
2412
3484
  }
2413
3485
  registry["matches"] = registry["match"]
@@ -2451,20 +3523,24 @@ module Odin
2451
3523
  }
2452
3524
 
2453
3525
  registry["repeat"] = ->(args, _ctx) {
2454
- s = args[0]&.to_string || ""
2455
- count = args[1]&.to_number&.to_i || 0
2456
- count = 0 if count < 0
2457
- Types::DynValue.of_string(s * count)
3526
+ v = args[0]
3527
+ next Types::DynValue.of_null if v.nil? || v.null?
3528
+ count = Verbs::NumericVerbs.to_double(args[1])&.to_i
3529
+ next Types::DynValue.of_null if count.nil? || count < 0
3530
+ count = 100_000 if count > 100_000
3531
+ Types::DynValue.of_string(v.to_string * count)
2458
3532
  }
2459
3533
 
2460
3534
  registry["replaceRegex"] = ->(args, _ctx) {
2461
- s = args[0]&.to_string || ""
3535
+ next Types::DynValue.of_null if args.length < 3 || args[0].nil? || args[0].null?
3536
+ s = args[0].to_string
2462
3537
  pattern = args[1]&.to_string || ""
2463
3538
  replacement = args[2]&.to_string || ""
2464
3539
  begin
3540
+ next Types::DynValue.of_null if pattern.length > 256 || s.length > 100_000
2465
3541
  Types::DynValue.of_string(s.gsub(Regexp.new(pattern), replacement))
2466
3542
  rescue RegexpError
2467
- Types::DynValue.of_string(s)
3543
+ Types::DynValue.of_null
2468
3544
  end
2469
3545
  }
2470
3546
 
@@ -2477,10 +3553,27 @@ module Odin
2477
3553
  }
2478
3554
 
2479
3555
  registry["wrap"] = ->(args, _ctx) {
3556
+ return Types::DynValue.of_null if args.length < 2
3557
+
2480
3558
  s = args[0]&.to_string || ""
2481
- prefix = args[1]&.to_string || ""
2482
- suffix = args[2]&.to_string || ""
2483
- Types::DynValue.of_string(prefix + s + suffix)
3559
+ width = (args[1]&.to_number || 0).floor
3560
+ return Types::DynValue.of_null if width <= 0
3561
+ return Types::DynValue.of_string(s) if s.length <= width
3562
+
3563
+ lines = []
3564
+ current = +""
3565
+ s.split(/\s+/).each do |word|
3566
+ if current.empty?
3567
+ current = word.dup
3568
+ elsif current.length + 1 + word.length <= width
3569
+ current << " " << word
3570
+ else
3571
+ lines << current
3572
+ current = word.dup
3573
+ end
3574
+ end
3575
+ lines << current unless current.empty?
3576
+ Types::DynValue.of_string(lines.join("\n"))
2484
3577
  }
2485
3578
 
2486
3579
  registry["tokenize"] = ->(args, _ctx) {
@@ -2511,21 +3604,12 @@ module Odin
2511
3604
  }
2512
3605
 
2513
3606
  registry["between"] = ->(args, _ctx) {
2514
- s = args[0]&.to_string || ""
2515
- start_delim = args[1]&.to_string || ""
2516
- end_delim = args[2]&.to_string || ""
2517
- start_idx = s.index(start_delim)
2518
- if start_idx
2519
- after_start = start_idx + start_delim.length
2520
- end_idx = s.index(end_delim, after_start)
2521
- if end_idx
2522
- Types::DynValue.of_string(s[after_start...end_idx])
2523
- else
2524
- Types::DynValue.of_string("")
2525
- end
2526
- else
2527
- Types::DynValue.of_string("")
2528
- end
3607
+ return Types::DynValue.of_null if args.length < 3
3608
+
3609
+ value = args[0]&.to_number || 0
3610
+ min = args[1]&.to_number || 0
3611
+ max = args[2]&.to_number || 0
3612
+ Types::DynValue.of_bool(value >= min && value <= max)
2529
3613
  }
2530
3614
 
2531
3615
  # ── Encoding verbs ──
@@ -2537,10 +3621,17 @@ module Odin
2537
3621
  }
2538
3622
 
2539
3623
  registry["base64Decode"] = ->(args, _ctx) {
2540
- s = args[0]&.to_string || ""
3624
+ v = args[0]
3625
+ next Types::DynValue.of_null if v.nil? || v.null?
3626
+ s = v.to_string.tr("-_", "+/")
3627
+ pad = (4 - s.length % 4) % 4
3628
+ s += "=" * pad
2541
3629
  require "base64"
2542
3630
  begin
2543
- Types::DynValue.of_string(Base64.strict_decode64(s))
3631
+ decoded = Base64.strict_decode64(s)
3632
+ decoded.force_encoding("UTF-8")
3633
+ next Types::DynValue.of_null unless decoded.valid_encoding?
3634
+ Types::DynValue.of_string(decoded)
2544
3635
  rescue ArgumentError
2545
3636
  Types::DynValue.of_null
2546
3637
  end
@@ -2552,42 +3643,67 @@ module Odin
2552
3643
  }
2553
3644
 
2554
3645
  registry["hexDecode"] = ->(args, _ctx) {
2555
- s = args[0]&.to_string || ""
2556
- begin
2557
- Types::DynValue.of_string([s].pack("H*"))
2558
- rescue ArgumentError
2559
- Types::DynValue.of_null
2560
- end
3646
+ v = args[0]
3647
+ next Types::DynValue.of_null if v.nil? || v.null?
3648
+ s = v.to_string
3649
+ next Types::DynValue.of_null if s.length.odd? || s.match?(/[^0-9a-fA-F]/)
3650
+ decoded = [s].pack("H*")
3651
+ decoded.force_encoding("UTF-8")
3652
+ next Types::DynValue.of_null unless decoded.valid_encoding?
3653
+ Types::DynValue.of_string(decoded)
2561
3654
  }
2562
3655
 
2563
3656
  registry["urlEncode"] = ->(args, _ctx) {
2564
- s = args[0]&.to_string || ""
3657
+ v = args[0]
3658
+ next Types::DynValue.of_null if v.nil? || v.null?
2565
3659
  require "uri"
2566
- Types::DynValue.of_string(URI.encode_www_form_component(s).gsub("+", "%20"))
3660
+ Types::DynValue.of_string(URI.encode_www_form_component(v.to_string, "UTF-8").gsub("+", "%20").gsub("%7E", "~"))
2567
3661
  }
2568
3662
 
2569
3663
  registry["urlDecode"] = ->(args, _ctx) {
2570
- s = args[0]&.to_string || ""
3664
+ v = args[0]
3665
+ next Types::DynValue.of_null if v.nil? || v.null?
3666
+ s = v.to_string
3667
+ # Reject malformed percent-encoding (% not followed by two hex digits).
3668
+ next Types::DynValue.of_null if s.scan(/%(..?|.?)/).any? { |seq| seq[0].length != 2 || seq[0].match?(/[^0-9a-fA-F]/) }
2571
3669
  require "uri"
2572
3670
  begin
2573
- Types::DynValue.of_string(URI.decode_www_form_component(s))
3671
+ decoded = URI.decode_www_form_component(s, "UTF-8")
3672
+ next Types::DynValue.of_null unless decoded.valid_encoding?
3673
+ Types::DynValue.of_string(decoded)
2574
3674
  rescue ArgumentError
2575
- Types::DynValue.of_string(s)
3675
+ Types::DynValue.of_null
2576
3676
  end
2577
3677
  }
2578
3678
 
2579
3679
  registry["jsonEncode"] = ->(args, _ctx) {
2580
3680
  v = args[0]
3681
+ next Types::DynValue.of_null if v.nil? || v.null?
2581
3682
  require "json"
2582
- Types::DynValue.of_string(v.nil? || v.null? ? "null" : JSON.generate(v.to_ruby))
3683
+ if v.object? || v.array?
3684
+ next Types::DynValue.of_string(JSON.generate(v.to_ruby))
3685
+ end
3686
+ # Scalars: JSON-escape the string and drop the surrounding quotes.
3687
+ encoded = JSON.generate(v.to_string)
3688
+ Types::DynValue.of_string(encoded[1...-1])
2583
3689
  }
2584
3690
 
2585
3691
  registry["jsonDecode"] = ->(args, _ctx) {
2586
- s = args[0]&.to_string || ""
3692
+ v = args[0]
3693
+ next Types::DynValue.of_null if v.nil? || v.null?
3694
+ s = v.to_string
2587
3695
  require "json"
3696
+ if s.start_with?("{", "[")
3697
+ begin
3698
+ parsed = JSON.parse(s)
3699
+ next Types::DynValue.from_ruby(parsed) if parsed.is_a?(Hash) || parsed.is_a?(Array)
3700
+ rescue JSON::ParserError
3701
+ end
3702
+ end
3703
+ # Unescape as a JSON string; an invalid escape yields null.
3704
+ next Types::DynValue.of_null unless valid_json_escapes?(s)
2588
3705
  begin
2589
- parsed = JSON.parse(s)
2590
- Types::DynValue.from_ruby(parsed)
3706
+ Types::DynValue.of_string(JSON.parse("\"#{s}\""))
2591
3707
  rescue JSON::ParserError
2592
3708
  Types::DynValue.of_null
2593
3709
  end
@@ -2620,7 +3736,7 @@ module Odin
2620
3736
  registry["crc32"] = ->(args, _ctx) {
2621
3737
  s = args[0]&.to_string || ""
2622
3738
  require "zlib"
2623
- Types::DynValue.of_integer(Zlib.crc32(s))
3739
+ Types::DynValue.of_string(format("%08x", Zlib.crc32(s)))
2624
3740
  }
2625
3741
 
2626
3742
  # ── Logic verbs ──
@@ -2684,13 +3800,38 @@ module Odin
2684
3800
  registry["coerceDate"] = ->(args, _ctx) {
2685
3801
  v = args[0]
2686
3802
  return Types::DynValue.of_null if v.nil? || v.null?
3803
+ return v if v.type == :date
3804
+ if v.type == :timestamp
3805
+ s = v.to_string
3806
+ s = s[0...s.index("T")] if s.include?("T")
3807
+ next Types::DynValue.of_date(s)
3808
+ end
3809
+
2687
3810
  s = v.to_string.strip
2688
- begin
2689
- d = Date.parse(s)
2690
- Types::DynValue.of_date(d.strftime("%Y-%m-%d"))
2691
- rescue ArgumentError, TypeError
2692
- Types::DynValue.of_null
3811
+ next Types::DynValue.of_null if s.empty?
3812
+
3813
+ valid_ymd = ->(y, mo, d) { mo.between?(1, 12) && d >= 1 && Date.valid_date?(y, mo, d) }
3814
+
3815
+ # yyyy-MM-dd prefix
3816
+ if s.length >= 10 && s[4] == "-" && s[7] == "-" &&
3817
+ s[0, 4] =~ /\A\d{4}\z/ && s[5, 2] =~ /\A\d{2}\z/ && s[8, 2] =~ /\A\d{2}\z/
3818
+ next Types::DynValue.of_date(s[0, 10])
3819
+ end
3820
+
3821
+ # Compact YYYYMMDD
3822
+ if (m = s.match(/\A(\d{4})(\d{2})(\d{2})\z/))
3823
+ y, mo, d = m[1].to_i, m[2].to_i, m[3].to_i
3824
+ next valid_ymd.call(y, mo, d) ? Types::DynValue.of_date(format("%04d-%02d-%02d", y, mo, d)) : Types::DynValue.of_null
2693
3825
  end
3826
+
3827
+ # Slash MM/DD/YYYY (US), or DD/MM/YYYY when first > 12
3828
+ if (m = s.match(%r{\A(\d{1,2})/(\d{1,2})/(\d{4})\z}))
3829
+ first, second, y = m[1].to_i, m[2].to_i, m[3].to_i
3830
+ mo, d = first > 12 ? [second, first] : [first, second]
3831
+ next valid_ymd.call(y, mo, d) ? Types::DynValue.of_date(format("%04d-%02d-%02d", y, mo, d)) : Types::DynValue.of_null
3832
+ end
3833
+
3834
+ Types::DynValue.of_null
2694
3835
  }
2695
3836
 
2696
3837
  registry["coerceTimestamp"] = ->(args, _ctx) {
@@ -2721,15 +3862,19 @@ module Odin
2721
3862
  registry["toObject"] = ->(args, _ctx) {
2722
3863
  v = args[0]
2723
3864
  if v.nil? || v.null?
2724
- Types::DynValue.of_object({})
3865
+ Types::DynValue.of_null
2725
3866
  elsif v.object?
2726
3867
  v
2727
3868
  elsif v.array?
3869
+ items = v.value || []
2728
3870
  obj = {}
2729
- v.value.each_with_index { |item, i| obj[i.to_s] = item }
2730
- Types::DynValue.of_object(obj)
3871
+ items.each do |item|
3872
+ pair = to_object_pair(item)
3873
+ obj[pair[0]] = pair[1] if pair
3874
+ end
3875
+ obj.empty? ? Types::DynValue.of_null : Types::DynValue.of_object(obj)
2731
3876
  else
2732
- Types::DynValue.of_object({ "value" => v })
3877
+ Types::DynValue.of_null
2733
3878
  end
2734
3879
  }
2735
3880
 
@@ -2749,7 +3894,7 @@ module Odin
2749
3894
  end
2750
3895
 
2751
3896
  if seed_arg
2752
- # Deterministic UUID from seed — matches TypeScript's exact algorithm
3897
+ # Deterministic UUID from seed
2753
3898
  hash1 = 5381
2754
3899
  hash2 = 52711
2755
3900
  seed_arg.each_byte do |c|
@@ -2793,11 +3938,8 @@ module Odin
2793
3938
  digits = raw.gsub(/\D/, "")
2794
3939
  formatted = case country
2795
3940
  when "US", "CA"
2796
- if digits.length == 10
2797
- "(#{digits[0..2]}) #{digits[3..5]}-#{digits[6..9]}"
2798
- elsif digits.length == 11 && digits.start_with?("1")
2799
- "+1 (#{digits[1..3]}) #{digits[4..6]}-#{digits[7..10]}"
2800
- end
3941
+ d = (digits.length == 11 && digits.start_with?("1")) ? digits[1..] : digits
3942
+ "(#{d[0..2]}) #{d[3..5]}-#{d[6..9]}" if d.length == 10
2801
3943
  when "GB"
2802
3944
  if digits.length == 11 && digits.start_with?("0")
2803
3945
  "+44 #{digits[1..4]} #{digits[5..10]}"