kumi 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/BACKLOG.md +34 -0
  4. data/CHANGELOG.md +15 -0
  5. data/CLAUDE.md +4 -6
  6. data/README.md +0 -18
  7. data/config/functions.yaml +352 -0
  8. data/docs/dev/analyzer-debug.md +52 -0
  9. data/docs/dev/parse-command.md +64 -0
  10. data/docs/functions/analyzer_integration.md +199 -0
  11. data/docs/functions/signatures.md +171 -0
  12. data/examples/hash_objects_demo.rb +138 -0
  13. data/golden/array_operations/schema.kumi +17 -0
  14. data/golden/cascade_logic/schema.kumi +16 -0
  15. data/golden/mixed_nesting/schema.kumi +42 -0
  16. data/golden/simple_math/schema.kumi +10 -0
  17. data/lib/kumi/analyzer.rb +72 -21
  18. data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
  19. data/lib/kumi/core/analyzer/debug.rb +167 -0
  20. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
  21. data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
  22. data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
  23. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +99 -151
  24. data/lib/kumi/core/analyzer/passes/toposorter.rb +37 -1
  25. data/lib/kumi/core/analyzer/state_serde.rb +64 -0
  26. data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
  27. data/lib/kumi/core/compiler/access_planner.rb +3 -2
  28. data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
  29. data/lib/kumi/core/functions/dimension.rb +98 -0
  30. data/lib/kumi/core/functions/dtypes.rb +20 -0
  31. data/lib/kumi/core/functions/errors.rb +11 -0
  32. data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
  33. data/lib/kumi/core/functions/loader.rb +119 -0
  34. data/lib/kumi/core/functions/registry_v2.rb +68 -0
  35. data/lib/kumi/core/functions/shape.rb +70 -0
  36. data/lib/kumi/core/functions/signature.rb +122 -0
  37. data/lib/kumi/core/functions/signature_parser.rb +86 -0
  38. data/lib/kumi/core/functions/signature_resolver.rb +272 -0
  39. data/lib/kumi/core/ir/execution_engine/interpreter.rb +98 -7
  40. data/lib/kumi/core/ir/execution_engine/profiler.rb +202 -0
  41. data/lib/kumi/core/ir/execution_engine.rb +30 -1
  42. data/lib/kumi/dev/ir.rb +75 -0
  43. data/lib/kumi/dev/parse.rb +105 -0
  44. data/lib/kumi/dev/runner.rb +83 -0
  45. data/lib/kumi/frontends/ruby.rb +28 -0
  46. data/lib/kumi/frontends/text.rb +46 -0
  47. data/lib/kumi/frontends.rb +29 -0
  48. data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
  49. data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
  50. data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
  51. data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
  52. data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
  53. data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
  54. data/lib/kumi/runtime/executable.rb +63 -20
  55. data/lib/kumi/schema.rb +4 -4
  56. data/lib/kumi/support/diff.rb +22 -0
  57. data/lib/kumi/support/ir_render.rb +61 -0
  58. data/lib/kumi/version.rb +1 -1
  59. data/lib/kumi.rb +2 -0
  60. data/performance_results.txt +63 -0
  61. data/scripts/test_mixed_nesting_performance.rb +206 -0
  62. metadata +45 -5
  63. data/docs/features/javascript-transpiler.md +0 -148
  64. data/lib/kumi/js.rb +0 -23
  65. data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "errors"
4
+ require_relative "shape"
5
+ require_relative "signature"
6
+
7
+ module Kumi
8
+ module Core
9
+ module Functions
10
+ # Given a set of signatures and actual argument shapes, pick the best match.
11
+ # Supports NEP 20 extensions: fixed-size, flexible, and broadcastable dimensions.
12
+ #
13
+ # Inputs:
14
+ # signatures : Array<Signature> (with Dimension objects)
15
+ # arg_shapes : Array<Array<Symbol|Integer>> e.g., [[:i], [:i]] or [[], [3]] or [[2, :i]]
16
+ #
17
+ # Returns:
18
+ # { signature:, result_axes:, join_policy:, dropped_axes:, effective_signature: }
19
+ #
20
+ # NEP 20 Matching rules:
21
+ # - Arity must match exactly (before flexible dimension resolution).
22
+ # - Fixed-size dimensions (integers) must match exactly.
23
+ # - Flexible dimensions (?) can be omitted if not present in all operands.
24
+ # - Broadcastable dimensions (|1) can match scalar or size-1 dimensions.
25
+ # - For each param position, shapes are checked according to NEP 20 rules.
26
+ # - We prefer exact matches, then flexible matches, then broadcast matches.
27
+ class SignatureResolver
28
+ class << self
29
+ def choose(signatures:, arg_shapes:)
30
+ # Handle empty arg_shapes for zero-arity functions
31
+ arg_shapes = [] if arg_shapes.nil?
32
+ sanity_check_args!(arg_shapes)
33
+
34
+ candidates = signatures.map do |sig|
35
+ score = match_score(sig, arg_shapes)
36
+ next if score.nil?
37
+
38
+ # Convert arg_shapes to normalized Dimension arrays for environment building
39
+ normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
40
+ env = build_dimension_environment(sig, normalized_args)
41
+ next if env.nil? # Skip candidates with dimension conflicts
42
+
43
+ {
44
+ signature: sig,
45
+ score: score,
46
+ result_axes: sig.out_shape.map(&:name), # Convert Dimension objects to names for backward compatibility
47
+ join_policy: sig.join_policy,
48
+ dropped_axes: sig.dropped_axes.map { |name| name.is_a?(Symbol) ? name : name.to_sym }, # Convert to symbols
49
+ env: env
50
+ }
51
+ end.compact
52
+
53
+ raise SignatureMatchError, mismatch_message(signatures, arg_shapes) if candidates.empty?
54
+
55
+ # Lower score is better: 0 = exact-everywhere, then number of broadcasts
56
+ best = candidates.min_by { |c| c[:score] }
57
+
58
+ # Add effective signature and environment for analyzer/lowering
59
+ best[:effective_signature] = {
60
+ in_shapes: best[:signature].in_shapes.map { |dims| dims.map(&:name) },
61
+ out_shape: best[:signature].out_shape.map(&:name),
62
+ join_policy: best[:signature].join_policy
63
+ }
64
+ # env is already included from candidate building
65
+
66
+ best
67
+ end
68
+
69
+ private
70
+
71
+ def sanity_check_args!(arg_shapes)
72
+ unless arg_shapes.is_a?(Array) &&
73
+ arg_shapes.all? { |s| s.is_a?(Array) && s.all? { |a| a.is_a?(Symbol) || a.is_a?(Integer) } }
74
+ raise SignatureMatchError, "arg_shapes must be an array of dimension arrays (symbols or integers), got: #{arg_shapes.inspect}"
75
+ end
76
+ end
77
+
78
+ # Returns an integer "broadcast cost" or nil if not matchable.
79
+ # Lower score = better match: 0 = exact, then increasing cost for broadcasts/flexibility
80
+ def match_score(sig, arg_shapes)
81
+ return nil unless sig.arity == arg_shapes.length
82
+
83
+ # Convert arg_shapes to normalized Dimension arrays for comparison
84
+ normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
85
+
86
+ # Try to match each argument against its expected signature shape
87
+ cost = 0
88
+ sig.in_shapes.each_with_index do |expected_dims, idx|
89
+ got_dims = normalized_args[idx]
90
+ arg_cost = match_argument_cost(got: got_dims, expected: expected_dims)
91
+ return nil if arg_cost.nil?
92
+
93
+ cost += arg_cost
94
+ end
95
+
96
+ # Additional checks for join_policy constraints
97
+ return nil unless valid_join_policy?(sig, normalized_args)
98
+
99
+ cost
100
+ end
101
+
102
+ private
103
+
104
+ # Convert a shape array (symbols/integers) to normalized Dimension array
105
+ def normalize_shape(shape)
106
+ shape.map do |dim|
107
+ case dim
108
+ when Symbol
109
+ Dimension.new(dim)
110
+ when Integer
111
+ Dimension.new(dim)
112
+ else
113
+ raise SignatureMatchError, "Invalid dimension type: #{dim.class}"
114
+ end
115
+ end
116
+ end
117
+
118
+ # Calculate cost of matching one argument against expected dimensions
119
+ def match_argument_cost(got:, expected:)
120
+ # Handle scalar first
121
+ if got.empty?
122
+ return expected.empty? ? 0 : (expected.any?(&:flexible?) ? 10 : 1) # scalar broadcast or flexible-tail
123
+ end
124
+
125
+ # Try strict matching first if no flexible dimensions
126
+ if !expected.any?(&:flexible?) && got.length == expected.length
127
+ total = 0
128
+ got.zip(expected).each do |g, e|
129
+ c = match_dimension_cost(got: g, expected: e)
130
+ return nil if c.nil?
131
+ total += c
132
+ end
133
+ return total
134
+ end
135
+
136
+ # Use right-aligned flexible matching
137
+ right_align_match(got: got, expected: expected)
138
+ end
139
+
140
+ # Right-aligned matching for flexible dimensions (NEP 20 ? modifier)
141
+ def right_align_match(got:, expected:)
142
+ gi = got.length - 1
143
+ ei = expected.length - 1
144
+ cost = 0
145
+
146
+ while ei >= 0
147
+ exp = expected[ei]
148
+
149
+ if exp.flexible? && gi < 0
150
+ # optional tail dimension that we don't have → ok, consume expected only
151
+ ei -= 1
152
+ cost += 10
153
+ next
154
+ end
155
+
156
+ return nil if gi < 0 # ran out of got dims and exp wasn't flexible
157
+
158
+ got_dim = got[gi]
159
+ dim_cost = match_dimension_cost(got: got_dim, expected: exp)
160
+ if dim_cost.nil?
161
+ # if exp is flexible, we can try to drop it
162
+ if exp.flexible?
163
+ ei -= 1
164
+ cost += 10
165
+ next
166
+ else
167
+ return nil
168
+ end
169
+ else
170
+ cost += dim_cost
171
+ gi -= 1
172
+ ei -= 1
173
+ end
174
+ end
175
+
176
+ # if we still have leftover got dims, argument is longer than expected → not a match
177
+ return nil if gi >= 0
178
+
179
+ cost
180
+ end
181
+
182
+ # Calculate cost of matching one dimension against another
183
+ def match_dimension_cost(got:, expected:)
184
+ return 0 if got == expected # Exact match
185
+
186
+ # Fixed-size equality
187
+ if got.fixed_size? && expected.fixed_size?
188
+ return got.size == expected.size ? 0 : nil
189
+ end
190
+
191
+ # Same symbolic name (ignoring modifiers) → ok unless one is fixed and the other isn't (penalize)
192
+ if got.named? && expected.named? && got.name == expected.name
193
+ return (got.fixed_size? || expected.fixed_size?) ? 2 : 0
194
+ end
195
+
196
+ # Broadcastable expected dim accepts scalar or size-1
197
+ if expected.broadcastable?
198
+ # scalar at argument level would have been handled in match_argument_cost
199
+ # so here we check for size-1 fixed dimensions
200
+ return 3 if got.fixed_size? && got.size == 1
201
+ # Named dimensions that could be size-1 at runtime also get broadcast cost
202
+ return 3 if got.named?
203
+ end
204
+
205
+ nil # No match possible
206
+ end
207
+
208
+ # Check if join_policy constraints are satisfied
209
+ def valid_join_policy?(sig, normalized_args)
210
+ return true if sig.join_policy # :zip or :product allows different axes
211
+
212
+ # nil join_policy: check if dimension names are consistent
213
+ non_scalar_args = normalized_args.reject { |a| Shape.scalar?(a) }
214
+ return true if non_scalar_args.empty?
215
+
216
+ # For nil join_policy, we allow different dimension names if:
217
+ # 1. All args have same dimension names (element-wise operations), OR
218
+ # 2. The constraint solver can validate cross-dimensional consistency (like matmul)
219
+ first_names = non_scalar_args.first.map(&:name)
220
+ same_names = non_scalar_args.all? { |arg| arg.map(&:name) == first_names }
221
+
222
+ return true if same_names
223
+
224
+ # If dimension names differ, check if constraint solver can handle it
225
+ # This allows operations like matmul where dimensions are linked across arguments
226
+ env = build_dimension_environment(sig, normalized_args)
227
+ !env.nil?
228
+ end
229
+
230
+ def mismatch_message(signatures, arg_shapes)
231
+ sigs = signatures.map(&:inspect).join(", ")
232
+ "no matching signature for shapes #{pp_shapes(arg_shapes)} among [#{sigs}]"
233
+ end
234
+
235
+ def pp_shapes(shapes)
236
+ shapes.map { |ax| "(#{ax.join(',')})" }.join(", ")
237
+ end
238
+
239
+ # Build dimension environment by checking consistency of named dimensions across arguments
240
+ def build_dimension_environment(sig, normalized_args)
241
+ env = {}
242
+
243
+ # Walk all expected dimensions across all arguments
244
+ sig.in_shapes.each_with_index do |expected_shape, arg_idx|
245
+ got_shape = normalized_args[arg_idx] || []
246
+
247
+ expected_shape.each_with_index do |exp_dim, dim_idx|
248
+ next unless exp_dim.named? && dim_idx < got_shape.length
249
+
250
+ got_dim = got_shape[dim_idx]
251
+ dim_name = exp_dim.name
252
+
253
+ # Check for consistency: same dimension name must map to same concrete value
254
+ if env.key?(dim_name)
255
+ # If we've seen this dimension name before, it must match
256
+ if env[dim_name] != got_dim
257
+ return nil # Inconsistent binding - signature doesn't match
258
+ end
259
+ else
260
+ # First time seeing this dimension name - record the binding
261
+ env[dim_name] = got_dim
262
+ end
263
+ end
264
+ end
265
+
266
+ env
267
+ end
268
+ end
269
+ end
270
+ end
271
+ end
272
+ end
@@ -9,20 +9,57 @@ module Kumi
9
9
  PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
10
10
  NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
11
11
 
12
+ def self.build_name_index(ir_module)
13
+ index = {}
14
+ ir_module.decls.each do |decl|
15
+ decl.ops.each do |op|
16
+ next unless op.tag == :store
17
+ name = op.attrs[:name]
18
+ index[name] = decl if name
19
+ end
20
+ end
21
+ index
22
+ end
23
+
12
24
  def self.run(ir_module, ctx, accessors:, registry:)
13
25
  # Validate registry is properly initialized
14
26
  raise ArgumentError, "Registry cannot be nil" if registry.nil?
15
27
  raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
16
28
 
29
+ # --- PROFILER: init per run ---
30
+ Profiler.reset!(meta: { decls: ir_module.decls&.size || 0 }) if Profiler.enabled?
31
+
17
32
  outputs = {}
18
33
  target = ctx[:target]
19
34
  guard_stack = [true]
35
+
36
+ # Always ensure we have a declaration cache - either from caller or new for this VM run
37
+ declaration_cache = ctx[:declaration_cache] || {}
38
+
39
+ # Build name index for targeting by stored names
40
+ name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
41
+
42
+ # Choose declarations to execute by stored name (not only decl name)
43
+ decls_to_run =
44
+ if target
45
+ # Prefer a decl that STORES the target (covers __vec twins)
46
+ d = name_index && name_index[target]
47
+ # Fallback: allow targeting by decl name (legacy behavior)
48
+ d ||= ir_module.decls.find { |dd| dd.name == target }
49
+ raise "Unknown target: #{target}" unless d
50
+ [d]
51
+ else
52
+ ir_module.decls
53
+ end
20
54
 
21
- ir_module.decls.each do |decl|
55
+ decls_to_run.each do |decl|
22
56
  slots = []
23
57
  guard_stack = [true] # reset per decl
24
58
 
25
59
  decl.ops.each_with_index do |op, op_index|
60
+ t0 = Profiler.enabled? ? Profiler.t0 : nil
61
+ cpu_t0 = Profiler.enabled? ? Profiler.cpu_t0 : nil
62
+ rows_touched = nil
26
63
  if ENV["ASSERT_VM_SLOTS"] == "1"
27
64
  expected = op_index
28
65
  unless slots.length == expected
@@ -47,17 +84,20 @@ module Kumi
47
84
  false
48
85
  end
49
86
  slots << nil # keep slot_id == op_index
87
+ Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "enter") if t0
50
88
  next
51
89
 
52
90
  when :guard_pop
53
91
  guard_stack.pop
54
92
  slots << nil
93
+ Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "exit") if t0
55
94
  next
56
95
  end
57
96
 
58
97
  # Skip body when guarded off, but keep indices aligned
59
98
  unless guard_stack.last
60
99
  slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
100
+ Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "skipped") if t0
61
101
  next
62
102
  end
63
103
 
@@ -69,35 +109,81 @@ module Kumi
69
109
  raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
70
110
 
71
111
  slots[dst] = slots[src]
112
+ Profiler.record!(decl: decl.name, idx: op_index, tag: :assign, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
72
113
 
73
114
  when :const
74
115
  result = Values.scalar(op.attrs[:value])
75
116
  puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
76
117
  slots << result
118
+ Profiler.record!(decl: decl.name, idx: op_index, tag: :const, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
77
119
 
78
120
  when :load_input
79
121
  plan_id = op.attrs[:plan_id]
80
122
  scope = op.attrs[:scope] || []
81
123
  scalar = op.attrs[:is_scalar]
82
124
  indexed = op.attrs[:has_idx]
83
- raw = accessors.fetch(plan_id).call(ctx[:input] || ctx["input"])
84
125
 
85
- puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect}" if ENV["DEBUG_VM_ARGS"]
126
+ # NEW: consult runtime accessor cache
127
+ acc_cache = ctx[:accessor_cache] || {}
128
+ input_obj = ctx[:input] || ctx["input"]
129
+ cache_key = [plan_id, input_obj.object_id]
130
+
131
+ if acc_cache.key?(cache_key)
132
+ raw = acc_cache[cache_key]
133
+ hit = true
134
+ else
135
+ raw = accessors.fetch(plan_id).call(input_obj)
136
+ acc_cache[cache_key] = raw
137
+ hit = false
138
+ end
139
+
140
+ puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect} cache_hit: #{hit}" if ENV["DEBUG_VM_ARGS"]
86
141
  slots << if scalar
87
142
  Values.scalar(raw)
88
143
  elsif indexed
144
+ rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
89
145
  Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
90
146
  else
147
+ rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
91
148
  Values.vec(scope, raw.map { |v| { v: v } }, false)
92
149
  end
150
+ rows_touched ||= 1
151
+ cache_note = hit ? "hit:#{plan_id}" : "miss:#{plan_id}"
152
+ Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
153
+ rows: rows_touched, note: cache_note) if t0
93
154
 
94
155
  when :ref
95
156
  name = op.attrs[:name]
96
- referenced_value = outputs.fetch(name) { raise "Missing output for reference: #{name}" }
157
+
158
+ if outputs.key?(name)
159
+ referenced = outputs[name]
160
+ elsif declaration_cache.key?(name)
161
+ referenced = declaration_cache[name]
162
+ else
163
+ # demand-compute the producing decl up to the store of `name`
164
+ active = (ctx[:active] ||= {})
165
+ raise "cycle detected: #{name}" if active[name]
166
+ active[name] = true
167
+
168
+ subctx = {
169
+ input: ctx[:input] || ctx["input"],
170
+ target: name, # target is the STORED NAME
171
+ accessor_cache: ctx[:accessor_cache],
172
+ declaration_cache: ctx[:declaration_cache],
173
+ name_index: name_index, # reuse map
174
+ active: active
175
+ }
176
+ referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
177
+ active.delete(name)
178
+ end
179
+
97
180
  if ENV["DEBUG_VM_ARGS"]
98
- puts "DEBUG Ref #{name}: #{referenced_value[:k] == :scalar ? "scalar(#{referenced_value[:v].inspect})" : "#{referenced_value[:k]}(#{referenced_value[:rows]&.size || 0} rows)"}"
181
+ puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
99
182
  end
100
- slots << referenced_value
183
+
184
+ slots << referenced
185
+ rows_touched = (referenced[:k] == :vec) ? (referenced[:rows]&.size || 0) : 1
186
+ Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0, rows: rows_touched) if t0
101
187
 
102
188
  when :array
103
189
  # Validate slot indices before accessing
@@ -216,7 +302,10 @@ module Kumi
216
302
  raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
217
303
  end
218
304
 
219
- outputs[name] = slots[src]
305
+ result = slots[src]
306
+ outputs[name] = result
307
+ # Also store in declaration cache for future ref operations
308
+ declaration_cache[name] = result
220
309
 
221
310
  # keep slot_id == op_index invariant
222
311
  slots << nil
@@ -327,6 +416,8 @@ module Kumi
327
416
  end
328
417
  end
329
418
 
419
+ # --- end-of-run summary ---
420
+ Profiler.emit_summary! if Profiler.enabled?
330
421
  outputs
331
422
  end
332
423
  end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "time"
6
+
7
+ module Kumi
8
+ module Core
9
+ module IR
10
+ module ExecutionEngine
11
+ module Profiler
12
+ class << self
13
+ def enabled? = ENV["KUMI_PROFILE"] == "1"
14
+
15
+ def reset!(meta: {})
16
+ return unless enabled?
17
+ @events = []
18
+ @meta = meta
19
+ @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
20
+ @run_id = (@run_id || 0) + 1 # Track run number for averaging
21
+ @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
22
+
23
+ if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
24
+ FileUtils.mkdir_p(File.dirname(@file))
25
+ File.write(@file, "")
26
+ @aggregated_stats.clear # Clear aggregated stats on truncate
27
+ end
28
+ end
29
+
30
+ # monotonic start time
31
+ def t0
32
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
33
+ end
34
+
35
+ # CPU time start (process + thread)
36
+ def cpu_t0
37
+ Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
38
+ end
39
+
40
+ # Per-op record with both wall time and CPU time
41
+ def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
42
+ return unless enabled?
43
+
44
+ wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
45
+ cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
46
+
47
+ ev = {
48
+ ts: Time.now.utc.iso8601(3),
49
+ run: @run_id,
50
+ decl: decl, # decl name (string/symbol)
51
+ i: idx, # op index
52
+ tag: tag, # op tag (symbol)
53
+ wall_ms: wall_ms.round(4),
54
+ cpu_ms: cpu_ms.round(4),
55
+ rows: rows,
56
+ note: note,
57
+ key: op_key(decl, idx, tag, op), # stable key for grep/diff
58
+ attrs: compact_attrs(op.attrs)
59
+ }
60
+
61
+ # Aggregate stats for multi-run averaging
62
+ op_key = "#{decl}@#{idx}:#{tag}"
63
+ agg = @aggregated_stats[op_key]
64
+ agg[:count] += 1
65
+ agg[:total_ms] += wall_ms
66
+ agg[:total_cpu_ms] += cpu_ms
67
+ agg[:rows] += (rows || 0)
68
+ agg[:runs] << @run_id
69
+ agg[:decl] = decl
70
+ agg[:tag] = tag
71
+ agg[:idx] = idx
72
+ agg[:note] = note if note
73
+
74
+ (@events ||= []) << ev
75
+ stream(ev) if ENV["KUMI_PROFILE_STREAM"] == "1"
76
+ ev
77
+ end
78
+
79
+ def summary(top: 20)
80
+ return {} unless enabled?
81
+
82
+ # Current run summary (legacy format)
83
+ current_agg = Hash.new { |h, k| h[k] = { count: 0, ms: 0.0, rows: 0 } }
84
+ (@events || []).each do |e|
85
+ k = [e[:decl], e[:tag]]
86
+ a = current_agg[k]
87
+ a[:count] += 1
88
+ a[:ms] += (e[:wall_ms] || e[:ms] || 0)
89
+ a[:rows] += (e[:rows] || 0)
90
+ end
91
+ current_ranked = current_agg.map { |(decl, tag), v|
92
+ { decl: decl, tag: tag, count: v[:count], ms: v[:ms].round(3), rows: v[:rows],
93
+ rps: v[:rows] > 0 ? (v[:rows] / v[:ms]).round(1) : nil }
94
+ }.sort_by { |h| -h[:ms] }.first(top)
95
+
96
+ { meta: @meta || {}, top: current_ranked,
97
+ total_ms: ((@events || []).sum { |e| e[:wall_ms] || e[:ms] || 0 }).round(3),
98
+ op_count: (@events || []).size,
99
+ run_id: @run_id }
100
+ end
101
+
102
+ # Multi-run averaged analysis
103
+ def averaged_analysis(top: 20)
104
+ return {} unless enabled? && @aggregated_stats&.any?
105
+
106
+ # Convert aggregated stats to averaged metrics
107
+ averaged = @aggregated_stats.map do |op_key, stats|
108
+ num_runs = stats[:runs].size
109
+ avg_wall_ms = stats[:total_ms] / stats[:count]
110
+ avg_cpu_ms = stats[:total_cpu_ms] / stats[:count]
111
+ total_wall_ms = stats[:total_ms]
112
+ total_cpu_ms = stats[:total_cpu_ms]
113
+
114
+ {
115
+ op_key: op_key,
116
+ decl: stats[:decl],
117
+ idx: stats[:idx],
118
+ tag: stats[:tag],
119
+ runs: num_runs,
120
+ total_calls: stats[:count],
121
+ calls_per_run: stats[:count] / num_runs.to_f,
122
+ avg_wall_ms: avg_wall_ms.round(4),
123
+ avg_cpu_ms: avg_cpu_ms.round(4),
124
+ total_wall_ms: total_wall_ms.round(3),
125
+ total_cpu_ms: total_cpu_ms.round(3),
126
+ cpu_efficiency: total_wall_ms > 0 ? (total_cpu_ms / total_wall_ms * 100).round(1) : 100,
127
+ rows_total: stats[:rows],
128
+ note: stats[:note]
129
+ }
130
+ end.sort_by { |s| -s[:total_wall_ms] }.first(top)
131
+
132
+ {
133
+ meta: @meta || {},
134
+ total_runs: (@aggregated_stats.values.map { |s| s[:runs].size }.max || 0),
135
+ averaged_ops: averaged,
136
+ total_operations: @aggregated_stats.size
137
+ }
138
+ end
139
+
140
+ # Identify potential cache overhead operations
141
+ def cache_overhead_analysis
142
+ return {} unless enabled? && @aggregated_stats&.any?
143
+
144
+ # Look for operations that might be cache-related
145
+ cache_ops = @aggregated_stats.select do |op_key, stats|
146
+ op_key.include?("ref") || op_key.include?("load_input") || stats[:note]&.include?("cache")
147
+ end
148
+
149
+ cache_analysis = cache_ops.map do |op_key, stats|
150
+ num_runs = stats[:runs].size
151
+ avg_wall_ms = stats[:total_ms] / stats[:count]
152
+
153
+ {
154
+ op_key: op_key,
155
+ decl: stats[:decl],
156
+ tag: stats[:tag],
157
+ avg_time_ms: avg_wall_ms.round(4),
158
+ total_time_ms: stats[:total_ms].round(3),
159
+ call_count: stats[:count],
160
+ overhead_per_call: avg_wall_ms.round(6)
161
+ }
162
+ end.sort_by { |s| -s[:total_time_ms] }
163
+
164
+ {
165
+ cache_operations: cache_analysis,
166
+ total_cache_time: cache_analysis.sum { |op| op[:total_time_ms] }.round(3)
167
+ }
168
+ end
169
+
170
+ def emit_summary!
171
+ return unless enabled?
172
+ stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
173
+ end
174
+
175
+ # Stable textual key for "match ops one by one"
176
+ def op_key(decl, idx, tag, op)
177
+ attrs = compact_attrs(op.attrs)
178
+ args = op.args
179
+ "#{decl}@#{idx}:#{tag}|#{attrs.keys.sort_by(&:to_s).map { |k| "#{k}=#{attrs[k].inspect}" }.join(",")}|args=#{args.inspect}"
180
+ end
181
+
182
+ def compact_attrs(h)
183
+ return {} unless h
184
+ h.transform_values do |v|
185
+ case v
186
+ when Array, Hash, Symbol, String, Numeric, TrueClass, FalseClass, NilClass then v
187
+ else v.to_s
188
+ end
189
+ end
190
+ end
191
+
192
+ def stream(obj)
193
+ return unless @file
194
+ FileUtils.mkdir_p(File.dirname(@file))
195
+ File.open(@file, "a") { |f| f.puts(obj.to_json) }
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end