RubyGems - kumi - Versions diffs - 0.0.14 → 0.0.16 - Mend

kumi 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -0
data/README.md +0 -27
data/docs/dev/vm-profiling.md +95 -0
data/docs/features/README.md +0 -7
data/lib/kumi/analyzer.rb +10 -2
data/lib/kumi/compiler.rb +6 -5
data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +65 -0
data/lib/kumi/core/analyzer/passes/ir_execution_schedule_pass.rb +67 -0
data/lib/kumi/core/analyzer/passes/toposorter.rb +15 -50
data/lib/kumi/core/compiler/access_builder.rb +22 -9
data/lib/kumi/core/compiler/access_codegen.rb +61 -0
data/lib/kumi/core/compiler/access_emit/base.rb +173 -0
data/lib/kumi/core/compiler/access_emit/each_indexed.rb +56 -0
data/lib/kumi/core/compiler/access_emit/materialize.rb +45 -0
data/lib/kumi/core/compiler/access_emit/ravel.rb +50 -0
data/lib/kumi/core/compiler/access_emit/read.rb +32 -0
data/lib/kumi/core/ir/execution_engine/interpreter.rb +56 -189
data/lib/kumi/core/ir/execution_engine/profiler.rb +139 -11
data/lib/kumi/core/ir/execution_engine/values.rb +8 -8
data/lib/kumi/core/ir/execution_engine.rb +5 -30
data/lib/kumi/dev/parse.rb +12 -12
data/lib/kumi/dev/profile_aggregator.rb +301 -0
data/lib/kumi/dev/profile_runner.rb +199 -0
data/lib/kumi/dev/runner.rb +3 -1
data/lib/kumi/dev.rb +14 -0
data/lib/kumi/runtime/executable.rb +32 -153
data/lib/kumi/runtime/run.rb +105 -0
data/lib/kumi/schema.rb +15 -14
data/lib/kumi/version.rb +1 -1
data/lib/kumi.rb +4 -2
metadata +15 -3
data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89

data/lib/kumi/core/ir/execution_engine/interpreter.rb CHANGED Viewed

@@ -8,57 +8,34 @@ module Kumi
         module Interpreter
           PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
           NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
-          def self.build_name_index(ir_module)
-            index = {}
-            ir_module.decls.each do |decl|
-              decl.ops.each do |op|
-                next unless op.tag == :store
-                name = op.attrs[:name]
-                index[name] = decl if name
-              end
+          EMPTY_ARY = [].freeze
+          def self.run(schedule, input:, runtime:, accessors:, registry:)
+            prof = Profiler.enabled?
+            # --- PROFILER: init per run (but not in persistent mode) ---
+            if prof
+              schema_name = runtime[:schema_name] || "UnknownSchema"
+              # In persistent mode, just update schema name without full reset
+              Profiler.set_schema_name(schema_name)
             end
-            index
-          end
-          def self.run(ir_module, ctx, accessors:, registry:)
-            # Validate registry is properly initialized
-            raise ArgumentError, "Registry cannot be nil" if registry.nil?
-            raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
-            # --- PROFILER: init per run ---
-            Profiler.reset!(meta: { decls: ir_module.decls&.size || 0 }) if Profiler.enabled?
             outputs = {}
-            target = ctx[:target]
+            target = runtime[:target]
             guard_stack = [true]
-            # Always ensure we have a declaration cache - either from caller or new for this VM run
-            declaration_cache = ctx[:declaration_cache] || {}
-            # Build name index for targeting by stored names
-            name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
-            # Choose declarations to execute by stored name (not only decl name)
-            decls_to_run =
-              if target
-                # Prefer a decl that STORES the target (covers __vec twins)
-                d = name_index && name_index[target]
-                # Fallback: allow targeting by decl name (legacy behavior)
-                d ||= ir_module.decls.find { |dd| dd.name == target }
-                raise "Unknown target: #{target}" unless d
-                [d]
-              else
-                ir_module.decls
-              end
-            decls_to_run.each do |decl|
+            # Caches live in runtime (engine frame), not input
+            declaration_cache = runtime[:declaration_cache]
+            # Choose declarations to execute - prefer explicit schedule if present
+            # decls_to_run = runtime[:decls_to_run] || ir_module.decls
+            schedule.each do |decl|
               slots = []
               guard_stack = [true] # reset per decl
               decl.ops.each_with_index do |op, op_index|
-                t0 = Profiler.enabled? ? Profiler.t0 : nil
-                cpu_t0 = Profiler.enabled? ? Profiler.cpu_t0 : nil
+                t0 = prof ? Profiler.t0 : nil
+                cpu_t0 = prof ? Profiler.cpu_t0 : nil
                 rows_touched = nil
                 if ENV["ASSERT_VM_SLOTS"] == "1"
                   expected = op_index
@@ -84,7 +61,10 @@ module Kumi
                                    false
                                  end
                   slots << nil # keep slot_id == op_index
-                  Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "enter") if t0
+                  if prof
+                    Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
+                                     note: "enter")
+                  end
                   next
                 when :guard_pop
@@ -97,105 +77,55 @@ module Kumi
                 # Skip body when guarded off, but keep indices aligned
                 unless guard_stack.last
                   slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
-                  Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "skipped") if t0
+                  if t0
+                    Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
+                                     note: "skipped")
+                  end
                   next
                 end
                 case op.tag
-                when :assign
-                  dst = op.attrs[:dst]
-                  src = op.attrs[:src]
-                  raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
-                  slots[dst] = slots[src]
-                  Profiler.record!(decl: decl.name, idx: op_index, tag: :assign, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
                 when :const
                   result = Values.scalar(op.attrs[:value])
-                  puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
                   slots << result
                   Profiler.record!(decl: decl.name, idx: op_index, tag: :const, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
                 when :load_input
                   plan_id = op.attrs[:plan_id]
-                  scope = op.attrs[:scope] || []
+                  scope = op.attrs[:scope] || EMPTY_ARY
                   scalar = op.attrs[:is_scalar]
                   indexed = op.attrs[:has_idx]
-                  # NEW: consult runtime accessor cache
-                  acc_cache = ctx[:accessor_cache] || {}
-                  input_obj = ctx[:input] || ctx["input"]
-                  cache_key = [plan_id, input_obj.object_id]
+                  raw = accessors[plan_id].call(input) # <- memoized by ExecutionEngine
-                  if acc_cache.key?(cache_key)
-                    raw = acc_cache[cache_key]
-                    hit = true
-                  else
-                    raw = accessors.fetch(plan_id).call(input_obj)
-                    acc_cache[cache_key] = raw
-                    hit = false
-                  end
-                  puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect} cache_hit: #{hit}" if ENV["DEBUG_VM_ARGS"]
                   slots << if scalar
                              Values.scalar(raw)
                            elsif indexed
-                             rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
+                             rows_touched = prof && raw.respond_to?(:size) ? raw.size : raw.count
                              Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
                            else
-                             rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
+                             rows_touched = prof && raw.respond_to?(:size) ? raw.size : raw.count
                              Values.vec(scope, raw.map { |v| { v: v } }, false)
                            end
                   rows_touched ||= 1
-                  cache_note = hit ? "hit:#{plan_id}" : "miss:#{plan_id}"
-                  Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
-                                   rows: rows_touched, note: cache_note) if t0
+                  if t0
+                    Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
+                                     rows: rows_touched, note: "ok")
+                  end
                 when :ref
                   name = op.attrs[:name]
-                  if outputs.key?(name)
-                    referenced = outputs[name]
-                  elsif declaration_cache.key?(name)
-                    referenced = declaration_cache[name]
-                  else
-                    # demand-compute the producing decl up to the store of `name`
-                    active = (ctx[:active] ||= {})
-                    raise "cycle detected: #{name}" if active[name]
-                    active[name] = true
-                    subctx = {
-                      input: ctx[:input] || ctx["input"],
-                      target: name,                         # target is the STORED NAME
-                      accessor_cache: ctx[:accessor_cache],
-                      declaration_cache: ctx[:declaration_cache],
-                      name_index: name_index,               # reuse map
-                      active: active
-                    }
-                    referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
-                    active.delete(name)
-                  end
-                  if ENV["DEBUG_VM_ARGS"]
-                    puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
-                  end
-                  slots << referenced
-                  rows_touched = (referenced[:k] == :vec) ? (referenced[:rows]&.size || 0) : 1
-                  Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0, rows: rows_touched) if t0
+                  referenced = outputs[name] { raise "unscheduled ref #{name}: producer not executed or dependency analysis failed" }
-                when :array
-                  # Validate slot indices before accessing
-                  op.args.each do |slot_idx|
-                    if slot_idx >= slots.length
-                      raise "Array operation: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
-                    elsif slots[slot_idx].nil?
-                      raise "Array operation: slot #{slot_idx} is nil " \
-                            "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                    end
+                  slots << referenced
+                  rows_touched = referenced[:k] == :vec ? (referenced[:rows]&.size || 0) : 1
+                  if prof
+                    Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0,
+                                     rows: rows_touched, note: hit)
                   end
+                when :array
                   parts = op.args.map { |i| slots[i] }
                   if parts.all? { |p| p[:k] == :scalar }
                     slots << Values.scalar(parts.map { |p| p[:v] })
@@ -219,63 +149,43 @@ module Kumi
                   fn_name = op.attrs[:fn]
                   fn_entry = registry[fn_name] or raise "Function #{fn_name} not found in registry"
                   fn = fn_entry.fn
-                  puts "DEBUG Map #{fn_name}: args=#{op.args.inspect}" if ENV["DEBUG_VM_ARGS"]
                   # Validate slot indices before accessing
-                  op.args.each do |slot_idx|
-                    if slot_idx >= slots.length
-                      raise "Map operation #{fn_name}: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
-                    elsif slots[slot_idx].nil?
-                      raise "Map operation #{fn_name}: slot #{slot_idx} is nil " \
-                            "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                    end
-                  end
+                  # op.args.each do |slot_idx|
+                  #   if slot_idx >= slots.length
+                  #     raise "Map operation #{fn_name}: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
+                  #   elsif slots[slot_idx].nil?
+                  #     raise "Map operation #{fn_name}: slot #{slot_idx} is nil " \
+                  #           "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
+                  #   end
+                  # end
                   args = op.args.map { |slot_idx| slots[slot_idx] }
                   if args.all? { |a| a[:k] == :scalar }
-                    puts "DEBUG Scalar call #{fn_name}: args=#{args.map { |a| a[:v] }.inspect}" if ENV["DEBUG_VM_ARGS"]
                     scalar_args = args.map { |a| a[:v] }
                     result = fn.call(*scalar_args)
                     slots << Values.scalar(result)
                   else
                     base = args.find { |a| a[:k] == :vec } or raise "Map needs a vec carrier"
-                    puts "DEBUG Vec call #{fn_name}: base=#{base.inspect}" if ENV["DEBUG_VM_ARGS"]
                     # Preserve original order: broadcast scalars in-place
                     arg_vecs = args.map { |a| a[:k] == :scalar ? Combinators.broadcast_scalar(a, base) : a }
-                    puts "DEBUG Vec call #{fn_name}: arg_vecs=#{arg_vecs.inspect}" if ENV["DEBUG_VM_ARGS"]
                     scopes = arg_vecs.map { |v| v[:scope] }.uniq
-                    puts "DEBUG Vec call #{fn_name}: scopes=#{scopes.inspect}" if ENV["DEBUG_VM_ARGS"]
                     raise "Cross-scope Map without Join" unless scopes.size <= 1
                     zipped = Combinators.zip_same_scope(*arg_vecs)
-                    # if ENV["DEBUG_VM_ARGS"] && fn_name == :if
-                    #   puts "DEBUG Vec call #{fn_name}: zipped rows:"
-                    #   zipped[:rows].each_with_index do |row, i|
-                    #     puts "  [#{i}] args=#{Array(row[:v]).inspect}"
-                    #   end
-                    # end
-                    puts "DEBUG Vec call #{fn_name}: zipped rows=#{zipped[:rows].inspect}" if ENV["DEBUG_VM_ARGS"]
                     rows = zipped[:rows].map do |row|
                       row_args = Array(row[:v])
                       vr = fn.call(*row_args)
                       row.key?(:idx) ? { v: vr, idx: row[:idx] } : { v: vr }
                     end
-                    puts "DEBUG Vec call #{fn_name}: result rows=#{rows.inspect}" if ENV["DEBUG_VM_ARGS"]
                     slots << Values.vec(base[:scope], rows, base[:has_idx])
                   end
                 when :switch
                   chosen = op.attrs[:cases].find do |(cond_slot, _)|
-                    if cond_slot >= slots.length
-                      raise "Switch operation: condition slot #{cond_slot} out of bounds (slots.length=#{slots.length})"
-                    elsif slots[cond_slot].nil?
-                      raise "Switch operation: condition slot #{cond_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                    end
                     c = slots[cond_slot]
                     if c[:k] == :scalar
                       !!c[:v]
@@ -285,22 +195,12 @@ module Kumi
                     end
                   end
                   result_slot = chosen ? chosen[1] : op.attrs[:default]
-                  if result_slot >= slots.length
-                    raise "Switch operation: result slot #{result_slot} out of bounds (slots.length=#{slots.length})"
-                  elsif slots[result_slot].nil?
-                    raise "Switch operation: result slot #{result_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                  end
                   slots << slots[result_slot]
                 when :store
                   name = op.attrs[:name]
                   src  = op.args[0] or raise "store: missing source slot"
-                  if src >= slots.length
-                    raise "Store operation '#{name}': source slot #{src} out of bounds (slots.length=#{slots.length})"
-                  elsif slots[src].nil?
-                    raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                  end
                   result = slots[src]
                   outputs[name] = result
@@ -317,10 +217,8 @@ module Kumi
                   fn = fn_entry.fn
                   src = slots[op.args[0]]
-                  raise "Reduce expects Vec" unless src[:k] == :vec
-                  result_scope = Array(op.attrs[:result_scope] || [])
-                  axis         = Array(op.attrs[:axis] || [])
+                  result_scope = op.attrs[:result_scope]
+                  axis         = op.attrs[:axis]
                   if result_scope.empty?
                     # === GLOBAL REDUCE ===
@@ -328,12 +226,6 @@ module Kumi
                     vals = src[:rows].map { |r| r[:v] }
                     slots << Values.scalar(fn.call(vals))
                   else
-                    # === GROUPED REDUCE ===
-                    # Must have indices to group by prefix keys.
-                    unless src[:has_idx]
-                      raise "Grouped reduce requires indexed input (got ravel) for #{op.attrs[:fn]} at #{result_scope.inspect}"
-                    end
                     group_len = result_scope.length
                     # Preserve stable source order so zips with other @result_scope vecs line up.
@@ -356,39 +248,17 @@ module Kumi
                 when :lift
                   src_slot = op.args[0]
-                  if src_slot >= slots.length
-                    raise "Lift operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
-                  elsif slots[src_slot].nil?
-                    raise "Lift operation: source slot #{src_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                  end
                   v = slots[src_slot]
-                  to_scope = op.attrs[:to_scope] || []
+                  to_scope = op.attrs[:to_scope] || EMPTY_ARY
                   depth    = [to_scope.length, v[:rank] || v[:rows].first&.dig(:idx)&.length || 0].min
                   slots << Values.scalar(Combinators.group_rows(v[:rows], depth))
                 when :align_to
-                  tgt_slot = op.args[0]
-                  src_slot = op.args[1]
-                  if tgt_slot >= slots.length
-                    raise "AlignTo operation: target slot #{tgt_slot} out of bounds (slots.length=#{slots.length})"
-                  elsif slots[tgt_slot].nil?
-                    raise "AlignTo operation: target slot #{tgt_slot} is nil " \
-                          "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                  end
-                  if src_slot >= slots.length
-                    raise "AlignTo operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
-                  elsif slots[src_slot].nil?
-                    raise "AlignTo operation: source slot #{src_slot} is nil " \
-                          "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
-                  end
+                  tgt = slots[op.args[0]]
+                  src = slots[op.args[1]]
-                  tgt = slots[tgt_slot]
-                  src = slots[src_slot]
-                  to_scope = op.attrs[:to_scope] || []
+                  to_scope = op.attrs[:to_scope] || EMPTY_ARY
                   require_unique = op.attrs[:require_unique] || false
                   on_missing = op.attrs[:on_missing] || :error
@@ -397,9 +267,6 @@ module Kumi
                                                            on_missing: on_missing)
                   slots << aligned
-                when :join
-                  raise NotImplementedError, "Join not implemented yet"
                 else
                   raise "Unknown operation: #{op.tag}"
                 end

data/lib/kumi/core/ir/execution_engine/profiler.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require "json"
 require "fileutils"
 require "time"
+require "set"
 module Kumi
   module Core
@@ -11,19 +12,71 @@ module Kumi
         module Profiler
           class << self
             def enabled? = ENV["KUMI_PROFILE"] == "1"
+            def ops_enabled? = ENV.fetch("KUMI_PROFILE_OPS", "1") == "1"
+            def sample_rate = (ENV["KUMI_PROFILE_SAMPLE"]&.to_i || 1)
+            def persistent? = ENV["KUMI_PROFILE_PERSISTENT"] == "1"
+            def set_schema_name(name)
+              @schema_name = name
+              # Ensure profiler is initialized in persistent mode
+              unless @initialized
+                @events = []
+                @meta = {}
+                @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
+                @run_id ||= 1
+                @op_seq ||= 0
+                @aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
+                # Truncate file if needed
+                if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
+                  FileUtils.mkdir_p(File.dirname(@file))
+                  File.write(@file, "")
+                  @aggregated_stats.clear
+                  @persistent_initialized = true
+                end
+                @initialized = true
+              end
+            end
             def reset!(meta: {})
+              set_schema_name(meta[:schema_name]) if meta[:schema_name]
               return unless enabled?
-              @events = []
-              @meta   = meta
-              @file   = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
-              @run_id = (@run_id || 0) + 1  # Track run number for averaging
-              @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
-              if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
-                FileUtils.mkdir_p(File.dirname(@file))
-                File.write(@file, "")
-                @aggregated_stats.clear  # Clear aggregated stats on truncate
+              # In persistent mode, don't reset aggregated stats or increment run_id
+              # This allows profiling across multiple schema creations
+              if persistent?
+                @events = []
+                @meta = (@meta || {}).merge(meta)
+                @schema_name = meta[:schema_name] if meta[:schema_name]
+                @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
+                @run_id ||= 1
+                @op_seq ||= 0
+                @aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
+                # Only truncate on very first reset in persistent mode
+                if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
+                  FileUtils.mkdir_p(File.dirname(@file))
+                  File.write(@file, "")
+                  @aggregated_stats.clear
+                  @persistent_initialized = true
+                end
+              else
+                # Original behavior: full reset each time
+                @events = []
+                @meta   = meta
+                @schema_name = meta[:schema_name]
+                @file   = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
+                @run_id = (@run_id || 0) + 1
+                @op_seq = 0
+                @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
+                if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
+                  FileUtils.mkdir_p(File.dirname(@file))
+                  File.write(@file, "")
+                  @aggregated_stats.clear
+                end
               end
             end
@@ -37,9 +90,54 @@ module Kumi
               Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
             end
-            # Per-op record with both wall time and CPU time
-            def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
+            # Phase timing for coarse-grained operations
+            def phase(name, tags = {})
+              return yield unless enabled?
+              p0 = t0; c0 = cpu_t0
+              result = yield
+              wall_ms = (t0 - p0) * 1000.0
+              cpu_ms = (cpu_t0 - c0) * 1000.0
+              stream({
+                ts: Time.now.utc.iso8601(3),
+                kind: "phase",
+                name: name,
+                wall_ms: wall_ms.round(3),
+                cpu_ms: cpu_ms.round(3),
+                tags: tags,
+                run: @run_id
+              })
+              result
+            end
+            # Memory snapshot with GC statistics
+            def memory_snapshot(label, extra: {})
               return unless enabled?
+              s = GC.stat
+              stream({
+                ts: Time.now.utc.iso8601(3),
+                kind: "mem",
+                label: label,
+                heap_live: s[:heap_live_slots],
+                old_objects: s[:old_objects],
+                minor_gc: s[:minor_gc_count],
+                major_gc: s[:major_gc_count],
+                rss_mb: read_rss_mb,
+                run: @run_id,
+                **extra
+              })
+            end
+            def read_rss_mb
+              ((File.read("/proc/#{$$}/status")[/VmRSS:\s+(\d+)\skB/, 1].to_i) / 1024.0).round(2)
+            rescue
+              nil
+            end
+            # Per-op record with both wall time and CPU time (with sampling support)
+            def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
+              return unless enabled? && ops_enabled?
+              @op_seq += 1
+              return unless sample_rate <= 1 || (@op_seq % sample_rate).zero?
               wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
               cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
@@ -47,6 +145,7 @@ module Kumi
               ev = {
                 ts:     Time.now.utc.iso8601(3),
                 run:    @run_id,
+                schema: @schema_name,  # schema identifier for multi-schema differentiation
                 decl:   decl,     # decl name (string/symbol)
                 i:      idx,      # op index
                 tag:    tag,      # op tag (symbol)
@@ -172,6 +271,35 @@ module Kumi
               stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
             end
+            def init_persistent!
+              return unless enabled? && persistent?
+              @persistent_initialized = false
+              reset!
+            end
+            def finalize!
+              return unless enabled?
+              # Emit final aggregated summary
+              if @aggregated_stats&.any?
+                stream({
+                  ts: Time.now.utc.iso8601(3),
+                  kind: "final_summary",
+                  data: averaged_analysis
+                })
+              end
+              # Emit cache analysis if available
+              cache_analysis = cache_overhead_analysis
+              if cache_analysis[:cache_operations]&.any?
+                stream({
+                  ts: Time.now.utc.iso8601(3),
+                  kind: "cache_analysis",
+                  data: cache_analysis
+                })
+              end
+            end
             # Stable textual key for "match ops one by one"
             def op_key(decl, idx, tag, op)
               attrs = compact_attrs(op.attrs)

data/lib/kumi/core/ir/execution_engine/values.rb CHANGED Viewed

@@ -13,14 +13,14 @@ module Kumi
           # Create a vector with scope and rows
           def self.vec(scope, rows, has_idx)
-            if has_idx
-              rank = rows.empty? ? 0 : rows.first[:idx].length
-              # TODO: > Make sure this is not costly
-              # raise if rows.any? { |r| r[:idx].length != rank }
-              rows = rows.sort_by { |r| r[:idx] } # one-time sort
-            else
-              rank = 0
-            end
+            rank = if has_idx
+                     rows.empty? ? 0 : rows.first[:idx].length
+                   # TODO: > Make sure this is not costly
+                   # raise if rows.any? { |r| r[:idx].length != rank }
+                   # rows = rows.sort_by { |r| r[:idx] } # one-time sort
+                   else
+                     0
+                   end
             { k: :vec, scope: scope, rows: rows, has_idx: has_idx, rank: rank }
           end

data/lib/kumi/core/ir/execution_engine.rb CHANGED Viewed

@@ -41,37 +41,12 @@ module Kumi
       # - DEBUG_VM_ARGS=1 prints per-op execution and arguments.
       # - DEBUG_GROUP_ROWS=1 prints grouping decisions during Lift.
       module ExecutionEngine
-        def self.run(ir_module, ctx, accessors:, registry:)
-          # Use persistent accessor cache if available, otherwise create temporary one
-          if ctx[:accessor_cache]
-            # Include input data in cache key to avoid cross-context pollution
-            input_key = ctx[:input]&.hash || ctx["input"]&.hash || 0
-            memoized_accessors = add_persistent_memoization(accessors, ctx[:accessor_cache], input_key)
-          else
-            memoized_accessors = add_temporary_memoization(accessors)
-          end
-          Interpreter.run(ir_module, ctx, accessors: memoized_accessors, registry: registry)
-        end
-        private
+        def self.run(schedule, input:, accessors:, registry:, runtime: {})
+          runtime[:accessor_cache] ||= {}
-        def self.add_persistent_memoization(accessors, cache, input_key)
-          accessors.map do |plan_id, accessor_fn|
-            [plan_id, lambda do |input_data|
-              cache_key = [plan_id, input_key]
-              cache[cache_key] ||= accessor_fn.call(input_data)
-            end]
-          end.to_h
-        end
-        def self.add_temporary_memoization(accessors)
-          cache = {}
-          accessors.map do |plan_id, accessor_fn|
-            [plan_id, lambda do |input_data|
-              cache[plan_id] ||= accessor_fn.call(input_data)
-            end]
-          end.to_h
+          Dev::Profiler.phase("engine.interpreter") do
+            Interpreter.run(schedule, input: input, runtime: runtime, accessors: accessors, registry: registry)
+          end
         end
       end
     end