RubyGems - kumi - Versions diffs - 0.0.12 → 0.0.14 - Mend

kumi 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
data/.rspec +0 -1
data/BACKLOG.md +34 -0
data/CHANGELOG.md +15 -0
data/CLAUDE.md +4 -6
data/README.md +0 -18
data/config/functions.yaml +352 -0
data/docs/dev/analyzer-debug.md +52 -0
data/docs/dev/parse-command.md +64 -0
data/docs/functions/analyzer_integration.md +199 -0
data/docs/functions/signatures.md +171 -0
data/examples/hash_objects_demo.rb +138 -0
data/golden/array_operations/schema.kumi +17 -0
data/golden/cascade_logic/schema.kumi +16 -0
data/golden/mixed_nesting/schema.kumi +42 -0
data/golden/simple_math/schema.kumi +10 -0
data/lib/kumi/analyzer.rb +72 -21
data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
data/lib/kumi/core/analyzer/debug.rb +167 -0
data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +99 -151
data/lib/kumi/core/analyzer/passes/toposorter.rb +37 -1
data/lib/kumi/core/analyzer/state_serde.rb +64 -0
data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
data/lib/kumi/core/compiler/access_planner.rb +3 -2
data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
data/lib/kumi/core/functions/dimension.rb +98 -0
data/lib/kumi/core/functions/dtypes.rb +20 -0
data/lib/kumi/core/functions/errors.rb +11 -0
data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
data/lib/kumi/core/functions/loader.rb +119 -0
data/lib/kumi/core/functions/registry_v2.rb +68 -0
data/lib/kumi/core/functions/shape.rb +70 -0
data/lib/kumi/core/functions/signature.rb +122 -0
data/lib/kumi/core/functions/signature_parser.rb +86 -0
data/lib/kumi/core/functions/signature_resolver.rb +272 -0
data/lib/kumi/core/ir/execution_engine/interpreter.rb +98 -7
data/lib/kumi/core/ir/execution_engine/profiler.rb +202 -0
data/lib/kumi/core/ir/execution_engine.rb +30 -1
data/lib/kumi/dev/ir.rb +75 -0
data/lib/kumi/dev/parse.rb +105 -0
data/lib/kumi/dev/runner.rb +83 -0
data/lib/kumi/frontends/ruby.rb +28 -0
data/lib/kumi/frontends/text.rb +46 -0
data/lib/kumi/frontends.rb +29 -0
data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
data/lib/kumi/runtime/executable.rb +63 -20
data/lib/kumi/schema.rb +4 -4
data/lib/kumi/support/diff.rb +22 -0
data/lib/kumi/support/ir_render.rb +61 -0
data/lib/kumi/version.rb +1 -1
data/lib/kumi.rb +2 -0
data/performance_results.txt +63 -0
data/scripts/test_mixed_nesting_performance.rb +206 -0
metadata +45 -5
data/docs/features/javascript-transpiler.md +0 -148
data/lib/kumi/js.rb +0 -23
data/lib/kumi/support/ir_dump.rb +0 -491

data/lib/kumi/core/functions/signature_resolver.rb ADDED Viewed

@@ -0,0 +1,272 @@
+# frozen_string_literal: true
+require_relative "errors"
+require_relative "shape"
+require_relative "signature"
+module Kumi
+  module Core
+    module Functions
+      # Given a set of signatures and actual argument shapes, pick the best match.
+      # Supports NEP 20 extensions: fixed-size, flexible, and broadcastable dimensions.
+      #
+      # Inputs:
+      #   signatures : Array<Signature> (with Dimension objects)
+      #   arg_shapes : Array<Array<Symbol|Integer>>   e.g., [[:i], [:i]] or [[], [3]] or [[2, :i]]
+      #
+      # Returns:
+      #   { signature:, result_axes:, join_policy:, dropped_axes:, effective_signature: }
+      #
+      # NEP 20 Matching rules:
+      # - Arity must match exactly (before flexible dimension resolution).
+      # - Fixed-size dimensions (integers) must match exactly.
+      # - Flexible dimensions (?) can be omitted if not present in all operands.
+      # - Broadcastable dimensions (|1) can match scalar or size-1 dimensions.
+      # - For each param position, shapes are checked according to NEP 20 rules.
+      # - We prefer exact matches, then flexible matches, then broadcast matches.
+      class SignatureResolver
+        class << self
+          def choose(signatures:, arg_shapes:)
+            # Handle empty arg_shapes for zero-arity functions
+            arg_shapes = [] if arg_shapes.nil?
+            sanity_check_args!(arg_shapes)
+            candidates = signatures.map do |sig|
+              score = match_score(sig, arg_shapes)
+              next if score.nil?
+              # Convert arg_shapes to normalized Dimension arrays for environment building
+              normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
+              env = build_dimension_environment(sig, normalized_args)
+              next if env.nil?  # Skip candidates with dimension conflicts
+              {
+                signature: sig,
+                score: score,
+                result_axes: sig.out_shape.map(&:name), # Convert Dimension objects to names for backward compatibility
+                join_policy: sig.join_policy,
+                dropped_axes: sig.dropped_axes.map { |name| name.is_a?(Symbol) ? name : name.to_sym }, # Convert to symbols
+                env: env
+              }
+            end.compact
+            raise SignatureMatchError, mismatch_message(signatures, arg_shapes) if candidates.empty?
+            # Lower score is better: 0 = exact-everywhere, then number of broadcasts
+            best = candidates.min_by { |c| c[:score] }
+            # Add effective signature and environment for analyzer/lowering
+            best[:effective_signature] = {
+              in_shapes: best[:signature].in_shapes.map { |dims| dims.map(&:name) },
+              out_shape: best[:signature].out_shape.map(&:name),
+              join_policy: best[:signature].join_policy
+            }
+            # env is already included from candidate building
+            best
+          end
+          private
+          def sanity_check_args!(arg_shapes)
+            unless arg_shapes.is_a?(Array) &&
+                   arg_shapes.all? { |s| s.is_a?(Array) && s.all? { |a| a.is_a?(Symbol) || a.is_a?(Integer) } }
+              raise SignatureMatchError, "arg_shapes must be an array of dimension arrays (symbols or integers), got: #{arg_shapes.inspect}"
+            end
+          end
+          # Returns an integer "broadcast cost" or nil if not matchable.
+          # Lower score = better match: 0 = exact, then increasing cost for broadcasts/flexibility
+          def match_score(sig, arg_shapes)
+            return nil unless sig.arity == arg_shapes.length
+            # Convert arg_shapes to normalized Dimension arrays for comparison
+            normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
+            # Try to match each argument against its expected signature shape
+            cost = 0
+            sig.in_shapes.each_with_index do |expected_dims, idx|
+              got_dims = normalized_args[idx]
+              arg_cost = match_argument_cost(got: got_dims, expected: expected_dims)
+              return nil if arg_cost.nil?
+              cost += arg_cost
+            end
+            # Additional checks for join_policy constraints
+            return nil unless valid_join_policy?(sig, normalized_args)
+            cost
+          end
+          private
+          # Convert a shape array (symbols/integers) to normalized Dimension array
+          def normalize_shape(shape)
+            shape.map do |dim|
+              case dim
+              when Symbol
+                Dimension.new(dim)
+              when Integer
+                Dimension.new(dim)
+              else
+                raise SignatureMatchError, "Invalid dimension type: #{dim.class}"
+              end
+            end
+          end
+          # Calculate cost of matching one argument against expected dimensions
+          def match_argument_cost(got:, expected:)
+            # Handle scalar first
+            if got.empty?
+              return expected.empty? ? 0 : (expected.any?(&:flexible?) ? 10 : 1) # scalar broadcast or flexible-tail
+            end
+            # Try strict matching first if no flexible dimensions
+            if !expected.any?(&:flexible?) && got.length == expected.length
+              total = 0
+              got.zip(expected).each do |g, e|
+                c = match_dimension_cost(got: g, expected: e)
+                return nil if c.nil?
+                total += c
+              end
+              return total
+            end
+            # Use right-aligned flexible matching
+            right_align_match(got: got, expected: expected)
+          end
+          # Right-aligned matching for flexible dimensions (NEP 20 ? modifier)
+          def right_align_match(got:, expected:)
+            gi = got.length - 1
+            ei = expected.length - 1
+            cost = 0
+            while ei >= 0
+              exp = expected[ei]
+              if exp.flexible? && gi < 0
+                # optional tail dimension that we don't have → ok, consume expected only
+                ei -= 1
+                cost += 10
+                next
+              end
+              return nil if gi < 0 # ran out of got dims and exp wasn't flexible
+              got_dim = got[gi]
+              dim_cost = match_dimension_cost(got: got_dim, expected: exp)
+              if dim_cost.nil?
+                # if exp is flexible, we can try to drop it
+                if exp.flexible?
+                  ei -= 1
+                  cost += 10
+                  next
+                else
+                  return nil
+                end
+              else
+                cost += dim_cost
+                gi -= 1
+                ei -= 1
+              end
+            end
+            # if we still have leftover got dims, argument is longer than expected → not a match
+            return nil if gi >= 0
+            cost
+          end
+          # Calculate cost of matching one dimension against another
+          def match_dimension_cost(got:, expected:)
+            return 0 if got == expected # Exact match
+            # Fixed-size equality
+            if got.fixed_size? && expected.fixed_size?
+              return got.size == expected.size ? 0 : nil
+            end
+            # Same symbolic name (ignoring modifiers) → ok unless one is fixed and the other isn't (penalize)
+            if got.named? && expected.named? && got.name == expected.name
+              return (got.fixed_size? || expected.fixed_size?) ? 2 : 0
+            end
+            # Broadcastable expected dim accepts scalar or size-1
+            if expected.broadcastable?
+              # scalar at argument level would have been handled in match_argument_cost
+              # so here we check for size-1 fixed dimensions
+              return 3 if got.fixed_size? && got.size == 1
+              # Named dimensions that could be size-1 at runtime also get broadcast cost
+              return 3 if got.named?
+            end
+            nil # No match possible
+          end
+          # Check if join_policy constraints are satisfied
+          def valid_join_policy?(sig, normalized_args)
+            return true if sig.join_policy # :zip or :product allows different axes
+            # nil join_policy: check if dimension names are consistent
+            non_scalar_args = normalized_args.reject { |a| Shape.scalar?(a) }
+            return true if non_scalar_args.empty?
+            # For nil join_policy, we allow different dimension names if:
+            # 1. All args have same dimension names (element-wise operations), OR
+            # 2. The constraint solver can validate cross-dimensional consistency (like matmul)
+            first_names = non_scalar_args.first.map(&:name)
+            same_names = non_scalar_args.all? { |arg| arg.map(&:name) == first_names }
+            return true if same_names
+            # If dimension names differ, check if constraint solver can handle it
+            # This allows operations like matmul where dimensions are linked across arguments
+            env = build_dimension_environment(sig, normalized_args)
+            !env.nil?
+          end
+          def mismatch_message(signatures, arg_shapes)
+            sigs = signatures.map(&:inspect).join(", ")
+            "no matching signature for shapes #{pp_shapes(arg_shapes)} among [#{sigs}]"
+          end
+          def pp_shapes(shapes)
+            shapes.map { |ax| "(#{ax.join(',')})" }.join(", ")
+          end
+          # Build dimension environment by checking consistency of named dimensions across arguments
+          def build_dimension_environment(sig, normalized_args)
+            env = {}
+            # Walk all expected dimensions across all arguments
+            sig.in_shapes.each_with_index do |expected_shape, arg_idx|
+              got_shape = normalized_args[arg_idx] || []
+              expected_shape.each_with_index do |exp_dim, dim_idx|
+                next unless exp_dim.named? && dim_idx < got_shape.length
+                got_dim = got_shape[dim_idx]
+                dim_name = exp_dim.name
+                # Check for consistency: same dimension name must map to same concrete value
+                if env.key?(dim_name)
+                  # If we've seen this dimension name before, it must match
+                  if env[dim_name] != got_dim
+                    return nil  # Inconsistent binding - signature doesn't match
+                  end
+                else
+                  # First time seeing this dimension name - record the binding
+                  env[dim_name] = got_dim
+                end
+              end
+            end
+            env
+          end
+        end
+      end
+    end
+  end
+end

data/lib/kumi/core/ir/execution_engine/interpreter.rb CHANGED Viewed

@@ -9,20 +9,57 @@ module Kumi
           PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
           NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
+          def self.build_name_index(ir_module)
+            index = {}
+            ir_module.decls.each do |decl|
+              decl.ops.each do |op|
+                next unless op.tag == :store
+                name = op.attrs[:name]
+                index[name] = decl if name
+              end
+            end
+            index
+          end
           def self.run(ir_module, ctx, accessors:, registry:)
             # Validate registry is properly initialized
             raise ArgumentError, "Registry cannot be nil" if registry.nil?
             raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
+            # --- PROFILER: init per run ---
+            Profiler.reset!(meta: { decls: ir_module.decls&.size || 0 }) if Profiler.enabled?
             outputs = {}
             target = ctx[:target]
             guard_stack = [true]
+            # Always ensure we have a declaration cache - either from caller or new for this VM run
+            declaration_cache = ctx[:declaration_cache] || {}
+            # Build name index for targeting by stored names
+            name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
+            # Choose declarations to execute by stored name (not only decl name)
+            decls_to_run =
+              if target
+                # Prefer a decl that STORES the target (covers __vec twins)
+                d = name_index && name_index[target]
+                # Fallback: allow targeting by decl name (legacy behavior)
+                d ||= ir_module.decls.find { |dd| dd.name == target }
+                raise "Unknown target: #{target}" unless d
+                [d]
+              else
+                ir_module.decls
+              end
-            ir_module.decls.each do |decl|
+            decls_to_run.each do |decl|
               slots = []
               guard_stack = [true] # reset per decl
               decl.ops.each_with_index do |op, op_index|
+                t0 = Profiler.enabled? ? Profiler.t0 : nil
+                cpu_t0 = Profiler.enabled? ? Profiler.cpu_t0 : nil
+                rows_touched = nil
                 if ENV["ASSERT_VM_SLOTS"] == "1"
                   expected = op_index
                   unless slots.length == expected
@@ -47,17 +84,20 @@ module Kumi
                                    false
                                  end
                   slots << nil # keep slot_id == op_index
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "enter") if t0
                   next
                 when :guard_pop
                   guard_stack.pop
                   slots << nil
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "exit") if t0
                   next
                 end
                 # Skip body when guarded off, but keep indices aligned
                 unless guard_stack.last
                   slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "skipped") if t0
                   next
                 end
@@ -69,35 +109,81 @@ module Kumi
                   raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
                   slots[dst] = slots[src]
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: :assign, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
                 when :const
                   result = Values.scalar(op.attrs[:value])
                   puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
                   slots << result
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: :const, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
                 when :load_input
                   plan_id = op.attrs[:plan_id]
                   scope = op.attrs[:scope] || []
                   scalar = op.attrs[:is_scalar]
                   indexed = op.attrs[:has_idx]
-                  raw = accessors.fetch(plan_id).call(ctx[:input] || ctx["input"])
-                  puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect}" if ENV["DEBUG_VM_ARGS"]
+                  # NEW: consult runtime accessor cache
+                  acc_cache = ctx[:accessor_cache] || {}
+                  input_obj = ctx[:input] || ctx["input"]
+                  cache_key = [plan_id, input_obj.object_id]
+                  if acc_cache.key?(cache_key)
+                    raw = acc_cache[cache_key]
+                    hit = true
+                  else
+                    raw = accessors.fetch(plan_id).call(input_obj)
+                    acc_cache[cache_key] = raw
+                    hit = false
+                  end
+                  puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect} cache_hit: #{hit}" if ENV["DEBUG_VM_ARGS"]
                   slots << if scalar
                              Values.scalar(raw)
                            elsif indexed
+                             rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
                              Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
                            else
+                             rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
                              Values.vec(scope, raw.map { |v| { v: v } }, false)
                            end
+                  rows_touched ||= 1
+                  cache_note = hit ? "hit:#{plan_id}" : "miss:#{plan_id}"
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
+                                   rows: rows_touched, note: cache_note) if t0
                 when :ref
                   name = op.attrs[:name]
-                  referenced_value = outputs.fetch(name) { raise "Missing output for reference: #{name}" }
+                  if outputs.key?(name)
+                    referenced = outputs[name]
+                  elsif declaration_cache.key?(name)
+                    referenced = declaration_cache[name]
+                  else
+                    # demand-compute the producing decl up to the store of `name`
+                    active = (ctx[:active] ||= {})
+                    raise "cycle detected: #{name}" if active[name]
+                    active[name] = true
+                    subctx = {
+                      input: ctx[:input] || ctx["input"],
+                      target: name,                         # target is the STORED NAME
+                      accessor_cache: ctx[:accessor_cache],
+                      declaration_cache: ctx[:declaration_cache],
+                      name_index: name_index,               # reuse map
+                      active: active
+                    }
+                    referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
+                    active.delete(name)
+                  end
                   if ENV["DEBUG_VM_ARGS"]
-                    puts "DEBUG Ref #{name}: #{referenced_value[:k] == :scalar ? "scalar(#{referenced_value[:v].inspect})" : "#{referenced_value[:k]}(#{referenced_value[:rows]&.size || 0} rows)"}"
+                    puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
                   end
-                  slots << referenced_value
+                  slots << referenced
+                  rows_touched = (referenced[:k] == :vec) ? (referenced[:rows]&.size || 0) : 1
+                  Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0, rows: rows_touched) if t0
                 when :array
                   # Validate slot indices before accessing
@@ -216,7 +302,10 @@ module Kumi
                     raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
                   end
-                  outputs[name] = slots[src]
+                  result = slots[src]
+                  outputs[name] = result
+                  # Also store in declaration cache for future ref operations
+                  declaration_cache[name] = result
                   # keep slot_id == op_index invariant
                   slots << nil
@@ -327,6 +416,8 @@ module Kumi
               end
             end
+            # --- end-of-run summary ---
+            Profiler.emit_summary! if Profiler.enabled?
             outputs
           end
         end

data/lib/kumi/core/ir/execution_engine/profiler.rb ADDED Viewed

@@ -0,0 +1,202 @@
+# frozen_string_literal: true
+require "json"
+require "fileutils"
+require "time"
+module Kumi
+  module Core
+    module IR
+      module ExecutionEngine
+        module Profiler
+          class << self
+            def enabled? = ENV["KUMI_PROFILE"] == "1"
+            def reset!(meta: {})
+              return unless enabled?
+              @events = []
+              @meta   = meta
+              @file   = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
+              @run_id = (@run_id || 0) + 1  # Track run number for averaging
+              @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
+              if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
+                FileUtils.mkdir_p(File.dirname(@file))
+                File.write(@file, "")
+                @aggregated_stats.clear  # Clear aggregated stats on truncate
+              end
+            end
+            # monotonic start time
+            def t0
+              Process.clock_gettime(Process::CLOCK_MONOTONIC)
+            end
+            # CPU time start (process + thread)
+            def cpu_t0
+              Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
+            end
+            # Per-op record with both wall time and CPU time
+            def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
+              return unless enabled?
+              wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
+              cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
+              ev = {
+                ts:     Time.now.utc.iso8601(3),
+                run:    @run_id,
+                decl:   decl,     # decl name (string/symbol)
+                i:      idx,      # op index
+                tag:    tag,      # op tag (symbol)
+                wall_ms: wall_ms.round(4),
+                cpu_ms:  cpu_ms.round(4),
+                rows:   rows,
+                note:   note,
+                key:    op_key(decl, idx, tag, op),      # stable key for grep/diff
+                attrs:  compact_attrs(op.attrs)
+              }
+              # Aggregate stats for multi-run averaging
+              op_key = "#{decl}@#{idx}:#{tag}"
+              agg = @aggregated_stats[op_key]
+              agg[:count] += 1
+              agg[:total_ms] += wall_ms
+              agg[:total_cpu_ms] += cpu_ms
+              agg[:rows] += (rows || 0)
+              agg[:runs] << @run_id
+              agg[:decl] = decl
+              agg[:tag] = tag
+              agg[:idx] = idx
+              agg[:note] = note if note
+              (@events ||= []) << ev
+              stream(ev) if ENV["KUMI_PROFILE_STREAM"] == "1"
+              ev
+            end
+            def summary(top: 20)
+              return {} unless enabled?
+              # Current run summary (legacy format)
+              current_agg = Hash.new { |h, k| h[k] = { count: 0, ms: 0.0, rows: 0 } }
+              (@events || []).each do |e|
+                k = [e[:decl], e[:tag]]
+                a = current_agg[k]
+                a[:count] += 1
+                a[:ms]    += (e[:wall_ms] || e[:ms] || 0)
+                a[:rows]  += (e[:rows] || 0)
+              end
+              current_ranked = current_agg.map { |(decl, tag), v|
+                { decl: decl, tag: tag, count: v[:count], ms: v[:ms].round(3), rows: v[:rows],
+                  rps: v[:rows] > 0 ? (v[:rows] / v[:ms]).round(1) : nil }
+              }.sort_by { |h| -h[:ms] }.first(top)
+              { meta: @meta || {}, top: current_ranked,
+                total_ms: ((@events || []).sum { |e| e[:wall_ms] || e[:ms] || 0 }).round(3),
+                op_count: (@events || []).size,
+                run_id: @run_id }
+            end
+            # Multi-run averaged analysis
+            def averaged_analysis(top: 20)
+              return {} unless enabled? && @aggregated_stats&.any?
+              # Convert aggregated stats to averaged metrics
+              averaged = @aggregated_stats.map do |op_key, stats|
+                num_runs = stats[:runs].size
+                avg_wall_ms = stats[:total_ms] / stats[:count]
+                avg_cpu_ms = stats[:total_cpu_ms] / stats[:count]
+                total_wall_ms = stats[:total_ms]
+                total_cpu_ms = stats[:total_cpu_ms]
+                {
+                  op_key: op_key,
+                  decl: stats[:decl],
+                  idx: stats[:idx],
+                  tag: stats[:tag],
+                  runs: num_runs,
+                  total_calls: stats[:count],
+                  calls_per_run: stats[:count] / num_runs.to_f,
+                  avg_wall_ms: avg_wall_ms.round(4),
+                  avg_cpu_ms: avg_cpu_ms.round(4),
+                  total_wall_ms: total_wall_ms.round(3),
+                  total_cpu_ms: total_cpu_ms.round(3),
+                  cpu_efficiency: total_wall_ms > 0 ? (total_cpu_ms / total_wall_ms * 100).round(1) : 100,
+                  rows_total: stats[:rows],
+                  note: stats[:note]
+                }
+              end.sort_by { |s| -s[:total_wall_ms] }.first(top)
+              {
+                meta: @meta || {},
+                total_runs: (@aggregated_stats.values.map { |s| s[:runs].size }.max || 0),
+                averaged_ops: averaged,
+                total_operations: @aggregated_stats.size
+              }
+            end
+            # Identify potential cache overhead operations
+            def cache_overhead_analysis
+              return {} unless enabled? && @aggregated_stats&.any?
+              # Look for operations that might be cache-related
+              cache_ops = @aggregated_stats.select do |op_key, stats|
+                op_key.include?("ref") || op_key.include?("load_input") || stats[:note]&.include?("cache")
+              end
+              cache_analysis = cache_ops.map do |op_key, stats|
+                num_runs = stats[:runs].size
+                avg_wall_ms = stats[:total_ms] / stats[:count]
+                {
+                  op_key: op_key,
+                  decl: stats[:decl],
+                  tag: stats[:tag],
+                  avg_time_ms: avg_wall_ms.round(4),
+                  total_time_ms: stats[:total_ms].round(3),
+                  call_count: stats[:count],
+                  overhead_per_call: avg_wall_ms.round(6)
+                }
+              end.sort_by { |s| -s[:total_time_ms] }
+              {
+                cache_operations: cache_analysis,
+                total_cache_time: cache_analysis.sum { |op| op[:total_time_ms] }.round(3)
+              }
+            end
+            def emit_summary!
+              return unless enabled?
+              stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
+            end
+            # Stable textual key for "match ops one by one"
+            def op_key(decl, idx, tag, op)
+              attrs = compact_attrs(op.attrs)
+              args  = op.args
+              "#{decl}@#{idx}:#{tag}|#{attrs.keys.sort_by(&:to_s).map { |k| "#{k}=#{attrs[k].inspect}" }.join(",")}|args=#{args.inspect}"
+            end
+            def compact_attrs(h)
+              return {} unless h
+              h.transform_values do |v|
+                case v
+                when Array, Hash, Symbol, String, Numeric, TrueClass, FalseClass, NilClass then v
+                else v.to_s
+                end
+              end
+            end
+            def stream(obj)
+              return unless @file
+              FileUtils.mkdir_p(File.dirname(@file))
+              File.open(@file, "a") { |f| f.puts(obj.to_json) }
+            end
+          end
+        end
+      end
+    end
+  end
+end