RubyGems - kumi - Versions diffs - 0.0.14 → 0.0.16 - Mend

kumi 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -0
data/README.md +0 -27
data/docs/dev/vm-profiling.md +95 -0
data/docs/features/README.md +0 -7
data/lib/kumi/analyzer.rb +10 -2
data/lib/kumi/compiler.rb +6 -5
data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +65 -0
data/lib/kumi/core/analyzer/passes/ir_execution_schedule_pass.rb +67 -0
data/lib/kumi/core/analyzer/passes/toposorter.rb +15 -50
data/lib/kumi/core/compiler/access_builder.rb +22 -9
data/lib/kumi/core/compiler/access_codegen.rb +61 -0
data/lib/kumi/core/compiler/access_emit/base.rb +173 -0
data/lib/kumi/core/compiler/access_emit/each_indexed.rb +56 -0
data/lib/kumi/core/compiler/access_emit/materialize.rb +45 -0
data/lib/kumi/core/compiler/access_emit/ravel.rb +50 -0
data/lib/kumi/core/compiler/access_emit/read.rb +32 -0
data/lib/kumi/core/ir/execution_engine/interpreter.rb +56 -189
data/lib/kumi/core/ir/execution_engine/profiler.rb +139 -11
data/lib/kumi/core/ir/execution_engine/values.rb +8 -8
data/lib/kumi/core/ir/execution_engine.rb +5 -30
data/lib/kumi/dev/parse.rb +12 -12
data/lib/kumi/dev/profile_aggregator.rb +301 -0
data/lib/kumi/dev/profile_runner.rb +199 -0
data/lib/kumi/dev/runner.rb +3 -1
data/lib/kumi/dev.rb +14 -0
data/lib/kumi/runtime/executable.rb +32 -153
data/lib/kumi/runtime/run.rb +105 -0
data/lib/kumi/schema.rb +15 -14
data/lib/kumi/version.rb +1 -1
data/lib/kumi.rb +4 -2
metadata +15 -3
data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89

data/lib/kumi/dev/parse.rb CHANGED Viewed

@@ -35,20 +35,20 @@ module Kumi
         end
         # Report trace file if enabled
-        if opts[:trace] && res.respond_to?(:trace_file)
-          puts "Trace written to: #{res.trace_file}"
-        end
+        puts "Trace written to: #{res.trace_file}" if opts[:trace] && res.respond_to?(:trace_file)
         # Determine file extension and renderer
         extension = opts[:json] ? "json" : "txt"
-        golden_path = File.join(File.dirname(schema_path), "expected", "ir.#{extension}")
+        file_name = File.basename(schema_path)
+        golden_path = File.join(File.dirname(schema_path), "expected", "#{file_name}_ir.#{extension}")
         # Render IR
         rendered = if opts[:json]
-          Dev::IR.to_json(res.ir, pretty: true)
-        else
-          Dev::IR.to_text(res.ir)
-        end
+                     Dev::IR.to_json(res.ir, pretty: true)
+                   else
+                     Dev::IR.to_text(res.ir)
+                   end
         # Handle write mode
         if opts[:write]
@@ -71,7 +71,7 @@ module Kumi
           end
         end
-        # Handle no-diff mode
+        # Handle no-diff mode
         if opts[:no_diff]
           puts rendered
           return true
@@ -84,7 +84,7 @@ module Kumi
           Tempfile.create(["actual", File.extname(golden_path)]) do |actual_file|
             actual_file.write(rendered)
             actual_file.flush
             result = `diff -u --label=expected --label=actual #{golden_path} #{actual_file.path}`
             if result.empty?
               puts "No changes (#{golden_path})"
@@ -97,9 +97,9 @@ module Kumi
         else
           # No golden file exists, just print the output
           puts rendered
-          return true
+          true
         end
       end
     end
   end
-end
+end

data/lib/kumi/dev/profile_aggregator.rb ADDED Viewed

@@ -0,0 +1,301 @@
+# frozen_string_literal: true
+require 'json'
+module Kumi
+  module Dev
+    class ProfileAggregator
+      attr_reader :events, :phases, :operations, :memory_snapshots, :final_summary
+      def initialize(jsonl_file)
+        @jsonl_file = jsonl_file
+        @events = []
+        @phases = []
+        @operations = []
+        @memory_snapshots = []
+        @final_summary = nil
+        load_events
+      end
+      def self.load(jsonl_file)
+        new(jsonl_file)
+      end
+      # Core aggregation methods
+      def total_execution_time
+        script_phase = phases.find { |p| p["name"] == "script_execution" }
+        script_phase ? script_phase["wall_ms"] : 0
+      end
+      def vm_execution_time
+        vm_phases = phases.select { |p| p["name"] == "vm.run" }
+        vm_phases.sum { |p| p["wall_ms"] || 0 }
+      end
+      def vm_execution_count
+        phases.count { |p| p["name"] == "vm.run" }
+      end
+      def runs_analyzed
+        (operations + phases + memory_snapshots).map { |e| e["run"] }.compact.uniq.sort
+      end
+      def schema_breakdown
+        @schema_breakdown ||= operations.group_by { |op| op["schema"] || "Unknown" }.transform_values do |ops|
+          {
+            operations: ops.length,
+            time: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
+            declarations: ops.map { |op| op["decl"] }.uniq.compact.sort
+          }
+        end
+      end
+      def operations_by_run
+        operations.group_by { |op| op["run"] }
+      end
+      def operation_stats_by_type
+        operations.group_by { |op| op["tag"] }.transform_values do |ops|
+          {
+            count: ops.length,
+            total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
+            avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
+            max_ms: ops.map { |op| op["wall_ms"] || 0 }.max || 0,
+            declarations: ops.map { |op| op["decl"] }.uniq.compact
+          }
+        end.sort_by { |_, stats| -stats[:total_ms] }
+      end
+      def operation_stats_by_declaration
+        operations.group_by { |op| op["decl"] }.transform_values do |ops|
+          {
+            count: ops.length,
+            total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
+            avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
+            operation_types: ops.map { |op| op["tag"] }.uniq.compact
+          }
+        end.sort_by { |_, stats| -stats[:total_ms] }
+      end
+      def hotspot_analysis(limit: 20)
+        operations.map do |op|
+          {
+            key: "#{op['decl']}@#{op['seq'] || 0}:#{op['tag']}",
+            decl: op["decl"],
+            tag: op["tag"],
+            wall_ms: op["wall_ms"] || 0,
+            cpu_ms: op["cpu_ms"] || 0,
+            rows: op["rows"] || 0
+          }
+        end.group_by { |op| op[:key] }.transform_values do |ops|
+          {
+            count: ops.length,
+            total_ms: ops.sum { |op| op[:wall_ms] }.round(4),
+            avg_ms: ops.empty? ? 0 : (ops.sum { |op| op[:wall_ms] } / ops.length).round(6),
+            decl: ops.first[:decl],
+            tag: ops.first[:tag]
+          }
+        end.sort_by { |_, stats| -stats[:total_ms] }.first(limit)
+      end
+      def reference_operation_analysis
+        ref_ops = operations.select { |op| op["tag"] == "ref" }
+        return { operations: 0, total_time: 0, avg_time: 0, by_declaration: [] } if ref_ops.empty?
+        {
+          operations: ref_ops.length,
+          total_time: ref_ops.sum { |op| op["wall_ms"] || 0 }.round(4),
+          avg_time: (ref_ops.sum { |op| op["wall_ms"] || 0 } / ref_ops.length).round(6),
+          by_declaration: ref_ops.group_by { |op| op["decl"] }.transform_values do |ops|
+            {
+              count: ops.length,
+              total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
+              avg_ms: (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6)
+            }
+          end.sort_by { |_, stats| -stats[:total_ms] }
+        }
+      end
+      def memory_analysis
+        return nil if memory_snapshots.length < 2
+        start_mem = memory_snapshots.first
+        end_mem = memory_snapshots.last
+        {
+          start: {
+            heap_live: start_mem["heap_live"],
+            rss_mb: start_mem["rss_mb"],
+            minor_gc: start_mem["minor_gc"],
+            major_gc: start_mem["major_gc"]
+          },
+          end: {
+            heap_live: end_mem["heap_live"],
+            rss_mb: end_mem["rss_mb"],
+            minor_gc: end_mem["minor_gc"],
+            major_gc: end_mem["major_gc"]
+          },
+          growth: {
+            heap_objects: end_mem["heap_live"] - start_mem["heap_live"],
+            heap_growth_pct: ((end_mem["heap_live"] - start_mem["heap_live"]).to_f / start_mem["heap_live"] * 100).round(1),
+            rss_mb: (end_mem["rss_mb"] - start_mem["rss_mb"]).round(2),
+            rss_growth_pct: ((end_mem["rss_mb"] - start_mem["rss_mb"]) / start_mem["rss_mb"] * 100).round(1),
+            minor_gcs: end_mem["minor_gc"] - start_mem["minor_gc"],
+            major_gcs: end_mem["major_gc"] - start_mem["major_gc"]
+          }
+        }
+      end
+      def phase_analysis
+        phases.group_by { |p| p["name"] }.transform_values do |phase_events|
+          {
+            count: phase_events.length,
+            total_ms: phase_events.sum { |p| p["wall_ms"] || 0 }.round(4),
+            avg_ms: phase_events.empty? ? 0 : (phase_events.sum { |p| p["wall_ms"] || 0 } / phase_events.length).round(4),
+            max_ms: phase_events.map { |p| p["wall_ms"] || 0 }.max || 0
+          }
+        end.sort_by { |_, stats| -stats[:total_ms] }
+      end
+      # Reporting methods
+      def summary_report
+        total_ops = operations.length
+        total_vm_time = vm_execution_time
+        ref_analysis = reference_operation_analysis
+        puts "=== PROFILE AGGREGATION SUMMARY ==="
+        puts "Total events: #{events.length}"
+        puts "VM operations: #{total_ops}"
+        puts "VM executions: #{vm_execution_count}"
+        # Schema differentiation
+        schema_stats = schema_breakdown
+        if schema_stats.any? && schema_stats.keys.first != "Unknown"
+          puts "Schemas analyzed: #{schema_stats.keys.join(", ")}"
+          schema_stats.each do |schema, stats|
+            puts "  #{schema}: #{stats[:operations]} operations, #{stats[:time]}ms"
+          end
+        else
+          puts "Schema runs: #{runs_analyzed.length} (runs: #{runs_analyzed.join(', ')})"
+        end
+        puts "Total VM time: #{total_vm_time.round(4)}ms"
+        puts "Average per VM execution: #{vm_execution_count > 0 ? (total_vm_time / vm_execution_count).round(4) : 0}ms"
+        puts
+        if ref_analysis[:operations] && ref_analysis[:operations] > 0
+          puts "Reference Operations:"
+          puts "  Count: #{ref_analysis[:operations]} (#{(ref_analysis[:operations].to_f / total_ops * 100).round(1)}% of all ops)"
+          puts "  Time: #{ref_analysis[:total_time]}ms (#{total_vm_time > 0 ? (ref_analysis[:total_time] / total_vm_time * 100).round(1) : 0}% of VM time)"
+          puts "  Avg: #{ref_analysis[:avg_time]}ms per reference"
+        end
+        mem = memory_analysis
+        if mem
+          puts
+          puts "Memory Growth:"
+          puts "  Heap: +#{mem[:growth][:heap_objects]} objects (#{mem[:growth][:heap_growth_pct]}%)"
+          puts "  RSS: +#{mem[:growth][:rss_mb]}MB (#{mem[:growth][:rss_growth_pct]}%)"
+          puts "  GC: #{mem[:growth][:minor_gcs]} minor, #{mem[:growth][:major_gcs]} major"
+        end
+      end
+      def detailed_report(limit: 15)
+        summary_report
+        puts
+        puts "=== TOP #{limit} HOTSPOTS ==="
+        hotspots = hotspot_analysis(limit: limit)
+        hotspots.each_with_index do |(key, stats), i|
+          puts "#{(i+1).to_s.rjust(2)}. #{key.ljust(40)} #{stats[:total_ms].to_s.rjust(10)}ms (#{stats[:count]} calls, #{stats[:avg_ms]}ms avg)"
+        end
+        # Schema breakdown if available
+        schema_stats = schema_breakdown
+        if schema_stats.keys.length > 1 || (schema_stats.keys.first && schema_stats.keys.first != "Unknown")
+          puts
+          puts "=== SCHEMA BREAKDOWN ==="
+          schema_stats.each do |schema, stats|
+            puts "#{schema}:"
+            puts "  Operations: #{stats[:operations]}"
+            puts "  Total time: #{stats[:time]}ms"
+            puts "  Declarations: #{stats[:declarations].join(", ")}"
+            puts
+          end
+        end
+        puts "=== OPERATION TYPE BREAKDOWN ==="
+        operation_stats_by_type.each do |op_type, stats|
+          puts "#{op_type.ljust(15)} #{stats[:count].to_s.rjust(8)} calls  #{stats[:total_ms].to_s.rjust(12)}ms  #{stats[:avg_ms].to_s.rjust(10)}ms avg"
+        end
+        puts
+        puts "=== TOP #{limit} DECLARATIONS BY TIME ==="
+        operation_stats_by_declaration.first(limit).each do |decl, stats|
+          puts "#{decl.to_s.ljust(35)} #{stats[:count].to_s.rjust(6)} ops  #{stats[:total_ms].to_s.rjust(10)}ms"
+        end
+      end
+      def export_summary(filename)
+        summary = {
+          metadata: {
+            total_events: events.length,
+            vm_operations: operations.length,
+            vm_executions: vm_execution_count,
+            analysis_timestamp: Time.now.strftime("%Y-%m-%dT%H:%M:%SZ")
+          },
+          timing: {
+            total_execution_ms: total_execution_time,
+            vm_execution_ms: vm_execution_time,
+            avg_vm_execution_ms: vm_execution_count > 0 ? (vm_execution_time / vm_execution_count).round(4) : 0
+          },
+          operations: {
+            by_type: operation_stats_by_type,
+            by_declaration: operation_stats_by_declaration,
+            hotspots: hotspot_analysis(limit: 20)
+          },
+          references: reference_operation_analysis,
+          memory: memory_analysis,
+          phases: phase_analysis
+        }
+        File.write(filename, JSON.pretty_generate(summary))
+        puts "Summary exported to: #{filename}"
+      end
+      private
+      def load_events
+        return unless File.exist?(@jsonl_file)
+        File.readlines(@jsonl_file).each do |line|
+          begin
+            event = JSON.parse(line.strip)
+            next unless event && event.is_a?(Hash)
+            @events << event
+            case event["kind"]
+            when "phase"
+              @phases << event
+            when "mem"
+              @memory_snapshots << event
+            when "final_summary"
+              @final_summary = event
+            else
+              # VM operations don't have a "kind" field - they have ts, run, decl, i, tag, wall_ms, cpu_ms, etc.
+              # According to profiler.rb line 118-130, VM operations are identified by having decl + tag but no kind
+              if event["decl"] && event["tag"] && !event["kind"]
+                @operations << event
+              elsif event["kind"] && !["summary", "cache_analysis"].include?(event["kind"])
+                # Handle any future event types that have a kind but aren't known
+                @operations << event
+              end
+            end
+          rescue JSON::ParserError
+            # Skip malformed JSON lines
+          end
+        end
+      end
+    end
+  end
+end

data/lib/kumi/dev/profile_runner.rb ADDED Viewed

@@ -0,0 +1,199 @@
+# frozen_string_literal: true
+require "json"
+require "fileutils"
+require "benchmark"
+module Kumi
+  module Dev
+    module ProfileRunner
+      module_function
+      def run(script_path, opts = {})
+        # Validate script exists
+        unless File.exist?(script_path)
+          puts "Error: Script not found: #{script_path}"
+          return false
+        end
+        # Set up profiling environment
+        setup_profiler_env(opts)
+        puts "Profiling: #{script_path}"
+        puts "Configuration:"
+        puts "  Output: #{ENV['KUMI_PROFILE_FILE']}"
+        puts "  Phases: enabled"
+        puts "  Operations: #{ENV['KUMI_PROFILE_OPS'] == '1' ? 'enabled' : 'disabled'}"
+        puts "  Sampling: #{ENV['KUMI_PROFILE_SAMPLE'] || '1'}"
+        puts "  Persistent: #{ENV['KUMI_PROFILE_PERSISTENT'] == '1' ? 'yes' : 'no'}"
+        puts "  Memory snapshots: #{opts[:memory] ? 'enabled' : 'disabled'}"
+        puts
+        # Initialize profiler
+        Dev::Profiler.init_persistent! if ENV["KUMI_PROFILE_PERSISTENT"] == "1"
+        # Add memory snapshot before execution
+        Dev::Profiler.memory_snapshot("script_start") if opts[:memory]
+        # Execute the script
+        start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        begin
+          result = Dev::Profiler.phase("script_execution", script: File.basename(script_path)) do
+            # Execute in a clean environment to avoid polluting the current process
+            load(File.expand_path(script_path))
+          end
+        rescue StandardError => e
+          puts "Error executing script: #{e.message}"
+          puts e.backtrace.first(5).join("\n")
+          return false
+        ensure
+          execution_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
+        end
+        # Add memory snapshot after execution
+        Dev::Profiler.memory_snapshot("script_end") if opts[:memory]
+        # Finalize profiler to get aggregated data
+        Dev::Profiler.finalize!
+        puts "Script completed in #{execution_time.round(4)}s"
+        # Show analysis unless quiet
+        show_analysis(opts) unless opts[:quiet]
+        true
+      rescue LoadError => e
+        puts "Error loading script: #{e.message}"
+        false
+      end
+      private
+      def self.setup_profiler_env(opts)
+        # Always enable profiling
+        ENV["KUMI_PROFILE"] = "1"
+        # Output file
+        output_file = opts[:output] || "tmp/profile.jsonl"
+        ENV["KUMI_PROFILE_FILE"] = output_file
+        # Truncate if requested
+        ENV["KUMI_PROFILE_TRUNCATE"] = opts[:truncate] ? "1" : "0"
+        # Streaming
+        ENV["KUMI_PROFILE_STREAM"] = opts[:stream] ? "1" : "0"
+        # Operations profiling
+        if opts[:phases_only]
+          ENV["KUMI_PROFILE_OPS"] = "0"
+        elsif opts[:ops]
+          ENV["KUMI_PROFILE_OPS"] = "1"
+        else
+          # Default: phases only
+          ENV["KUMI_PROFILE_OPS"] = "0"
+        end
+        # Sampling
+        ENV["KUMI_PROFILE_SAMPLE"] = opts[:sample].to_s if opts[:sample]
+        # Persistent mode
+        ENV["KUMI_PROFILE_PERSISTENT"] = opts[:persistent] ? "1" : "0"
+        # Ensure output directory exists
+        FileUtils.mkdir_p(File.dirname(output_file))
+      end
+      def self.show_analysis(opts)
+        output_file = ENV["KUMI_PROFILE_FILE"]
+        unless File.exist?(output_file)
+          puts "No profile data generated"
+          return
+        end
+        puts "\n=== Profiling Analysis ==="
+        # Use ProfileAggregator for comprehensive analysis
+        require_relative "profile_aggregator"
+        aggregator = ProfileAggregator.new(output_file)
+        if opts[:json]
+          # Export full analysis to JSON and display
+          json_output = opts[:json_file] || "/tmp/profile_analysis.json"
+          aggregator.export_summary(json_output)
+          puts File.read(json_output)
+          return
+        end
+        # Show comprehensive analysis using ProfileAggregator
+        if opts[:detailed]
+          aggregator.detailed_report(limit: opts[:limit] || 15)
+        else
+          # Show summary + key insights
+          aggregator.summary_report
+          # Add some key insights for CLI users
+          puts
+          puts "=== KEY INSIGHTS ==="
+          # Show top hotspots
+          hotspots = aggregator.hotspot_analysis(limit: 3)
+          if hotspots.any?
+            puts "Top Performance Bottlenecks:"
+            hotspots.each_with_index do |(key, stats), i|
+              puts "  #{i+1}. #{stats[:decl]} (#{stats[:tag]}): #{stats[:total_ms]}ms"
+            end
+          end
+          # Reference analysis summary
+          ref_analysis = aggregator.reference_operation_analysis
+          if ref_analysis[:operations] > 0
+            puts "Reference Operation Impact: #{(ref_analysis[:total_time] / aggregator.vm_execution_time * 100).round(1)}% of VM time"
+          end
+          # Memory impact
+          mem = aggregator.memory_analysis
+          if mem
+            puts "Memory Impact: #{mem[:growth][:heap_growth_pct]}% heap growth, #{mem[:growth][:rss_growth_pct]}% RSS growth"
+          end
+        end
+        puts
+        puts "Full profile: #{output_file}"
+        puts "For detailed analysis: bin/kumi profile #{ARGV.join(' ')} --detailed"
+      end
+      def self.analyze_phases(phase_events)
+        phase_events.group_by { |e| e["name"] }.transform_values do |events|
+          {
+            count: events.length,
+            total_ms: events.sum { |e| e["wall_ms"] }.round(3),
+            avg_ms: (events.sum { |e| e["wall_ms"] } / events.length).round(4)
+          }
+        end.sort_by { |_, stats| -stats[:total_ms] }.to_h
+      end
+      def self.analyze_events(events)
+        {
+          summary: {
+            total_events: events.length,
+            phase_events: events.count { |e| e["kind"] == "phase" },
+            memory_events: events.count { |e| e["kind"] == "mem" },
+            operation_events: events.count { |e| !%w[phase mem summary final_summary cache_analysis].include?(e["kind"]) }
+          },
+          phases: analyze_phases(events.select { |e| e["kind"] == "phase" }),
+          memory_snapshots: events.select { |e| e["kind"] == "mem" }.map do |e|
+            {
+              label: e["label"],
+              heap_live: e["heap_live"],
+              rss_mb: e["rss_mb"],
+              timestamp: e["ts"]
+            }
+          end,
+          final_analysis: events.find { |e| e["kind"] == "final_summary" }&.dig("data"),
+          cache_analysis: events.find { |e| e["kind"] == "cache_analysis" }&.dig("data")
+        }
+      end
+    end
+  end
+end

data/lib/kumi/dev/runner.rb CHANGED Viewed

@@ -19,7 +19,9 @@ module Kumi
         errors = []
         begin
-          final_state = Kumi::Analyzer.run_analysis_passes(schema, Kumi::Analyzer::DEFAULT_PASSES, state, errors)
+          final_state = Dev::Profiler.phase("text.analyzer") do
+            Kumi::Analyzer.run_analysis_passes(schema, Kumi::Analyzer::DEFAULT_PASSES, state, errors)
+          end
           ir = final_state[:ir_module]
           result = Result.new(

data/lib/kumi/dev.rb ADDED Viewed

@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Kumi
+  module Dev
+    # Alias to the execution engine profiler for cross-layer access
+    Profiler = Kumi::Core::IR::ExecutionEngine::Profiler
+    # Load profile runner for CLI
+    autoload :ProfileRunner, "kumi/dev/profile_runner"
+    # Load profile aggregator for data analysis
+    autoload :ProfileAggregator, "kumi/dev/profile_aggregator"
+  end
+end