kumi 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/BACKLOG.md +34 -0
  4. data/CHANGELOG.md +33 -0
  5. data/CLAUDE.md +4 -6
  6. data/README.md +0 -45
  7. data/config/functions.yaml +352 -0
  8. data/docs/dev/analyzer-debug.md +52 -0
  9. data/docs/dev/parse-command.md +64 -0
  10. data/docs/dev/vm-profiling.md +95 -0
  11. data/docs/features/README.md +0 -7
  12. data/docs/functions/analyzer_integration.md +199 -0
  13. data/docs/functions/signatures.md +171 -0
  14. data/examples/hash_objects_demo.rb +138 -0
  15. data/golden/array_operations/schema.kumi +17 -0
  16. data/golden/cascade_logic/schema.kumi +16 -0
  17. data/golden/mixed_nesting/schema.kumi +42 -0
  18. data/golden/simple_math/schema.kumi +10 -0
  19. data/lib/kumi/analyzer.rb +76 -22
  20. data/lib/kumi/compiler.rb +6 -5
  21. data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
  22. data/lib/kumi/core/analyzer/debug.rb +167 -0
  23. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
  24. data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
  25. data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +67 -0
  26. data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
  27. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +72 -157
  28. data/lib/kumi/core/analyzer/passes/toposorter.rb +40 -36
  29. data/lib/kumi/core/analyzer/state_serde.rb +64 -0
  30. data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
  31. data/lib/kumi/core/compiler/access_planner.rb +3 -2
  32. data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
  33. data/lib/kumi/core/functions/dimension.rb +98 -0
  34. data/lib/kumi/core/functions/dtypes.rb +20 -0
  35. data/lib/kumi/core/functions/errors.rb +11 -0
  36. data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
  37. data/lib/kumi/core/functions/loader.rb +119 -0
  38. data/lib/kumi/core/functions/registry_v2.rb +68 -0
  39. data/lib/kumi/core/functions/shape.rb +70 -0
  40. data/lib/kumi/core/functions/signature.rb +122 -0
  41. data/lib/kumi/core/functions/signature_parser.rb +86 -0
  42. data/lib/kumi/core/functions/signature_resolver.rb +272 -0
  43. data/lib/kumi/core/ir/execution_engine/interpreter.rb +110 -7
  44. data/lib/kumi/core/ir/execution_engine/profiler.rb +330 -0
  45. data/lib/kumi/core/ir/execution_engine.rb +6 -15
  46. data/lib/kumi/dev/ir.rb +75 -0
  47. data/lib/kumi/dev/parse.rb +105 -0
  48. data/lib/kumi/dev/profile_aggregator.rb +301 -0
  49. data/lib/kumi/dev/profile_runner.rb +199 -0
  50. data/lib/kumi/dev/runner.rb +85 -0
  51. data/lib/kumi/dev.rb +14 -0
  52. data/lib/kumi/frontends/ruby.rb +28 -0
  53. data/lib/kumi/frontends/text.rb +46 -0
  54. data/lib/kumi/frontends.rb +29 -0
  55. data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
  56. data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
  57. data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
  58. data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
  59. data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
  60. data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
  61. data/lib/kumi/runtime/executable.rb +108 -45
  62. data/lib/kumi/schema.rb +12 -6
  63. data/lib/kumi/support/diff.rb +22 -0
  64. data/lib/kumi/support/ir_render.rb +61 -0
  65. data/lib/kumi/version.rb +1 -1
  66. data/lib/kumi.rb +3 -0
  67. data/performance_results.txt +63 -0
  68. data/scripts/test_mixed_nesting_performance.rb +206 -0
  69. metadata +50 -6
  70. data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89
  71. data/docs/features/javascript-transpiler.md +0 -148
  72. data/lib/kumi/js.rb +0 -23
  73. data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,301 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Kumi
6
+ module Dev
7
+ class ProfileAggregator
8
+ attr_reader :events, :phases, :operations, :memory_snapshots, :final_summary
9
+
10
+ def initialize(jsonl_file)
11
+ @jsonl_file = jsonl_file
12
+ @events = []
13
+ @phases = []
14
+ @operations = []
15
+ @memory_snapshots = []
16
+ @final_summary = nil
17
+ load_events
18
+ end
19
+
20
+ def self.load(jsonl_file)
21
+ new(jsonl_file)
22
+ end
23
+
24
+ # Core aggregation methods
25
+ def total_execution_time
26
+ script_phase = phases.find { |p| p["name"] == "script_execution" }
27
+ script_phase ? script_phase["wall_ms"] : 0
28
+ end
29
+
30
+ def vm_execution_time
31
+ vm_phases = phases.select { |p| p["name"] == "vm.run" }
32
+ vm_phases.sum { |p| p["wall_ms"] || 0 }
33
+ end
34
+
35
+ def vm_execution_count
36
+ phases.count { |p| p["name"] == "vm.run" }
37
+ end
38
+
39
+ def runs_analyzed
40
+ (operations + phases + memory_snapshots).map { |e| e["run"] }.compact.uniq.sort
41
+ end
42
+
43
+ def schema_breakdown
44
+ @schema_breakdown ||= operations.group_by { |op| op["schema"] || "Unknown" }.transform_values do |ops|
45
+ {
46
+ operations: ops.length,
47
+ time: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
48
+ declarations: ops.map { |op| op["decl"] }.uniq.compact.sort
49
+ }
50
+ end
51
+ end
52
+
53
+ def operations_by_run
54
+ operations.group_by { |op| op["run"] }
55
+ end
56
+
57
+ def operation_stats_by_type
58
+ operations.group_by { |op| op["tag"] }.transform_values do |ops|
59
+ {
60
+ count: ops.length,
61
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
62
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
63
+ max_ms: ops.map { |op| op["wall_ms"] || 0 }.max || 0,
64
+ declarations: ops.map { |op| op["decl"] }.uniq.compact
65
+ }
66
+ end.sort_by { |_, stats| -stats[:total_ms] }
67
+ end
68
+
69
+ def operation_stats_by_declaration
70
+ operations.group_by { |op| op["decl"] }.transform_values do |ops|
71
+ {
72
+ count: ops.length,
73
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
74
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
75
+ operation_types: ops.map { |op| op["tag"] }.uniq.compact
76
+ }
77
+ end.sort_by { |_, stats| -stats[:total_ms] }
78
+ end
79
+
80
+ def hotspot_analysis(limit: 20)
81
+ operations.map do |op|
82
+ {
83
+ key: "#{op['decl']}@#{op['seq'] || 0}:#{op['tag']}",
84
+ decl: op["decl"],
85
+ tag: op["tag"],
86
+ wall_ms: op["wall_ms"] || 0,
87
+ cpu_ms: op["cpu_ms"] || 0,
88
+ rows: op["rows"] || 0
89
+ }
90
+ end.group_by { |op| op[:key] }.transform_values do |ops|
91
+ {
92
+ count: ops.length,
93
+ total_ms: ops.sum { |op| op[:wall_ms] }.round(4),
94
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op[:wall_ms] } / ops.length).round(6),
95
+ decl: ops.first[:decl],
96
+ tag: ops.first[:tag]
97
+ }
98
+ end.sort_by { |_, stats| -stats[:total_ms] }.first(limit)
99
+ end
100
+
101
+ def reference_operation_analysis
102
+ ref_ops = operations.select { |op| op["tag"] == "ref" }
103
+ return { operations: 0, total_time: 0, avg_time: 0, by_declaration: [] } if ref_ops.empty?
104
+
105
+ {
106
+ operations: ref_ops.length,
107
+ total_time: ref_ops.sum { |op| op["wall_ms"] || 0 }.round(4),
108
+ avg_time: (ref_ops.sum { |op| op["wall_ms"] || 0 } / ref_ops.length).round(6),
109
+ by_declaration: ref_ops.group_by { |op| op["decl"] }.transform_values do |ops|
110
+ {
111
+ count: ops.length,
112
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
113
+ avg_ms: (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6)
114
+ }
115
+ end.sort_by { |_, stats| -stats[:total_ms] }
116
+ }
117
+ end
118
+
119
+ def memory_analysis
120
+ return nil if memory_snapshots.length < 2
121
+
122
+ start_mem = memory_snapshots.first
123
+ end_mem = memory_snapshots.last
124
+
125
+ {
126
+ start: {
127
+ heap_live: start_mem["heap_live"],
128
+ rss_mb: start_mem["rss_mb"],
129
+ minor_gc: start_mem["minor_gc"],
130
+ major_gc: start_mem["major_gc"]
131
+ },
132
+ end: {
133
+ heap_live: end_mem["heap_live"],
134
+ rss_mb: end_mem["rss_mb"],
135
+ minor_gc: end_mem["minor_gc"],
136
+ major_gc: end_mem["major_gc"]
137
+ },
138
+ growth: {
139
+ heap_objects: end_mem["heap_live"] - start_mem["heap_live"],
140
+ heap_growth_pct: ((end_mem["heap_live"] - start_mem["heap_live"]).to_f / start_mem["heap_live"] * 100).round(1),
141
+ rss_mb: (end_mem["rss_mb"] - start_mem["rss_mb"]).round(2),
142
+ rss_growth_pct: ((end_mem["rss_mb"] - start_mem["rss_mb"]) / start_mem["rss_mb"] * 100).round(1),
143
+ minor_gcs: end_mem["minor_gc"] - start_mem["minor_gc"],
144
+ major_gcs: end_mem["major_gc"] - start_mem["major_gc"]
145
+ }
146
+ }
147
+ end
148
+
149
+ def phase_analysis
150
+ phases.group_by { |p| p["name"] }.transform_values do |phase_events|
151
+ {
152
+ count: phase_events.length,
153
+ total_ms: phase_events.sum { |p| p["wall_ms"] || 0 }.round(4),
154
+ avg_ms: phase_events.empty? ? 0 : (phase_events.sum { |p| p["wall_ms"] || 0 } / phase_events.length).round(4),
155
+ max_ms: phase_events.map { |p| p["wall_ms"] || 0 }.max || 0
156
+ }
157
+ end.sort_by { |_, stats| -stats[:total_ms] }
158
+ end
159
+
160
+ # Reporting methods
161
+ def summary_report
162
+ total_ops = operations.length
163
+ total_vm_time = vm_execution_time
164
+ ref_analysis = reference_operation_analysis
165
+
166
+ puts "=== PROFILE AGGREGATION SUMMARY ==="
167
+ puts "Total events: #{events.length}"
168
+ puts "VM operations: #{total_ops}"
169
+ puts "VM executions: #{vm_execution_count}"
170
+
171
+ # Schema differentiation
172
+ schema_stats = schema_breakdown
173
+ if schema_stats.any? && schema_stats.keys.first != "Unknown"
174
+ puts "Schemas analyzed: #{schema_stats.keys.join(", ")}"
175
+ schema_stats.each do |schema, stats|
176
+ puts " #{schema}: #{stats[:operations]} operations, #{stats[:time]}ms"
177
+ end
178
+ else
179
+ puts "Schema runs: #{runs_analyzed.length} (runs: #{runs_analyzed.join(', ')})"
180
+ end
181
+
182
+ puts "Total VM time: #{total_vm_time.round(4)}ms"
183
+ puts "Average per VM execution: #{vm_execution_count > 0 ? (total_vm_time / vm_execution_count).round(4) : 0}ms"
184
+ puts
185
+
186
+ if ref_analysis[:operations] && ref_analysis[:operations] > 0
187
+ puts "Reference Operations:"
188
+ puts " Count: #{ref_analysis[:operations]} (#{(ref_analysis[:operations].to_f / total_ops * 100).round(1)}% of all ops)"
189
+ puts " Time: #{ref_analysis[:total_time]}ms (#{total_vm_time > 0 ? (ref_analysis[:total_time] / total_vm_time * 100).round(1) : 0}% of VM time)"
190
+ puts " Avg: #{ref_analysis[:avg_time]}ms per reference"
191
+ end
192
+
193
+ mem = memory_analysis
194
+ if mem
195
+ puts
196
+ puts "Memory Growth:"
197
+ puts " Heap: +#{mem[:growth][:heap_objects]} objects (#{mem[:growth][:heap_growth_pct]}%)"
198
+ puts " RSS: +#{mem[:growth][:rss_mb]}MB (#{mem[:growth][:rss_growth_pct]}%)"
199
+ puts " GC: #{mem[:growth][:minor_gcs]} minor, #{mem[:growth][:major_gcs]} major"
200
+ end
201
+ end
202
+
203
+ def detailed_report(limit: 15)
204
+ summary_report
205
+ puts
206
+ puts "=== TOP #{limit} HOTSPOTS ==="
207
+ hotspots = hotspot_analysis(limit: limit)
208
+ hotspots.each_with_index do |(key, stats), i|
209
+ puts "#{(i+1).to_s.rjust(2)}. #{key.ljust(40)} #{stats[:total_ms].to_s.rjust(10)}ms (#{stats[:count]} calls, #{stats[:avg_ms]}ms avg)"
210
+ end
211
+
212
+ # Schema breakdown if available
213
+ schema_stats = schema_breakdown
214
+ if schema_stats.keys.length > 1 || (schema_stats.keys.first && schema_stats.keys.first != "Unknown")
215
+ puts
216
+ puts "=== SCHEMA BREAKDOWN ==="
217
+ schema_stats.each do |schema, stats|
218
+ puts "#{schema}:"
219
+ puts " Operations: #{stats[:operations]}"
220
+ puts " Total time: #{stats[:time]}ms"
221
+ puts " Declarations: #{stats[:declarations].join(", ")}"
222
+ puts
223
+ end
224
+ end
225
+
226
+ puts "=== OPERATION TYPE BREAKDOWN ==="
227
+ operation_stats_by_type.each do |op_type, stats|
228
+ puts "#{op_type.ljust(15)} #{stats[:count].to_s.rjust(8)} calls #{stats[:total_ms].to_s.rjust(12)}ms #{stats[:avg_ms].to_s.rjust(10)}ms avg"
229
+ end
230
+
231
+ puts
232
+ puts "=== TOP #{limit} DECLARATIONS BY TIME ==="
233
+ operation_stats_by_declaration.first(limit).each do |decl, stats|
234
+ puts "#{decl.to_s.ljust(35)} #{stats[:count].to_s.rjust(6)} ops #{stats[:total_ms].to_s.rjust(10)}ms"
235
+ end
236
+ end
237
+
238
+ def export_summary(filename)
239
+ summary = {
240
+ metadata: {
241
+ total_events: events.length,
242
+ vm_operations: operations.length,
243
+ vm_executions: vm_execution_count,
244
+ analysis_timestamp: Time.now.strftime("%Y-%m-%dT%H:%M:%SZ")
245
+ },
246
+ timing: {
247
+ total_execution_ms: total_execution_time,
248
+ vm_execution_ms: vm_execution_time,
249
+ avg_vm_execution_ms: vm_execution_count > 0 ? (vm_execution_time / vm_execution_count).round(4) : 0
250
+ },
251
+ operations: {
252
+ by_type: operation_stats_by_type,
253
+ by_declaration: operation_stats_by_declaration,
254
+ hotspots: hotspot_analysis(limit: 20)
255
+ },
256
+ references: reference_operation_analysis,
257
+ memory: memory_analysis,
258
+ phases: phase_analysis
259
+ }
260
+
261
+ File.write(filename, JSON.pretty_generate(summary))
262
+ puts "Summary exported to: #{filename}"
263
+ end
264
+
265
+ private
266
+
267
+ def load_events
268
+ return unless File.exist?(@jsonl_file)
269
+
270
+ File.readlines(@jsonl_file).each do |line|
271
+ begin
272
+ event = JSON.parse(line.strip)
273
+ next unless event && event.is_a?(Hash)
274
+
275
+ @events << event
276
+
277
+ case event["kind"]
278
+ when "phase"
279
+ @phases << event
280
+ when "mem"
281
+ @memory_snapshots << event
282
+ when "final_summary"
283
+ @final_summary = event
284
+ else
285
+ # VM operations don't have a "kind" field - they have ts, run, decl, i, tag, wall_ms, cpu_ms, etc.
286
+ # According to profiler.rb line 118-130, VM operations are identified by having decl + tag but no kind
287
+ if event["decl"] && event["tag"] && !event["kind"]
288
+ @operations << event
289
+ elsif event["kind"] && !["summary", "cache_analysis"].include?(event["kind"])
290
+ # Handle any future event types that have a kind but aren't known
291
+ @operations << event
292
+ end
293
+ end
294
+ rescue JSON::ParserError
295
+ # Skip malformed JSON lines
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "benchmark"
6
+
7
+ module Kumi
8
+ module Dev
9
+ module ProfileRunner
10
+ module_function
11
+
12
+ def run(script_path, opts = {})
13
+ # Validate script exists
14
+ unless File.exist?(script_path)
15
+ puts "Error: Script not found: #{script_path}"
16
+ return false
17
+ end
18
+
19
+ # Set up profiling environment
20
+ setup_profiler_env(opts)
21
+
22
+ puts "Profiling: #{script_path}"
23
+ puts "Configuration:"
24
+ puts " Output: #{ENV['KUMI_PROFILE_FILE']}"
25
+ puts " Phases: enabled"
26
+ puts " Operations: #{ENV['KUMI_PROFILE_OPS'] == '1' ? 'enabled' : 'disabled'}"
27
+ puts " Sampling: #{ENV['KUMI_PROFILE_SAMPLE'] || '1'}"
28
+ puts " Persistent: #{ENV['KUMI_PROFILE_PERSISTENT'] == '1' ? 'yes' : 'no'}"
29
+ puts " Memory snapshots: #{opts[:memory] ? 'enabled' : 'disabled'}"
30
+ puts
31
+
32
+ # Initialize profiler
33
+ Dev::Profiler.init_persistent! if ENV["KUMI_PROFILE_PERSISTENT"] == "1"
34
+
35
+ # Add memory snapshot before execution
36
+ Dev::Profiler.memory_snapshot("script_start") if opts[:memory]
37
+
38
+ # Execute the script
39
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
40
+ begin
41
+ result = Dev::Profiler.phase("script_execution", script: File.basename(script_path)) do
42
+ # Execute in a clean environment to avoid polluting the current process
43
+ load(File.expand_path(script_path))
44
+ end
45
+ rescue StandardError => e
46
+ puts "Error executing script: #{e.message}"
47
+ puts e.backtrace.first(5).join("\n")
48
+ return false
49
+ ensure
50
+ execution_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
51
+ end
52
+
53
+ # Add memory snapshot after execution
54
+ Dev::Profiler.memory_snapshot("script_end") if opts[:memory]
55
+
56
+ # Finalize profiler to get aggregated data
57
+ Dev::Profiler.finalize!
58
+
59
+ puts "Script completed in #{execution_time.round(4)}s"
60
+
61
+ # Show analysis unless quiet
62
+ show_analysis(opts) unless opts[:quiet]
63
+
64
+ true
65
+ rescue LoadError => e
66
+ puts "Error loading script: #{e.message}"
67
+ false
68
+ end
69
+
70
+ private
71
+
72
+ def self.setup_profiler_env(opts)
73
+ # Always enable profiling
74
+ ENV["KUMI_PROFILE"] = "1"
75
+
76
+ # Output file
77
+ output_file = opts[:output] || "tmp/profile.jsonl"
78
+ ENV["KUMI_PROFILE_FILE"] = output_file
79
+
80
+ # Truncate if requested
81
+ ENV["KUMI_PROFILE_TRUNCATE"] = opts[:truncate] ? "1" : "0"
82
+
83
+ # Streaming
84
+ ENV["KUMI_PROFILE_STREAM"] = opts[:stream] ? "1" : "0"
85
+
86
+ # Operations profiling
87
+ if opts[:phases_only]
88
+ ENV["KUMI_PROFILE_OPS"] = "0"
89
+ elsif opts[:ops]
90
+ ENV["KUMI_PROFILE_OPS"] = "1"
91
+ else
92
+ # Default: phases only
93
+ ENV["KUMI_PROFILE_OPS"] = "0"
94
+ end
95
+
96
+ # Sampling
97
+ ENV["KUMI_PROFILE_SAMPLE"] = opts[:sample].to_s if opts[:sample]
98
+
99
+ # Persistent mode
100
+ ENV["KUMI_PROFILE_PERSISTENT"] = opts[:persistent] ? "1" : "0"
101
+
102
+ # Ensure output directory exists
103
+ FileUtils.mkdir_p(File.dirname(output_file))
104
+ end
105
+
106
+ def self.show_analysis(opts)
107
+ output_file = ENV["KUMI_PROFILE_FILE"]
108
+
109
+ unless File.exist?(output_file)
110
+ puts "No profile data generated"
111
+ return
112
+ end
113
+
114
+ puts "\n=== Profiling Analysis ==="
115
+
116
+ # Use ProfileAggregator for comprehensive analysis
117
+ require_relative "profile_aggregator"
118
+ aggregator = ProfileAggregator.new(output_file)
119
+
120
+ if opts[:json]
121
+ # Export full analysis to JSON and display
122
+ json_output = opts[:json_file] || "/tmp/profile_analysis.json"
123
+ aggregator.export_summary(json_output)
124
+ puts File.read(json_output)
125
+ return
126
+ end
127
+
128
+ # Show comprehensive analysis using ProfileAggregator
129
+ if opts[:detailed]
130
+ aggregator.detailed_report(limit: opts[:limit] || 15)
131
+ else
132
+ # Show summary + key insights
133
+ aggregator.summary_report
134
+
135
+ # Add some key insights for CLI users
136
+ puts
137
+ puts "=== KEY INSIGHTS ==="
138
+
139
+ # Show top hotspots
140
+ hotspots = aggregator.hotspot_analysis(limit: 3)
141
+ if hotspots.any?
142
+ puts "Top Performance Bottlenecks:"
143
+ hotspots.each_with_index do |(key, stats), i|
144
+ puts " #{i+1}. #{stats[:decl]} (#{stats[:tag]}): #{stats[:total_ms]}ms"
145
+ end
146
+ end
147
+
148
+ # Reference analysis summary
149
+ ref_analysis = aggregator.reference_operation_analysis
150
+ if ref_analysis[:operations] > 0
151
+ puts "Reference Operation Impact: #{(ref_analysis[:total_time] / aggregator.vm_execution_time * 100).round(1)}% of VM time"
152
+ end
153
+
154
+ # Memory impact
155
+ mem = aggregator.memory_analysis
156
+ if mem
157
+ puts "Memory Impact: #{mem[:growth][:heap_growth_pct]}% heap growth, #{mem[:growth][:rss_growth_pct]}% RSS growth"
158
+ end
159
+ end
160
+
161
+ puts
162
+ puts "Full profile: #{output_file}"
163
+ puts "For detailed analysis: bin/kumi profile #{ARGV.join(' ')} --detailed"
164
+ end
165
+
166
+ def self.analyze_phases(phase_events)
167
+ phase_events.group_by { |e| e["name"] }.transform_values do |events|
168
+ {
169
+ count: events.length,
170
+ total_ms: events.sum { |e| e["wall_ms"] }.round(3),
171
+ avg_ms: (events.sum { |e| e["wall_ms"] } / events.length).round(4)
172
+ }
173
+ end.sort_by { |_, stats| -stats[:total_ms] }.to_h
174
+ end
175
+
176
+ def self.analyze_events(events)
177
+ {
178
+ summary: {
179
+ total_events: events.length,
180
+ phase_events: events.count { |e| e["kind"] == "phase" },
181
+ memory_events: events.count { |e| e["kind"] == "mem" },
182
+ operation_events: events.count { |e| !%w[phase mem summary final_summary cache_analysis].include?(e["kind"]) }
183
+ },
184
+ phases: analyze_phases(events.select { |e| e["kind"] == "phase" }),
185
+ memory_snapshots: events.select { |e| e["kind"] == "mem" }.map do |e|
186
+ {
187
+ label: e["label"],
188
+ heap_live: e["heap_live"],
189
+ rss_mb: e["rss_mb"],
190
+ timestamp: e["ts"]
191
+ }
192
+ end,
193
+ final_analysis: events.find { |e| e["kind"] == "final_summary" }&.dig("data"),
194
+ cache_analysis: events.find { |e| e["kind"] == "cache_analysis" }&.dig("data")
195
+ }
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Dev
5
+ module Runner
6
+ Result = Struct.new(:state, :ir, :errors, keyword_init: true) do
7
+ def ok?
8
+ errors.empty?
9
+ end
10
+ end
11
+
12
+ module_function
13
+
14
+ def run(schema, opts = {})
15
+ # Set ENV vars for debug/checkpoint based on opts
16
+ setup_env_vars(opts)
17
+
18
+ state = Core::Analyzer::AnalysisState.new
19
+ errors = []
20
+
21
+ begin
22
+ final_state = Dev::Profiler.phase("text.analyzer") do
23
+ Kumi::Analyzer.run_analysis_passes(schema, Kumi::Analyzer::DEFAULT_PASSES, state, errors)
24
+ end
25
+ ir = final_state[:ir_module]
26
+
27
+ result = Result.new(
28
+ state: final_state,
29
+ ir: ir,
30
+ errors: errors
31
+ )
32
+
33
+ # Report trace file if enabled
34
+ if opts[:trace] && defined?(@trace_file) && @trace_file
35
+ trace_file_path = @trace_file
36
+ result.define_singleton_method(:trace_file) { trace_file_path }
37
+ end
38
+
39
+ result
40
+ rescue StandardError => e
41
+ # Convert exception to error if not already captured
42
+ errors << e.message unless errors.include?(e.message)
43
+ Result.new(
44
+ state: state,
45
+ ir: nil,
46
+ errors: errors
47
+ )
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def self.setup_env_vars(opts)
54
+ if opts[:trace]
55
+ ENV["KUMI_DEBUG_STATE"] = "1"
56
+ trace_file = ENV["KUMI_DEBUG_FILE"] || "tmp/state_trace.jsonl"
57
+ ENV["KUMI_DEBUG_FILE"] = trace_file
58
+
59
+ # Store for later reporting
60
+ @trace_file = trace_file
61
+ end
62
+
63
+ if opts[:snap]
64
+ ENV["KUMI_CHECKPOINT_PHASES"] = opts[:snap]
65
+ end
66
+
67
+ if opts[:snap_dir]
68
+ ENV["KUMI_CHECKPOINT_DIR"] = opts[:snap_dir]
69
+ end
70
+
71
+ if opts[:resume_from]
72
+ ENV["KUMI_CHECKPOINT_RESUME_FROM"] = opts[:resume_from]
73
+ end
74
+
75
+ if opts[:resume_at]
76
+ ENV["KUMI_CHECKPOINT_RESUME_AT"] = opts[:resume_at]
77
+ end
78
+
79
+ if opts[:stop_after]
80
+ ENV["KUMI_CHECKPOINT_STOP_AFTER"] = opts[:stop_after]
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
data/lib/kumi/dev.rb ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Dev
5
+ # Alias to the execution engine profiler for cross-layer access
6
+ Profiler = Kumi::Core::IR::ExecutionEngine::Profiler
7
+
8
+ # Load profile runner for CLI
9
+ autoload :ProfileRunner, "kumi/dev/profile_runner"
10
+
11
+ # Load profile aggregator for data analysis
12
+ autoload :ProfileAggregator, "kumi/dev/profile_aggregator"
13
+ end
14
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Frontends
5
+ module Ruby
6
+ module_function
7
+
8
+ def load(path:, inputs: {})
9
+ mod = Module.new
10
+ mod.extend(Kumi::Schema)
11
+ mod.module_eval(File.read(path), path)
12
+
13
+ # Extract just the syntax tree AST (same as Text frontend)
14
+ schema_ast = if mod.const_defined?(:GoldenSchema)
15
+ golden = mod.const_get(:GoldenSchema)
16
+ golden.build if golden.respond_to?(:build)
17
+ golden.__syntax_tree__
18
+ elsif mod.__syntax_tree__
19
+ mod.__syntax_tree__
20
+ else
21
+ raise "No schema AST found. Make sure the .rb file calls 'schema do...end'"
22
+ end
23
+
24
+ [schema_ast, inputs]
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Frontends
5
+ module Text
6
+ module_function
7
+
8
+ def load(path:, inputs: {})
9
+ src = File.read(path)
10
+
11
+ begin
12
+ require "kumi-parser"
13
+ ast = Kumi::Parser::TextParser.parse(src)
14
+ Core::Analyzer::Debug.info(:parse, kind: :text, file: path, ok: true) if Core::Analyzer::Debug.enabled?
15
+ [ast, inputs]
16
+ rescue LoadError
17
+ raise "kumi-parser gem not available. Install: gem install kumi-parser"
18
+ rescue StandardError => e
19
+ loc = (e.respond_to?(:location) && e.location) || {}
20
+ line, col = loc.values_at(:line, :column)
21
+ snippet = self.code_frame(src, line, col)
22
+ raise StandardError, "#{path}:#{line || '?'}:#{col || '?'}: #{e.message}\n#{snippet}"
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def self.code_frame(src, line, col, context: 2)
29
+ return "" unless line
30
+
31
+ lines = src.lines
32
+ from = [line - 1 - context, 0].max
33
+ to = [line - 1 + context, lines.length - 1].min
34
+ out = []
35
+
36
+ (from..to).each do |i|
37
+ prefix = (i + 1 == line) ? "➤" : " "
38
+ out << "#{prefix} %4d | %s" % [i + 1, lines[i].rstrip]
39
+ out << " | %s^" % (" " * (col - 1)) if i + 1 == line && col
40
+ end
41
+
42
+ out.join("\n")
43
+ end
44
+ end
45
+ end
46
+ end