kumi 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/BACKLOG.md +34 -0
  4. data/CLAUDE.md +4 -6
  5. data/README.md +0 -18
  6. data/config/functions.yaml +352 -0
  7. data/docs/dev/analyzer-debug.md +52 -0
  8. data/docs/dev/parse-command.md +64 -0
  9. data/docs/functions/analyzer_integration.md +199 -0
  10. data/docs/functions/signatures.md +171 -0
  11. data/examples/hash_objects_demo.rb +138 -0
  12. data/golden/array_operations/schema.kumi +17 -0
  13. data/golden/cascade_logic/schema.kumi +16 -0
  14. data/golden/mixed_nesting/schema.kumi +42 -0
  15. data/golden/simple_math/schema.kumi +10 -0
  16. data/lib/kumi/analyzer.rb +72 -21
  17. data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
  18. data/lib/kumi/core/analyzer/debug.rb +167 -0
  19. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
  20. data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
  21. data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
  22. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +72 -157
  23. data/lib/kumi/core/analyzer/passes/toposorter.rb +37 -1
  24. data/lib/kumi/core/analyzer/state_serde.rb +64 -0
  25. data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
  26. data/lib/kumi/core/compiler/access_planner.rb +3 -2
  27. data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
  28. data/lib/kumi/core/functions/dimension.rb +98 -0
  29. data/lib/kumi/core/functions/dtypes.rb +20 -0
  30. data/lib/kumi/core/functions/errors.rb +11 -0
  31. data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
  32. data/lib/kumi/core/functions/loader.rb +119 -0
  33. data/lib/kumi/core/functions/registry_v2.rb +68 -0
  34. data/lib/kumi/core/functions/shape.rb +70 -0
  35. data/lib/kumi/core/functions/signature.rb +122 -0
  36. data/lib/kumi/core/functions/signature_parser.rb +86 -0
  37. data/lib/kumi/core/functions/signature_resolver.rb +272 -0
  38. data/lib/kumi/core/ir/execution_engine/interpreter.rb +98 -7
  39. data/lib/kumi/core/ir/execution_engine/profiler.rb +202 -0
  40. data/lib/kumi/dev/ir.rb +75 -0
  41. data/lib/kumi/dev/parse.rb +105 -0
  42. data/lib/kumi/dev/runner.rb +83 -0
  43. data/lib/kumi/frontends/ruby.rb +28 -0
  44. data/lib/kumi/frontends/text.rb +46 -0
  45. data/lib/kumi/frontends.rb +29 -0
  46. data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
  47. data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
  48. data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
  49. data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
  50. data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
  51. data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
  52. data/lib/kumi/runtime/executable.rb +57 -26
  53. data/lib/kumi/schema.rb +4 -4
  54. data/lib/kumi/support/diff.rb +22 -0
  55. data/lib/kumi/support/ir_render.rb +61 -0
  56. data/lib/kumi/version.rb +1 -1
  57. data/lib/kumi.rb +2 -0
  58. data/performance_results.txt +63 -0
  59. data/scripts/test_mixed_nesting_performance.rb +206 -0
  60. metadata +45 -5
  61. data/docs/features/javascript-transpiler.md +0 -148
  62. data/lib/kumi/js.rb +0 -23
  63. data/lib/kumi/support/ir_dump.rb +0 -491
@@ -1,491 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Support
5
- # Pretty printer for IR modules - makes IR debugging much more readable
6
- module IRDump
7
- class << self
8
- def pretty_print(ir_module, show_inputs: true, analysis_state: nil)
9
- @analysis_state = analysis_state # Store for use in other methods
10
- output = []
11
-
12
- if analysis_state
13
- output << "=" * 60
14
- output << "ANALYSIS STATE"
15
- output << "=" * 60
16
- output << format_analysis_state(analysis_state)
17
- output << ""
18
- end
19
-
20
- if show_inputs
21
- output << "=" * 60
22
- output << "INPUT METADATA"
23
- output << "=" * 60
24
- ir_module.inputs.each do |name, meta|
25
- output << format_input(name, meta)
26
- end
27
- output << ""
28
- end
29
-
30
- output << "=" * 60
31
- output << "IR DECLARATIONS (#{ir_module.decls.size})"
32
- output << "=" * 60
33
-
34
- ir_module.decls.each_with_index do |decl, decl_idx|
35
- output << ""
36
- output << format_declaration_header(decl, decl_idx)
37
- output << format_declaration_body(decl)
38
- end
39
-
40
- output.join("\n")
41
- end
42
-
43
- private
44
-
45
- def format_analysis_state(state)
46
- output = []
47
-
48
- # IR Lowering Overview
49
- output << "IR LOWERING STRATEGY:"
50
- output << " • Plan selection: choose :read/:ravel/:each_indexed/:materialize per input path"
51
- output << " • Shape tracking: Scalar vs Vec(scope, has_idx) for every slot"
52
- output << " • Auto-alignment: AlignTo for elementwise maps across compatible scopes"
53
- output << " • Twin generation: Vec declarations get __vec twin + Lift/Reduce for public access"
54
- output << " • Reducers: aggregate functions (sum, max, etc.) use Reduce ops"
55
- output << " • Cascades: compile to nested if/switch with lazy evaluation via guards"
56
- output << ""
57
-
58
- # Evaluation Order
59
- if state[:evaluation_order]
60
- order = state[:evaluation_order]
61
- output << "EVALUATION ORDER (#{order.size} declarations):"
62
- output << " Topologically sorted: #{order.join(' → ')}"
63
- output << ""
64
- end
65
-
66
- # Access Plans
67
- if state[:access_plans]
68
- plans = state[:access_plans]
69
- total_plans = plans.values.map(&:size).sum
70
- output << "ACCESS PLANS (#{plans.size} input paths, #{total_plans} total plans):"
71
- output << " Modes: :read (scalar), :ravel (flat vec), :each_indexed (indexed vec), :materialize (nested)"
72
- plans.each do |path, path_plans|
73
- output << " #{path}:"
74
- path_plans.each do |plan|
75
- mode_info = case plan.mode
76
- when :read then "scalar read (no traversal)"
77
- when :ravel then "flattened vector (leaf values only)"
78
- when :each_indexed then "indexed vector (with hierarchical indices)"
79
- when :materialize then "structured data (preserves nesting)"
80
- else plan.mode.to_s
81
- end
82
- scope_info = plan.scope.empty? ? "" : " @#{plan.scope.join('.')}"
83
- output << " #{plan.accessor_key} → #{mode_info}#{scope_info} (depth=#{plan.depth})"
84
- end
85
- end
86
- output << ""
87
- end
88
-
89
- # Join/Reduce Plans
90
- if state[:join_reduce_plans] && !state[:join_reduce_plans].empty?
91
- plans = state[:join_reduce_plans]
92
- output << "JOIN/REDUCE PLANS (#{plans.size} declarations):"
93
- plans.each do |name, plan|
94
- if plan.is_a?(Kumi::Core::Analyzer::Plans::Join)
95
- # Extract readable info from the Join struct
96
- policy = plan.policy
97
- target_scope = plan.target_scope
98
-
99
- parts = []
100
- parts << "policy=#{policy}"
101
- parts << "target_scope=#{target_scope}" unless target_scope.empty?
102
-
103
- output << " #{name}: #{parts.join(', ')}"
104
- next
105
- elsif !plan.is_a?(Kumi::Core::Analyzer::Plans::Reduce)
106
- output << " #{name}: (unknown plan type: #{plan.class})"
107
- next
108
- end
109
-
110
- # Extract readable info from the Reduce struct
111
- function = plan.function if
112
- axis = plan.axis
113
- source_scope = plan.source_scope
114
- result_scope = plan.result_scope
115
- flatten_args = plan.flatten_args
116
-
117
- parts = []
118
- parts << "function=#{function}"
119
- parts << "axis=#{axis}"
120
- parts << "source_scope=#{source_scope}"
121
- parts << "result_scope=#{result_scope}"
122
- parts << "flatten_args=#{flatten_args}"
123
-
124
- output << " #{name}: #{parts.join(', ')}"
125
- end
126
- output << ""
127
- end
128
-
129
- # Scope Plans
130
- if state[:scope_plans] && !state[:scope_plans].empty?
131
- plans = state[:scope_plans]
132
- output << "SCOPE PLANS (#{plans.size} declarations):"
133
- plans.each do |name, plan|
134
- # Extract readable info from the Scope struct
135
- scope = plan.scope
136
- lifts = plan.lifts
137
- join_hint = plan.join_hint
138
- arg_shapes = plan.arg_shapes
139
-
140
- parts = []
141
- parts << "scope=#{scope}"
142
- parts << "lifts=#{lifts}"
143
- parts << "join_hint=#{join_hint}"
144
- parts << "arg_shapes=#{arg_shapes}"
145
-
146
- if parts.empty?
147
- output << " #{name}: (default scope)"
148
- else
149
- output << " #{name}: #{parts.join(', ')}"
150
- end
151
- end
152
- output << ""
153
- end
154
-
155
- # Dependencies
156
- if state[:dependencies]
157
- deps = state[:dependencies]
158
- output << "DEPENDENCIES (#{deps.size} declarations):"
159
- deps.each do |name, dep_list|
160
- if dep_list.empty?
161
- output << " #{name}: (no dependencies)"
162
- else
163
- # Extract readable info from dependency edges
164
- dep_info = dep_list.map do |dep|
165
- if dep.respond_to?(:name) && dep.respond_to?(:kind)
166
- "#{dep.name} (#{dep.kind})"
167
- elsif dep.respond_to?(:name)
168
- dep.name.to_s
169
- elsif dep.respond_to?(:to_s)
170
- dep.to_s.split('::').last || dep.to_s
171
- else
172
- dep.inspect
173
- end
174
- end
175
- output << " #{name}: depends on #{dep_info.join(', ')}"
176
- end
177
- end
178
- output << ""
179
- end
180
-
181
- # Type Information
182
- if state[:type_metadata]
183
- types = state[:type_metadata]
184
- output << "TYPE METADATA (#{types.size} declarations):"
185
- types.each do |name, type_info|
186
- type_str = case type_info
187
- when Symbol then type_info.to_s
188
- when Hash then type_info.inspect
189
- else type_info.to_s
190
- end
191
- output << " #{name}: #{type_str}"
192
- end
193
- output << ""
194
- end
195
-
196
- # Functions Required
197
- if state[:functions_required]
198
- funcs = state[:functions_required]
199
- output << "FUNCTIONS REQUIRED (#{funcs.size} unique functions):"
200
- output << " #{funcs.sort.join(', ')}"
201
- output << ""
202
- end
203
-
204
- # Declarations
205
- if state[:declarations]
206
- decls = state[:declarations]
207
- output << "DECLARATIONS (#{decls.size} total):"
208
- decls.each do |name, decl|
209
- kind = decl.is_a?(Kumi::Syntax::ValueDeclaration) ? "VALUE" : "TRAIT"
210
- expr_type = decl.expression.class.name.split('::').last
211
- output << " #{name}: #{kind} (#{expr_type})"
212
- end
213
- output << ""
214
- end
215
-
216
- # Vector Twin Tracking (internal state)
217
- if state[:vec_meta]
218
- vec_meta = state[:vec_meta] || {}
219
-
220
- output << "VECTOR TWINS (internal tracking):"
221
- if vec_meta.empty?
222
- output << " (no vector declarations)"
223
- else
224
- twin_names = vec_meta.keys.sort
225
- output << " Twins created: #{twin_names.join(', ')}"
226
- vec_meta.each do |twin_name, meta|
227
- scope_info = meta[:scope].empty? ? "[]" : "[:#{meta[:scope].join(', :')}]"
228
- idx_info = meta[:has_idx] ? "indexed" : "ravel"
229
- output << " #{twin_name}: vec[#{idx_info}]#{scope_info}"
230
- end
231
- end
232
- output << ""
233
- end
234
-
235
- # Analysis Errors (if any)
236
- if state[:errors] && !state[:errors].empty?
237
- errors = state[:errors]
238
- output << "ANALYSIS ERRORS (#{errors.size}):"
239
- errors.each_with_index do |error, idx|
240
- output << " [#{idx + 1}] #{error}"
241
- end
242
- output << ""
243
- end
244
-
245
- output.join("\n")
246
- end
247
-
248
- def format_input(name, meta)
249
- type_info = meta[:type] ? " : #{meta[:type]}" : ""
250
- domain_info = meta[:domain] ? " ∈ #{meta[:domain]}" : ""
251
- " #{name}#{type_info}#{domain_info}"
252
- end
253
-
254
- def format_declaration_header(decl, decl_idx)
255
- # Enhanced shape annotation with scope and type information
256
- vec_twin_name = :"#{decl.name}__vec"
257
- vec_meta = @analysis_state&.dig(:vec_meta)
258
-
259
- # Get type information from analysis state
260
- inferred_types = @analysis_state&.dig(:inferred_types) || {}
261
- inferred_type = inferred_types[decl.name]
262
- type_annotation = format_type_annotation(inferred_type, decl)
263
-
264
- if decl.shape == :vec && vec_meta && vec_meta[vec_twin_name]
265
- has_idx = vec_meta[vec_twin_name][:has_idx]
266
- scope = vec_meta[vec_twin_name][:scope] || []
267
- scope_str = scope.empty? ? "" : " by :#{scope.join(', :')}"
268
-
269
- if has_idx
270
- public_surface = "nested_arrays#{type_annotation}#{scope_str}"
271
- twin_annotation = "vec[indexed][:#{scope.join(', :')}]"
272
- else
273
- public_surface = "flat_array#{type_annotation}#{scope_str}"
274
- twin_annotation = "vec[ravel][:#{scope.join(', :')}]"
275
- end
276
-
277
- shape_info = " [public: #{public_surface}] (twin: #{twin_annotation})"
278
- elsif decl.shape == :vec
279
- public_surface = "vector#{type_annotation}"
280
- shape_info = " [public: #{public_surface}] (twin: vec[unknown])"
281
- else
282
- shape_info = " [public: scalar#{type_annotation}]"
283
- end
284
-
285
- kind_info = decl.kind.to_s.upcase
286
-
287
- # Count operation types for summary
288
- op_counts = decl.ops.group_by(&:tag).transform_values(&:size)
289
- op_summary = " (#{decl.ops.size} ops: #{op_counts.map { |k, v| "#{k}=#{v}" }.join(', ')})"
290
-
291
- "[#{decl_idx}] #{kind_info} #{decl.name}#{shape_info}#{op_summary}"
292
- end
293
-
294
- def format_declaration_body(decl)
295
- lines = []
296
- @decl_ops_context = decl.ops # Store for broadcast detection
297
-
298
- decl.ops.each_with_index do |op, op_idx|
299
- lines << format_operation(op, op_idx)
300
- end
301
-
302
- lines.map { |line| " #{line}" }
303
- end
304
-
305
- def format_operation(op, op_idx)
306
- case op.tag
307
- when :const
308
- value = op.attrs[:value]
309
- type_hint = case value
310
- when String then " (str)"
311
- when Integer then " (int)"
312
- when Float then " (float)"
313
- when TrueClass, FalseClass then " (bool)"
314
- else ""
315
- end
316
- "#{op_idx}: CONST #{value.inspect}#{type_hint} → s#{op_idx}"
317
-
318
- when :load_input
319
- plan_id = op.attrs[:plan_id]
320
- scope = op.attrs[:scope] || []
321
- is_scalar = op.attrs[:is_scalar]
322
- has_idx = op.attrs[:has_idx]
323
-
324
- # Parse plan_id to show what it's accessing
325
- path_info = plan_id.to_s.split(':')
326
- path = path_info[0]
327
- mode = path_info[1] || "read"
328
-
329
- if is_scalar
330
- shape_info = "scalar"
331
- else
332
- idx_info = has_idx ? "indexed" : "ravel"
333
- scope_info = scope.empty? ? "[]" : "[:#{scope.join(', :')}]"
334
- shape_info = "vec[#{idx_info}]#{scope_info}"
335
- end
336
-
337
- "#{op_idx}: #{path} → #{shape_info} → s#{op_idx}"
338
-
339
- when :ref
340
- name = op.attrs[:name]
341
- is_twin = name.to_s.end_with?("__vec")
342
-
343
- if is_twin
344
- # Look up scope information for twin
345
- vec_meta = @analysis_state&.dig(:vec_meta)
346
- if vec_meta && vec_meta[name]
347
- scope = vec_meta[name][:scope] || []
348
- has_idx = vec_meta[name][:has_idx]
349
- shape_info = has_idx ? "vec[indexed]" : "vec[ravel]"
350
- scope_info = scope.empty? ? "" : "[:#{scope.join(', :')}]"
351
- "#{op_idx}: REF #{name} → #{shape_info}#{scope_info} → s#{op_idx}"
352
- else
353
- "#{op_idx}: REF #{name} → vec[unknown] → s#{op_idx}"
354
- end
355
- else
356
- "#{op_idx}: REF #{name} → scalar → s#{op_idx}"
357
- end
358
-
359
- when :map
360
- fn_name = op.attrs[:fn]
361
- argc = op.attrs[:argc]
362
- args_str = op.args.map { |slot| "s#{slot}" }.join(", ")
363
-
364
- # Add function type information
365
- fn_type = case fn_name
366
- when :multiply, :add, :subtract, :divide then " (math)"
367
- when :>, :<, :>=, :<=, :==, :!= then " (comparison)"
368
- when :and, :or, :not then " (logic)"
369
- when :if then " (conditional)"
370
- else ""
371
- end
372
-
373
- # Check if this represents scalar-to-vector broadcast
374
- broadcast_note = ""
375
- if argc == 2 && op.args.size == 2
376
- # Look at the previous operations to see if we have scalar + vector
377
- # This is a heuristic - we'd need more context for perfect detection
378
- if @analysis_state
379
- # Try to detect scalar broadcast pattern: const followed by map
380
- prev_ops = @decl_ops_context
381
- if prev_ops && prev_ops[op.args[0]]&.tag == :const && prev_ops[op.args[1]]&.tag == :load_input
382
- broadcast_note = " [scalar broadcast]"
383
- elsif prev_ops && prev_ops[op.args[1]]&.tag == :const && prev_ops[op.args[0]]&.tag == :load_input
384
- broadcast_note = " [scalar broadcast]"
385
- end
386
- end
387
- end
388
-
389
- "#{op_idx}: MAP #{fn_name}#{fn_type}(#{args_str})#{broadcast_note} → s#{op_idx}"
390
-
391
- when :reduce
392
- fn_name = op.attrs[:fn]
393
- axis = op.attrs[:axis] || []
394
- result_scope = op.attrs[:result_scope] || []
395
- flatten_args = op.attrs[:flatten_args] || []
396
- args_str = op.args.map { |slot| "s#{slot}" }.join(", ")
397
-
398
- # Show grouping information with cleaner format
399
- if result_scope.empty?
400
- result_shape = "scalar"
401
- else
402
- result_shape = "grouped_vec[:#{result_scope.join(', :')}]"
403
- end
404
-
405
- axis_str = axis.empty? ? "" : " axis=[:#{axis.join(', :')}]"
406
- "#{op_idx}: REDUCE #{fn_name}(#{args_str})#{axis_str} → #{result_shape} → s#{op_idx}"
407
-
408
- when :array
409
- size = op.attrs[:size] || op.args.size
410
- args_str = op.args.map { |slot| "s#{slot}" }.join(", ")
411
- "#{op_idx}: ARRAY [#{args_str}] (#{size} elements) → s#{op_idx}"
412
-
413
- when :switch
414
- cases = op.attrs[:cases] || []
415
- default = op.attrs[:default]
416
- cases_str = cases.map { |(cond, val)| "s#{cond}→s#{val}" }.join(", ")
417
- default_str = default ? " else s#{default}" : ""
418
- "#{op_idx}: SWITCH {#{cases_str}#{default_str}} → s#{op_idx}"
419
-
420
- when :lift
421
- to_scope = op.attrs[:to_scope] || []
422
- args_str = op.args.map { |slot| "s#{slot}" }.join(", ")
423
- depth = to_scope.length
424
- scope_str = to_scope.empty? ? "" : " @:#{to_scope.join(', :')}"
425
- "#{op_idx}: LIFT #{args_str}#{scope_str} depth=#{depth} (→ nested_arrays[|#{to_scope.join('|')}|]) → s#{op_idx}"
426
-
427
- when :align_to
428
- to_scope = op.attrs[:to_scope] || []
429
- require_unique = op.attrs[:require_unique]
430
- on_missing = op.attrs[:on_missing]
431
- target_slot = op.args[0]
432
- source_slot = op.args[1]
433
-
434
- flags = []
435
- flags << "unique" if require_unique
436
- flags << "on_missing=#{on_missing}" if on_missing && on_missing != :error
437
- flag_str = flags.empty? ? "" : " (#{flags.join(', ')})"
438
-
439
- scope_str = to_scope.empty? ? "" : "[:#{to_scope.join(', :')}]"
440
- "#{op_idx}: ALIGN_TO target=s#{target_slot} source=s#{source_slot} to #{scope_str}#{flag_str} → s#{op_idx}"
441
-
442
- when :store
443
- name = op.attrs[:name]
444
- source_slot = op.args[0]
445
- is_twin = name.to_s.end_with?("__vec")
446
- store_type = is_twin ? " (vec twin)" : " (public)"
447
- "#{op_idx}: STORE #{name}#{store_type} ← s#{source_slot}"
448
-
449
- when :guard_push
450
- cond_slot = op.attrs[:cond_slot]
451
- "#{op_idx}: GUARD_PUSH s#{cond_slot} (enable if s#{cond_slot} is truthy)"
452
-
453
- when :guard_pop
454
- "#{op_idx}: GUARD_POP (restore previous guard state)"
455
-
456
- else
457
- # Fallback for unknown operations with enhanced information
458
- attrs_items = op.attrs.map { |k, v| "#{k}=#{v.inspect}" }
459
- attrs_str = attrs_items.empty? ? "" : " {#{attrs_items.join(', ')}}"
460
- args_str = op.args.empty? ? "" : " args=[#{op.args.join(', ')}]"
461
- "#{op_idx}: #{op.tag.to_s.upcase}#{attrs_str}#{args_str} → s#{op_idx}"
462
- end
463
- end
464
-
465
- def format_type_annotation(inferred_type, decl)
466
- return "" unless inferred_type
467
-
468
- case inferred_type
469
- when Symbol
470
- "(#{inferred_type.to_s.capitalize})"
471
- when Hash
472
- if inferred_type.key?(:array)
473
- element_type = inferred_type[:array]
474
- element_name = element_type.is_a?(Symbol) ? element_type.to_s.capitalize : element_type.to_s
475
- "(#{element_name})"
476
- else
477
- "(#{inferred_type.inspect})"
478
- end
479
- else
480
- # Fallback based on declaration type
481
- if decl.is_a?(Kumi::Syntax::TraitDeclaration)
482
- "(Boolean)"
483
- else
484
- "(#{inferred_type.class.name.split('::').last})"
485
- end
486
- end
487
- end
488
- end
489
- end
490
- end
491
- end