kumi 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/CLAUDE.md +18 -258
  4. data/README.md +188 -121
  5. data/docs/AST.md +1 -1
  6. data/docs/FUNCTIONS.md +52 -8
  7. data/docs/VECTOR_SEMANTICS.md +286 -0
  8. data/docs/compiler_design_principles.md +86 -0
  9. data/docs/features/README.md +15 -2
  10. data/docs/features/hierarchical-broadcasting.md +349 -0
  11. data/docs/features/javascript-transpiler.md +148 -0
  12. data/docs/features/performance.md +1 -3
  13. data/docs/features/s-expression-printer.md +2 -2
  14. data/docs/schema_metadata.md +7 -7
  15. data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
  16. data/examples/game_of_life.rb +2 -4
  17. data/lib/kumi/analyzer.rb +34 -14
  18. data/lib/kumi/compiler.rb +4 -283
  19. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
  20. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  21. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  22. data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
  23. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  24. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  25. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  26. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  27. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
  28. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  29. data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
  30. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  31. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  32. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
  33. data/lib/kumi/core/analyzer/plans.rb +52 -0
  34. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  35. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  36. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  37. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  38. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  39. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  40. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  41. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  42. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  43. data/lib/kumi/core/compiler_base.rb +137 -0
  44. data/lib/kumi/core/error_reporter.rb +6 -5
  45. data/lib/kumi/core/errors.rb +4 -0
  46. data/lib/kumi/core/explain.rb +157 -205
  47. data/lib/kumi/core/export/node_builders.rb +2 -2
  48. data/lib/kumi/core/export/node_serializers.rb +1 -1
  49. data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
  50. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  51. data/lib/kumi/core/function_registry/function_builder.rb +142 -53
  52. data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
  53. data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
  54. data/lib/kumi/core/function_registry.rb +138 -98
  55. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  56. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  57. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  58. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  59. data/lib/kumi/core/ir.rb +58 -0
  60. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  61. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  62. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
  63. data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
  64. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  65. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  66. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  67. data/lib/kumi/errors.rb +2 -0
  68. data/lib/kumi/js.rb +23 -0
  69. data/lib/kumi/registry.rb +17 -22
  70. data/lib/kumi/runtime/executable.rb +213 -0
  71. data/lib/kumi/schema.rb +15 -4
  72. data/lib/kumi/schema_metadata.rb +2 -2
  73. data/lib/kumi/support/ir_dump.rb +491 -0
  74. data/lib/kumi/support/s_expression_printer.rb +17 -16
  75. data/lib/kumi/syntax/array_expression.rb +6 -6
  76. data/lib/kumi/syntax/call_expression.rb +4 -4
  77. data/lib/kumi/syntax/cascade_expression.rb +4 -4
  78. data/lib/kumi/syntax/case_expression.rb +4 -4
  79. data/lib/kumi/syntax/declaration_reference.rb +4 -4
  80. data/lib/kumi/syntax/hash_expression.rb +4 -4
  81. data/lib/kumi/syntax/input_declaration.rb +6 -5
  82. data/lib/kumi/syntax/input_element_reference.rb +5 -5
  83. data/lib/kumi/syntax/input_reference.rb +5 -5
  84. data/lib/kumi/syntax/literal.rb +4 -4
  85. data/lib/kumi/syntax/location.rb +5 -0
  86. data/lib/kumi/syntax/node.rb +33 -34
  87. data/lib/kumi/syntax/root.rb +6 -6
  88. data/lib/kumi/syntax/trait_declaration.rb +4 -4
  89. data/lib/kumi/syntax/value_declaration.rb +4 -4
  90. data/lib/kumi/version.rb +1 -1
  91. data/lib/kumi.rb +6 -15
  92. data/scripts/analyze_broadcast_methods.rb +68 -0
  93. data/scripts/analyze_cascade_methods.rb +74 -0
  94. data/scripts/check_broadcasting_coverage.rb +51 -0
  95. data/scripts/find_dead_code.rb +114 -0
  96. metadata +36 -9
  97. data/docs/features/array-broadcasting.md +0 -170
  98. data/lib/kumi/cli.rb +0 -449
  99. data/lib/kumi/core/compiled_schema.rb +0 -43
  100. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  101. data/lib/kumi/core/schema_instance.rb +0 -111
  102. data/lib/kumi/core/vectorization_metadata.rb +0 -110
  103. data/migrate_to_core_iterative.rb +0 -938
data/lib/kumi/analyzer.rb CHANGED
@@ -4,20 +4,22 @@ module Kumi
4
4
  module Analyzer
5
5
  Result = Struct.new(:definitions, :dependency_graph, :leaf_map, :topo_order, :decl_types, :state, keyword_init: true)
6
6
 
7
- module_function
8
-
9
7
  DEFAULT_PASSES = [
10
8
  Core::Analyzer::Passes::NameIndexer, # 1. Finds all names and checks for duplicates.
11
9
  Core::Analyzer::Passes::InputCollector, # 2. Collects field metadata from input declarations.
12
- Core::Analyzer::Passes::DeclarationValidator, # 3. Checks the basic structure of each rule.
13
- Core::Analyzer::Passes::SemanticConstraintValidator, # 4. Validates DSL semantic constraints at AST level.
14
- Core::Analyzer::Passes::DependencyResolver, # 5. Builds the dependency graph with conditional dependencies.
15
- Core::Analyzer::Passes::UnsatDetector, # 6. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
16
- Core::Analyzer::Passes::Toposorter, # 7. Creates the final evaluation order, allowing safe cycles.
17
- Core::Analyzer::Passes::BroadcastDetector, # 8. Detects which operations should be broadcast over arrays (must run before type inference).
18
- Core::Analyzer::Passes::TypeInferencer, # 9. Infers types for all declarations (uses vectorization metadata).
19
- Core::Analyzer::Passes::TypeConsistencyChecker, # 10. Validates declared vs inferred type consistency.
20
- Core::Analyzer::Passes::TypeChecker # 11. Validates types using inferred information.
10
+ Core::Analyzer::Passes::DeclarationValidator, # 4. Checks the basic structure of each rule.
11
+ Core::Analyzer::Passes::SemanticConstraintValidator, # 5. Validates DSL semantic constraints at AST level.
12
+ Core::Analyzer::Passes::DependencyResolver, # 6. Builds the dependency graph with conditional dependencies.
13
+ Core::Analyzer::Passes::UnsatDetector, # 7. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
14
+ Core::Analyzer::Passes::Toposorter, # 8. Creates the final evaluation order, allowing safe cycles.
15
+ Core::Analyzer::Passes::BroadcastDetector, # 9. Detects which operations should be broadcast over arrays.
16
+ Core::Analyzer::Passes::TypeInferencerPass, # 10. Infers types for all declarations (uses vectorization metadata).
17
+ Core::Analyzer::Passes::TypeConsistencyChecker, # 11. Validates declared vs inferred type consistency.
18
+ Core::Analyzer::Passes::TypeChecker, # 12. Validates types using inferred information.
19
+ Core::Analyzer::Passes::InputAccessPlannerPass, # 13. Plans access strategies for input fields.
20
+ Core::Analyzer::Passes::ScopeResolutionPass, # 14. Plans execution scope and lifting needs for declarations.
21
+ Core::Analyzer::Passes::JoinReducePlanningPass, # 15. Plans join/reduce operations (Generates IR Structs)
22
+ Core::Analyzer::Passes::LowerToIRPass # 16. Lowers the schema to IR (Generates IR Structs)
21
23
  ].freeze
22
24
 
23
25
  def self.analyze!(schema, passes: DEFAULT_PASSES, **opts)
@@ -35,7 +37,13 @@ module Kumi
35
37
  begin
36
38
  state = pass_instance.run(errors)
37
39
  rescue StandardError => e
38
- errors << Core::ErrorReporter.create_error(e.message, location: nil, type: :semantic)
40
+ # TODO: - GREATLY improve this, need to capture the context of the error
41
+ # and the pass that failed and line number if relevant
42
+ pass_name = pass_class.name.split("::").last
43
+ message = "Error in Analysis Pass(#{pass_name}): #{e.message}"
44
+ errors << Core::ErrorReporter.create_error(message, location: nil, type: :semantic, backtrace: e.backtrace)
45
+
46
+ raise
39
47
  end
40
48
  end
41
49
  state
@@ -43,11 +51,14 @@ module Kumi
43
51
 
44
52
  def self.handle_analysis_errors(errors)
45
53
  type_errors = errors.select { |e| e.type == :type }
54
+ semantic_errors = errors.select { |e| e.type == :semantic }
46
55
  first_error_location = errors.first.location
47
56
 
48
57
  raise Errors::TypeError.new(format_errors(errors), first_error_location) if type_errors.any?
49
58
 
50
- raise Errors::SemanticError.new(format_errors(errors), first_error_location)
59
+ raise Errors::SemanticError.new(format_errors(errors), first_error_location) if first_error_location || semantic_errors
60
+
61
+ raise Errors::AnalysisError.new(format_errors(errors))
51
62
  end
52
63
 
53
64
  def self.create_analysis_result(state)
@@ -65,7 +76,16 @@ module Kumi
65
76
  def self.format_errors(errors)
66
77
  return "" if errors.empty?
67
78
 
68
- errors.map(&:to_s).join("\n")
79
+ backtrace = errors.first.backtrace
80
+
81
+ message = errors.map(&:to_s).join("\n")
82
+
83
+ message.tap do |msg|
84
+ if backtrace && !backtrace.empty?
85
+ msg << "\n\nBacktrace:\n"
86
+ msg << backtrace[0..10].join("\n") # Limit to first 10 lines for readability
87
+ end
88
+ end
69
89
  end
70
90
  end
71
91
  end
data/lib/kumi/compiler.rb CHANGED
@@ -2,298 +2,19 @@
2
2
 
3
3
  module Kumi
4
4
  # Compiles an analyzed schema into executable lambdas
5
- class Compiler
6
- # ExprCompilers holds per-node compile implementations
7
- module ExprCompilers
8
- def compile_literal(expr)
9
- v = expr.value
10
- ->(_ctx) { v }
11
- end
12
-
13
- def compile_field_node(expr)
14
- compile_field(expr)
15
- end
16
-
17
- def compile_element_field_reference(expr)
18
- path = expr.path
19
-
20
- lambda do |ctx|
21
- # Start with the top-level collection from the context.
22
- collection = ctx[path.first]
23
-
24
- # Recursively map over the nested collections.
25
- # The `dig_and_map` helper will handle any level of nesting.
26
- dig_and_map(collection, path[1..])
27
- end
28
- end
29
-
30
- def compile_binding_node(expr)
31
- name = expr.name
32
- # Handle forward references in cycles by deferring binding lookup to runtime
33
- lambda do |ctx|
34
- fn = @bindings[name].last
35
- fn.call(ctx)
36
- end
37
- end
38
-
39
- def compile_list(expr)
40
- fns = expr.elements.map { |e| compile_expr(e) }
41
- ->(ctx) { fns.map { |fn| fn.call(ctx) } }
42
- end
43
-
44
- def compile_call(expr)
45
- fn_name = expr.fn_name
46
- arg_fns = expr.args.map { |a| compile_expr(a) }
47
-
48
- # Check if this is a vectorized operation
49
- if vectorized_operation?(expr)
50
- ->(ctx) { invoke_vectorized_function(fn_name, arg_fns, ctx, expr.loc) }
51
- else
52
- ->(ctx) { invoke_function(fn_name, arg_fns, ctx, expr.loc) }
53
- end
54
- end
55
-
56
- def compile_cascade(expr)
57
- # Check if current declaration is vectorized
58
- broadcast_meta = @analysis.state[:broadcasts]
59
- is_vectorized = @current_declaration && broadcast_meta&.dig(:vectorized_operations, @current_declaration)
60
-
61
- # For vectorized cascades, we need to transform conditions that use all?
62
- pairs = if is_vectorized
63
- expr.cases.map do |c|
64
- condition_fn = transform_vectorized_condition(c.condition)
65
- result_fn = compile_expr(c.result)
66
- [condition_fn, result_fn]
67
- end
68
- else
69
- expr.cases.map { |c| [compile_expr(c.condition), compile_expr(c.result)] }
70
- end
71
-
72
- if is_vectorized
73
- lambda do |ctx|
74
- # This cascade can be vectorized - check if we actually need to at runtime
75
- # Evaluate all conditions and results to check for arrays
76
- cond_results = pairs.map { |cond, _res| cond.call(ctx) }
77
- res_results = pairs.map { |_cond, res| res.call(ctx) }
78
-
79
- # Check if any conditions or results are arrays (vectorized)
80
- has_vectorized_data = (cond_results + res_results).any?(Array)
81
-
82
- if has_vectorized_data
83
- # Apply element-wise cascade evaluation
84
- array_length = cond_results.find { |v| v.is_a?(Array) }&.length ||
85
- res_results.find { |v| v.is_a?(Array) }&.length || 1
86
-
87
- (0...array_length).map do |i|
88
- pairs.each_with_index do |(_cond, _res), pair_idx|
89
- cond_val = cond_results[pair_idx].is_a?(Array) ? cond_results[pair_idx][i] : cond_results[pair_idx]
90
-
91
- if cond_val
92
- res_val = res_results[pair_idx].is_a?(Array) ? res_results[pair_idx][i] : res_results[pair_idx]
93
- break res_val
94
- end
95
- end || nil
96
- end
97
- else
98
- # All data is scalar - use regular cascade evaluation
99
- pairs.each_with_index do |(_cond, _res), pair_idx|
100
- return res_results[pair_idx] if cond_results[pair_idx]
101
- end
102
- nil
103
- end
104
- end
105
- else
106
- lambda do |ctx|
107
- pairs.each { |cond, res| return res.call(ctx) if cond.call(ctx) }
108
- nil
109
- end
110
- end
111
- end
112
-
113
- def transform_vectorized_condition(condition_expr)
114
- # If this is fn(:all?, [trait_ref]), extract the trait_ref for vectorized cascades
115
- if condition_expr.is_a?(Kumi::Syntax::CallExpression) &&
116
- condition_expr.fn_name == :all? &&
117
- condition_expr.args.length == 1
118
-
119
- arg = condition_expr.args.first
120
- if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
121
- trait_ref = arg.elements.first
122
- return compile_expr(trait_ref)
123
- end
124
- end
125
-
126
- # Otherwise compile normally
127
- compile_expr(condition_expr)
128
- end
129
- end
130
-
131
- include ExprCompilers
132
-
133
- # Map node classes to compiler methods
134
- DISPATCH = {
135
- Kumi::Syntax::Literal => :compile_literal,
136
- Kumi::Syntax::InputReference => :compile_field_node,
137
- Kumi::Syntax::InputElementReference => :compile_element_field_reference,
138
- Kumi::Syntax::DeclarationReference => :compile_binding_node,
139
- Kumi::Syntax::ArrayExpression => :compile_list,
140
- Kumi::Syntax::CallExpression => :compile_call,
141
- Kumi::Syntax::CascadeExpression => :compile_cascade
142
- }.freeze
143
-
5
+ class Compiler < Core::CompilerBase
144
6
  def self.compile(schema, analyzer:)
145
7
  new(schema, analyzer).compile
146
8
  end
147
9
 
148
10
  def initialize(schema, analyzer)
149
- @schema = schema
150
- @analysis = analyzer
11
+ super
151
12
  @bindings = {}
152
13
  end
153
14
 
154
15
  def compile
155
- build_index
156
- @analysis.topo_order.each do |name|
157
- decl = @index[name] or raise("Unknown binding #{name}")
158
- compile_declaration(decl)
159
- end
160
-
161
- Core::CompiledSchema.new(@bindings.freeze)
162
- end
163
-
164
- private
165
-
166
- def build_index
167
- @index = {}
168
- @schema.attributes.each { |a| @index[a.name] = a }
169
- @schema.traits.each { |t| @index[t.name] = t }
170
- end
171
-
172
- def dig_and_map(collection, path_segments)
173
- return collection unless collection.is_a?(Array)
174
-
175
- current_segment = path_segments.first
176
- remaining_segments = path_segments[1..]
177
-
178
- collection.map do |element|
179
- value = element[current_segment]
180
-
181
- # If there are more segments, recurse. Otherwise, return the value.
182
- if remaining_segments.empty?
183
- value
184
- else
185
- dig_and_map(value, remaining_segments)
186
- end
187
- end
188
- end
189
-
190
- def compile_declaration(decl)
191
- @current_declaration = decl.name
192
- kind = decl.is_a?(Kumi::Syntax::TraitDeclaration) ? :trait : :attr
193
- fn = compile_expr(decl.expression)
194
- @bindings[decl.name] = [kind, fn]
195
- @current_declaration = nil
196
- end
197
-
198
- # Dispatch to the appropriate compile_* method
199
- def compile_expr(expr)
200
- method = DISPATCH.fetch(expr.class)
201
- send(method, expr)
202
- end
203
-
204
- def compile_field(node)
205
- name = node.name
206
- loc = node.loc
207
- lambda do |ctx|
208
- return ctx[name] if ctx.respond_to?(:key?) && ctx.key?(name)
209
-
210
- raise Errors::RuntimeError,
211
- "Key '#{name}' not found at #{loc}. Available: #{ctx.respond_to?(:keys) ? ctx.keys.join(', ') : 'N/A'}"
212
- end
213
- end
214
-
215
- def vectorized_operation?(expr)
216
- # Check if this operation uses vectorized inputs
217
- broadcast_meta = @analysis.state[:broadcasts]
218
- return false unless broadcast_meta
219
-
220
- # Reduction functions are NOT vectorized operations - they consume arrays
221
- return false if Kumi::Registry.reducer?(expr.fn_name)
222
-
223
- expr.args.any? do |arg|
224
- case arg
225
- when Kumi::Syntax::InputElementReference
226
- broadcast_meta[:array_fields]&.key?(arg.path.first)
227
- when Kumi::Syntax::DeclarationReference
228
- broadcast_meta[:vectorized_operations]&.key?(arg.name)
229
- else
230
- false
231
- end
232
- end
233
- end
234
-
235
- def invoke_vectorized_function(name, arg_fns, ctx, loc)
236
- # Evaluate arguments
237
- values = arg_fns.map { |fn| fn.call(ctx) }
238
-
239
- # Check if any argument is vectorized (array)
240
- has_vectorized_args = values.any?(Array)
241
-
242
- if has_vectorized_args
243
- # Apply function with broadcasting to all vectorized arguments
244
- vectorized_function_call(name, values)
245
- else
246
- # All arguments are scalars - regular function call
247
- fn = Kumi::Registry.fetch(name)
248
- fn.call(*values)
249
- end
250
- rescue StandardError => e
251
- enhanced_message = "Error calling fn(:#{name}) at #{loc}: #{e.message}"
252
- runtime_error = Errors::RuntimeError.new(enhanced_message)
253
- runtime_error.set_backtrace(e.backtrace)
254
- runtime_error.define_singleton_method(:cause) { e }
255
- raise runtime_error
256
- end
257
-
258
- def vectorized_function_call(fn_name, values)
259
- # Get the function from registry
260
- fn = Kumi::Registry.fetch(fn_name)
261
-
262
- # Find array dimensions for broadcasting
263
- array_values = values.select { |v| v.is_a?(Array) }
264
- return fn.call(*values) if array_values.empty?
265
-
266
- # All arrays should have the same length (validation could be added)
267
- array_length = array_values.first.size
268
-
269
- # Broadcast and apply function element-wise
270
- (0...array_length).map do |i|
271
- element_args = values.map do |v|
272
- v.is_a?(Array) ? v[i] : v # Broadcast scalars
273
- end
274
- fn.call(*element_args)
275
- end
276
- end
277
-
278
- def invoke_function(name, arg_fns, ctx, loc)
279
- fn = Kumi::Registry.fetch(name)
280
- values = arg_fns.map { |fn| fn.call(ctx) }
281
- fn.call(*values)
282
- rescue StandardError => e
283
- # Preserve original error class and backtrace while adding context
284
- enhanced_message = "Error calling fn(:#{name}) at #{loc}: #{e.message}"
285
-
286
- if e.is_a?(Kumi::Core::Errors::Error)
287
- # Re-raise Kumi errors with enhanced message but preserve type
288
- e.define_singleton_method(:message) { enhanced_message }
289
- raise e
290
- else
291
- # For non-Kumi errors, wrap in RuntimeError but preserve original error info
292
- runtime_error = Errors::RuntimeError.new(enhanced_message)
293
- runtime_error.set_backtrace(e.backtrace)
294
- runtime_error.define_singleton_method(:cause) { e }
295
- raise runtime_error
296
- end
16
+ # Switch to LIR: Use the analysis state instead of old compilation
17
+ Runtime::Executable.from_analysis(@analysis.state)
297
18
  end
298
19
  end
299
20
  end