kumi 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +160 -8
  3. data/README.md +278 -200
  4. data/{documents → docs}/AST.md +29 -29
  5. data/{documents → docs}/DSL.md +3 -3
  6. data/{documents → docs}/SYNTAX.md +107 -24
  7. data/docs/features/README.md +45 -0
  8. data/docs/features/analysis-cascade-mutual-exclusion.md +89 -0
  9. data/docs/features/analysis-type-inference.md +42 -0
  10. data/docs/features/analysis-unsat-detection.md +71 -0
  11. data/docs/features/array-broadcasting.md +170 -0
  12. data/docs/features/input-declaration-system.md +42 -0
  13. data/docs/features/performance.md +16 -0
  14. data/examples/federal_tax_calculator_2024.rb +43 -40
  15. data/examples/game_of_life.rb +97 -0
  16. data/examples/simple_rpg_game.rb +1000 -0
  17. data/examples/static_analysis_errors.rb +178 -0
  18. data/examples/wide_schema_compilation_and_evaluation_benchmark.rb +1 -1
  19. data/lib/kumi/analyzer/analysis_state.rb +37 -0
  20. data/lib/kumi/analyzer/constant_evaluator.rb +22 -16
  21. data/lib/kumi/analyzer/passes/broadcast_detector.rb +251 -0
  22. data/lib/kumi/analyzer/passes/{definition_validator.rb → declaration_validator.rb} +8 -7
  23. data/lib/kumi/analyzer/passes/dependency_resolver.rb +106 -26
  24. data/lib/kumi/analyzer/passes/input_collector.rb +105 -23
  25. data/lib/kumi/analyzer/passes/name_indexer.rb +2 -2
  26. data/lib/kumi/analyzer/passes/pass_base.rb +11 -28
  27. data/lib/kumi/analyzer/passes/semantic_constraint_validator.rb +110 -0
  28. data/lib/kumi/analyzer/passes/toposorter.rb +45 -9
  29. data/lib/kumi/analyzer/passes/type_checker.rb +34 -11
  30. data/lib/kumi/analyzer/passes/type_consistency_checker.rb +2 -1
  31. data/lib/kumi/analyzer/passes/type_inferencer.rb +128 -21
  32. data/lib/kumi/analyzer/passes/unsat_detector.rb +312 -13
  33. data/lib/kumi/analyzer/passes/visitor_pass.rb +4 -3
  34. data/lib/kumi/analyzer.rb +41 -24
  35. data/lib/kumi/atom_unsat_solver.rb +45 -0
  36. data/lib/kumi/cli.rb +449 -0
  37. data/lib/kumi/compiler.rb +194 -16
  38. data/lib/kumi/constraint_relationship_solver.rb +638 -0
  39. data/lib/kumi/domain/validator.rb +0 -4
  40. data/lib/kumi/error_reporter.rb +6 -6
  41. data/lib/kumi/evaluation_wrapper.rb +20 -4
  42. data/lib/kumi/explain.rb +28 -28
  43. data/lib/kumi/export/node_registry.rb +26 -12
  44. data/lib/kumi/export/node_serializers.rb +1 -1
  45. data/lib/kumi/function_registry/collection_functions.rb +117 -9
  46. data/lib/kumi/function_registry/function_builder.rb +4 -3
  47. data/lib/kumi/function_registry.rb +8 -2
  48. data/lib/kumi/input/type_matcher.rb +3 -0
  49. data/lib/kumi/input/validator.rb +0 -3
  50. data/lib/kumi/parser/declaration_reference_proxy.rb +36 -0
  51. data/lib/kumi/parser/dsl_cascade_builder.rb +19 -8
  52. data/lib/kumi/parser/expression_converter.rb +80 -12
  53. data/lib/kumi/parser/input_builder.rb +40 -9
  54. data/lib/kumi/parser/input_field_proxy.rb +46 -0
  55. data/lib/kumi/parser/input_proxy.rb +3 -3
  56. data/lib/kumi/parser/nested_input.rb +15 -0
  57. data/lib/kumi/parser/parser.rb +2 -0
  58. data/lib/kumi/parser/schema_builder.rb +10 -9
  59. data/lib/kumi/parser/sugar.rb +171 -18
  60. data/lib/kumi/schema.rb +3 -1
  61. data/lib/kumi/schema_instance.rb +69 -3
  62. data/lib/kumi/syntax/array_expression.rb +15 -0
  63. data/lib/kumi/syntax/call_expression.rb +11 -0
  64. data/lib/kumi/syntax/cascade_expression.rb +11 -0
  65. data/lib/kumi/syntax/case_expression.rb +11 -0
  66. data/lib/kumi/syntax/declaration_reference.rb +11 -0
  67. data/lib/kumi/syntax/hash_expression.rb +11 -0
  68. data/lib/kumi/syntax/input_declaration.rb +12 -0
  69. data/lib/kumi/syntax/input_element_reference.rb +12 -0
  70. data/lib/kumi/syntax/input_reference.rb +12 -0
  71. data/lib/kumi/syntax/literal.rb +11 -0
  72. data/lib/kumi/syntax/root.rb +1 -0
  73. data/lib/kumi/syntax/trait_declaration.rb +11 -0
  74. data/lib/kumi/syntax/value_declaration.rb +11 -0
  75. data/lib/kumi/types/compatibility.rb +8 -0
  76. data/lib/kumi/types/validator.rb +1 -1
  77. data/lib/kumi/vectorization_metadata.rb +108 -0
  78. data/lib/kumi/version.rb +1 -1
  79. data/scripts/generate_function_docs.rb +22 -10
  80. metadata +38 -17
  81. data/CHANGELOG.md +0 -25
  82. data/lib/kumi/domain.rb +0 -8
  83. data/lib/kumi/input.rb +0 -8
  84. data/lib/kumi/syntax/declarations.rb +0 -23
  85. data/lib/kumi/syntax/expressions.rb +0 -30
  86. data/lib/kumi/syntax/terminal_expressions.rb +0 -27
  87. data/lib/kumi/syntax.rb +0 -9
  88. data/test_impossible_cascade.rb +0 -51
  89. /data/{documents → docs}/FUNCTIONS.md +0 -0
@@ -4,14 +4,15 @@ module Kumi
4
4
  module Analyzer
5
5
  module Passes
6
6
  # RESPONSIBILITY: Validate function call arity and argument types against FunctionRegistry
7
- # DEPENDENCIES: None (can run independently)
7
+ # DEPENDENCIES: :decl_types from TypeInferencer
8
8
  # PRODUCES: None (validation only)
9
9
  # INTERFACE: new(schema, state).run(errors)
10
10
  class TypeChecker < VisitorPass
11
11
  def run(errors)
12
- visit_nodes_of_type(Expressions::CallExpression, errors: errors) do |node, _decl, errs|
12
+ visit_nodes_of_type(Kumi::Syntax::CallExpression, errors: errors) do |node, _decl, errs|
13
13
  validate_function_call(node, errs)
14
14
  end
15
+ state
15
16
  end
16
17
 
17
18
  private
@@ -46,11 +47,33 @@ module Kumi
46
47
  types = signature[:param_types]
47
48
  return if types.nil? || (signature[:arity].negative? && node.args.empty?)
48
49
 
50
+ # Skip type checking for vectorized operations
51
+ broadcast_meta = get_state(:broadcast_metadata, required: false)
52
+ if broadcast_meta && is_part_of_vectorized_operation?(node, broadcast_meta)
53
+ return
54
+ end
55
+
49
56
  node.args.each_with_index do |arg, i|
50
57
  validate_argument_type(arg, i, types[i], node.fn_name, errors)
51
58
  end
52
59
  end
53
60
 
61
+ def is_part_of_vectorized_operation?(node, broadcast_meta)
62
+ # Check if this node is part of a vectorized or reduction operation
63
+ # This is a simplified check - in a real implementation we'd need to track context
64
+ node.args.any? do |arg|
65
+ case arg
66
+ when Kumi::Syntax::DeclarationReference
67
+ broadcast_meta[:vectorized_operations]&.key?(arg.name) ||
68
+ broadcast_meta[:reduction_operations]&.key?(arg.name)
69
+ when Kumi::Syntax::InputElementReference
70
+ broadcast_meta[:array_fields]&.key?(arg.path.first)
71
+ else
72
+ false
73
+ end
74
+ end
75
+ end
76
+
54
77
  def validate_argument_type(arg, index, expected_type, fn_name, errors)
55
78
  return if expected_type.nil? || expected_type == Kumi::Types::ANY
56
79
 
@@ -66,15 +89,15 @@ module Kumi
66
89
 
67
90
  def get_expression_type(expr)
68
91
  case expr
69
- when TerminalExpressions::Literal
92
+ when Kumi::Syntax::Literal
70
93
  # Inferred type from literal value
71
94
  Kumi::Types.infer_from_value(expr.value)
72
95
 
73
- when TerminalExpressions::FieldRef
96
+ when Kumi::Syntax::InputReference
74
97
  # Declared type from input block (user-specified)
75
98
  get_declared_field_type(expr.name)
76
99
 
77
- when TerminalExpressions::Binding
100
+ when Kumi::Syntax::DeclarationReference
78
101
  # Inferred type from type inference results
79
102
  get_inferred_declaration_type(expr.name)
80
103
 
@@ -100,10 +123,10 @@ module Kumi
100
123
 
101
124
  def describe_expression_type(expr, type)
102
125
  case expr
103
- when TerminalExpressions::Literal
126
+ when Kumi::Syntax::Literal
104
127
  "`#{expr.value}` of type #{type} (literal value)"
105
128
 
106
- when TerminalExpressions::FieldRef
129
+ when Kumi::Syntax::InputReference
107
130
  input_meta = get_state(:input_meta, required: false) || {}
108
131
  field_meta = input_meta[expr.name]
109
132
 
@@ -116,17 +139,17 @@ module Kumi
116
139
  "undeclared input field `#{expr.name}` (inferred as #{type})"
117
140
  end
118
141
 
119
- when TerminalExpressions::Binding
142
+ when Kumi::Syntax::DeclarationReference
120
143
  # This type was inferred from the declaration's expression
121
144
  "reference to declaration `#{expr.name}` of inferred type #{type}"
122
145
 
123
- when Expressions::CallExpression
146
+ when Kumi::Syntax::CallExpression
124
147
  "result of function `#{expr.fn_name}` returning #{type}"
125
148
 
126
- when Expressions::ListExpression
149
+ when Kumi::Syntax::ArrayExpression
127
150
  "list expression of type #{type}"
128
151
 
129
- when Expressions::CascadeExpression
152
+ when Kumi::Syntax::CascadeExpression
130
153
  "cascade expression of type #{type}"
131
154
 
132
155
  else
@@ -16,6 +16,7 @@ module Kumi
16
16
 
17
17
  # Then check basic consistency (placeholder for now)
18
18
  # In a full implementation, this would do sophisticated usage analysis
19
+ state
19
20
  end
20
21
 
21
22
  private
@@ -30,7 +31,7 @@ module Kumi
30
31
  field_decl = find_input_field_declaration(field_name)
31
32
  location = field_decl&.loc
32
33
 
33
- add_error(errors, location, "Invalid type declaration for field :#{field_name}: #{declared_type.inspect}")
34
+ report_type_error(errors, "Invalid type declaration for field :#{field_name}: #{declared_type.inspect}", location: location)
34
35
  end
35
36
  end
36
37
 
@@ -4,16 +4,17 @@ module Kumi
4
4
  module Analyzer
5
5
  module Passes
6
6
  # RESPONSIBILITY: Infer types for all declarations based on expression analysis
7
- # DEPENDENCIES: Toposorter (needs topo_order), DefinitionValidator (needs definitions)
7
+ # DEPENDENCIES: Toposorter (needs topo_order), DeclarationValidator (needs definitions)
8
8
  # PRODUCES: decl_types hash mapping declaration names to inferred types
9
9
  # INTERFACE: new(schema, state).run(errors)
10
10
  class TypeInferencer < PassBase
11
11
  def run(errors)
12
- return if state[:decl_types] # Already run
13
-
14
12
  types = {}
15
13
  topo_order = get_state(:topo_order)
16
14
  definitions = get_state(:definitions)
15
+
16
+ # Get broadcast metadata from broadcast detector
17
+ broadcast_meta = get_state(:broadcast_metadata, required: false) || {}
17
18
 
18
19
  # Process declarations in topological order to ensure dependencies are resolved
19
20
  topo_order.each do |name|
@@ -21,44 +22,67 @@ module Kumi
21
22
  next unless decl
22
23
 
23
24
  begin
24
- inferred_type = infer_expression_type(decl.expression, types)
25
- types[name] = inferred_type
25
+ # Check if this declaration is marked as vectorized
26
+ if broadcast_meta[:vectorized_operations]&.key?(name)
27
+ # Infer the element type and wrap in array
28
+ element_type = infer_vectorized_element_type(decl.expression, types, broadcast_meta)
29
+ types[name] = decl.is_a?(Kumi::Syntax::TraitDeclaration) ? { array: :boolean } : { array: element_type }
30
+ else
31
+ # Normal type inference
32
+ inferred_type = infer_expression_type(decl.expression, types, broadcast_meta, name)
33
+ types[name] = inferred_type
34
+ end
26
35
  rescue StandardError => e
27
- add_error(errors, decl&.loc, "Type inference failed: #{e.message}")
36
+ report_type_error(errors, "Type inference failed: #{e.message}", location: decl&.loc)
28
37
  end
29
38
  end
30
39
 
31
- set_state(:decl_types, types)
40
+ state.with(:decl_types, types)
32
41
  end
33
42
 
34
43
  private
35
44
 
36
- def infer_expression_type(expr, type_context = {})
45
+ def infer_expression_type(expr, type_context = {}, broadcast_metadata = {}, current_decl_name = nil)
37
46
  case expr
38
47
  when Literal
39
48
  Types.infer_from_value(expr.value)
40
- when FieldRef
49
+ when InputReference
41
50
  # Look up type from field metadata
42
51
  input_meta = get_state(:input_meta, required: false) || {}
43
52
  meta = input_meta[expr.name]
44
53
  meta&.dig(:type) || :any
45
- when Binding
54
+ when DeclarationReference
46
55
  type_context[expr.name] || :any
47
56
  when CallExpression
48
- infer_call_type(expr, type_context)
49
- when ListExpression
50
- infer_list_type(expr, type_context)
57
+ infer_call_type(expr, type_context, broadcast_metadata, current_decl_name)
58
+ when ArrayExpression
59
+ infer_list_type(expr, type_context, broadcast_metadata, current_decl_name)
51
60
  when CascadeExpression
52
- infer_cascade_type(expr, type_context)
61
+ infer_cascade_type(expr, type_context, broadcast_metadata, current_decl_name)
62
+ when InputElementReference
63
+ # Element reference returns the field type
64
+ infer_element_reference_type(expr)
53
65
  else
54
66
  :any
55
67
  end
56
68
  end
57
69
 
58
- def infer_call_type(call_expr, type_context)
59
- fn_name = call_expr.fn_name
70
+ def infer_call_type(call_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
71
+ fn_name = call_expr.fn_name
60
72
  args = call_expr.args
61
73
 
74
+ # Check broadcast metadata first
75
+ if current_decl_name && broadcast_metadata[:vectorized_values]&.key?(current_decl_name)
76
+ # This declaration is marked as vectorized, so it produces an array
77
+ element_type = infer_vectorized_element_type(call_expr, type_context, broadcast_metadata)
78
+ return { array: element_type }
79
+ end
80
+
81
+ if current_decl_name && broadcast_metadata[:reducer_values]&.key?(current_decl_name)
82
+ # This declaration is marked as a reducer, get the result from the function
83
+ return infer_function_return_type(fn_name, args, type_context, broadcast_metadata)
84
+ end
85
+
62
86
  # Check if function exists in registry
63
87
  unless FunctionRegistry.supported?(fn_name)
64
88
  # Don't push error here - let existing TypeChecker handle it
@@ -74,7 +98,7 @@ module Kumi
74
98
  end
75
99
 
76
100
  # Infer argument types
77
- arg_types = args.map { |arg| infer_expression_type(arg, type_context) }
101
+ arg_types = args.map { |arg| infer_expression_type(arg, type_context, broadcast_metadata, current_decl_name) }
78
102
 
79
103
  # Validate parameter types (warn but don't fail)
80
104
  param_types = signature[:param_types] || []
@@ -92,10 +116,29 @@ module Kumi
92
116
  signature[:return_type] || :any
93
117
  end
94
118
 
95
- def infer_list_type(list_expr, type_context)
119
+ def infer_vectorized_element_type(call_expr, type_context, broadcast_metadata)
120
+ # For vectorized arithmetic operations, infer the element type
121
+ # For now, assume arithmetic operations on floats produce floats
122
+ case call_expr.fn_name
123
+ when :multiply, :add, :subtract, :divide
124
+ :float
125
+ else
126
+ :any
127
+ end
128
+ end
129
+
130
+ def infer_function_return_type(fn_name, args, type_context, broadcast_metadata)
131
+ # Get the function signature
132
+ return :any unless FunctionRegistry.supported?(fn_name)
133
+
134
+ signature = FunctionRegistry.signature(fn_name)
135
+ signature[:return_type] || :any
136
+ end
137
+
138
+ def infer_list_type(list_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
96
139
  return Types.array(:any) if list_expr.elements.empty?
97
140
 
98
- element_types = list_expr.elements.map { |elem| infer_expression_type(elem, type_context) }
141
+ element_types = list_expr.elements.map { |elem| infer_expression_type(elem, type_context, broadcast_metadata, current_decl_name) }
99
142
 
100
143
  # Try to unify all element types
101
144
  unified_type = element_types.reduce { |acc, type| Types.unify(acc, type) }
@@ -105,11 +148,75 @@ module Kumi
105
148
  Types.array(:any)
106
149
  end
107
150
 
108
- def infer_cascade_type(cascade_expr, type_context)
151
+ def infer_vectorized_element_type(expr, type_context, vectorization_meta)
152
+ # For vectorized operations, we need to infer the element type
153
+ case expr
154
+ when InputElementReference
155
+ # Get the field type from metadata
156
+ input_meta = get_state(:input_meta, required: false) || {}
157
+ array_name = expr.path.first
158
+ field_name = expr.path[1]
159
+
160
+ array_meta = input_meta[array_name]
161
+ return :any unless array_meta&.dig(:type) == :array
162
+
163
+ array_meta.dig(:children, field_name, :type) || :any
164
+
165
+ when CallExpression
166
+ # For arithmetic operations, infer from operands
167
+ if %i[add subtract multiply divide].include?(expr.fn_name)
168
+ # Get types of operands
169
+ arg_types = expr.args.map do |arg|
170
+ if arg.is_a?(InputElementReference)
171
+ infer_vectorized_element_type(arg, type_context, vectorization_meta)
172
+ elsif arg.is_a?(DeclarationReference)
173
+ # Get the element type if it's vectorized
174
+ ref_type = type_context[arg.name]
175
+ if ref_type.is_a?(Hash) && ref_type.key?(:array)
176
+ ref_type[:array]
177
+ else
178
+ ref_type || :any
179
+ end
180
+ else
181
+ infer_expression_type(arg, type_context, vectorization_meta)
182
+ end
183
+ end
184
+
185
+ # Unify types for arithmetic
186
+ Types.unify(*arg_types) || :float
187
+ else
188
+ :any
189
+ end
190
+
191
+ else
192
+ :any
193
+ end
194
+ end
195
+
196
+ def infer_element_reference_type(expr)
197
+ # Get array field metadata
198
+ input_meta = get_state(:input_meta, required: false) || {}
199
+
200
+ return :any unless expr.path.size >= 2
201
+
202
+ array_name = expr.path.first
203
+ field_name = expr.path[1]
204
+
205
+ array_meta = input_meta[array_name]
206
+ return :any unless array_meta&.dig(:type) == :array
207
+
208
+ # Get the field type from children metadata
209
+ field_type = array_meta.dig(:children, field_name, :type) || :any
210
+
211
+ # Return array of field type (vectorized)
212
+ { array: field_type }
213
+ end
214
+
215
+ def infer_cascade_type(cascade_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
109
216
  return :any if cascade_expr.cases.empty?
110
217
 
111
218
  result_types = cascade_expr.cases.map do |case_stmt|
112
- infer_expression_type(case_stmt.result, type_context)
219
+ infer_expression_type(case_stmt.result, type_context, broadcast_metadata, current_decl_name)
113
220
  end
114
221
 
115
222
  # Reduce all possible types into a single unified type