RubyGems - kumi - Versions diffs - 0.0.9 → 0.0.11 - Mend

kumi 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +18 -0
data/CLAUDE.md +18 -258
data/README.md +188 -121
data/docs/AST.md +1 -1
data/docs/FUNCTIONS.md +52 -8
data/docs/VECTOR_SEMANTICS.md +286 -0
data/docs/compiler_design_principles.md +86 -0
data/docs/features/README.md +15 -2
data/docs/features/hierarchical-broadcasting.md +349 -0
data/docs/features/javascript-transpiler.md +148 -0
data/docs/features/performance.md +1 -3
data/docs/features/s-expression-printer.md +2 -2
data/docs/schema_metadata.md +7 -7
data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
data/examples/game_of_life.rb +2 -4
data/lib/kumi/analyzer.rb +34 -14
data/lib/kumi/compiler.rb +4 -283
data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
data/lib/kumi/core/analyzer/plans.rb +52 -0
data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
data/lib/kumi/core/compiler/access_builder.rb +36 -0
data/lib/kumi/core/compiler/access_planner.rb +219 -0
data/lib/kumi/core/compiler/accessors/base.rb +69 -0
data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
data/lib/kumi/core/compiler_base.rb +137 -0
data/lib/kumi/core/error_reporter.rb +6 -5
data/lib/kumi/core/errors.rb +4 -0
data/lib/kumi/core/explain.rb +157 -205
data/lib/kumi/core/export/node_builders.rb +2 -2
data/lib/kumi/core/export/node_serializers.rb +1 -1
data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
data/lib/kumi/core/function_registry/function_builder.rb +142 -53
data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
data/lib/kumi/core/function_registry.rb +138 -98
data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
data/lib/kumi/core/ir/execution_engine.rb +50 -0
data/lib/kumi/core/ir.rb +58 -0
data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
data/lib/kumi/core/ruby_parser/parser.rb +1 -1
data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
data/lib/kumi/errors.rb +2 -0
data/lib/kumi/js.rb +23 -0
data/lib/kumi/registry.rb +17 -22
data/lib/kumi/runtime/executable.rb +213 -0
data/lib/kumi/schema.rb +15 -4
data/lib/kumi/schema_metadata.rb +2 -2
data/lib/kumi/support/ir_dump.rb +491 -0
data/lib/kumi/support/s_expression_printer.rb +17 -16
data/lib/kumi/syntax/array_expression.rb +6 -6
data/lib/kumi/syntax/call_expression.rb +4 -4
data/lib/kumi/syntax/cascade_expression.rb +4 -4
data/lib/kumi/syntax/case_expression.rb +4 -4
data/lib/kumi/syntax/declaration_reference.rb +4 -4
data/lib/kumi/syntax/hash_expression.rb +4 -4
data/lib/kumi/syntax/input_declaration.rb +6 -5
data/lib/kumi/syntax/input_element_reference.rb +5 -5
data/lib/kumi/syntax/input_reference.rb +5 -5
data/lib/kumi/syntax/literal.rb +4 -4
data/lib/kumi/syntax/location.rb +5 -0
data/lib/kumi/syntax/node.rb +33 -34
data/lib/kumi/syntax/root.rb +6 -6
data/lib/kumi/syntax/trait_declaration.rb +4 -4
data/lib/kumi/syntax/value_declaration.rb +4 -4
data/lib/kumi/version.rb +1 -1
data/lib/kumi.rb +6 -15
data/scripts/analyze_broadcast_methods.rb +68 -0
data/scripts/analyze_cascade_methods.rb +74 -0
data/scripts/check_broadcasting_coverage.rb +51 -0
data/scripts/find_dead_code.rb +114 -0
metadata +36 -9
data/docs/features/array-broadcasting.md +0 -170
data/lib/kumi/cli.rb +0 -449
data/lib/kumi/core/compiled_schema.rb +0 -43
data/lib/kumi/core/evaluation_wrapper.rb +0 -40
data/lib/kumi/core/schema_instance.rb +0 -111
data/lib/kumi/core/vectorization_metadata.rb +0 -110
data/migrate_to_core_iterative.rb +0 -938

data/lib/kumi/analyzer.rb CHANGED Viewed

@@ -4,20 +4,22 @@ module Kumi
   module Analyzer
     Result = Struct.new(:definitions, :dependency_graph, :leaf_map, :topo_order, :decl_types, :state, keyword_init: true)
-    module_function
     DEFAULT_PASSES = [
       Core::Analyzer::Passes::NameIndexer,                     # 1. Finds all names and checks for duplicates.
       Core::Analyzer::Passes::InputCollector,                  # 2. Collects field metadata from input declarations.
-      Core::Analyzer::Passes::DeclarationValidator,            # 3. Checks the basic structure of each rule.
-      Core::Analyzer::Passes::SemanticConstraintValidator,     # 4. Validates DSL semantic constraints at AST level.
-      Core::Analyzer::Passes::DependencyResolver,              # 5. Builds the dependency graph with conditional dependencies.
-      Core::Analyzer::Passes::UnsatDetector,                   # 6. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
-      Core::Analyzer::Passes::Toposorter,                      # 7. Creates the final evaluation order, allowing safe cycles.
-      Core::Analyzer::Passes::BroadcastDetector, # 8. Detects which operations should be broadcast over arrays (must run before type inference).
-      Core::Analyzer::Passes::TypeInferencer,                  # 9. Infers types for all declarations (uses vectorization metadata).
-      Core::Analyzer::Passes::TypeConsistencyChecker,          # 10. Validates declared vs inferred type consistency.
-      Core::Analyzer::Passes::TypeChecker                      # 11. Validates types using inferred information.
+      Core::Analyzer::Passes::DeclarationValidator,            # 4. Checks the basic structure of each rule.
+      Core::Analyzer::Passes::SemanticConstraintValidator,     # 5. Validates DSL semantic constraints at AST level.
+      Core::Analyzer::Passes::DependencyResolver,              # 6. Builds the dependency graph with conditional dependencies.
+      Core::Analyzer::Passes::UnsatDetector,                   # 7. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
+      Core::Analyzer::Passes::Toposorter,                      # 8. Creates the final evaluation order, allowing safe cycles.
+      Core::Analyzer::Passes::BroadcastDetector,               # 9. Detects which operations should be broadcast over arrays.
+      Core::Analyzer::Passes::TypeInferencerPass,              # 10. Infers types for all declarations (uses vectorization metadata).
+      Core::Analyzer::Passes::TypeConsistencyChecker,          # 11. Validates declared vs inferred type consistency.
+      Core::Analyzer::Passes::TypeChecker,                     # 12. Validates types using inferred information.
+      Core::Analyzer::Passes::InputAccessPlannerPass,          # 13. Plans access strategies for input fields.
+      Core::Analyzer::Passes::ScopeResolutionPass,             # 14. Plans execution scope and lifting needs for declarations.
+      Core::Analyzer::Passes::JoinReducePlanningPass,          # 15. Plans join/reduce operations (Generates IR Structs)
+      Core::Analyzer::Passes::LowerToIRPass # 16. Lowers the schema to IR (Generates IR Structs)
     ].freeze
     def self.analyze!(schema, passes: DEFAULT_PASSES, **opts)
@@ -35,7 +37,13 @@ module Kumi
         begin
           state = pass_instance.run(errors)
         rescue StandardError => e
-          errors << Core::ErrorReporter.create_error(e.message, location: nil, type: :semantic)
+          # TODO: - GREATLY improve this, need to capture the context of the error
+          # and the pass that failed and line number if relevant
+          pass_name = pass_class.name.split("::").last
+          message = "Error in Analysis Pass(#{pass_name}): #{e.message}"
+          errors << Core::ErrorReporter.create_error(message, location: nil, type: :semantic, backtrace: e.backtrace)
+          raise
         end
       end
       state
@@ -43,11 +51,14 @@ module Kumi
     def self.handle_analysis_errors(errors)
       type_errors = errors.select { |e| e.type == :type }
+      semantic_errors = errors.select { |e| e.type == :semantic }
       first_error_location = errors.first.location
       raise Errors::TypeError.new(format_errors(errors), first_error_location) if type_errors.any?
-      raise Errors::SemanticError.new(format_errors(errors), first_error_location)
+      raise Errors::SemanticError.new(format_errors(errors), first_error_location) if first_error_location || semantic_errors
+      raise Errors::AnalysisError.new(format_errors(errors))
     end
     def self.create_analysis_result(state)
@@ -65,7 +76,16 @@ module Kumi
     def self.format_errors(errors)
       return "" if errors.empty?
-      errors.map(&:to_s).join("\n")
+      backtrace = errors.first.backtrace
+      message = errors.map(&:to_s).join("\n")
+      message.tap do |msg|
+        if backtrace && !backtrace.empty?
+          msg << "\n\nBacktrace:\n"
+          msg << backtrace[0..10].join("\n") # Limit to first 10 lines for readability
+        end
+      end
     end
   end
 end

data/lib/kumi/compiler.rb CHANGED Viewed

@@ -2,298 +2,19 @@
 module Kumi
   # Compiles an analyzed schema into executable lambdas
-  class Compiler
-    # ExprCompilers holds per-node compile implementations
-    module ExprCompilers
-      def compile_literal(expr)
-        v = expr.value
-        ->(_ctx) { v }
-      end
-      def compile_field_node(expr)
-        compile_field(expr)
-      end
-      def compile_element_field_reference(expr)
-        path = expr.path
-        lambda do |ctx|
-          # Start with the top-level collection from the context.
-          collection = ctx[path.first]
-          # Recursively map over the nested collections.
-          # The `dig_and_map` helper will handle any level of nesting.
-          dig_and_map(collection, path[1..])
-        end
-      end
-      def compile_binding_node(expr)
-        name = expr.name
-        # Handle forward references in cycles by deferring binding lookup to runtime
-        lambda do |ctx|
-          fn = @bindings[name].last
-          fn.call(ctx)
-        end
-      end
-      def compile_list(expr)
-        fns = expr.elements.map { |e| compile_expr(e) }
-        ->(ctx) { fns.map { |fn| fn.call(ctx) } }
-      end
-      def compile_call(expr)
-        fn_name = expr.fn_name
-        arg_fns = expr.args.map { |a| compile_expr(a) }
-        # Check if this is a vectorized operation
-        if vectorized_operation?(expr)
-          ->(ctx) { invoke_vectorized_function(fn_name, arg_fns, ctx, expr.loc) }
-        else
-          ->(ctx) { invoke_function(fn_name, arg_fns, ctx, expr.loc) }
-        end
-      end
-      def compile_cascade(expr)
-        # Check if current declaration is vectorized
-        broadcast_meta = @analysis.state[:broadcasts]
-        is_vectorized = @current_declaration && broadcast_meta&.dig(:vectorized_operations, @current_declaration)
-        # For vectorized cascades, we need to transform conditions that use all?
-        pairs = if is_vectorized
-                  expr.cases.map do |c|
-                    condition_fn = transform_vectorized_condition(c.condition)
-                    result_fn = compile_expr(c.result)
-                    [condition_fn, result_fn]
-                  end
-                else
-                  expr.cases.map { |c| [compile_expr(c.condition), compile_expr(c.result)] }
-                end
-        if is_vectorized
-          lambda do |ctx|
-            # This cascade can be vectorized - check if we actually need to at runtime
-            # Evaluate all conditions and results to check for arrays
-            cond_results = pairs.map { |cond, _res| cond.call(ctx) }
-            res_results = pairs.map { |_cond, res| res.call(ctx) }
-            # Check if any conditions or results are arrays (vectorized)
-            has_vectorized_data = (cond_results + res_results).any?(Array)
-            if has_vectorized_data
-              # Apply element-wise cascade evaluation
-              array_length = cond_results.find { |v| v.is_a?(Array) }&.length ||
-                             res_results.find { |v| v.is_a?(Array) }&.length || 1
-              (0...array_length).map do |i|
-                pairs.each_with_index do |(_cond, _res), pair_idx|
-                  cond_val = cond_results[pair_idx].is_a?(Array) ? cond_results[pair_idx][i] : cond_results[pair_idx]
-                  if cond_val
-                    res_val = res_results[pair_idx].is_a?(Array) ? res_results[pair_idx][i] : res_results[pair_idx]
-                    break res_val
-                  end
-                end || nil
-              end
-            else
-              # All data is scalar - use regular cascade evaluation
-              pairs.each_with_index do |(_cond, _res), pair_idx|
-                return res_results[pair_idx] if cond_results[pair_idx]
-              end
-              nil
-            end
-          end
-        else
-          lambda do |ctx|
-            pairs.each { |cond, res| return res.call(ctx) if cond.call(ctx) }
-            nil
-          end
-        end
-      end
-      def transform_vectorized_condition(condition_expr)
-        # If this is fn(:all?, [trait_ref]), extract the trait_ref for vectorized cascades
-        if condition_expr.is_a?(Kumi::Syntax::CallExpression) &&
-           condition_expr.fn_name == :all? &&
-           condition_expr.args.length == 1
-          arg = condition_expr.args.first
-          if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
-            trait_ref = arg.elements.first
-            return compile_expr(trait_ref)
-          end
-        end
-        # Otherwise compile normally
-        compile_expr(condition_expr)
-      end
-    end
-    include ExprCompilers
-    # Map node classes to compiler methods
-    DISPATCH = {
-      Kumi::Syntax::Literal => :compile_literal,
-      Kumi::Syntax::InputReference => :compile_field_node,
-      Kumi::Syntax::InputElementReference => :compile_element_field_reference,
-      Kumi::Syntax::DeclarationReference => :compile_binding_node,
-      Kumi::Syntax::ArrayExpression => :compile_list,
-      Kumi::Syntax::CallExpression => :compile_call,
-      Kumi::Syntax::CascadeExpression => :compile_cascade
-    }.freeze
+  class Compiler < Core::CompilerBase
     def self.compile(schema, analyzer:)
       new(schema, analyzer).compile
     end
     def initialize(schema, analyzer)
-      @schema = schema
-      @analysis = analyzer
+      super
       @bindings = {}
     end
     def compile
-      build_index
-      @analysis.topo_order.each do |name|
-        decl = @index[name] or raise("Unknown binding #{name}")
-        compile_declaration(decl)
-      end
-      Core::CompiledSchema.new(@bindings.freeze)
-    end
-    private
-    def build_index
-      @index = {}
-      @schema.attributes.each { |a| @index[a.name] = a }
-      @schema.traits.each     { |t| @index[t.name] = t }
-    end
-    def dig_and_map(collection, path_segments)
-      return collection unless collection.is_a?(Array)
-      current_segment = path_segments.first
-      remaining_segments = path_segments[1..]
-      collection.map do |element|
-        value = element[current_segment]
-        # If there are more segments, recurse. Otherwise, return the value.
-        if remaining_segments.empty?
-          value
-        else
-          dig_and_map(value, remaining_segments)
-        end
-      end
-    end
-    def compile_declaration(decl)
-      @current_declaration = decl.name
-      kind = decl.is_a?(Kumi::Syntax::TraitDeclaration) ? :trait : :attr
-      fn = compile_expr(decl.expression)
-      @bindings[decl.name] = [kind, fn]
-      @current_declaration = nil
-    end
-    # Dispatch to the appropriate compile_* method
-    def compile_expr(expr)
-      method = DISPATCH.fetch(expr.class)
-      send(method, expr)
-    end
-    def compile_field(node)
-      name = node.name
-      loc  = node.loc
-      lambda do |ctx|
-        return ctx[name] if ctx.respond_to?(:key?) && ctx.key?(name)
-        raise Errors::RuntimeError,
-              "Key '#{name}' not found at #{loc}. Available: #{ctx.respond_to?(:keys) ? ctx.keys.join(', ') : 'N/A'}"
-      end
-    end
-    def vectorized_operation?(expr)
-      # Check if this operation uses vectorized inputs
-      broadcast_meta = @analysis.state[:broadcasts]
-      return false unless broadcast_meta
-      # Reduction functions are NOT vectorized operations - they consume arrays
-      return false if Kumi::Registry.reducer?(expr.fn_name)
-      expr.args.any? do |arg|
-        case arg
-        when Kumi::Syntax::InputElementReference
-          broadcast_meta[:array_fields]&.key?(arg.path.first)
-        when Kumi::Syntax::DeclarationReference
-          broadcast_meta[:vectorized_operations]&.key?(arg.name)
-        else
-          false
-        end
-      end
-    end
-    def invoke_vectorized_function(name, arg_fns, ctx, loc)
-      # Evaluate arguments
-      values = arg_fns.map { |fn| fn.call(ctx) }
-      # Check if any argument is vectorized (array)
-      has_vectorized_args = values.any?(Array)
-      if has_vectorized_args
-        # Apply function with broadcasting to all vectorized arguments
-        vectorized_function_call(name, values)
-      else
-        # All arguments are scalars - regular function call
-        fn = Kumi::Registry.fetch(name)
-        fn.call(*values)
-      end
-    rescue StandardError => e
-      enhanced_message = "Error calling fn(:#{name}) at #{loc}: #{e.message}"
-      runtime_error = Errors::RuntimeError.new(enhanced_message)
-      runtime_error.set_backtrace(e.backtrace)
-      runtime_error.define_singleton_method(:cause) { e }
-      raise runtime_error
-    end
-    def vectorized_function_call(fn_name, values)
-      # Get the function from registry
-      fn = Kumi::Registry.fetch(fn_name)
-      # Find array dimensions for broadcasting
-      array_values = values.select { |v| v.is_a?(Array) }
-      return fn.call(*values) if array_values.empty?
-      # All arrays should have the same length (validation could be added)
-      array_length = array_values.first.size
-      # Broadcast and apply function element-wise
-      (0...array_length).map do |i|
-        element_args = values.map do |v|
-          v.is_a?(Array) ? v[i] : v # Broadcast scalars
-        end
-        fn.call(*element_args)
-      end
-    end
-    def invoke_function(name, arg_fns, ctx, loc)
-      fn = Kumi::Registry.fetch(name)
-      values = arg_fns.map { |fn| fn.call(ctx) }
-      fn.call(*values)
-    rescue StandardError => e
-      # Preserve original error class and backtrace while adding context
-      enhanced_message = "Error calling fn(:#{name}) at #{loc}: #{e.message}"
-      if e.is_a?(Kumi::Core::Errors::Error)
-        # Re-raise Kumi errors with enhanced message but preserve type
-        e.define_singleton_method(:message) { enhanced_message }
-        raise e
-      else
-        # For non-Kumi errors, wrap in RuntimeError but preserve original error info
-        runtime_error = Errors::RuntimeError.new(enhanced_message)
-        runtime_error.set_backtrace(e.backtrace)
-        runtime_error.define_singleton_method(:cause) { e }
-        raise runtime_error
-      end
+      # Switch to LIR: Use the analysis state instead of old compilation
+      Runtime::Executable.from_analysis(@analysis.state)
     end
   end
 end