RubyGems - kumi - Versions diffs - 0.0.20 → 0.0.22 - Mend

kumi 0.0.20 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/lib/kumi/core/functions/loader.rb +1 -1
data/lib/kumi/version.rb +1 -1
metadata +2 -11
data/lib/kumi/compiler.rb +0 -21
data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +0 -816
data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +0 -907
data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +0 -349
data/lib/kumi/core/analyzer/passes/type_checker.rb +0 -179
data/lib/kumi/core/analyzer/passes/type_inferencer_pass.rb +0 -234
data/lib/kumi/core/compiler_base.rb +0 -137
data/lib/kumi/core/explain.rb +0 -254
data/lib/kumi/schema_metadata.rb +0 -524
/data/lib/kumi/core/functions/{model.rb → function_spec.rb} +0 -0

data/lib/kumi/core/analyzer/passes/type_inferencer_pass.rb DELETED Viewed

@@ -1,234 +0,0 @@
-# frozen_string_literal: true
-module Kumi
-  module Core
-    module Analyzer
-      module Passes
-        # RESPONSIBILITY: Infer types for all declarations based on expression analysis
-        # DEPENDENCIES: Toposorter (needs evaluation_order), DeclarationValidator (needs declarations)
-        # PRODUCES: inferred_types hash mapping declaration names to inferred types
-        # INTERFACE: new(schema, state).run(errors)
-        class TypeInferencerPass < PassBase
-          def run(errors)
-            types = {}
-            topo_order = get_state(:evaluation_order)
-            definitions = get_state(:declarations)
-            # Get broadcast metadata from broadcast detector
-            broadcast_meta = get_state(:broadcasts, required: false) || {}
-            # Process declarations in topological order to ensure dependencies are resolved
-            topo_order.each do |name|
-              decl = definitions[name]
-              next unless decl
-              # Check if this declaration is marked as vectorized
-              if broadcast_meta[:vectorized_operations]&.key?(name)
-                # Infer the element type and wrap in array
-                element_type = infer_vectorized_element_type(decl.expression, types, broadcast_meta)
-                types[name] = decl.is_a?(Kumi::Syntax::TraitDeclaration) ? { array: :boolean } : { array: element_type }
-              else
-                # Normal type inference
-                inferred_type = infer_expression_type(decl.expression, types, broadcast_meta, name)
-                types[name] = inferred_type
-              end
-              # rescue StandardError => e
-              # report_type_error(errors, "Type inference failed: #{e.message}", location: decl&.loc)
-            end
-            state.with(:inferred_types, types)
-          end
-          private
-          def infer_expression_type(expr, type_context = {}, broadcast_metadata = {}, current_decl_name = nil)
-            case expr
-            when Literal
-              Types.infer_from_value(expr.value)
-            when InputReference
-              # Look up type from field metadata
-              input_meta = get_state(:input_metadata, required: false) || {}
-              meta = input_meta[expr.name]
-              meta&.dig(:type) || :any
-            when DeclarationReference
-              type_context[expr.name] || :any
-            when CallExpression
-              infer_call_type(expr, type_context, broadcast_metadata, current_decl_name)
-            when ArrayExpression
-              infer_list_type(expr, type_context, broadcast_metadata, current_decl_name)
-            when CascadeExpression
-              infer_cascade_type(expr, type_context, broadcast_metadata, current_decl_name)
-            when InputElementReference
-              # Element reference returns the field type
-              infer_element_reference_type(expr)
-            else
-              :any
-            end
-          end
-          def infer_call_type(call_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
-            fn_name = call_expr.fn_name
-            args = call_expr.args
-            # Check broadcast metadata first
-            if current_decl_name && broadcast_metadata[:vectorized_values]&.key?(current_decl_name)
-              # This declaration is marked as vectorized, so it produces an array
-              element_type = infer_vectorized_element_type(call_expr, type_context, broadcast_metadata)
-              return { array: element_type }
-            end
-            if current_decl_name && broadcast_metadata[:reducer_values]&.key?(current_decl_name)
-              # This declaration is marked as a reducer, get the result from the function
-              return infer_function_return_type(fn_name, args, type_context, broadcast_metadata)
-            end
-            # Check if function exists in registry
-            unless Kumi::Registry.supported?(fn_name)
-              # Don't push error here - let existing TypeChecker handle it
-              return :any
-            end
-            signature = Kumi::Registry.signature(fn_name)
-            # Validate arity if not variable
-            if signature[:arity] >= 0 && args.size != signature[:arity]
-              # Don't push error here - let existing TypeChecker handle it
-              return :any
-            end
-            # Infer argument types
-            arg_types = args.map { |arg| infer_expression_type(arg, type_context, broadcast_metadata, current_decl_name) }
-            # Validate parameter types (warn but don't fail)
-            param_types = signature[:param_types] || []
-            if signature[:arity] >= 0 && param_types.size.positive?
-              arg_types.each_with_index do |arg_type, i|
-                expected_type = param_types[i] || param_types.last
-                next if expected_type.nil?
-                # unless Types.compatible?(arg_type, expected_type)
-                # Could add warning here in future, but for now just infer best type
-                # end
-              end
-            end
-            signature[:return_type] || :any
-          end
-          def infer_vectorized_element_type(call_expr, _type_context, _broadcast_metadata)
-            # For vectorized arithmetic operations, infer the element type
-            # For now, assume arithmetic operations on floats produce floats
-            case call_expr.fn_name
-            when :multiply, :add, :subtract, :divide
-              :float
-            else
-              :any
-            end
-          end
-          def infer_function_return_type(fn_name, _args, _type_context, _broadcast_metadata)
-            # Get the function signature
-            return :any unless Kumi::Registry.supported?(fn_name)
-            signature = Kumi::Registry.signature(fn_name)
-            signature[:return_type] || :any
-          end
-          def infer_list_type(list_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
-            return Types.array(:any) if list_expr.elements.empty?
-            element_types = list_expr.elements.map do |elem|
-              infer_expression_type(elem, type_context, broadcast_metadata, current_decl_name)
-            end
-            # Try to unify all element types
-            unified_type = element_types.reduce { |acc, type| Types.unify(acc, type) }
-            Types.array(unified_type)
-          rescue StandardError
-            # If unification fails, fall back to generic array
-            Types.array(:any)
-          end
-          def infer_vectorized_element_type(expr, type_context, vectorization_meta)
-            # For vectorized operations, we need to infer the element type
-            case expr
-            when InputElementReference
-              # Get the field type from metadata
-              input_meta = get_state(:input_metadata, required: false) || {}
-              array_name = expr.path.first
-              field_name = expr.path[1]
-              array_meta = input_meta[array_name]
-              return :any unless array_meta&.dig(:type) == :array
-              array_meta.dig(:children, field_name, :type) || :any
-            when CallExpression
-              # For arithmetic operations, infer from operands
-              if %i[add subtract multiply divide].include?(expr.fn_name)
-                # Get types of operands
-                arg_types = expr.args.map do |arg|
-                  if arg.is_a?(InputElementReference)
-                    infer_vectorized_element_type(arg, type_context, vectorization_meta)
-                  elsif arg.is_a?(DeclarationReference)
-                    # Get the element type if it's vectorized
-                    ref_type = type_context[arg.name]
-                    if ref_type.is_a?(Hash) && ref_type.key?(:array)
-                      ref_type[:array]
-                    else
-                      ref_type || :any
-                    end
-                  else
-                    infer_expression_type(arg, type_context, vectorization_meta)
-                  end
-                end
-                # Unify types for arithmetic
-                Types.unify(*arg_types) || :float
-              else
-                :any
-              end
-            else
-              :any
-            end
-          end
-          def infer_element_reference_type(expr)
-            # Get array field metadata
-            input_meta = get_state(:input_metadata, required: false) || {}
-            return :any unless expr.path.size >= 2
-            array_name = expr.path.first
-            field_name = expr.path[1]
-            array_meta = input_meta[array_name]
-            return :any unless array_meta&.dig(:type) == :array
-            # Get the field type from children metadata
-            field_type = array_meta.dig(:children, field_name, :type) || :any
-            # Return array of field type (vectorized)
-            { array: field_type }
-          end
-          def infer_cascade_type(cascade_expr, type_context, broadcast_metadata = {}, current_decl_name = nil)
-            return :any if cascade_expr.cases.empty?
-            result_types = cascade_expr.cases.map do |case_stmt|
-              infer_expression_type(case_stmt.result, type_context, broadcast_metadata, current_decl_name)
-            end
-            # Reduce all possible types into a single unified type
-            result_types.reduce { |unified, type| Types.unify(unified, type) } || :any
-          rescue StandardError
-            # Check if unification fails, fall back to base type
-            # TODO: understand if this right to fallback or we should raise
-            :any
-          end
-        end
-      end
-    end
-  end
-end

data/lib/kumi/core/compiler_base.rb DELETED Viewed

@@ -1,137 +0,0 @@
-# frozen_string_literal: true
-module Kumi
-  module Core
-    # Base compiler class with shared compilation logic between Ruby and JS compilers
-    class CompilerBase
-      # Map node classes to compiler methods
-      DISPATCH = {
-        Kumi::Syntax::Literal => :compile_literal,
-        Kumi::Syntax::InputReference => :compile_field_node,
-        Kumi::Syntax::InputElementReference => :compile_element_field_reference,
-        Kumi::Syntax::DeclarationReference => :compile_binding_node,
-        Kumi::Syntax::ArrayExpression => :compile_list,
-        Kumi::Syntax::CallExpression => :compile_call,
-        Kumi::Syntax::CascadeExpression => :compile_cascade
-      }.freeze
-      def initialize(syntax_tree, analyzer_result)
-        @schema = syntax_tree
-        @analysis = analyzer_result
-      end
-      # Shared compilation logic
-      def build_index
-        @index = {}
-        @schema.values.each { |a| @index[a.name] = a }
-        @schema.traits.each { |t| @index[t.name] = t }
-      end
-      def determine_operation_mode_for_path(_path)
-        # Use pre-computed operation mode from analysis
-        compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
-        compilation_meta&.dig(:operation_mode) || :broadcast
-      end
-      def vectorized_operation?(expr)
-        # Use pre-computed vectorization decision from analysis
-        compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
-        return false unless compilation_meta
-        # Check if current declaration is vectorized
-        if compilation_meta[:is_vectorized]
-          # For vectorized declarations, check if this specific operation should be vectorized
-          vectorized_ops = @analysis.state[:broadcasts][:vectorized_operations] || {}
-          current_decl_info = vectorized_ops[@current_declaration]
-          # For cascade declarations, check individual operations within them
-          return true if current_decl_info && current_decl_info[:operation] == expr.fn_name
-          # For cascade_with_vectorized_conditions_or_results, allow nested operations
-          return true if current_decl_info && current_decl_info[:source] == :cascade_with_vectorized_conditions_or_results
-          # Check if this is a direct vectorized operation
-          return true if current_decl_info && current_decl_info[:operation]
-        end
-        # Fallback: Reduction functions are NOT vectorized operations - they consume arrays
-        return false if Kumi::Registry.reducer?(expr.fn_name)
-        # Use pre-computed vectorization context for remaining cases
-        compilation_meta.dig(:vectorization_context, :needs_broadcasting) || false
-      end
-      def is_cascade_vectorized?(_expr)
-        # Use metadata to determine if this cascade is vectorized
-        broadcast_meta = @analysis.state[:broadcasts]
-        cascade_info = @current_declaration && broadcast_meta&.dig(:vectorized_operations, @current_declaration)
-        cascade_info && cascade_info[:source] == :cascade_with_vectorized_conditions_or_results
-      end
-      def get_cascade_compilation_metadata
-        compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
-        cascade_info = compilation_meta&.dig(:cascade_info) || {}
-        [compilation_meta, cascade_info]
-      end
-      def get_cascade_strategy
-        @analysis.state[:broadcasts][:cascade_strategies][@current_declaration]
-      end
-      def get_function_call_strategy
-        compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
-        compilation_meta&.dig(:function_call_strategy) || {}
-      end
-      def needs_flattening?
-        function_strategy = get_function_call_strategy
-        function_strategy[:flattening_required]
-      end
-      def get_flattening_info
-        @analysis.state[:broadcasts][:flattening_declarations][@current_declaration]
-      end
-      def get_flatten_argument_indices
-        compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
-        compilation_meta&.dig(:function_call_strategy, :flatten_argument_indices) || []
-      end
-      # Dispatch to the appropriate compile_* method
-      def compile_expr(expr)
-        method = DISPATCH.fetch(expr.class)
-        send(method, expr)
-      end
-      # Abstract methods to be implemented by subclasses
-      def compile_literal(expr)
-        raise NotImplementedError, "Subclasses must implement compile_literal"
-      end
-      def compile_field_node(expr)
-        raise NotImplementedError, "Subclasses must implement compile_field_node"
-      end
-      def compile_element_field_reference(expr)
-        raise NotImplementedError, "Subclasses must implement compile_element_field_reference"
-      end
-      def compile_binding_node(expr)
-        raise NotImplementedError, "Subclasses must implement compile_binding_node"
-      end
-      def compile_list(expr)
-        raise NotImplementedError, "Subclasses must implement compile_list"
-      end
-      def compile_call(expr)
-        raise NotImplementedError, "Subclasses must implement compile_call"
-      end
-      def compile_cascade(expr)
-        raise NotImplementedError, "Subclasses must implement compile_cascade"
-      end
-    end
-  end
-end

data/lib/kumi/core/explain.rb DELETED Viewed

@@ -1,254 +0,0 @@
-# frozen_string_literal: true
-module Kumi
-  module Core
-    module Explain
-      class ExplanationGenerator
-        def initialize(syntax_tree, analysis_state, inputs, registry: Kumi::Registry)
-          @syntax_tree = syntax_tree
-          @state       = analysis_state
-          @inputs      = inputs
-          @definitions = analysis_state[:declarations] || {}
-          @registry    = registry
-          @program = Kumi::Runtime::Executable.from_analysis(@state, registry: nil)
-          @session = @program.read(@inputs, mode: :ruby)
-        end
-        def explain(target_name)
-          decl = @definitions[target_name] or raise ArgumentError, "Unknown declaration: #{target_name}"
-          expr = decl.expression
-          value = @session.get(target_name)
-          prefix = "#{target_name} = "
-          expr_str = format_expression(expr, indent_context: prefix.length)
-          "#{prefix}#{expr_str} => #{format_value(value)}"
-        end
-        private
-        # ---------- formatting ----------
-        def format_expression(expr, indent_context: 0, nested: false)
-          case expr
-          when Kumi::Syntax::InputReference
-            "input.#{expr.name}"
-          when Kumi::Syntax::InputElementReference
-            "input.#{expr.path.join('.')}"
-          when Kumi::Syntax::DeclarationReference
-            expr.name.to_s
-          when Kumi::Syntax::Literal
-            format_value(expr.value)
-          when Kumi::Syntax::ArrayExpression
-            "[" + expr.elements.map { |e| format_expression(e, indent_context:, nested:) }.join(", ") + "]"
-          when Kumi::Syntax::CascadeExpression
-            format_cascade(expr, indent_context:)
-          when Kumi::Syntax::CallExpression
-            format_call(expr, indent_context:, nested:)
-          else
-            expr.class.name.split("::").last
-          end
-        end
-        def format_call(expr, indent_context:, nested:)
-          fn = expr.fn_name
-          if pretty_print?(fn)
-            format_pretty(expr, fn, indent_context:, nested:)
-          else
-            format_generic(expr, indent_context:)
-          end
-        end
-        def pretty_print?(fn)
-          %i[add subtract multiply divide == != > < >= <= and or not].include?(fn)
-        end
-        def format_pretty(expr, fn, indent_context:, nested:)
-          if needs_eval?(expr.args) && !nested
-            if chain_of_same_op?(expr, fn)
-              ops = flatten_chain(expr, fn)
-              sym = op_symbol(fn)
-              sym_args = ops.map { |a| format_expression(a, indent_context:, nested: true) }
-              eval_args = ops.map { |a| eval_arg_for_display(a) }
-              "#{sym_args.join(" #{sym} ")} = #{eval_args.join(" #{sym} ")}"
-            else
-              sym_args = expr.args.map { |a| format_expression(a, indent_context:, nested: true) }
-              eval_args = expr.args.map { |a| eval_arg_for_display(a) }
-              display_fmt(fn, sym_args) + " = " + display_fmt(fn, eval_args)
-            end
-          else
-            display_fmt(fn, expr.args.map { |a| format_expression(a, indent_context:, nested: true) })
-          end
-        end
-        def format_generic(expr, indent_context:)
-          parts = expr.args.map do |a|
-            desc = format_expression(a, indent_context:)
-            if literalish?(a)
-              desc
-            else
-              val = evaluate(a)
-              "#{desc} = #{format_value(val)}"
-            end
-          end
-          if parts.length > 1
-            indent = " " * (indent_context + expr.fn_name.to_s.length + 1)
-            "#{expr.fn_name}(#{parts.join(",\n#{indent}")})"
-          else
-            "#{expr.fn_name}(#{parts.join(', ')})"
-          end
-        end
-        def format_cascade(expr, indent_context:)
-          lines = []
-          expr.cases.each do |c|
-            cond_val = evaluate(c.condition)
-            cond_desc = format_expression(c.condition, indent_context:)
-            res_desc  = format_expression(c.result, indent_context:)
-            lines << "  #{cond_val ? '✓' : '✗'} on #{cond_desc}, #{res_desc}"
-            break if cond_val
-          end
-          "\n" + lines.join("\n")
-        end
-        def literalish?(expr)
-          expr.is_a?(Kumi::Syntax::Literal) ||
-            (expr.is_a?(Kumi::Syntax::ArrayExpression) && expr.elements.all?(Kumi::Syntax::Literal))
-        end
-        def needs_eval?(args)
-          args.any? { |a| !literalish?(a) }
-        end
-        def chain_of_same_op?(expr, fn) = expr.args.any? { |a| a.is_a?(Kumi::Syntax::CallExpression) && a.fn_name == fn }
-        def flatten_chain(expr, fn)
-          expr.args.flat_map do |a|
-            a.is_a?(Kumi::Syntax::CallExpression) && a.fn_name == fn ? flatten_chain(a, fn) : [a]
-          end
-        end
-        def op_symbol(fn)
-          { add: "+", subtract: "-", multiply: "×", divide: "÷" }[fn] || fn.to_s
-        end
-        def display_fmt(fn, args)
-          case fn
-          when :add      then args.join(" + ")
-          when :subtract then args.join(" - ")
-          when :multiply then args.join(" × ")
-          when :divide   then args.join(" ÷ ")
-          when :==       then "#{args[0]} == #{args[1]}"
-          when :!=       then "#{args[0]} != #{args[1]}"
-          when :>        then "#{args[0]} > #{args[1]}"
-          when :<        then "#{args[0]} < #{args[1]}"
-          when :>=       then "#{args[0]} >= #{args[1]}"
-          when :<=       then "#{args[0]} <= #{args[1]}"
-          when :and      then args.join(" && ")
-          when :or       then args.join(" || ")
-          when :not      then "!#{args[0]}"
-          else                "#{fn}(#{args.join(', ')})"
-          end
-        end
-        def eval_arg_for_display(arg)
-          return format_expression(arg, indent_context: 0, nested: true) if literalish?(arg)
-          val = evaluate(arg)
-          if arg.is_a?(Kumi::Syntax::DeclarationReference)
-            "(#{format_expression(arg, indent_context: 0, nested: true)} = #{format_value(val)})"
-          else
-            format_value(val)
-          end
-        end
-        def format_value(v)
-          case v
-          when Float, Integer then format_number(v)
-          when String         then "\"#{v}\""
-          when Array          then if v.length <= 4
-                                     "[#{v.map { |x| format_value(x) }.join(', ')}]"
-                                   else
-                                     "[#{v.take(4).map { |x| format_value(x) }.join(', ')}, …]"
-                                   end
-          else v.to_s
-          end
-        end
-        def format_number(n)
-          return n.to_s unless n.is_a?(Numeric)
-          i = n.is_a?(Integer) || n == n.to_i ? n.to_i : nil
-          return n.to_s unless i
-          i.abs >= 1000 ? i.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1 ').reverse : i.to_s
-        end
-        # ---------- evaluation (Program + Registry) ----------
-        def evaluate(expr)
-          case expr
-          when Kumi::Syntax::DeclarationReference
-            @session.get(expr.name)
-          when Kumi::Syntax::InputReference
-            fetch_indifferent(@inputs, expr.name)
-          when Kumi::Syntax::InputElementReference
-            dig_path(@inputs, expr.path)
-          when Kumi::Syntax::Literal
-            expr.value
-          when Kumi::Syntax::ArrayExpression
-            expr.elements.map { |e| evaluate(e) }
-          when Kumi::Syntax::CascadeExpression
-            evaluate_cascade(expr)
-          when Kumi::Syntax::CallExpression
-            eval_call(expr)
-          else
-            raise "Unsupported expression: #{expr.class}"
-          end
-        end
-        def eval_call(expr)
-          entry = @registry.entry(expr.fn_name) or raise "Unknown function: #{expr.fn_name}"
-          fn = entry.fn
-          args = expr.args.map { |a| evaluate(a) }
-          fn.call(*args)
-        end
-        def evaluate_cascade(expr)
-          expr.cases.each do |c|
-            return evaluate(c.result) if evaluate(c.condition)
-          end
-          nil
-        end
-        def fetch_indifferent(h, k)
-          h[k] || h[k.to_s] || h[k.to_sym]
-        end
-        def dig_path(h, path)
-          node = h
-          path.each do |seg|
-            node = if node.is_a?(Hash)
-                     fetch_indifferent(node, seg)
-                   else
-                     # if arrays are in path, interpret seg as index when Integer-like
-                     seg.is_a?(Integer) ? node[seg] : nil
-                   end
-          end
-          node
-        end
-      end
-      module_function
-      def call(schema_class, target_name, inputs:)
-        syntax_tree     = schema_class.instance_variable_get(:@__syntax_tree__)
-        analysis_state  = schema_class.instance_variable_get(:@__analyzer_result__)&.state
-        raise ArgumentError, "Schema not found or not compiled" unless syntax_tree && analysis_state
-        ExplanationGenerator.new(syntax_tree, analysis_state, inputs).explain(target_name)
-      end
-    end
-  end
-end