RubyGems - desiru - Versions diffs - 0.1.0 - Mend

desiru 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +7 -0
data/.rspec +1 -0
data/.rubocop.yml +55 -0
data/CLAUDE.md +22 -0
data/Gemfile +36 -0
data/Gemfile.lock +255 -0
data/LICENSE +21 -0
data/README.md +343 -0
data/Rakefile +18 -0
data/desiru.gemspec +44 -0
data/examples/README.md +55 -0
data/examples/async_processing.rb +135 -0
data/examples/few_shot_learning.rb +66 -0
data/examples/graphql_api.rb +190 -0
data/examples/graphql_integration.rb +114 -0
data/examples/rag_retrieval.rb +80 -0
data/examples/simple_qa.rb +31 -0
data/examples/typed_signatures.rb +45 -0
data/lib/desiru/async_capable.rb +170 -0
data/lib/desiru/cache.rb +116 -0
data/lib/desiru/configuration.rb +40 -0
data/lib/desiru/field.rb +171 -0
data/lib/desiru/graphql/data_loader.rb +210 -0
data/lib/desiru/graphql/executor.rb +115 -0
data/lib/desiru/graphql/schema_generator.rb +301 -0
data/lib/desiru/jobs/async_predict.rb +52 -0
data/lib/desiru/jobs/base.rb +53 -0
data/lib/desiru/jobs/batch_processor.rb +71 -0
data/lib/desiru/jobs/optimizer_job.rb +45 -0
data/lib/desiru/models/base.rb +112 -0
data/lib/desiru/models/raix_adapter.rb +210 -0
data/lib/desiru/module.rb +204 -0
data/lib/desiru/modules/chain_of_thought.rb +106 -0
data/lib/desiru/modules/predict.rb +142 -0
data/lib/desiru/modules/retrieve.rb +199 -0
data/lib/desiru/optimizers/base.rb +130 -0
data/lib/desiru/optimizers/bootstrap_few_shot.rb +212 -0
data/lib/desiru/program.rb +106 -0
data/lib/desiru/registry.rb +74 -0
data/lib/desiru/signature.rb +322 -0
data/lib/desiru/version.rb +5 -0
data/lib/desiru.rb +67 -0
metadata +184 -0

data/lib/desiru/modules/retrieve.rb ADDED Viewed

@@ -0,0 +1,199 @@
+# frozen_string_literal: true
+module Desiru
+  module Modules
+    # Retrieve module for RAG (Retrieval Augmented Generation)
+    # Implements vector search capabilities with pluggable backends
+    class Retrieve < Module
+      attr_reader :backend
+      def initialize(signature = nil, backend: nil, **)
+        # Default signature for retrieval operations
+        signature ||= 'query: string, k: integer? -> documents: list, scores: list'
+        super(signature, **)
+        # Initialize backend
+        @backend = backend || InMemoryBackend.new
+        validate_backend!
+      end
+      def forward(**inputs)
+        query = inputs[:query]
+        # Handle k parameter - it might come as nil if optional
+        k = inputs.fetch(:k, 5)
+        k = 5 if k.nil? # Ensure we have a value even if nil was passed
+        # Perform retrieval using the backend
+        results = backend.search(query, k: k)
+        # Separate documents and scores
+        documents = results.map { |r| r[:document] }
+        scores = results.map { |r| r[:score] }
+        { documents: documents, scores: scores }
+      end
+      # Add documents to the retrieval index
+      def add_documents(documents, embeddings: nil)
+        backend.add(documents, embeddings: embeddings)
+      end
+      # Clear the retrieval index
+      def clear_index
+        backend.clear
+      end
+      # Get the current document count
+      def document_count
+        backend.size
+      end
+      private
+      def validate_backend!
+        required_methods = %i[add search clear size]
+        missing_methods = required_methods.reject { |m| backend.respond_to?(m) }
+        return unless missing_methods.any?
+        raise ConfigurationError, "Backend must implement: #{missing_methods.join(', ')}"
+      end
+    end
+    # Abstract base class for retrieval backends
+    class Backend
+      def add(_documents, embeddings: nil)
+        raise NotImplementedError, 'Subclasses must implement #add'
+      end
+      def search(_query, k: 5)
+        raise NotImplementedError, 'Subclasses must implement #search'
+      end
+      def clear
+        raise NotImplementedError, 'Subclasses must implement #clear'
+      end
+      def size
+        raise NotImplementedError, 'Subclasses must implement #size'
+      end
+    end
+    # In-memory backend implementation for development and testing
+    class InMemoryBackend < Backend
+      def initialize(distance_metric: :cosine)
+        @documents = []
+        @embeddings = []
+        @distance_metric = distance_metric
+      end
+      def add(documents, embeddings: nil)
+        documents = Array(documents)
+        # If embeddings provided, they must match document count
+        if embeddings
+          embeddings = Array(embeddings)
+          if embeddings.size != documents.size
+            raise ArgumentError, "Embeddings count (#{embeddings.size}) must match documents count (#{documents.size})"
+          end
+        else
+          # Generate simple embeddings based on document content (for demo purposes)
+          embeddings = documents.map { |doc| generate_simple_embedding(doc) }
+        end
+        # Store documents and embeddings
+        @documents.concat(documents)
+        @embeddings.concat(embeddings)
+      end
+      def search(query, k: 5)
+        return [] if @documents.empty?
+        # Generate query embedding
+        query_embedding = generate_simple_embedding(query)
+        # Calculate distances to all documents
+        distances = @embeddings.map.with_index do |embedding, idx|
+          distance = calculate_distance(query_embedding, embedding)
+          { document: @documents[idx], score: distance, index: idx }
+        end
+        # Sort by distance (ascending for distance, would be descending for similarity)
+        sorted = case @distance_metric
+                 when :cosine
+                   # For cosine similarity, higher is better, so sort descending
+                   distances.sort_by { |d| -d[:score] }
+                 else
+                   # For distance metrics, lower is better
+                   distances.sort_by { |d| d[:score] }
+                 end
+        # Return top k results
+        sorted.first(k)
+      end
+      def clear
+        @documents.clear
+        @embeddings.clear
+      end
+      def size
+        @documents.size
+      end
+      private
+      def generate_simple_embedding(text)
+        # Simple embedding: character frequency vector
+        # In production, use proper embedding models
+        text = text.to_s.downcase
+        # Create a 26-dimensional vector for a-z frequency
+        embedding = Array.new(26, 0.0)
+        text.each_char do |char|
+          if char.between?('a', 'z')
+            idx = char.ord - 'a'.ord
+            embedding[idx] += 1.0
+          end
+        end
+        # Normalize the vector
+        magnitude = Math.sqrt(embedding.sum { |x| x**2 })
+        embedding.map! { |x| x / magnitude } if magnitude.positive?
+        embedding
+      end
+      def calculate_distance(vec1, vec2)
+        case @distance_metric
+        when :cosine
+          cosine_similarity(vec1, vec2)
+        when :euclidean
+          euclidean_distance(vec1, vec2)
+        else
+          raise ArgumentError, "Unknown distance metric: #{@distance_metric}"
+        end
+      end
+      def cosine_similarity(vec1, vec2)
+        # Cosine similarity: dot product of normalized vectors
+        # Since we pre-normalize embeddings, this is just dot product
+        vec1.zip(vec2).sum { |a, b| a * b }
+        # Return similarity (1.0 = identical, 0.0 = orthogonal)
+      end
+      def euclidean_distance(vec1, vec2)
+        # Euclidean distance
+        Math.sqrt(vec1.zip(vec2).sum { |a, b| (a - b)**2 })
+      end
+    end
+  end
+end
+# Register in the main module namespace for convenience
+module Desiru
+  Retrieve = Modules::Retrieve
+end

data/lib/desiru/optimizers/base.rb ADDED Viewed

@@ -0,0 +1,130 @@
+# frozen_string_literal: true
+module Desiru
+  module Optimizers
+    # Base class for all optimizers
+    class Base
+      attr_reader :metric, :config
+      def initialize(metric: :exact_match, **config)
+        @metric = normalize_metric(metric)
+        @config = default_config.merge(config)
+        @optimization_trace = []
+      end
+      def compile(program, trainset:, valset: nil)
+        raise NotImplementedError, 'Subclasses must implement #compile'
+      end
+      def optimize_module(module_instance, examples)
+        raise NotImplementedError, 'Subclasses must implement #optimize_module'
+      end
+      def evaluate(program, dataset)
+        scores = dataset.map do |example|
+          prediction = program.call(example.reject { |k, _| %i[answer output].include?(k) })
+          score_prediction(prediction, example)
+        end
+        {
+          average_score: scores.sum.to_f / scores.size,
+          scores: scores,
+          total: scores.size
+        }
+      end
+      protected
+      def default_config
+        {
+          max_bootstrapped_demos: 3,
+          max_labeled_demos: 16,
+          max_errors: 5,
+          num_candidates: 1,
+          stop_at_score: 1.0
+        }
+      end
+      def score_prediction(prediction, ground_truth)
+        case @metric
+        when Proc
+          @metric.call(prediction, ground_truth)
+        when :exact_match
+          exact_match_score(prediction, ground_truth)
+        when :f1
+          f1_score(prediction, ground_truth)
+        when :accuracy
+          accuracy_score(prediction, ground_truth)
+        else
+          raise OptimizerError, "Unknown metric: #{@metric}"
+        end
+      end
+      def exact_match_score(prediction, ground_truth)
+        pred_answer = extract_answer(prediction)
+        true_answer = extract_answer(ground_truth)
+        pred_answer.to_s.strip.downcase == true_answer.to_s.strip.downcase ? 1.0 : 0.0
+      end
+      def f1_score(prediction, ground_truth)
+        pred_tokens = tokenize(extract_answer(prediction))
+        true_tokens = tokenize(extract_answer(ground_truth))
+        return 0.0 if pred_tokens.empty? && true_tokens.empty?
+        return 0.0 if pred_tokens.empty? || true_tokens.empty?
+        precision = (pred_tokens & true_tokens).size.to_f / pred_tokens.size
+        recall = (pred_tokens & true_tokens).size.to_f / true_tokens.size
+        return 0.0 if (precision + recall).zero?
+        2 * (precision * recall) / (precision + recall)
+      end
+      def accuracy_score(prediction, ground_truth)
+        exact_match_score(prediction, ground_truth)
+      end
+      def extract_answer(data)
+        case data
+        when ModuleResult, ProgramResult
+          # Try common answer fields
+          data[:answer] || data[:output] || data[:result] || data.values.first
+        when Hash
+          data[:answer] || data[:output] || data[:result] || data.values.first
+        else
+          data
+        end
+      end
+      def tokenize(text)
+        text.to_s.downcase.split(/\W+/).reject(&:empty?)
+      end
+      def normalize_metric(metric)
+        case metric
+        when Symbol, String
+          metric.to_sym
+        when Proc
+          metric
+        else
+          raise OptimizerError, 'Metric must be a symbol or proc'
+        end
+      end
+      def trace_optimization(step, details)
+        @optimization_trace << {
+          step: step,
+          timestamp: Time.now,
+          details: details
+        }
+        Desiru.configuration.logger&.info("[Optimizer] #{step}: #{details}")
+      end
+    end
+    # Base error for optimizer-related issues
+    class OptimizerError < Error; end
+  end
+end

data/lib/desiru/optimizers/bootstrap_few_shot.rb ADDED Viewed

@@ -0,0 +1,212 @@
+# frozen_string_literal: true
+module Desiru
+  module Optimizers
+    # Bootstrap Few-Shot optimizer - automatically selects effective demonstrations
+    class BootstrapFewShot < Base
+      def compile(program, trainset:, valset: nil)
+        trace_optimization('Starting BootstrapFewShot optimization', {
+                             trainset_size: trainset.size,
+                             valset_size: valset&.size || 0
+                           })
+        # Create a working copy of the program
+        optimized_program = deep_copy_program(program)
+        # Optimize each module in the program
+        optimize_modules(optimized_program, trainset, valset)
+        # Evaluate final performance
+        if valset
+          final_score = evaluate(optimized_program, valset)
+          trace_optimization('Final validation score', final_score)
+        end
+        optimized_program
+      end
+      def optimize_module(module_instance, examples)
+        trace_optimization('Optimizing module', {
+                             module: module_instance.class.name,
+                             examples_available: examples.size
+                           })
+        # Bootstrap demonstrations
+        bootstrapped_demos = bootstrap_demonstrations(module_instance, examples)
+        # Select best demonstrations
+        selected_demos = select_demonstrations(
+          module_instance,
+          bootstrapped_demos,
+          examples
+        )
+        # Return module with selected demonstrations
+        module_instance.with_demos(selected_demos)
+      end
+      private
+      def deep_copy_program(program)
+        # This is a simplified version - in practice, we'd need proper deep copying
+        program.class.new(config: program.config, metadata: program.metadata)
+      end
+      def optimize_modules(program, trainset, _valset)
+        # Get all modules from the program
+        modules_to_optimize = extract_modules(program)
+        modules_to_optimize.each do |module_name, module_instance|
+          trace_optimization('Processing module', { name: module_name })
+          # Create module-specific examples
+          module_examples = create_module_examples(module_instance, trainset)
+          # Optimize the module
+          optimized_module = optimize_module(module_instance, module_examples)
+          # Replace in program
+          replace_module(program, module_name, optimized_module)
+        end
+      end
+      def bootstrap_demonstrations(module_instance, examples)
+        demonstrations = []
+        errors = 0
+        examples.each do |example|
+          break if demonstrations.size >= config[:max_bootstrapped_demos]
+          break if errors >= config[:max_errors]
+          begin
+            # Get module prediction
+            inputs = example.reject { |k, _| %i[answer output].include?(k) }
+            prediction = module_instance.call(inputs)
+            # Score the prediction
+            score = score_prediction(prediction, example)
+            if score >= 0.5 # Configurable threshold
+              demonstrations << {
+                input: format_demo_input(inputs),
+                output: format_demo_output(prediction),
+                score: score
+              }
+            else
+              errors += 1
+            end
+          rescue StandardError => e
+            trace_optimization('Error during bootstrap', { error: e.message })
+            errors += 1
+          end
+        end
+        demonstrations
+      end
+      def select_demonstrations(_module_instance, bootstrapped, examples)
+        all_demos = bootstrapped
+        # Add labeled examples if available
+        labeled = examples.select { |ex| ex[:answer] || ex[:output] }
+        labeled_demos = labeled.first(config[:max_labeled_demos]).map do |ex|
+          inputs = ex.reject { |k, _| %i[answer output].include?(k) }
+          {
+            input: format_demo_input(inputs),
+            output: format_demo_output(ex),
+            score: 1.0 # Perfect score for labeled examples
+          }
+        end
+        all_demos += labeled_demos
+        # Sort by score and diversity
+        selected = select_diverse_demos(all_demos)
+        # Take top K
+        selected.first(config[:max_bootstrapped_demos])
+      end
+      def select_diverse_demos(demos)
+        # Simple diversity selection - could be improved
+        selected = []
+        remaining = demos.sort_by { |d| -d[:score] }
+        while selected.size < config[:max_bootstrapped_demos] && remaining.any?
+          # Take the best remaining
+          best = remaining.shift
+          selected << best
+          # Remove similar demos (simple text similarity)
+          remaining.reject! do |demo|
+            similarity(demo[:input], best[:input]) > 0.8
+          end
+        end
+        selected
+      end
+      def similarity(text1, text2)
+        # Very simple similarity - could use better metrics
+        tokens1 = tokenize(text1)
+        tokens2 = tokenize(text2)
+        return 0.0 if tokens1.empty? || tokens2.empty?
+        intersection = (tokens1 & tokens2).size
+        union = (tokens1 | tokens2).size
+        intersection.to_f / union
+      end
+      def format_demo_input(inputs)
+        inputs.map { |k, v| "#{k}: #{v}" }.join("\n")
+      end
+      def format_demo_output(output)
+        case output
+        when ModuleResult
+          output.to_h.map { |k, v| "#{k}: #{v}" }.join("\n")
+        when Hash
+          output.map { |k, v| "#{k}: #{v}" }.join("\n")
+        else
+          output.to_s
+        end
+      end
+      def extract_modules(program)
+        # This would need to be implemented based on program structure
+        # For now, return modules from instance variables
+        modules = {}
+        program.instance_variables.each do |var|
+          value = program.instance_variable_get(var)
+          modules[var.to_s.delete('@').to_sym] = value if value.is_a?(Module)
+        end
+        modules
+      end
+      def create_module_examples(_module_instance, trainset)
+        # Transform trainset to match module's signature
+        trainset.map do |example|
+          # This is simplified - would need proper field mapping
+          example
+        end
+      end
+      def replace_module(program, module_name, new_module)
+        # Replace the module in the program
+        var_name = "@#{module_name}"
+        return unless program.instance_variable_defined?(var_name)
+        program.instance_variable_set(var_name, new_module)
+      end
+    end
+  end
+end
+# Register in the main module namespace for convenience
+module Desiru
+  BootstrapFewShot = Optimizers::BootstrapFewShot
+end

data/lib/desiru/program.rb ADDED Viewed

@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+module Desiru
+  # Base class for composing multiple modules into programs
+  # Implements composition patterns for complex AI workflows
+  class Program
+    attr_reader :modules, :config, :metadata
+    def initialize(config: {}, metadata: {})
+      @modules = {}
+      @config = default_config.merge(config)
+      @metadata = metadata
+      @execution_trace = []
+      setup_modules
+    end
+    def call(inputs = {})
+      @execution_trace.clear
+      start_time = Time.now
+      result = forward(inputs)
+      execution_time = Time.now - start_time
+      ProgramResult.new(
+        result,
+        metadata: {
+          execution_time: execution_time,
+          trace: @execution_trace.dup,
+          program: self.class.name
+        }
+      )
+    rescue StandardError => e
+      handle_error(e)
+    end
+    def forward(_inputs)
+      raise NotImplementedError, 'Subclasses must implement #forward'
+    end
+    def reset
+      modules.each_value(&:reset)
+      @execution_trace.clear
+    end
+    def optimize(optimizer, trainset, valset = nil)
+      optimizer.compile(self, trainset: trainset, valset: valset)
+    end
+    def to_h
+      {
+        class: self.class.name,
+        modules: modules.transform_values(&:to_h),
+        config: config,
+        metadata: metadata
+      }
+    end
+    protected
+    def setup_modules
+      # Override in subclasses to initialize modules
+    end
+    def trace_execution(module_name, inputs, outputs)
+      @execution_trace << {
+        module: module_name,
+        inputs: inputs,
+        outputs: outputs.is_a?(ModuleResult) ? outputs.to_h : outputs,
+        timestamp: Time.now
+      }
+    end
+    def default_config
+      {
+        max_iterations: 10,
+        early_stopping: true,
+        trace_execution: true
+      }
+    end
+    private
+    def handle_error(error)
+      Desiru.configuration.logger&.error("Program execution failed: #{error.message}")
+      # Programs don't retry by default - let individual modules handle retries
+      raise ProgramError, "Program execution failed: #{error.message}"
+    end
+  end
+  # Result object for program outputs
+  class ProgramResult < ModuleResult
+    def trace
+      metadata[:trace] || []
+    end
+    def execution_time
+      metadata[:execution_time]
+    end
+  end
+  # Base error for program-related issues
+  class ProgramError < Error; end
+end