RubyGems - agentf - Versions diffs - 0.4.7 → 0.6.0 - Mend

agentf 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/lib/agentf/agents/architect.rb +7 -3
data/lib/agentf/agents/base.rb +31 -3
data/lib/agentf/agents/debugger.rb +30 -8
data/lib/agentf/agents/designer.rb +20 -8
data/lib/agentf/agents/documenter.rb +8 -2
data/lib/agentf/agents/explorer.rb +29 -11
data/lib/agentf/agents/reviewer.rb +12 -7
data/lib/agentf/agents/security.rb +27 -15
data/lib/agentf/agents/specialist.rb +34 -18
data/lib/agentf/agents/tester.rb +48 -8
data/lib/agentf/cli/agent.rb +95 -0
data/lib/agentf/cli/eval.rb +203 -0
data/lib/agentf/cli/install.rb +7 -0
data/lib/agentf/cli/memory.rb +138 -90
data/lib/agentf/cli/router.rb +16 -4
data/lib/agentf/cli/update.rb +9 -2
data/lib/agentf/commands/memory_reviewer.rb +22 -48
data/lib/agentf/commands/metrics.rb +18 -25
data/lib/agentf/commands/registry.rb +28 -0
data/lib/agentf/context_builder.rb +4 -14
data/lib/agentf/embedding_provider.rb +35 -0
data/lib/agentf/evals/report.rb +134 -0
data/lib/agentf/evals/runner.rb +771 -0
data/lib/agentf/evals/scenario.rb +211 -0
data/lib/agentf/installer.rb +498 -365
data/lib/agentf/mcp/server.rb +294 -114
data/lib/agentf/memory.rb +354 -214
data/lib/agentf/service/providers.rb +10 -62
data/lib/agentf/version.rb +1 -1
data/lib/agentf/workflow_engine.rb +205 -77
data/lib/agentf.rb +10 -3
metadata +9 -3
data/lib/agentf/packs.rb +0 -74

data/lib/agentf/cli/update.rb CHANGED Viewed

@@ -34,7 +34,8 @@ module Agentf
           scope: "all",
           global_root: Dir.home,
           local_root: Dir.pwd,
-          force: false
+          force: false,
+          opencode_runtime: "mcp"
         }
       end
@@ -78,6 +79,9 @@ module Agentf
         local_root = parse_single_option(args, "--local-root=")
         @options[:local_root] = File.expand_path(local_root) if local_root
+        opencode_runtime = parse_single_option(args, "--opencode-runtime=")
+        @options[:opencode_runtime] = opencode_runtime if opencode_runtime
       end
       def roots_for(scope)
@@ -113,7 +117,8 @@ module Agentf
         installer = @installer_class.new(
           global_root: root,
-          local_root: root
+          local_root: root,
+          opencode_runtime: @options[:opencode_runtime]
         )
         results = installer.install(
@@ -191,12 +196,14 @@ module Agentf
             --scope=SCOPE          Update scope: global|local|all (default: all)
             --global-root=PATH     Root for global installs (default: $HOME)
             --local-root=PATH      Root for local installs (default: current directory)
+            --opencode-runtime=MODE Opencode runtime: mcp|plugin (default: mcp)
             --force                Regenerate even if version matches
           Examples:
             agentf update
             agentf update --force
             agentf update --provider=opencode,copilot --scope=local
+            agentf update --provider=opencode --opencode-runtime=plugin
         HELP
       end
     end

data/lib/agentf/commands/memory_reviewer.rb CHANGED Viewed

@@ -12,14 +12,11 @@ module Agentf
       def self.manifest
         {
           "name" => NAME,
-          "description" => "Review and query Redis-stored memories, pitfalls, and learnings.",
+          "description" => "Review and query Redis-stored memories, episodes, and learnings.",
           "commands" => [
             { "name" => "get_recent_memories", "type" => "function" },
-            { "name" => "get_pitfalls", "type" => "function" },
+            { "name" => "get_episodes", "type" => "function" },
             { "name" => "get_lessons", "type" => "function" },
-            { "name" => "get_successes", "type" => "function" },
-            { "name" => "get_all_tags", "type" => "function" },
-            { "name" => "get_by_tag", "type" => "function" },
             { "name" => "get_by_type", "type" => "function" },
             { "name" => "get_by_agent", "type" => "function" },
             { "name" => "search", "type" => "function" },
@@ -30,9 +27,10 @@ module Agentf
         }
       end
-      def initialize(project: nil)
+      def initialize(project: nil, memory: nil)
         @project = project || Agentf.config.project_name
-        @memory = Agentf::Memory::RedisMemory.new(project: @project)
+        # Allow injecting a memory instance for testing; default to real RedisMemory
+        @memory = memory || Agentf::Memory::RedisMemory.new(project: @project)
       end
       # Get recent memories
@@ -43,10 +41,9 @@ module Agentf
         { "error" => e.message }
       end
-      # Get all pitfalls (things that went wrong)
-      def get_pitfalls(limit: 10)
-        pitfalls = @memory.get_pitfalls(limit: limit)
-        format_memories(pitfalls)
+      def get_episodes(limit: 10, outcome: nil)
+        episodes = @memory.get_episodes(limit: limit, outcome: outcome)
+        format_memories(episodes)
       rescue => e
         { "error" => e.message }
       end
@@ -59,14 +56,6 @@ module Agentf
         { "error" => e.message }
       end
-      # Get all successes
-      def get_successes(limit: 10)
-        successes = @memory.get_memories_by_type(type: "success", limit: limit)
-        format_memories(successes)
-      rescue => e
-        { "error" => e.message }
-      end
       def get_business_intents(limit: 10)
         intents = @memory.get_intents(kind: "business", limit: limit)
         format_memories(intents)
@@ -81,28 +70,17 @@ module Agentf
         { "error" => e.message }
       end
-      # Get all unique tags from memories
-      def get_all_tags
-        tags = @memory.get_all_tags
-        { "tags" => tags.sort, "count" => tags.length }
-      rescue => e
-        { "error" => e.message }
-      end
-      # Get memories by tag
-      def get_by_tag(tag, limit: 10)
-        memories = @memory.get_recent_memories(limit: 100)
-        filtered = memories.select { |m| m["tags"]&.include?(tag) }
-        format_memories(filtered.first(limit))
+      def get_intents(limit: 10)
+        intents = @memory.get_intents(limit: limit)
+        format_memories(intents)
       rescue => e
         { "error" => e.message }
       end
       # Get memories by type (pitfall, lesson, success)
       def get_by_type(type, limit: 10)
-        memories = @memory.get_recent_memories(limit: 100)
-        filtered = memories.select { |m| m["type"] == type }
-        format_memories(filtered.first(limit))
+        memories = @memory.get_memories_by_type(type: type, limit: limit)
+        format_memories(memories)
       rescue => e
         { "error" => e.message }
       end
@@ -118,14 +96,7 @@ module Agentf
       # Search memories by keyword in title or description
       def search(query, limit: 10)
-        memories = @memory.get_recent_memories(limit: 100)
-        q = query.downcase
-        filtered = memories.select do |m|
-          m["title"]&.downcase&.include?(q) ||
-            m["description"]&.downcase&.include?(q) ||
-            m["context"]&.downcase&.include?(q)
-        end
-        format_memories(filtered.first(limit))
+        format_memories(@memory.search_memories(query: query, limit: limit))
       rescue => e
         { "error" => e.message }
       end
@@ -133,19 +104,22 @@ module Agentf
       # Get summary statistics
       def get_summary
         memories = @memory.get_recent_memories(limit: 100)
-        tags = @memory.get_all_tags
         {
           "total_memories" => memories.length,
           "by_type" => {
-            "pitfall" => memories.count { |m| m["type"] == "pitfall" },
+            "episode" => memories.count { |m| m["type"] == "episode" },
             "lesson" => memories.count { |m| m["type"] == "lesson" },
-            "success" => memories.count { |m| m["type"] == "success" },
+            "playbook" => memories.count { |m| m["type"] == "playbook" },
             "business_intent" => memories.count { |m| m["type"] == "business_intent" },
             "feature_intent" => memories.count { |m| m["type"] == "feature_intent" }
           },
+          "by_outcome" => {
+            "positive" => memories.count { |m| m["outcome"] == "positive" },
+            "negative" => memories.count { |m| m["outcome"] == "negative" },
+            "neutral" => memories.count { |m| m["outcome"] == "neutral" }
+          },
           "by_agent" => memories.each_with_object(Hash.new(0)) { |m, h| h[m["agent"]] += 1 },
-          "unique_tags" => tags.length,
           "project" => @project
         }
       rescue => e
@@ -181,7 +155,7 @@ module Agentf
           "description" => m["description"],
           "context" => m["context"],
           "code_snippet" => m["code_snippet"],
-          "tags" => m["tags"],
+          "outcome" => m["outcome"],
           "agent" => m["agent"],
           "metadata" => m["metadata"],
           "entity_ids" => m["entity_ids"],

data/lib/agentf/commands/metrics.rb CHANGED Viewed

@@ -7,8 +7,6 @@ module Agentf
     class Metrics
       NAME = "metrics"
-      WORKFLOW_METRICS_TAG = "workflow_metric"
       def self.manifest
         {
           "name" => NAME,
@@ -28,20 +26,23 @@ module Agentf
       def record_workflow(workflow_state)
         metrics = extract_metrics(workflow_state)
-        @memory.store_episode(
-          type: "success",
-          title: metric_title(metrics),
-          description: metric_description(metrics),
-          context: metric_context(metrics),
-          tags: metric_tags(metrics),
-          agent: Agentf::AgentRoles::ORCHESTRATOR,
-          code_snippet: ""
-        )
-        { "status" => "recorded", "metrics" => metrics }
-      rescue StandardError => e
-        { "status" => "error", "error" => e.message }
+        begin
+          @memory.store_episode(
+            type: "episode",
+            title: metric_title(metrics),
+            description: metric_description(metrics),
+            context: metric_context(metrics),
+            agent: Agentf::AgentRoles::ORCHESTRATOR,
+            outcome: "positive",
+            metadata: { "workflow_metric" => true },
+            code_snippet: ""
+          )
+          { "status" => "recorded", "metrics" => metrics }
+        rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
+          { "status" => "confirmation_required", "confirmation_details" => e.details, "attempted" => { "action" => "record_workflow" } }
+        rescue StandardError => e
+          { "status" => "error", "error" => e.message }
+        end
       end
       def summary(limit: 100)
@@ -169,14 +170,6 @@ module Agentf
         }.to_json
       end
-      def metric_tags(metrics)
-        [
-          WORKFLOW_METRICS_TAG,
-          "provider:#{metrics['provider'].to_s.downcase}",
-          "workflow:#{metrics['workflow_type']}"
-        ]
-      end
       def top_contract_violations(records)
         counts = Hash.new(0)
         records.each do |record|
@@ -189,7 +182,7 @@ module Agentf
         memories = @memory.get_recent_memories(limit: limit)
         memories
-          .select { |m| Array(m["tags"]).include?(WORKFLOW_METRICS_TAG) }
+          .select { |m| m.dig("metadata", "workflow_metric") == true }
           .map do |m|
             context = parse_context_json(m["context"])
             context

data/lib/agentf/commands/registry.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Agentf
+  module Commands
+    class Registry
+      def initialize(map = {})
+        @map = map
+      end
+      def register(name, impl)
+        @map[name.to_s] = impl
+      end
+      def fetch(name)
+        @map.fetch(name.to_s)
+      end
+      def call(command_name, action, *args)
+        impl = fetch(command_name)
+        if impl.respond_to?(action)
+          impl.public_send(action, *args)
+        else
+          raise "Command #{command_name} does not implement #{action}"
+        end
+      end
+    end
+  end
+end

data/lib/agentf/context_builder.rb CHANGED Viewed

@@ -2,8 +2,9 @@
 module Agentf
   class ContextBuilder
-    def initialize(memory:)
+    def initialize(memory:, embedding_provider: Agentf::EmbeddingProvider.new)
       @memory = memory
+      @embedding_provider = embedding_provider
     end
     def build(agent:, workflow_state:, limit: 8)
@@ -13,23 +14,12 @@ module Agentf
       @memory.get_agent_context(
         agent: agent,
         task_type: task_type,
-        query_embedding: simple_embedding(task),
+        query_text: task,
+        query_embedding: @embedding_provider.embed(task),
         limit: limit
       )
     rescue StandardError
       { "agent" => agent, "intent" => [], "memories" => [], "similar_tasks" => [] }
     end
-    private
-    def simple_embedding(text)
-      normalized = text.to_s.downcase
-      [
-        normalized.include?("fix") || normalized.include?("bug") ? 1.0 : 0.0,
-        normalized.include?("feature") || normalized.include?("add") ? 1.0 : 0.0,
-        normalized.include?("security") ? 1.0 : 0.0,
-        normalized.length.to_f / 100.0
-      ]
-    end
   end
 end

data/lib/agentf/embedding_provider.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+require "digest"
+module Agentf
+  class EmbeddingProvider
+    DIMENSIONS = 64
+    def initialize(dimensions: DIMENSIONS)
+      @dimensions = dimensions
+    end
+    def embed(text)
+      tokens = tokenize(text)
+      return [] if tokens.empty?
+      vector = Array.new(@dimensions, 0.0)
+      tokens.each do |token|
+        hash = Digest::SHA256.hexdigest(token)[0, 8].to_i(16)
+        vector[hash % @dimensions] += 1.0
+      end
+      magnitude = Math.sqrt(vector.sum { |value| value * value })
+      return vector if magnitude.zero?
+      vector.map { |value| (value / magnitude).round(8) }
+    end
+    private
+    def tokenize(text)
+      text.to_s.downcase.scan(/[a-z0-9_]+/).reject { |token| token.length < 2 }
+    end
+  end
+end

data/lib/agentf/evals/report.rb ADDED Viewed

@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+require "json"
+module Agentf
+  module Evals
+    class Report
+      def initialize(output_root: Runner::DEFAULT_OUTPUT_ROOT)
+        @output_root = File.expand_path(output_root)
+      end
+      attr_reader :output_root
+      def generate(limit: nil, since: nil, scenario: nil)
+        records = load_history
+        records = filter_since(records, since)
+        records = filter_scenario(records, scenario)
+        records = records.last(limit) if limit && limit.positive?
+        {
+          "output_root" => output_root,
+          "history_path" => history_path,
+          "count" => records.length,
+          "passes" => records.count { |record| record["status"] == "passed" },
+          "failures" => records.count { |record| record["status"] == "failed" },
+          "retry_summary" => summarize_retries(records),
+          "memory_effectiveness" => summarize_memory_effectiveness(records),
+          "providers" => summarize_dimension(records, "providers"),
+          "models" => summarize_dimension(records, "models"),
+          "scenarios" => summarize_scenarios(records)
+        }
+      end
+      private
+      def history_path
+        File.join(output_root, "history.jsonl")
+      end
+      def load_history
+        return [] unless File.exist?(history_path)
+        File.readlines(history_path, chomp: true).filter_map do |line|
+          next if line.to_s.strip.empty?
+          JSON.parse(line)
+        rescue JSON::ParserError
+          nil
+        end
+      end
+      def filter_since(records, since)
+        return records unless since
+        cutoff = since.is_a?(Time) ? since : Time.parse(since.to_s)
+        records.select do |record|
+          recorded_at = record["recorded_at"]
+          recorded_at && Time.parse(recorded_at) >= cutoff
+        rescue ArgumentError
+          false
+        end
+      end
+      def filter_scenario(records, scenario)
+        return records if scenario.to_s.strip.empty?
+        records.select { |record| record["scenario"] == scenario }
+      end
+      def summarize_retries(records)
+        retried = records.count { |record| record["retry_count"].to_i.positive? }
+        {
+          "retried_runs" => retried,
+          "total_retries" => records.sum { |record| record["retry_count"].to_i },
+          "flaky_runs" => records.count { |record| record["flaky"] == true }
+        }
+      end
+      def summarize_dimension(records, key)
+        summary = Hash.new { |hash, name| hash[name] = base_stats }
+        records.each do |record|
+          Array(record[key]).each do |name|
+            entry = summary[name]
+            update_stats(entry, record)
+          end
+        end
+        summary
+      end
+      def summarize_scenarios(records)
+        summary = Hash.new { |hash, name| hash[name] = base_stats.merge("last_status" => nil, "last_recorded_at" => nil) }
+        records.each do |record|
+          entry = summary[record["scenario"]]
+          update_stats(entry, record)
+          update_memory_effectiveness(entry, record)
+          entry["last_status"] = record["status"]
+          entry["last_recorded_at"] = record["recorded_at"]
+        end
+        summary
+      end
+      def summarize_memory_effectiveness(records)
+        relevant = records.filter_map { |record| record["memory_effectiveness"] }
+        {
+          "tracked_runs" => relevant.length,
+          "retrieved_expected_memory" => relevant.count { |item| item["retrieved_expected_memory"] == true }
+        }
+      end
+      def base_stats
+        { "total" => 0, "passed" => 0, "failed" => 0, "retried" => 0, "flaky" => 0 }
+      end
+      def update_stats(entry, record)
+        entry["total"] += 1
+        entry[record["status"] == "passed" ? "passed" : "failed"] += 1
+        entry["retried"] += 1 if record["retry_count"].to_i.positive?
+        entry["flaky"] += 1 if record["flaky"] == true
+      end
+      def update_memory_effectiveness(entry, record)
+        effect = record["memory_effectiveness"]
+        return unless effect
+        entry["memory_tracked"] = entry.fetch("memory_tracked", 0) + 1
+        entry["memory_retrieved"] = entry.fetch("memory_retrieved", 0) + (effect["retrieved_expected_memory"] == true ? 1 : 0)
+      end
+    end
+  end
+end