RubyGems - agentf - Versions diffs - 0.4.6 → 0.5.0 - Mend

agentf 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +4 -4
data/lib/agentf/agents/architect.rb +4 -0
data/lib/agentf/agents/base.rb +29 -1
data/lib/agentf/agents/debugger.rb +33 -10
data/lib/agentf/agents/designer.rb +19 -8
data/lib/agentf/agents/documenter.rb +6 -0
data/lib/agentf/agents/explorer.rb +31 -12
data/lib/agentf/agents/reviewer.rb +5 -0
data/lib/agentf/agents/security.rb +26 -16
data/lib/agentf/agents/specialist.rb +32 -18
data/lib/agentf/agents/tester.rb +47 -8
data/lib/agentf/cli/agent.rb +95 -0
data/lib/agentf/cli/eval.rb +203 -0
data/lib/agentf/cli/install.rb +7 -0
data/lib/agentf/cli/memory.rb +82 -30
data/lib/agentf/cli/router.rb +15 -3
data/lib/agentf/cli/update.rb +9 -2
data/lib/agentf/commands/memory_reviewer.rb +10 -2
data/lib/agentf/commands/metrics.rb +16 -14
data/lib/agentf/commands/registry.rb +28 -0
data/lib/agentf/evals/report.rb +134 -0
data/lib/agentf/evals/runner.rb +771 -0
data/lib/agentf/evals/scenario.rb +211 -0
data/lib/agentf/installer.rb +486 -348
data/lib/agentf/mcp/server.rb +291 -49
data/lib/agentf/memory.rb +97 -19
data/lib/agentf/service/providers.rb +10 -62
data/lib/agentf/version.rb +1 -1
data/lib/agentf/workflow_engine.rb +204 -73
data/lib/agentf.rb +9 -3
metadata +8 -3
data/lib/agentf/packs.rb +0 -74

data/lib/agentf/cli/eval.rb ADDED Viewed

@@ -0,0 +1,203 @@
+# frozen_string_literal: true
+require_relative "arg_parser"
+require_relative "../evals/runner"
+module Agentf
+  module CLI
+    class Eval
+      include ArgParser
+      def initialize(runner: nil)
+        @runner = runner
+        @json_output = false
+      end
+      def run(args)
+        @json_output = !args.delete("--json").nil?
+        command = args.shift || "help"
+        case command
+        when "list"
+          list_scenarios(args)
+        when "run"
+          run_scenarios(args)
+        when "report"
+          report_results(args)
+        when "help", "--help", "-h"
+          show_help
+        else
+          $stderr.puts "Unknown eval command: #{command}"
+          $stderr.puts
+          show_help
+          exit 1
+        end
+      end
+      private
+      def list_scenarios(args)
+        runner = build_runner(args)
+        scenarios = runner.list
+        if @json_output
+          puts JSON.generate({ "count" => scenarios.length, "scenarios" => scenarios.map(&:to_h) })
+          return
+        end
+        if scenarios.empty?
+          puts "No eval scenarios found under #{runner.root}"
+          return
+        end
+        puts "Eval scenarios (#{scenarios.length}):"
+        scenarios.each do |scenario|
+          suffix = scenario.description.empty? ? "" : " - #{scenario.description}"
+          target = if scenario.execution_mode == "mcp"
+                     "mcp: #{scenario.mcp_tool}"
+                   elsif scenario.execution_mode == "provider"
+                     "provider: #{scenario.provider_name}"
+                   else
+                     "agent: #{scenario.agent}"
+                   end
+          puts "  - #{scenario.name} (#{target})#{suffix}"
+        end
+      end
+      def run_scenarios(args)
+        name = args.shift || "all"
+        keep_workspace = args.delete("--keep-workspace")
+        timeout_seconds = parse_integer_option(args, "--timeout=", default: 0)
+        runner = build_runner(args)
+        result = runner.run(name: name, keep_workspace: !!keep_workspace, timeout_seconds: timeout_seconds.positive? ? timeout_seconds : nil)
+        if @json_output
+          puts JSON.pretty_generate(result)
+          return
+        end
+        puts "Evals complete: #{result['passed']}/#{result['count']} passed"
+        result["results"].each do |scenario_result|
+          status = scenario_result["status"] == "passed" ? "PASS" : "FAIL"
+          detail = scenario_result["failure_step"] ? " (failed at #{scenario_result['failure_step']})" : ""
+          puts "  - [#{status}] #{scenario_result['scenario']}#{detail}"
+          puts "    artifacts: #{scenario_result['artifact_dir']}"
+        end
+        print_matrix_summary(result["matrix"])
+        exit 1 if result["failed"].positive?
+      end
+      def build_runner(args)
+        root = parse_single_option(args, "--root=")
+        output_root = parse_single_option(args, "--output-dir=")
+        @runner || Agentf::Evals::Runner.new(root: root, output_root: output_root)
+      end
+      def show_help
+        puts <<~HELP
+          Usage: agentf eval <command> [options]
+          Commands:
+            list                          List available eval scenarios
+            run <scenario|all>            Run one scenario or all scenarios
+            report                        Summarize eval history
+          Options:
+            --root=<path>                 Scenario root directory (default: ./evals)
+            --output-dir=<path>           Artifact output directory (default: tmp/evals)
+            --timeout=<seconds>           Override per-scenario timeout
+            --keep-workspace              Keep temp workspace after run
+            --json                        Output structured JSON
+          Examples:
+            agentf eval list
+            agentf eval run engineer_store_success
+            agentf eval report
+            agentf eval run all --json
+        HELP
+      end
+      def report_results(args)
+        output_root = parse_single_option(args, "--output-dir=")
+        limit = parse_integer_option(args, "--limit=", default: 0)
+        since = parse_single_option(args, "--since=")
+        scenario = parse_single_option(args, "--scenario=")
+        report = Agentf::Evals::Report.new(output_root: output_root || Agentf::Evals::Runner::DEFAULT_OUTPUT_ROOT)
+        result = report.generate(limit: limit.positive? ? limit : nil, since: since, scenario: scenario)
+        if @json_output
+          puts JSON.pretty_generate(result)
+          return
+        end
+        puts "Eval history: #{result['passes']}/#{result['count']} passed"
+        puts "Retries: #{result.dig('retry_summary', 'total_retries')} total, #{result.dig('retry_summary', 'flaky_runs')} flaky passes"
+        if result["memory_effectiveness"]
+          puts "Memory retrieval: #{result.dig('memory_effectiveness', 'retrieved_expected_memory')}/#{result.dig('memory_effectiveness', 'tracked_runs')} tracked runs retrieved expected memory"
+        end
+        print_comparison_table("Providers", result["providers"])
+        print_comparison_table("Models", result["models"])
+        print_scenario_trends(result["scenarios"])
+        print_matrix_summary({ "providers" => result["providers"], "models" => result["models"] })
+      end
+      def print_comparison_table(title, rows)
+        return if rows.to_h.empty?
+        puts "#{title}:"
+        puts "  Name                 Pass  Fail  Retry  Flaky"
+        rows.sort.each do |name, stats|
+          puts format(
+            "  %-20s %4d  %4d  %5d  %5d",
+            name,
+            stats["passed"].to_i,
+            stats["failed"].to_i,
+            stats["retried"].to_i,
+            stats["flaky"].to_i
+          )
+        end
+      end
+      def print_scenario_trends(rows)
+        return if rows.to_h.empty?
+        puts "Scenario trends:"
+        puts "  Scenario              Pass  Fail  Retry  Flaky  Mem"
+        rows.sort.each do |name, stats|
+          puts format(
+            "  %-20s %4d  %4d  %5d  %5d  %3s",
+            name,
+            stats["passed"].to_i,
+            stats["failed"].to_i,
+            stats["retried"].to_i,
+            stats["flaky"].to_i,
+            stats.fetch("memory_retrieved", 0).to_i.positive? ? "yes" : "no"
+          )
+        end
+      end
+      def print_matrix_summary(matrix)
+        return unless matrix.is_a?(Hash)
+        providers = matrix.fetch("providers", {})
+        models = matrix.fetch("models", {})
+        unless providers.empty?
+          puts "Provider matrix:"
+          providers.each do |provider, stats|
+            puts "  - #{provider}: #{stats['passed']}/#{stats['total']} passed"
+          end
+        end
+        unless models.empty?
+          puts "Model matrix:"
+          models.each do |model, stats|
+            puts "  - #{model}: #{stats['passed']}/#{stats['total']} passed"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/agentf/cli/install.rb CHANGED Viewed

@@ -17,6 +17,7 @@ module Agentf
           local_root: Dir.pwd,
           dry_run: false,
           install_deps: true,
+          opencode_runtime: "mcp",
           only_agents: nil,
           only_commands: nil
         }
@@ -35,6 +36,7 @@ module Agentf
           local_root: @options[:local_root],
           dry_run: @options[:dry_run],
           install_deps: @options[:install_deps],
+          opencode_runtime: @options[:opencode_runtime],
           verbose: @options.fetch(:verbose, false)
         )
@@ -72,6 +74,9 @@ module Agentf
         # Extract --install-deps flag
         @options[:install_deps] = !args.delete("--install-deps").nil?
+        opencode_runtime = parse_single_option(args, "--opencode-runtime=")
+        @options[:opencode_runtime] = opencode_runtime if opencode_runtime
         # Extract --global-root and --local-root
         global_root = parse_single_option(args, "--global-root=")
         @options[:global_root] = File.expand_path(global_root) if global_root
@@ -107,6 +112,7 @@ module Agentf
             --local-root=PATH      Root for local installs (default: current directory)
             --agent=LIST           Only install specific agents (comma-separated)
             --command=LIST         Only install specific commands (comma-separated)
+            --opencode-runtime=MODE Opencode runtime: mcp|plugin (default: mcp)
             --dry-run              Show planned writes without writing files
           Examples:
@@ -114,6 +120,7 @@ module Agentf
             agentf install --provider=opencode,copilot --scope=local
             agentf install --provider=copilot --dry-run
             agentf install --agent=architect,specialist
+            agentf install --provider=opencode --opencode-runtime=plugin
         HELP
       end
     end

data/lib/agentf/cli/memory.rb CHANGED Viewed

@@ -145,18 +145,30 @@ module Agentf
         constraints = parse_list_option(args, "--constraints=")
         priority = parse_integer_option(args, "--priority=", default: 1)
-        intent_id = @memory.store_business_intent(
-          title: title,
-          description: description,
-          tags: tags,
-          constraints: constraints,
-          priority: priority
-        )
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-business-intent", args: { title: title, description: description, tags: tags, constraints: constraints, priority: priority } }) do
+          id = @memory.store_business_intent(
+            title: title,
+            description: description,
+            tags: tags,
+            constraints: constraints,
+            priority: priority
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store business intent: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => "business_intent", "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => "business_intent", "status" => "stored" })
         else
-          puts "Stored business intent: #{intent_id}"
+          puts "Stored business intent: #{id}"
         end
       end
@@ -174,19 +186,31 @@ module Agentf
         non_goals = parse_list_option(args, "--non-goals=")
         related_task_id = parse_single_option(args, "--task=")
-        intent_id = @memory.store_feature_intent(
-          title: title,
-          description: description,
-          tags: tags,
-          acceptance_criteria: acceptance_criteria,
-          non_goals: non_goals,
-          related_task_id: related_task_id
-        )
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-feature-intent", args: { title: title, description: description, tags: tags, acceptance: acceptance_criteria, non_goals: non_goals, related_task_id: related_task_id } }) do
+          id = @memory.store_feature_intent(
+            title: title,
+            description: description,
+            tags: tags,
+            acceptance_criteria: acceptance_criteria,
+            non_goals: non_goals,
+            related_task_id: related_task_id
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store feature intent: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => "feature_intent", "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => "feature_intent", "status" => "stored" })
         else
-          puts "Stored feature intent: #{intent_id}"
+          puts "Stored feature intent: #{id}"
         end
       end
@@ -204,20 +228,48 @@ module Agentf
         agent = parse_single_option(args, "--agent=") || Agentf::AgentRoles::ENGINEER
         code_snippet = parse_single_option(args, "--code=").to_s
-        intent_id = @memory.store_episode(
-          type: type,
-          title: title,
-          description: description,
-          context: context,
-          tags: tags,
-          agent: agent,
-          code_snippet: code_snippet
-        )
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-#{type}", args: { title: title, description: description, tags: tags, context: context, agent: agent, code: code_snippet } }) do
+          id = @memory.store_episode(
+            type: type,
+            title: title,
+            description: description,
+            context: context,
+            tags: tags,
+            agent: agent,
+            code_snippet: code_snippet
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store #{type}: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => type, "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => type, "status" => "stored" })
         else
-          puts "Stored #{type}: #{intent_id}"
+          puts "Stored #{type}: #{id}"
+        end
+      end
+      # Helper to standardize CLI memory write confirmation handling.
+      def safe_cli_memory_write(memory, attempted: {})
+        begin
+          yield
+          nil
+        rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
+          {
+            "confirmation_required" => true,
+            "confirmation_details" => e.details,
+            "attempted" => attempted,
+            "confirmed_write_token" => "confirmed",
+            "confirmation_prompt" => "Ask the user whether to save this memory. If they approve, rerun the same command with confirmation enabled. If they decline, do not retry."
+          }
         end
       end

data/lib/agentf/cli/router.rb CHANGED Viewed

@@ -7,6 +7,7 @@ require_relative "install"
 require_relative "update"
 require_relative "metrics"
 require_relative "architecture"
+require_relative "eval"
 module Agentf
   module CLI
@@ -18,8 +19,8 @@ module Agentf
     #   agentf install --provider opencode,copilot
     #   agentf version
     #   agentf help
-    class Router
-      SUBCOMMANDS = %w[memory code metrics architecture install update mcp-server version help].freeze
+      class Router
+        SUBCOMMANDS = %w[memory code metrics architecture install update eval agent mcp-server version help].freeze
       def run(args)
         subcommand = args.shift || "help"
@@ -42,8 +43,14 @@ module Agentf
           Architecture.new.run(args)
         when "update"
           Update.new.run(args)
+        when "eval"
+          Eval.new.run(args)
         when "mcp-server"
           start_mcp_server
+        when "agent"
+          # agent <AGENT_NAME> [payload]
+          require_relative "agent"
+          Agent.new.run(args)
         when "version", "--version", "-v"
           puts "agentf #{Agentf::VERSION}"
         when "help", "--help", "-h"
@@ -74,6 +81,8 @@ module Agentf
             architecture Analyze architecture layers and violations
             install      Generate provider manifests (agents, commands, tools)
             update       Regenerate manifests when gem version changes
+            eval         Run black-box eval scenarios against `agentf agent`
+            agent        Run a single agent directly
             mcp-server   Start MCP server over stdio (for Copilot integration)
             version      Show version
@@ -87,7 +96,7 @@ module Agentf
             AGENTF_WORKFLOW_CONTRACT_MODE=advisory|enforcing|off   Contract behavior mode
             AGENTF_AGENT_CONTRACT_ENABLED=true|false   Enable/disable per-agent contract checks
             AGENTF_AGENT_CONTRACT_MODE=advisory|enforcing|off   Per-agent contract behavior mode
-            AGENTF_DEFAULT_PACK=generic|rails_standard|rails_37signals|rails_feature_spec
+  (AGENTF_DEFAULT_PACK no longer used — orchestrator uses internal profiles)
             AGENTF_GEM_PATH=/path/to/gem   Path to agentf gem (for OpenCode plugin binary resolution)
           Examples:
@@ -100,6 +109,9 @@ module Agentf
             agentf metrics parity --json
             agentf architecture analyze
             agentf architecture review --json
+            agentf eval list
+            agentf eval run all --json
+            agentf agent planner "Plan a refactor" --json
             agentf update
             agentf update --force --provider=opencode,copilot
             agentf mcp-server

data/lib/agentf/cli/update.rb CHANGED Viewed

@@ -34,7 +34,8 @@ module Agentf
           scope: "all",
           global_root: Dir.home,
           local_root: Dir.pwd,
-          force: false
+          force: false,
+          opencode_runtime: "mcp"
         }
       end
@@ -78,6 +79,9 @@ module Agentf
         local_root = parse_single_option(args, "--local-root=")
         @options[:local_root] = File.expand_path(local_root) if local_root
+        opencode_runtime = parse_single_option(args, "--opencode-runtime=")
+        @options[:opencode_runtime] = opencode_runtime if opencode_runtime
       end
       def roots_for(scope)
@@ -113,7 +117,8 @@ module Agentf
         installer = @installer_class.new(
           global_root: root,
-          local_root: root
+          local_root: root,
+          opencode_runtime: @options[:opencode_runtime]
         )
         results = installer.install(
@@ -191,12 +196,14 @@ module Agentf
             --scope=SCOPE          Update scope: global|local|all (default: all)
             --global-root=PATH     Root for global installs (default: $HOME)
             --local-root=PATH      Root for local installs (default: current directory)
+            --opencode-runtime=MODE Opencode runtime: mcp|plugin (default: mcp)
             --force                Regenerate even if version matches
           Examples:
             agentf update
             agentf update --force
             agentf update --provider=opencode,copilot --scope=local
+            agentf update --provider=opencode --opencode-runtime=plugin
         HELP
       end
     end

data/lib/agentf/commands/memory_reviewer.rb CHANGED Viewed

@@ -30,9 +30,10 @@ module Agentf
         }
       end
-      def initialize(project: nil)
+      def initialize(project: nil, memory: nil)
         @project = project || Agentf.config.project_name
-        @memory = Agentf::Memory::RedisMemory.new(project: @project)
+        # Allow injecting a memory instance for testing; default to real RedisMemory
+        @memory = memory || Agentf::Memory::RedisMemory.new(project: @project)
       end
       # Get recent memories
@@ -81,6 +82,13 @@ module Agentf
         { "error" => e.message }
       end
+      def get_intents(limit: 10)
+        intents = @memory.get_intents(limit: limit)
+        format_memories(intents)
+      rescue => e
+        { "error" => e.message }
+      end
       # Get all unique tags from memories
       def get_all_tags
         tags = @memory.get_all_tags

data/lib/agentf/commands/metrics.rb CHANGED Viewed

@@ -28,20 +28,22 @@ module Agentf
       def record_workflow(workflow_state)
         metrics = extract_metrics(workflow_state)
-        @memory.store_episode(
-          type: "success",
-          title: metric_title(metrics),
-          description: metric_description(metrics),
-          context: metric_context(metrics),
-          tags: metric_tags(metrics),
-          agent: Agentf::AgentRoles::ORCHESTRATOR,
-          code_snippet: ""
-        )
-        { "status" => "recorded", "metrics" => metrics }
-      rescue StandardError => e
-        { "status" => "error", "error" => e.message }
+        begin
+          @memory.store_episode(
+            type: "success",
+            title: metric_title(metrics),
+            description: metric_description(metrics),
+            context: metric_context(metrics),
+            tags: metric_tags(metrics),
+            agent: Agentf::AgentRoles::ORCHESTRATOR,
+            code_snippet: ""
+          )
+          { "status" => "recorded", "metrics" => metrics }
+        rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
+          { "status" => "confirmation_required", "confirmation_details" => e.details, "attempted" => { "action" => "record_workflow" } }
+        rescue StandardError => e
+          { "status" => "error", "error" => e.message }
+        end
       end
       def summary(limit: 100)

data/lib/agentf/commands/registry.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# frozen_string_literal: true
+module Agentf
+  module Commands
+    class Registry
+      def initialize(map = {})
+        @map = map
+      end
+      def register(name, impl)
+        @map[name.to_s] = impl
+      end
+      def fetch(name)
+        @map.fetch(name.to_s)
+      end
+      def call(command_name, action, *args)
+        impl = fetch(command_name)
+        if impl.respond_to?(action)
+          impl.public_send(action, *args)
+        else
+          raise "Command #{command_name} does not implement #{action}"
+        end
+      end
+    end
+  end
+end