RubyGems - agentf - Versions diffs - 0.4.7 → 0.6.0 - Mend

agentf 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/lib/agentf/agents/architect.rb +7 -3
data/lib/agentf/agents/base.rb +31 -3
data/lib/agentf/agents/debugger.rb +30 -8
data/lib/agentf/agents/designer.rb +20 -8
data/lib/agentf/agents/documenter.rb +8 -2
data/lib/agentf/agents/explorer.rb +29 -11
data/lib/agentf/agents/reviewer.rb +12 -7
data/lib/agentf/agents/security.rb +27 -15
data/lib/agentf/agents/specialist.rb +34 -18
data/lib/agentf/agents/tester.rb +48 -8
data/lib/agentf/cli/agent.rb +95 -0
data/lib/agentf/cli/eval.rb +203 -0
data/lib/agentf/cli/install.rb +7 -0
data/lib/agentf/cli/memory.rb +138 -90
data/lib/agentf/cli/router.rb +16 -4
data/lib/agentf/cli/update.rb +9 -2
data/lib/agentf/commands/memory_reviewer.rb +22 -48
data/lib/agentf/commands/metrics.rb +18 -25
data/lib/agentf/commands/registry.rb +28 -0
data/lib/agentf/context_builder.rb +4 -14
data/lib/agentf/embedding_provider.rb +35 -0
data/lib/agentf/evals/report.rb +134 -0
data/lib/agentf/evals/runner.rb +771 -0
data/lib/agentf/evals/scenario.rb +211 -0
data/lib/agentf/installer.rb +498 -365
data/lib/agentf/mcp/server.rb +294 -114
data/lib/agentf/memory.rb +354 -214
data/lib/agentf/service/providers.rb +10 -62
data/lib/agentf/version.rb +1 -1
data/lib/agentf/workflow_engine.rb +205 -77
data/lib/agentf.rb +10 -3
metadata +9 -3
data/lib/agentf/packs.rb +0 -74

data/lib/agentf/cli/eval.rb ADDED Viewed

@@ -0,0 +1,203 @@
+# frozen_string_literal: true
+require_relative "arg_parser"
+require_relative "../evals/runner"
+module Agentf
+  module CLI
+    class Eval
+      include ArgParser
+      def initialize(runner: nil)
+        @runner = runner
+        @json_output = false
+      end
+      def run(args)
+        @json_output = !args.delete("--json").nil?
+        command = args.shift || "help"
+        case command
+        when "list"
+          list_scenarios(args)
+        when "run"
+          run_scenarios(args)
+        when "report"
+          report_results(args)
+        when "help", "--help", "-h"
+          show_help
+        else
+          $stderr.puts "Unknown eval command: #{command}"
+          $stderr.puts
+          show_help
+          exit 1
+        end
+      end
+      private
+      def list_scenarios(args)
+        runner = build_runner(args)
+        scenarios = runner.list
+        if @json_output
+          puts JSON.generate({ "count" => scenarios.length, "scenarios" => scenarios.map(&:to_h) })
+          return
+        end
+        if scenarios.empty?
+          puts "No eval scenarios found under #{runner.root}"
+          return
+        end
+        puts "Eval scenarios (#{scenarios.length}):"
+        scenarios.each do |scenario|
+          suffix = scenario.description.empty? ? "" : " - #{scenario.description}"
+          target = if scenario.execution_mode == "mcp"
+                     "mcp: #{scenario.mcp_tool}"
+                   elsif scenario.execution_mode == "provider"
+                     "provider: #{scenario.provider_name}"
+                   else
+                     "agent: #{scenario.agent}"
+                   end
+          puts "  - #{scenario.name} (#{target})#{suffix}"
+        end
+      end
+      def run_scenarios(args)
+        name = args.shift || "all"
+        keep_workspace = args.delete("--keep-workspace")
+        timeout_seconds = parse_integer_option(args, "--timeout=", default: 0)
+        runner = build_runner(args)
+        result = runner.run(name: name, keep_workspace: !!keep_workspace, timeout_seconds: timeout_seconds.positive? ? timeout_seconds : nil)
+        if @json_output
+          puts JSON.pretty_generate(result)
+          return
+        end
+        puts "Evals complete: #{result['passed']}/#{result['count']} passed"
+        result["results"].each do |scenario_result|
+          status = scenario_result["status"] == "passed" ? "PASS" : "FAIL"
+          detail = scenario_result["failure_step"] ? " (failed at #{scenario_result['failure_step']})" : ""
+          puts "  - [#{status}] #{scenario_result['scenario']}#{detail}"
+          puts "    artifacts: #{scenario_result['artifact_dir']}"
+        end
+        print_matrix_summary(result["matrix"])
+        exit 1 if result["failed"].positive?
+      end
+      def build_runner(args)
+        root = parse_single_option(args, "--root=")
+        output_root = parse_single_option(args, "--output-dir=")
+        @runner || Agentf::Evals::Runner.new(root: root, output_root: output_root)
+      end
+      def show_help
+        puts <<~HELP
+          Usage: agentf eval <command> [options]
+          Commands:
+            list                          List available eval scenarios
+            run <scenario|all>            Run one scenario or all scenarios
+            report                        Summarize eval history
+          Options:
+            --root=<path>                 Scenario root directory (default: ./evals)
+            --output-dir=<path>           Artifact output directory (default: tmp/evals)
+            --timeout=<seconds>           Override per-scenario timeout
+            --keep-workspace              Keep temp workspace after run
+            --json                        Output structured JSON
+          Examples:
+            agentf eval list
+            agentf eval run engineer_episode_positive
+            agentf eval report
+            agentf eval run all --json
+        HELP
+      end
+      def report_results(args)
+        output_root = parse_single_option(args, "--output-dir=")
+        limit = parse_integer_option(args, "--limit=", default: 0)
+        since = parse_single_option(args, "--since=")
+        scenario = parse_single_option(args, "--scenario=")
+        report = Agentf::Evals::Report.new(output_root: output_root || Agentf::Evals::Runner::DEFAULT_OUTPUT_ROOT)
+        result = report.generate(limit: limit.positive? ? limit : nil, since: since, scenario: scenario)
+        if @json_output
+          puts JSON.pretty_generate(result)
+          return
+        end
+        puts "Eval history: #{result['passes']}/#{result['count']} passed"
+        puts "Retries: #{result.dig('retry_summary', 'total_retries')} total, #{result.dig('retry_summary', 'flaky_runs')} flaky passes"
+        if result["memory_effectiveness"]
+          puts "Memory retrieval: #{result.dig('memory_effectiveness', 'retrieved_expected_memory')}/#{result.dig('memory_effectiveness', 'tracked_runs')} tracked runs retrieved expected memory"
+        end
+        print_comparison_table("Providers", result["providers"])
+        print_comparison_table("Models", result["models"])
+        print_scenario_trends(result["scenarios"])
+        print_matrix_summary({ "providers" => result["providers"], "models" => result["models"] })
+      end
+      def print_comparison_table(title, rows)
+        return if rows.to_h.empty?
+        puts "#{title}:"
+        puts "  Name                 Pass  Fail  Retry  Flaky"
+        rows.sort.each do |name, stats|
+          puts format(
+            "  %-20s %4d  %4d  %5d  %5d",
+            name,
+            stats["passed"].to_i,
+            stats["failed"].to_i,
+            stats["retried"].to_i,
+            stats["flaky"].to_i
+          )
+        end
+      end
+      def print_scenario_trends(rows)
+        return if rows.to_h.empty?
+        puts "Scenario trends:"
+        puts "  Scenario              Pass  Fail  Retry  Flaky  Mem"
+        rows.sort.each do |name, stats|
+          puts format(
+            "  %-20s %4d  %4d  %5d  %5d  %3s",
+            name,
+            stats["passed"].to_i,
+            stats["failed"].to_i,
+            stats["retried"].to_i,
+            stats["flaky"].to_i,
+            stats.fetch("memory_retrieved", 0).to_i.positive? ? "yes" : "no"
+          )
+        end
+      end
+      def print_matrix_summary(matrix)
+        return unless matrix.is_a?(Hash)
+        providers = matrix.fetch("providers", {})
+        models = matrix.fetch("models", {})
+        unless providers.empty?
+          puts "Provider matrix:"
+          providers.each do |provider, stats|
+            puts "  - #{provider}: #{stats['passed']}/#{stats['total']} passed"
+          end
+        end
+        unless models.empty?
+          puts "Model matrix:"
+          models.each do |model, stats|
+            puts "  - #{model}: #{stats['passed']}/#{stats['total']} passed"
+          end
+        end
+      end
+    end
+  end
+end

data/lib/agentf/cli/install.rb CHANGED Viewed

@@ -17,6 +17,7 @@ module Agentf
           local_root: Dir.pwd,
           dry_run: false,
           install_deps: true,
+          opencode_runtime: "mcp",
           only_agents: nil,
           only_commands: nil
         }
@@ -35,6 +36,7 @@ module Agentf
           local_root: @options[:local_root],
           dry_run: @options[:dry_run],
           install_deps: @options[:install_deps],
+          opencode_runtime: @options[:opencode_runtime],
           verbose: @options.fetch(:verbose, false)
         )
@@ -72,6 +74,9 @@ module Agentf
         # Extract --install-deps flag
         @options[:install_deps] = !args.delete("--install-deps").nil?
+        opencode_runtime = parse_single_option(args, "--opencode-runtime=")
+        @options[:opencode_runtime] = opencode_runtime if opencode_runtime
         # Extract --global-root and --local-root
         global_root = parse_single_option(args, "--global-root=")
         @options[:global_root] = File.expand_path(global_root) if global_root
@@ -107,6 +112,7 @@ module Agentf
             --local-root=PATH      Root for local installs (default: current directory)
             --agent=LIST           Only install specific agents (comma-separated)
             --command=LIST         Only install specific commands (comma-separated)
+            --opencode-runtime=MODE Opencode runtime: mcp|plugin (default: mcp)
             --dry-run              Show planned writes without writing files
           Examples:
@@ -114,6 +120,7 @@ module Agentf
             agentf install --provider=opencode,copilot --scope=local
             agentf install --provider=copilot --dry-run
             agentf install --agent=architect,specialist
+            agentf install --provider=opencode --opencode-runtime=plugin
         HELP
       end
     end

data/lib/agentf/cli/memory.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Agentf
     class Memory
       include ArgParser
-      VALID_EPISODE_TYPES = %w[pitfall lesson success business_intent feature_intent].freeze
+      VALID_EPISODE_TYPES = %w[episode lesson playbook business_intent feature_intent incident].freeze
       def initialize(reviewer: nil, memory: nil)
         @reviewer = reviewer || Commands::MemoryReviewer.new
@@ -28,12 +28,10 @@ module Agentf
         case command
         when "recent", "list"
           list_memories(args)
-        when "pitfalls"
-          list_pitfalls(args)
+        when "episodes"
+          list_episodes(args)
         when "lessons"
           list_lessons(args)
-        when "successes"
-          list_successes(args)
         when "intents"
           list_intents(args)
         when "business-intents"
@@ -44,14 +42,10 @@ module Agentf
           add_business_intent(args)
         when "add-feature-intent"
           add_feature_intent(args)
+        when "add-playbook"
+          add_playbook(args)
         when "add-lesson"
           add_episode("lesson", args)
-        when "add-success"
-          add_episode("success", args)
-        when "add-pitfall"
-          add_episode("pitfall", args)
-        when "tags"
-          list_tags
         when "search"
           search_memories(args)
         when "delete"
@@ -62,8 +56,6 @@ module Agentf
           subgraph(args)
         when "summary", "stats"
           show_summary
-        when "by-tag"
-          by_tag(args)
         when "by-agent"
           by_agent(args)
         when "by-type"
@@ -86,9 +78,10 @@ module Agentf
         output(result)
       end
-      def list_pitfalls(args)
+      def list_episodes(args)
         limit = extract_limit(args)
-        result = @reviewer.get_pitfalls(limit: limit)
+        outcome = parse_single_option(args, "--outcome=")
+        result = @reviewer.get_episodes(limit: limit, outcome: outcome)
         output(result)
       end
@@ -98,12 +91,6 @@ module Agentf
         output(result)
       end
-      def list_successes(args)
-        limit = extract_limit(args)
-        result = @reviewer.get_successes(limit: limit)
-        output(result)
-      end
       def list_intents(args)
         limit = extract_limit(args)
         kind = args.shift
@@ -141,22 +128,32 @@ module Agentf
           exit 1
         end
-        tags = parse_list_option(args, "--tags=")
         constraints = parse_list_option(args, "--constraints=")
         priority = parse_integer_option(args, "--priority=", default: 1)
-        intent_id = @memory.store_business_intent(
-          title: title,
-          description: description,
-          tags: tags,
-          constraints: constraints,
-          priority: priority
-        )
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-business-intent", args: { title: title, description: description, constraints: constraints, priority: priority } }) do
+          id = @memory.store_business_intent(
+            title: title,
+            description: description,
+            constraints: constraints,
+            priority: priority
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store business intent: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => "business_intent", "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => "business_intent", "status" => "stored" })
         else
-          puts "Stored business intent: #{intent_id}"
+          puts "Stored business intent: #{id}"
         end
       end
@@ -169,24 +166,74 @@ module Agentf
           exit 1
         end
-        tags = parse_list_option(args, "--tags=")
         acceptance_criteria = parse_list_option(args, "--acceptance=")
         non_goals = parse_list_option(args, "--non-goals=")
         related_task_id = parse_single_option(args, "--task=")
-        intent_id = @memory.store_feature_intent(
-          title: title,
-          description: description,
-          tags: tags,
-          acceptance_criteria: acceptance_criteria,
-          non_goals: non_goals,
-          related_task_id: related_task_id
-        )
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-feature-intent", args: { title: title, description: description, acceptance: acceptance_criteria, non_goals: non_goals, related_task_id: related_task_id } }) do
+          id = @memory.store_feature_intent(
+            title: title,
+            description: description,
+            acceptance_criteria: acceptance_criteria,
+            non_goals: non_goals,
+            related_task_id: related_task_id
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store feature intent: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => "feature_intent", "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => "feature_intent", "status" => "stored" })
         else
-          puts "Stored feature intent: #{intent_id}"
+          puts "Stored feature intent: #{id}"
+        end
+      end
+      def add_playbook(args)
+        title = args.shift
+        description = args.shift
+        if title.to_s.empty? || description.to_s.empty?
+          $stderr.puts "Error: add-playbook requires <title> <description>"
+          exit 1
+        end
+        steps = parse_list_option(args, "--steps=")
+        feature_area = parse_single_option(args, "--feature-area=")
+        agent = parse_single_option(args, "--agent=") || Agentf::AgentRoles::PLANNER
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-playbook", args: { title: title, description: description, steps: steps, feature_area: feature_area, agent: agent } }) do
+          id = @memory.store_playbook(
+            title: title,
+            description: description,
+            steps: steps,
+            feature_area: feature_area,
+            agent: agent
+          )
+        end
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store playbook: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
+        if @json_output
+          puts JSON.generate({ "id" => id, "type" => "playbook", "status" => "stored" })
+        else
+          puts "Stored playbook: #{id}"
         end
       end
@@ -199,40 +246,53 @@ module Agentf
           exit 1
         end
-        tags = parse_list_option(args, "--tags=")
         context = parse_single_option(args, "--context=").to_s
         agent = parse_single_option(args, "--agent=") || Agentf::AgentRoles::ENGINEER
         code_snippet = parse_single_option(args, "--code=").to_s
+        outcome = parse_single_option(args, "--outcome=")
+        id = nil
+        res = safe_cli_memory_write(@memory, attempted: { command: "add-#{type}", args: { title: title, description: description, context: context, agent: agent, code: code_snippet, outcome: outcome } }) do
+          id = @memory.store_episode(
+            type: type,
+            title: title,
+            description: description,
+            context: context,
+            agent: agent,
+            code_snippet: code_snippet,
+            outcome: outcome
+          )
+        end
-        intent_id = @memory.store_episode(
-          type: type,
-          title: title,
-          description: description,
-          context: context,
-          tags: tags,
-          agent: agent,
-          code_snippet: code_snippet
-        )
+        if res.is_a?(Hash) && res["confirmation_required"]
+          if @json_output
+            puts JSON.generate(res)
+          else
+            $stderr.puts "Confirmation required to store #{type}: #{res['confirmation_details'].inspect}"
+          end
+          return
+        end
         if @json_output
-          puts JSON.generate({ "id" => intent_id, "type" => type, "status" => "stored" })
+          puts JSON.generate({ "id" => id, "type" => type, "status" => "stored" })
         else
-          puts "Stored #{type}: #{intent_id}"
+          puts "Stored #{type}: #{id}"
         end
       end
-      def list_tags
-        result = @reviewer.get_all_tags
-        if @json_output
-          puts JSON.generate(result)
-          return
-        end
-        if result["tags"].empty?
-          puts "No tags found."
-        else
-          puts "Tags (#{result["count"]}):"
-          result["tags"].each { |tag| puts "  - #{tag}" }
+      # Helper to standardize CLI memory write confirmation handling.
+      def safe_cli_memory_write(memory, attempted: {})
+        begin
+          yield
+          nil
+        rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
+          {
+            "confirmation_required" => true,
+            "confirmation_details" => e.details,
+            "attempted" => attempted,
+            "confirmed_write_token" => "confirmed",
+            "confirmation_prompt" => "Ask the user whether to save this memory. If they approve, rerun the same command with confirmation enabled. If they decline, do not retry."
+          }
         end
       end
@@ -266,19 +326,12 @@ module Agentf
         puts ""
         puts "By agent:"
         result["by_agent"].each { |agent, count| puts "  #{agent}: #{count}" }
-        puts ""
-        puts "Unique tags: #{result["unique_tags"]}"
-      end
-      def by_tag(args)
-        tag = args.shift
-        if tag.nil? || tag.empty?
-          $stderr.puts "Error: by-tag requires a tag name"
-          exit 1
+        if result["by_outcome"].is_a?(Hash)
+          puts ""
+          puts "By outcome:"
+          result["by_outcome"].each { |outcome, count| puts "  #{outcome}: #{count}" }
         end
-        limit = extract_limit(args)
-        result = @reviewer.get_by_tag(tag, limit: limit)
-        output(result)
       end
       def by_agent(args)
@@ -488,8 +541,8 @@ module Agentf
           [#{mem["type"]&.upcase}] #{mem["title"]}
           #{mem["created_at"]} by #{mem["agent"]}
           #{mem["description"]}
+          #{"Outcome: #{mem['outcome']}" unless mem["outcome"].to_s.empty?}
           #{format_code(mem["code_snippet"]) unless mem["code_snippet"].to_s.empty?}
-          Tags: #{mem["tags"]&.join(", ") || "none"}
         OUTPUT
       end
@@ -505,26 +558,22 @@ module Agentf
           Commands:
             recent, list              List recent memories (default: 10)
-            pitfalls                  List pitfalls (things that went wrong)
+            episodes                  List episode memories
             lessons                   List lessons learned
-            successes                 List successes
             intents [kind]            List intents (kind: business|feature)
             business-intents          List business intents
             feature-intents           List feature intents
             add-business-intent       Store business intent
             add-feature-intent        Store feature intent
+            add-playbook              Store playbook memory
             add-lesson                Store lesson memory
-            add-success               Store success memory
-            add-pitfall               Store pitfall memory
-            tags                      List all unique tags
-            search <query>            Search memories by keyword
+            search <query>            Search memories semantically
             delete id <memory_id>     Delete one memory and related edges
             delete last -n <count>    Delete most recent memories
             delete all                Delete memories and graph/task keys
             neighbors <id>            Traverse graph edges from a memory id
             subgraph <ids>            Build graph from comma-separated seed ids
             summary, stats            Show summary statistics
-            by-tag <tag>              Get memories with specific tag
             by-agent <agent>          Get memories from specific agent
             by-type <type>            Get memories by type (#{VALID_EPISODE_TYPES.join("|")})
@@ -534,18 +583,17 @@ module Agentf
           Examples:
             agentf memory recent -n 5
-            agentf memory pitfalls
+            agentf memory episodes --outcome=negative
             agentf memory intents business -n 5
-            agentf memory add-business-intent "Reliability" "Prioritize uptime" --tags=ops,platform --constraints="No downtime;No vendor lock-in"
+            agentf memory add-business-intent "Reliability" "Prioritize uptime" --constraints="No downtime;No vendor lock-in"
             agentf memory add-feature-intent "Agent handoff" "Improve orchestrator continuity" --acceptance="Keeps context;Preserves task state"
-            agentf memory add-lesson "Refactor strategy" "Extracted adapter seam" --agent=PLANNER --tags=architecture
-            agentf memory add-success "Provider install works" "Installed copilot + opencode manifests" --agent=ENGINEER
+            agentf memory add-playbook "Release rollout" "Safe deploy sequence" --steps="deploy canary;monitor;promote"
+            agentf memory add-lesson "Refactor strategy" "Extracted adapter seam" --agent=PLANNER
             agentf memory search "react"
             agentf memory delete id episode_abcd
             agentf memory delete last -n 10 --scope=project
             agentf memory delete all --scope=all --yes
             agentf memory neighbors episode_abcd --depth=2
-            agentf memory by-tag "performance"
             agentf memory summary
         HELP
       end

data/lib/agentf/cli/router.rb CHANGED Viewed

@@ -7,6 +7,7 @@ require_relative "install"
 require_relative "update"
 require_relative "metrics"
 require_relative "architecture"
+require_relative "eval"
 module Agentf
   module CLI
@@ -18,8 +19,8 @@ module Agentf
     #   agentf install --provider opencode,copilot
     #   agentf version
     #   agentf help
-    class Router
-      SUBCOMMANDS = %w[memory code metrics architecture install update mcp-server version help].freeze
+      class Router
+        SUBCOMMANDS = %w[memory code metrics architecture install update eval agent mcp-server version help].freeze
       def run(args)
         subcommand = args.shift || "help"
@@ -42,8 +43,14 @@ module Agentf
           Architecture.new.run(args)
         when "update"
           Update.new.run(args)
+        when "eval"
+          Eval.new.run(args)
         when "mcp-server"
           start_mcp_server
+        when "agent"
+          # agent <AGENT_NAME> [payload]
+          require_relative "agent"
+          Agent.new.run(args)
         when "version", "--version", "-v"
           puts "agentf #{Agentf::VERSION}"
         when "help", "--help", "-h"
@@ -68,12 +75,14 @@ module Agentf
           Usage: agentf <command> [subcommand] [options]
           Commands:
-            memory       Manage agent memory (lessons, pitfalls, successes, intents)
+            memory       Manage agent memory (episodes, lessons, playbooks, intents)
             code         Explore codebase (glob, grep, tree, related files)
             metrics      Show workflow success and provider parity metrics
             architecture Analyze architecture layers and violations
             install      Generate provider manifests (agents, commands, tools)
             update       Regenerate manifests when gem version changes
+            eval         Run black-box eval scenarios against `agentf agent`
+            agent        Run a single agent directly
             mcp-server   Start MCP server over stdio (for Copilot integration)
             version      Show version
@@ -87,7 +96,7 @@ module Agentf
             AGENTF_WORKFLOW_CONTRACT_MODE=advisory|enforcing|off   Contract behavior mode
             AGENTF_AGENT_CONTRACT_ENABLED=true|false   Enable/disable per-agent contract checks
             AGENTF_AGENT_CONTRACT_MODE=advisory|enforcing|off   Per-agent contract behavior mode
-            AGENTF_DEFAULT_PACK=generic|rails_standard|rails_37signals|rails_feature_spec
+  (AGENTF_DEFAULT_PACK no longer used — orchestrator uses internal profiles)
             AGENTF_GEM_PATH=/path/to/gem   Path to agentf gem (for OpenCode plugin binary resolution)
           Examples:
@@ -100,6 +109,9 @@ module Agentf
             agentf metrics parity --json
             agentf architecture analyze
             agentf architecture review --json
+            agentf eval list
+            agentf eval run all --json
+            agentf agent planner "Plan a refactor" --json
             agentf update
             agentf update --force --provider=opencode,copilot
             agentf mcp-server