agentf 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/agentf/agents/architect.rb +7 -3
- data/lib/agentf/agents/base.rb +31 -3
- data/lib/agentf/agents/debugger.rb +30 -8
- data/lib/agentf/agents/designer.rb +20 -8
- data/lib/agentf/agents/documenter.rb +8 -2
- data/lib/agentf/agents/explorer.rb +29 -11
- data/lib/agentf/agents/reviewer.rb +12 -7
- data/lib/agentf/agents/security.rb +27 -15
- data/lib/agentf/agents/specialist.rb +34 -18
- data/lib/agentf/agents/tester.rb +48 -8
- data/lib/agentf/cli/agent.rb +95 -0
- data/lib/agentf/cli/eval.rb +203 -0
- data/lib/agentf/cli/install.rb +7 -0
- data/lib/agentf/cli/memory.rb +138 -90
- data/lib/agentf/cli/router.rb +16 -4
- data/lib/agentf/cli/update.rb +9 -2
- data/lib/agentf/commands/memory_reviewer.rb +22 -48
- data/lib/agentf/commands/metrics.rb +18 -25
- data/lib/agentf/commands/registry.rb +28 -0
- data/lib/agentf/context_builder.rb +4 -14
- data/lib/agentf/embedding_provider.rb +35 -0
- data/lib/agentf/evals/report.rb +134 -0
- data/lib/agentf/evals/runner.rb +771 -0
- data/lib/agentf/evals/scenario.rb +211 -0
- data/lib/agentf/installer.rb +498 -365
- data/lib/agentf/mcp/server.rb +294 -114
- data/lib/agentf/memory.rb +354 -214
- data/lib/agentf/service/providers.rb +10 -62
- data/lib/agentf/version.rb +1 -1
- data/lib/agentf/workflow_engine.rb +205 -77
- data/lib/agentf.rb +10 -3
- metadata +9 -3
- data/lib/agentf/packs.rb +0 -74
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "arg_parser"
|
|
4
|
+
require_relative "../evals/runner"
|
|
5
|
+
|
|
6
|
+
module Agentf
|
|
7
|
+
module CLI
|
|
8
|
+
class Eval
|
|
9
|
+
include ArgParser
|
|
10
|
+
|
|
11
|
+
def initialize(runner: nil)
|
|
12
|
+
@runner = runner
|
|
13
|
+
@json_output = false
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run(args)
|
|
17
|
+
@json_output = !args.delete("--json").nil?
|
|
18
|
+
command = args.shift || "help"
|
|
19
|
+
|
|
20
|
+
case command
|
|
21
|
+
when "list"
|
|
22
|
+
list_scenarios(args)
|
|
23
|
+
when "run"
|
|
24
|
+
run_scenarios(args)
|
|
25
|
+
when "report"
|
|
26
|
+
report_results(args)
|
|
27
|
+
when "help", "--help", "-h"
|
|
28
|
+
show_help
|
|
29
|
+
else
|
|
30
|
+
$stderr.puts "Unknown eval command: #{command}"
|
|
31
|
+
$stderr.puts
|
|
32
|
+
show_help
|
|
33
|
+
exit 1
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def list_scenarios(args)
|
|
40
|
+
runner = build_runner(args)
|
|
41
|
+
scenarios = runner.list
|
|
42
|
+
|
|
43
|
+
if @json_output
|
|
44
|
+
puts JSON.generate({ "count" => scenarios.length, "scenarios" => scenarios.map(&:to_h) })
|
|
45
|
+
return
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
if scenarios.empty?
|
|
49
|
+
puts "No eval scenarios found under #{runner.root}"
|
|
50
|
+
return
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
puts "Eval scenarios (#{scenarios.length}):"
|
|
54
|
+
scenarios.each do |scenario|
|
|
55
|
+
suffix = scenario.description.empty? ? "" : " - #{scenario.description}"
|
|
56
|
+
target = if scenario.execution_mode == "mcp"
|
|
57
|
+
"mcp: #{scenario.mcp_tool}"
|
|
58
|
+
elsif scenario.execution_mode == "provider"
|
|
59
|
+
"provider: #{scenario.provider_name}"
|
|
60
|
+
else
|
|
61
|
+
"agent: #{scenario.agent}"
|
|
62
|
+
end
|
|
63
|
+
puts " - #{scenario.name} (#{target})#{suffix}"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def run_scenarios(args)
|
|
68
|
+
name = args.shift || "all"
|
|
69
|
+
keep_workspace = args.delete("--keep-workspace")
|
|
70
|
+
timeout_seconds = parse_integer_option(args, "--timeout=", default: 0)
|
|
71
|
+
runner = build_runner(args)
|
|
72
|
+
result = runner.run(name: name, keep_workspace: !!keep_workspace, timeout_seconds: timeout_seconds.positive? ? timeout_seconds : nil)
|
|
73
|
+
|
|
74
|
+
if @json_output
|
|
75
|
+
puts JSON.pretty_generate(result)
|
|
76
|
+
return
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
puts "Evals complete: #{result['passed']}/#{result['count']} passed"
|
|
80
|
+
result["results"].each do |scenario_result|
|
|
81
|
+
status = scenario_result["status"] == "passed" ? "PASS" : "FAIL"
|
|
82
|
+
detail = scenario_result["failure_step"] ? " (failed at #{scenario_result['failure_step']})" : ""
|
|
83
|
+
puts " - [#{status}] #{scenario_result['scenario']}#{detail}"
|
|
84
|
+
puts " artifacts: #{scenario_result['artifact_dir']}"
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
print_matrix_summary(result["matrix"])
|
|
88
|
+
|
|
89
|
+
exit 1 if result["failed"].positive?
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def build_runner(args)
|
|
93
|
+
root = parse_single_option(args, "--root=")
|
|
94
|
+
output_root = parse_single_option(args, "--output-dir=")
|
|
95
|
+
@runner || Agentf::Evals::Runner.new(root: root, output_root: output_root)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def show_help
|
|
99
|
+
puts <<~HELP
|
|
100
|
+
Usage: agentf eval <command> [options]
|
|
101
|
+
|
|
102
|
+
Commands:
|
|
103
|
+
list List available eval scenarios
|
|
104
|
+
run <scenario|all> Run one scenario or all scenarios
|
|
105
|
+
report Summarize eval history
|
|
106
|
+
|
|
107
|
+
Options:
|
|
108
|
+
--root=<path> Scenario root directory (default: ./evals)
|
|
109
|
+
--output-dir=<path> Artifact output directory (default: tmp/evals)
|
|
110
|
+
--timeout=<seconds> Override per-scenario timeout
|
|
111
|
+
--keep-workspace Keep temp workspace after run
|
|
112
|
+
--json Output structured JSON
|
|
113
|
+
|
|
114
|
+
Examples:
|
|
115
|
+
agentf eval list
|
|
116
|
+
agentf eval run engineer_episode_positive
|
|
117
|
+
agentf eval report
|
|
118
|
+
agentf eval run all --json
|
|
119
|
+
HELP
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def report_results(args)
|
|
123
|
+
output_root = parse_single_option(args, "--output-dir=")
|
|
124
|
+
limit = parse_integer_option(args, "--limit=", default: 0)
|
|
125
|
+
since = parse_single_option(args, "--since=")
|
|
126
|
+
scenario = parse_single_option(args, "--scenario=")
|
|
127
|
+
report = Agentf::Evals::Report.new(output_root: output_root || Agentf::Evals::Runner::DEFAULT_OUTPUT_ROOT)
|
|
128
|
+
result = report.generate(limit: limit.positive? ? limit : nil, since: since, scenario: scenario)
|
|
129
|
+
|
|
130
|
+
if @json_output
|
|
131
|
+
puts JSON.pretty_generate(result)
|
|
132
|
+
return
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
puts "Eval history: #{result['passes']}/#{result['count']} passed"
|
|
136
|
+
puts "Retries: #{result.dig('retry_summary', 'total_retries')} total, #{result.dig('retry_summary', 'flaky_runs')} flaky passes"
|
|
137
|
+
if result["memory_effectiveness"]
|
|
138
|
+
puts "Memory retrieval: #{result.dig('memory_effectiveness', 'retrieved_expected_memory')}/#{result.dig('memory_effectiveness', 'tracked_runs')} tracked runs retrieved expected memory"
|
|
139
|
+
end
|
|
140
|
+
print_comparison_table("Providers", result["providers"])
|
|
141
|
+
print_comparison_table("Models", result["models"])
|
|
142
|
+
print_scenario_trends(result["scenarios"])
|
|
143
|
+
print_matrix_summary({ "providers" => result["providers"], "models" => result["models"] })
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def print_comparison_table(title, rows)
|
|
147
|
+
return if rows.to_h.empty?
|
|
148
|
+
|
|
149
|
+
puts "#{title}:"
|
|
150
|
+
puts " Name Pass Fail Retry Flaky"
|
|
151
|
+
rows.sort.each do |name, stats|
|
|
152
|
+
puts format(
|
|
153
|
+
" %-20s %4d %4d %5d %5d",
|
|
154
|
+
name,
|
|
155
|
+
stats["passed"].to_i,
|
|
156
|
+
stats["failed"].to_i,
|
|
157
|
+
stats["retried"].to_i,
|
|
158
|
+
stats["flaky"].to_i
|
|
159
|
+
)
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def print_scenario_trends(rows)
|
|
164
|
+
return if rows.to_h.empty?
|
|
165
|
+
|
|
166
|
+
puts "Scenario trends:"
|
|
167
|
+
puts " Scenario Pass Fail Retry Flaky Mem"
|
|
168
|
+
rows.sort.each do |name, stats|
|
|
169
|
+
puts format(
|
|
170
|
+
" %-20s %4d %4d %5d %5d %3s",
|
|
171
|
+
name,
|
|
172
|
+
stats["passed"].to_i,
|
|
173
|
+
stats["failed"].to_i,
|
|
174
|
+
stats["retried"].to_i,
|
|
175
|
+
stats["flaky"].to_i,
|
|
176
|
+
stats.fetch("memory_retrieved", 0).to_i.positive? ? "yes" : "no"
|
|
177
|
+
)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def print_matrix_summary(matrix)
|
|
182
|
+
return unless matrix.is_a?(Hash)
|
|
183
|
+
|
|
184
|
+
providers = matrix.fetch("providers", {})
|
|
185
|
+
models = matrix.fetch("models", {})
|
|
186
|
+
|
|
187
|
+
unless providers.empty?
|
|
188
|
+
puts "Provider matrix:"
|
|
189
|
+
providers.each do |provider, stats|
|
|
190
|
+
puts " - #{provider}: #{stats['passed']}/#{stats['total']} passed"
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
unless models.empty?
|
|
195
|
+
puts "Model matrix:"
|
|
196
|
+
models.each do |model, stats|
|
|
197
|
+
puts " - #{model}: #{stats['passed']}/#{stats['total']} passed"
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
data/lib/agentf/cli/install.rb
CHANGED
|
@@ -17,6 +17,7 @@ module Agentf
|
|
|
17
17
|
local_root: Dir.pwd,
|
|
18
18
|
dry_run: false,
|
|
19
19
|
install_deps: true,
|
|
20
|
+
opencode_runtime: "mcp",
|
|
20
21
|
only_agents: nil,
|
|
21
22
|
only_commands: nil
|
|
22
23
|
}
|
|
@@ -35,6 +36,7 @@ module Agentf
|
|
|
35
36
|
local_root: @options[:local_root],
|
|
36
37
|
dry_run: @options[:dry_run],
|
|
37
38
|
install_deps: @options[:install_deps],
|
|
39
|
+
opencode_runtime: @options[:opencode_runtime],
|
|
38
40
|
verbose: @options.fetch(:verbose, false)
|
|
39
41
|
)
|
|
40
42
|
|
|
@@ -72,6 +74,9 @@ module Agentf
|
|
|
72
74
|
# Extract --install-deps flag
|
|
73
75
|
@options[:install_deps] = !args.delete("--install-deps").nil?
|
|
74
76
|
|
|
77
|
+
opencode_runtime = parse_single_option(args, "--opencode-runtime=")
|
|
78
|
+
@options[:opencode_runtime] = opencode_runtime if opencode_runtime
|
|
79
|
+
|
|
75
80
|
# Extract --global-root and --local-root
|
|
76
81
|
global_root = parse_single_option(args, "--global-root=")
|
|
77
82
|
@options[:global_root] = File.expand_path(global_root) if global_root
|
|
@@ -107,6 +112,7 @@ module Agentf
|
|
|
107
112
|
--local-root=PATH Root for local installs (default: current directory)
|
|
108
113
|
--agent=LIST Only install specific agents (comma-separated)
|
|
109
114
|
--command=LIST Only install specific commands (comma-separated)
|
|
115
|
+
--opencode-runtime=MODE Opencode runtime: mcp|plugin (default: mcp)
|
|
110
116
|
--dry-run Show planned writes without writing files
|
|
111
117
|
|
|
112
118
|
Examples:
|
|
@@ -114,6 +120,7 @@ module Agentf
|
|
|
114
120
|
agentf install --provider=opencode,copilot --scope=local
|
|
115
121
|
agentf install --provider=copilot --dry-run
|
|
116
122
|
agentf install --agent=architect,specialist
|
|
123
|
+
agentf install --provider=opencode --opencode-runtime=plugin
|
|
117
124
|
HELP
|
|
118
125
|
end
|
|
119
126
|
end
|
data/lib/agentf/cli/memory.rb
CHANGED
|
@@ -13,7 +13,7 @@ module Agentf
|
|
|
13
13
|
class Memory
|
|
14
14
|
include ArgParser
|
|
15
15
|
|
|
16
|
-
VALID_EPISODE_TYPES = %w[
|
|
16
|
+
VALID_EPISODE_TYPES = %w[episode lesson playbook business_intent feature_intent incident].freeze
|
|
17
17
|
|
|
18
18
|
def initialize(reviewer: nil, memory: nil)
|
|
19
19
|
@reviewer = reviewer || Commands::MemoryReviewer.new
|
|
@@ -28,12 +28,10 @@ module Agentf
|
|
|
28
28
|
case command
|
|
29
29
|
when "recent", "list"
|
|
30
30
|
list_memories(args)
|
|
31
|
-
when "
|
|
32
|
-
|
|
31
|
+
when "episodes"
|
|
32
|
+
list_episodes(args)
|
|
33
33
|
when "lessons"
|
|
34
34
|
list_lessons(args)
|
|
35
|
-
when "successes"
|
|
36
|
-
list_successes(args)
|
|
37
35
|
when "intents"
|
|
38
36
|
list_intents(args)
|
|
39
37
|
when "business-intents"
|
|
@@ -44,14 +42,10 @@ module Agentf
|
|
|
44
42
|
add_business_intent(args)
|
|
45
43
|
when "add-feature-intent"
|
|
46
44
|
add_feature_intent(args)
|
|
45
|
+
when "add-playbook"
|
|
46
|
+
add_playbook(args)
|
|
47
47
|
when "add-lesson"
|
|
48
48
|
add_episode("lesson", args)
|
|
49
|
-
when "add-success"
|
|
50
|
-
add_episode("success", args)
|
|
51
|
-
when "add-pitfall"
|
|
52
|
-
add_episode("pitfall", args)
|
|
53
|
-
when "tags"
|
|
54
|
-
list_tags
|
|
55
49
|
when "search"
|
|
56
50
|
search_memories(args)
|
|
57
51
|
when "delete"
|
|
@@ -62,8 +56,6 @@ module Agentf
|
|
|
62
56
|
subgraph(args)
|
|
63
57
|
when "summary", "stats"
|
|
64
58
|
show_summary
|
|
65
|
-
when "by-tag"
|
|
66
|
-
by_tag(args)
|
|
67
59
|
when "by-agent"
|
|
68
60
|
by_agent(args)
|
|
69
61
|
when "by-type"
|
|
@@ -86,9 +78,10 @@ module Agentf
|
|
|
86
78
|
output(result)
|
|
87
79
|
end
|
|
88
80
|
|
|
89
|
-
def
|
|
81
|
+
def list_episodes(args)
|
|
90
82
|
limit = extract_limit(args)
|
|
91
|
-
|
|
83
|
+
outcome = parse_single_option(args, "--outcome=")
|
|
84
|
+
result = @reviewer.get_episodes(limit: limit, outcome: outcome)
|
|
92
85
|
output(result)
|
|
93
86
|
end
|
|
94
87
|
|
|
@@ -98,12 +91,6 @@ module Agentf
|
|
|
98
91
|
output(result)
|
|
99
92
|
end
|
|
100
93
|
|
|
101
|
-
def list_successes(args)
|
|
102
|
-
limit = extract_limit(args)
|
|
103
|
-
result = @reviewer.get_successes(limit: limit)
|
|
104
|
-
output(result)
|
|
105
|
-
end
|
|
106
|
-
|
|
107
94
|
def list_intents(args)
|
|
108
95
|
limit = extract_limit(args)
|
|
109
96
|
kind = args.shift
|
|
@@ -141,22 +128,32 @@ module Agentf
|
|
|
141
128
|
exit 1
|
|
142
129
|
end
|
|
143
130
|
|
|
144
|
-
tags = parse_list_option(args, "--tags=")
|
|
145
131
|
constraints = parse_list_option(args, "--constraints=")
|
|
146
132
|
priority = parse_integer_option(args, "--priority=", default: 1)
|
|
147
133
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
134
|
+
id = nil
|
|
135
|
+
res = safe_cli_memory_write(@memory, attempted: { command: "add-business-intent", args: { title: title, description: description, constraints: constraints, priority: priority } }) do
|
|
136
|
+
id = @memory.store_business_intent(
|
|
137
|
+
title: title,
|
|
138
|
+
description: description,
|
|
139
|
+
constraints: constraints,
|
|
140
|
+
priority: priority
|
|
141
|
+
)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
145
|
+
if @json_output
|
|
146
|
+
puts JSON.generate(res)
|
|
147
|
+
else
|
|
148
|
+
$stderr.puts "Confirmation required to store business intent: #{res['confirmation_details'].inspect}"
|
|
149
|
+
end
|
|
150
|
+
return
|
|
151
|
+
end
|
|
155
152
|
|
|
156
153
|
if @json_output
|
|
157
|
-
puts JSON.generate({ "id" =>
|
|
154
|
+
puts JSON.generate({ "id" => id, "type" => "business_intent", "status" => "stored" })
|
|
158
155
|
else
|
|
159
|
-
puts "Stored business intent: #{
|
|
156
|
+
puts "Stored business intent: #{id}"
|
|
160
157
|
end
|
|
161
158
|
end
|
|
162
159
|
|
|
@@ -169,24 +166,74 @@ module Agentf
|
|
|
169
166
|
exit 1
|
|
170
167
|
end
|
|
171
168
|
|
|
172
|
-
tags = parse_list_option(args, "--tags=")
|
|
173
169
|
acceptance_criteria = parse_list_option(args, "--acceptance=")
|
|
174
170
|
non_goals = parse_list_option(args, "--non-goals=")
|
|
175
171
|
related_task_id = parse_single_option(args, "--task=")
|
|
176
172
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
173
|
+
id = nil
|
|
174
|
+
res = safe_cli_memory_write(@memory, attempted: { command: "add-feature-intent", args: { title: title, description: description, acceptance: acceptance_criteria, non_goals: non_goals, related_task_id: related_task_id } }) do
|
|
175
|
+
id = @memory.store_feature_intent(
|
|
176
|
+
title: title,
|
|
177
|
+
description: description,
|
|
178
|
+
acceptance_criteria: acceptance_criteria,
|
|
179
|
+
non_goals: non_goals,
|
|
180
|
+
related_task_id: related_task_id
|
|
181
|
+
)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
185
|
+
if @json_output
|
|
186
|
+
puts JSON.generate(res)
|
|
187
|
+
else
|
|
188
|
+
$stderr.puts "Confirmation required to store feature intent: #{res['confirmation_details'].inspect}"
|
|
189
|
+
end
|
|
190
|
+
return
|
|
191
|
+
end
|
|
185
192
|
|
|
186
193
|
if @json_output
|
|
187
|
-
puts JSON.generate({ "id" =>
|
|
194
|
+
puts JSON.generate({ "id" => id, "type" => "feature_intent", "status" => "stored" })
|
|
188
195
|
else
|
|
189
|
-
puts "Stored feature intent: #{
|
|
196
|
+
puts "Stored feature intent: #{id}"
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def add_playbook(args)
|
|
201
|
+
title = args.shift
|
|
202
|
+
description = args.shift
|
|
203
|
+
|
|
204
|
+
if title.to_s.empty? || description.to_s.empty?
|
|
205
|
+
$stderr.puts "Error: add-playbook requires <title> <description>"
|
|
206
|
+
exit 1
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
steps = parse_list_option(args, "--steps=")
|
|
210
|
+
feature_area = parse_single_option(args, "--feature-area=")
|
|
211
|
+
agent = parse_single_option(args, "--agent=") || Agentf::AgentRoles::PLANNER
|
|
212
|
+
|
|
213
|
+
id = nil
|
|
214
|
+
res = safe_cli_memory_write(@memory, attempted: { command: "add-playbook", args: { title: title, description: description, steps: steps, feature_area: feature_area, agent: agent } }) do
|
|
215
|
+
id = @memory.store_playbook(
|
|
216
|
+
title: title,
|
|
217
|
+
description: description,
|
|
218
|
+
steps: steps,
|
|
219
|
+
feature_area: feature_area,
|
|
220
|
+
agent: agent
|
|
221
|
+
)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
225
|
+
if @json_output
|
|
226
|
+
puts JSON.generate(res)
|
|
227
|
+
else
|
|
228
|
+
$stderr.puts "Confirmation required to store playbook: #{res['confirmation_details'].inspect}"
|
|
229
|
+
end
|
|
230
|
+
return
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
if @json_output
|
|
234
|
+
puts JSON.generate({ "id" => id, "type" => "playbook", "status" => "stored" })
|
|
235
|
+
else
|
|
236
|
+
puts "Stored playbook: #{id}"
|
|
190
237
|
end
|
|
191
238
|
end
|
|
192
239
|
|
|
@@ -199,40 +246,53 @@ module Agentf
|
|
|
199
246
|
exit 1
|
|
200
247
|
end
|
|
201
248
|
|
|
202
|
-
tags = parse_list_option(args, "--tags=")
|
|
203
249
|
context = parse_single_option(args, "--context=").to_s
|
|
204
250
|
agent = parse_single_option(args, "--agent=") || Agentf::AgentRoles::ENGINEER
|
|
205
251
|
code_snippet = parse_single_option(args, "--code=").to_s
|
|
252
|
+
outcome = parse_single_option(args, "--outcome=")
|
|
253
|
+
|
|
254
|
+
id = nil
|
|
255
|
+
res = safe_cli_memory_write(@memory, attempted: { command: "add-#{type}", args: { title: title, description: description, context: context, agent: agent, code: code_snippet, outcome: outcome } }) do
|
|
256
|
+
id = @memory.store_episode(
|
|
257
|
+
type: type,
|
|
258
|
+
title: title,
|
|
259
|
+
description: description,
|
|
260
|
+
context: context,
|
|
261
|
+
agent: agent,
|
|
262
|
+
code_snippet: code_snippet,
|
|
263
|
+
outcome: outcome
|
|
264
|
+
)
|
|
265
|
+
end
|
|
206
266
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
)
|
|
267
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
268
|
+
if @json_output
|
|
269
|
+
puts JSON.generate(res)
|
|
270
|
+
else
|
|
271
|
+
$stderr.puts "Confirmation required to store #{type}: #{res['confirmation_details'].inspect}"
|
|
272
|
+
end
|
|
273
|
+
return
|
|
274
|
+
end
|
|
216
275
|
|
|
217
276
|
if @json_output
|
|
218
|
-
puts JSON.generate({ "id" =>
|
|
277
|
+
puts JSON.generate({ "id" => id, "type" => type, "status" => "stored" })
|
|
219
278
|
else
|
|
220
|
-
puts "Stored #{type}: #{
|
|
279
|
+
puts "Stored #{type}: #{id}"
|
|
221
280
|
end
|
|
222
281
|
end
|
|
223
282
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
283
|
+
# Helper to standardize CLI memory write confirmation handling.
|
|
284
|
+
def safe_cli_memory_write(memory, attempted: {})
|
|
285
|
+
begin
|
|
286
|
+
yield
|
|
287
|
+
nil
|
|
288
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
289
|
+
{
|
|
290
|
+
"confirmation_required" => true,
|
|
291
|
+
"confirmation_details" => e.details,
|
|
292
|
+
"attempted" => attempted,
|
|
293
|
+
"confirmed_write_token" => "confirmed",
|
|
294
|
+
"confirmation_prompt" => "Ask the user whether to save this memory. If they approve, rerun the same command with confirmation enabled. If they decline, do not retry."
|
|
295
|
+
}
|
|
236
296
|
end
|
|
237
297
|
end
|
|
238
298
|
|
|
@@ -266,19 +326,12 @@ module Agentf
|
|
|
266
326
|
puts ""
|
|
267
327
|
puts "By agent:"
|
|
268
328
|
result["by_agent"].each { |agent, count| puts " #{agent}: #{count}" }
|
|
269
|
-
puts ""
|
|
270
|
-
puts "Unique tags: #{result["unique_tags"]}"
|
|
271
|
-
end
|
|
272
329
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
exit 1
|
|
330
|
+
if result["by_outcome"].is_a?(Hash)
|
|
331
|
+
puts ""
|
|
332
|
+
puts "By outcome:"
|
|
333
|
+
result["by_outcome"].each { |outcome, count| puts " #{outcome}: #{count}" }
|
|
278
334
|
end
|
|
279
|
-
limit = extract_limit(args)
|
|
280
|
-
result = @reviewer.get_by_tag(tag, limit: limit)
|
|
281
|
-
output(result)
|
|
282
335
|
end
|
|
283
336
|
|
|
284
337
|
def by_agent(args)
|
|
@@ -488,8 +541,8 @@ module Agentf
|
|
|
488
541
|
[#{mem["type"]&.upcase}] #{mem["title"]}
|
|
489
542
|
#{mem["created_at"]} by #{mem["agent"]}
|
|
490
543
|
#{mem["description"]}
|
|
544
|
+
#{"Outcome: #{mem['outcome']}" unless mem["outcome"].to_s.empty?}
|
|
491
545
|
#{format_code(mem["code_snippet"]) unless mem["code_snippet"].to_s.empty?}
|
|
492
|
-
Tags: #{mem["tags"]&.join(", ") || "none"}
|
|
493
546
|
OUTPUT
|
|
494
547
|
end
|
|
495
548
|
|
|
@@ -505,26 +558,22 @@ module Agentf
|
|
|
505
558
|
|
|
506
559
|
Commands:
|
|
507
560
|
recent, list List recent memories (default: 10)
|
|
508
|
-
|
|
561
|
+
episodes List episode memories
|
|
509
562
|
lessons List lessons learned
|
|
510
|
-
successes List successes
|
|
511
563
|
intents [kind] List intents (kind: business|feature)
|
|
512
564
|
business-intents List business intents
|
|
513
565
|
feature-intents List feature intents
|
|
514
566
|
add-business-intent Store business intent
|
|
515
567
|
add-feature-intent Store feature intent
|
|
568
|
+
add-playbook Store playbook memory
|
|
516
569
|
add-lesson Store lesson memory
|
|
517
|
-
|
|
518
|
-
add-pitfall Store pitfall memory
|
|
519
|
-
tags List all unique tags
|
|
520
|
-
search <query> Search memories by keyword
|
|
570
|
+
search <query> Search memories semantically
|
|
521
571
|
delete id <memory_id> Delete one memory and related edges
|
|
522
572
|
delete last -n <count> Delete most recent memories
|
|
523
573
|
delete all Delete memories and graph/task keys
|
|
524
574
|
neighbors <id> Traverse graph edges from a memory id
|
|
525
575
|
subgraph <ids> Build graph from comma-separated seed ids
|
|
526
576
|
summary, stats Show summary statistics
|
|
527
|
-
by-tag <tag> Get memories with specific tag
|
|
528
577
|
by-agent <agent> Get memories from specific agent
|
|
529
578
|
by-type <type> Get memories by type (#{VALID_EPISODE_TYPES.join("|")})
|
|
530
579
|
|
|
@@ -534,18 +583,17 @@ module Agentf
|
|
|
534
583
|
|
|
535
584
|
Examples:
|
|
536
585
|
agentf memory recent -n 5
|
|
537
|
-
agentf memory
|
|
586
|
+
agentf memory episodes --outcome=negative
|
|
538
587
|
agentf memory intents business -n 5
|
|
539
|
-
agentf memory add-business-intent "Reliability" "Prioritize uptime" --
|
|
588
|
+
agentf memory add-business-intent "Reliability" "Prioritize uptime" --constraints="No downtime;No vendor lock-in"
|
|
540
589
|
agentf memory add-feature-intent "Agent handoff" "Improve orchestrator continuity" --acceptance="Keeps context;Preserves task state"
|
|
541
|
-
agentf memory add-
|
|
542
|
-
agentf memory add-
|
|
590
|
+
agentf memory add-playbook "Release rollout" "Safe deploy sequence" --steps="deploy canary;monitor;promote"
|
|
591
|
+
agentf memory add-lesson "Refactor strategy" "Extracted adapter seam" --agent=PLANNER
|
|
543
592
|
agentf memory search "react"
|
|
544
593
|
agentf memory delete id episode_abcd
|
|
545
594
|
agentf memory delete last -n 10 --scope=project
|
|
546
595
|
agentf memory delete all --scope=all --yes
|
|
547
596
|
agentf memory neighbors episode_abcd --depth=2
|
|
548
|
-
agentf memory by-tag "performance"
|
|
549
597
|
agentf memory summary
|
|
550
598
|
HELP
|
|
551
599
|
end
|
data/lib/agentf/cli/router.rb
CHANGED
|
@@ -7,6 +7,7 @@ require_relative "install"
|
|
|
7
7
|
require_relative "update"
|
|
8
8
|
require_relative "metrics"
|
|
9
9
|
require_relative "architecture"
|
|
10
|
+
require_relative "eval"
|
|
10
11
|
|
|
11
12
|
module Agentf
|
|
12
13
|
module CLI
|
|
@@ -18,8 +19,8 @@ module Agentf
|
|
|
18
19
|
# agentf install --provider opencode,copilot
|
|
19
20
|
# agentf version
|
|
20
21
|
# agentf help
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
class Router
|
|
23
|
+
SUBCOMMANDS = %w[memory code metrics architecture install update eval agent mcp-server version help].freeze
|
|
23
24
|
|
|
24
25
|
def run(args)
|
|
25
26
|
subcommand = args.shift || "help"
|
|
@@ -42,8 +43,14 @@ module Agentf
|
|
|
42
43
|
Architecture.new.run(args)
|
|
43
44
|
when "update"
|
|
44
45
|
Update.new.run(args)
|
|
46
|
+
when "eval"
|
|
47
|
+
Eval.new.run(args)
|
|
45
48
|
when "mcp-server"
|
|
46
49
|
start_mcp_server
|
|
50
|
+
when "agent"
|
|
51
|
+
# agent <AGENT_NAME> [payload]
|
|
52
|
+
require_relative "agent"
|
|
53
|
+
Agent.new.run(args)
|
|
47
54
|
when "version", "--version", "-v"
|
|
48
55
|
puts "agentf #{Agentf::VERSION}"
|
|
49
56
|
when "help", "--help", "-h"
|
|
@@ -68,12 +75,14 @@ module Agentf
|
|
|
68
75
|
Usage: agentf <command> [subcommand] [options]
|
|
69
76
|
|
|
70
77
|
Commands:
|
|
71
|
-
memory Manage agent memory (
|
|
78
|
+
memory Manage agent memory (episodes, lessons, playbooks, intents)
|
|
72
79
|
code Explore codebase (glob, grep, tree, related files)
|
|
73
80
|
metrics Show workflow success and provider parity metrics
|
|
74
81
|
architecture Analyze architecture layers and violations
|
|
75
82
|
install Generate provider manifests (agents, commands, tools)
|
|
76
83
|
update Regenerate manifests when gem version changes
|
|
84
|
+
eval Run black-box eval scenarios against `agentf agent`
|
|
85
|
+
agent Run a single agent directly
|
|
77
86
|
mcp-server Start MCP server over stdio (for Copilot integration)
|
|
78
87
|
version Show version
|
|
79
88
|
|
|
@@ -87,7 +96,7 @@ module Agentf
|
|
|
87
96
|
AGENTF_WORKFLOW_CONTRACT_MODE=advisory|enforcing|off Contract behavior mode
|
|
88
97
|
AGENTF_AGENT_CONTRACT_ENABLED=true|false Enable/disable per-agent contract checks
|
|
89
98
|
AGENTF_AGENT_CONTRACT_MODE=advisory|enforcing|off Per-agent contract behavior mode
|
|
90
|
-
|
|
99
|
+
(AGENTF_DEFAULT_PACK no longer used — orchestrator uses internal profiles)
|
|
91
100
|
AGENTF_GEM_PATH=/path/to/gem Path to agentf gem (for OpenCode plugin binary resolution)
|
|
92
101
|
|
|
93
102
|
Examples:
|
|
@@ -100,6 +109,9 @@ module Agentf
|
|
|
100
109
|
agentf metrics parity --json
|
|
101
110
|
agentf architecture analyze
|
|
102
111
|
agentf architecture review --json
|
|
112
|
+
agentf eval list
|
|
113
|
+
agentf eval run all --json
|
|
114
|
+
agentf agent planner "Plan a refactor" --json
|
|
103
115
|
agentf update
|
|
104
116
|
agentf update --force --provider=opencode,copilot
|
|
105
117
|
agentf mcp-server
|