agentf 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/agentf/agents/architect.rb +7 -3
- data/lib/agentf/agents/base.rb +31 -3
- data/lib/agentf/agents/debugger.rb +30 -8
- data/lib/agentf/agents/designer.rb +20 -8
- data/lib/agentf/agents/documenter.rb +8 -2
- data/lib/agentf/agents/explorer.rb +29 -11
- data/lib/agentf/agents/reviewer.rb +12 -7
- data/lib/agentf/agents/security.rb +27 -15
- data/lib/agentf/agents/specialist.rb +34 -18
- data/lib/agentf/agents/tester.rb +48 -8
- data/lib/agentf/cli/agent.rb +95 -0
- data/lib/agentf/cli/eval.rb +203 -0
- data/lib/agentf/cli/install.rb +7 -0
- data/lib/agentf/cli/memory.rb +138 -90
- data/lib/agentf/cli/router.rb +16 -4
- data/lib/agentf/cli/update.rb +9 -2
- data/lib/agentf/commands/memory_reviewer.rb +22 -48
- data/lib/agentf/commands/metrics.rb +18 -25
- data/lib/agentf/commands/registry.rb +28 -0
- data/lib/agentf/context_builder.rb +4 -14
- data/lib/agentf/embedding_provider.rb +35 -0
- data/lib/agentf/evals/report.rb +134 -0
- data/lib/agentf/evals/runner.rb +771 -0
- data/lib/agentf/evals/scenario.rb +211 -0
- data/lib/agentf/installer.rb +498 -365
- data/lib/agentf/mcp/server.rb +294 -114
- data/lib/agentf/memory.rb +354 -214
- data/lib/agentf/service/providers.rb +10 -62
- data/lib/agentf/version.rb +1 -1
- data/lib/agentf/workflow_engine.rb +205 -77
- data/lib/agentf.rb +10 -3
- metadata +9 -3
- data/lib/agentf/packs.rb +0 -74
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ace5a58ed6bfb8389d1e7a68349d7cc9f8d80b4093131a8cf2013388b001a08d
|
|
4
|
+
data.tar.gz: ba9a86b1c4b9e7e7edf62bed089d5cb3a1dddb9c5b4282c16e9be27438fec088
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 35767581d0b7561c1800464909dbf00524724527148b20f4f1ab911d846301d1a60333e8703d7b3db04e7d3d34e1a977d522fef236f8899692ae8a8c2bac65ad
|
|
7
|
+
data.tar.gz: 8d61e62517723d4bc4d39905dfb87b792d27ba1bfbe2b1eb6c445409b138091d072413ad21bffbe2cd584887d9a75f798df64f79601594ccb1029e9db54439f1
|
|
@@ -9,7 +9,7 @@ module Agentf
|
|
|
9
9
|
DESCRIPTION = "Strategy, task decomposition, and memory retrieval."
|
|
10
10
|
COMMANDS = %w[glob read_file memory].freeze
|
|
11
11
|
MEMORY_CONCEPTS = {
|
|
12
|
-
"reads" => ["get_recent_memories", "
|
|
12
|
+
"reads" => ["get_recent_memories", "get_episodes"],
|
|
13
13
|
"writes" => [],
|
|
14
14
|
"policy" => "Retrieve relevant memories before planning; do not duplicate runtime memory into static markdown."
|
|
15
15
|
}.freeze
|
|
@@ -44,7 +44,7 @@ module Agentf
|
|
|
44
44
|
|
|
45
45
|
def self.policy_boundaries
|
|
46
46
|
{
|
|
47
|
-
"always" => ["Capture constraints before decomposition", "Use recent memories and
|
|
47
|
+
"always" => ["Capture constraints before decomposition", "Use recent memories and negative episodes in planning"],
|
|
48
48
|
"ask_first" => ["Changing architectural style from project defaults"],
|
|
49
49
|
"never" => ["Skip task decomposition for non-trivial workflows"],
|
|
50
50
|
"required_inputs" => [],
|
|
@@ -57,7 +57,7 @@ module Agentf
|
|
|
57
57
|
|
|
58
58
|
# Retrieve relevant memories before planning
|
|
59
59
|
recent = memory.get_recent_memories(limit: 5)
|
|
60
|
-
pitfalls = memory.
|
|
60
|
+
pitfalls = memory.get_episodes(limit: 3, outcome: "negative")
|
|
61
61
|
|
|
62
62
|
context = {
|
|
63
63
|
"task" => task,
|
|
@@ -78,6 +78,10 @@ module Agentf
|
|
|
78
78
|
|
|
79
79
|
{ "subtasks" => subtasks, "context" => context }
|
|
80
80
|
end
|
|
81
|
+
|
|
82
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
83
|
+
plan_task(task)
|
|
84
|
+
end
|
|
81
85
|
end
|
|
82
86
|
end
|
|
83
87
|
end
|
data/lib/agentf/agents/base.rb
CHANGED
|
@@ -32,8 +32,8 @@ module Agentf
|
|
|
32
32
|
|
|
33
33
|
def self.memory_concepts
|
|
34
34
|
{
|
|
35
|
-
"reads" => ["RedisMemory#get_recent_memories", "RedisMemory#
|
|
36
|
-
"writes" => ["RedisMemory#store_lesson", "RedisMemory#
|
|
35
|
+
"reads" => ["RedisMemory#get_recent_memories", "RedisMemory#get_episodes"],
|
|
36
|
+
"writes" => ["RedisMemory#store_lesson", "RedisMemory#store_episode", "RedisMemory#store_playbook"],
|
|
37
37
|
"policy" => "Memory is runtime state in Redis and should not be embedded as raw data in manifest markdown."
|
|
38
38
|
}
|
|
39
39
|
end
|
|
@@ -61,7 +61,15 @@ module Agentf
|
|
|
61
61
|
)
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
# Unified execution entrypoint for all agents. Concrete agents must
|
|
65
|
+
# implement `execute(task:, context:, agents:, commands:, logger:)`.
|
|
66
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
67
|
+
raise NotImplementedError, "#{self.class} must implement #execute"
|
|
68
|
+
end
|
|
69
|
+
|
|
64
70
|
def log(message)
|
|
71
|
+
return if ENV["AGENTF_SUPPRESS_AGENT_LOGS"] == "true"
|
|
72
|
+
|
|
65
73
|
puts "\n[#{@name}] #{message}"
|
|
66
74
|
end
|
|
67
75
|
|
|
@@ -83,8 +91,28 @@ module Agentf
|
|
|
83
91
|
result: result
|
|
84
92
|
)
|
|
85
93
|
|
|
86
|
-
|
|
94
|
+
result
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Helper to centralize memory write confirmation handling.
|
|
98
|
+
# Yields a block that performs the memory write. If the memory layer
|
|
99
|
+
# requires confirmation (ask_first policy) a structured hash is
|
|
100
|
+
# returned with confirmation details so agents can merge that into
|
|
101
|
+
# their own return payloads or let the orchestrator handle prompting.
|
|
102
|
+
def safe_memory_write(attempted: {})
|
|
103
|
+
begin
|
|
104
|
+
yield
|
|
105
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
106
|
+
log "[MEMORY] Confirmation required: #{e.message} -- details=#{e.details.inspect}"
|
|
107
|
+
{
|
|
108
|
+
"confirmation_required" => true,
|
|
109
|
+
"confirmation_details" => e.details,
|
|
110
|
+
"attempted" => attempted,
|
|
111
|
+
"confirmed_write_token" => "confirmed",
|
|
112
|
+
"confirmation_prompt" => "Ask the user whether to save this memory. If they approve, rerun the same tool with confirmedWrite=confirmed. If they decline, do not retry."
|
|
113
|
+
}
|
|
87
114
|
end
|
|
88
115
|
end
|
|
116
|
+
end
|
|
89
117
|
end
|
|
90
118
|
end
|
|
@@ -66,14 +66,31 @@ module Agentf
|
|
|
66
66
|
|
|
67
67
|
analysis = @commands.parse_error(error)
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
69
|
+
res = safe_memory_write(attempted: { action: "store_lesson", title: "Debugged: #{error[0..50]}...", agent: name }) do
|
|
70
|
+
memory.store_episode(
|
|
71
|
+
type: "lesson",
|
|
72
|
+
title: "Debugged: #{error[0..50]}...",
|
|
73
|
+
description: "Root cause: #{analysis.possible_causes.first}. Fix: #{analysis.suggested_fix}",
|
|
74
|
+
context: context.to_s,
|
|
75
|
+
agent: name
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
80
|
+
log "Root cause: #{analysis.possible_causes.first}"
|
|
81
|
+
log "Suggested fix: #{analysis.suggested_fix}"
|
|
82
|
+
return {
|
|
83
|
+
"success" => false,
|
|
84
|
+
"confirmation_required" => true,
|
|
85
|
+
"confirmation_details" => res["confirmation_details"],
|
|
86
|
+
"analysis" => {
|
|
87
|
+
"error_type" => analysis.error_type,
|
|
88
|
+
"possible_causes" => analysis.possible_causes,
|
|
89
|
+
"suggested_fix" => analysis.suggested_fix,
|
|
90
|
+
"stack_trace" => analysis.stack_trace
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
end
|
|
77
94
|
|
|
78
95
|
log "Root cause: #{analysis.possible_causes.first}"
|
|
79
96
|
log "Suggested fix: #{analysis.suggested_fix}"
|
|
@@ -90,6 +107,11 @@ module Agentf
|
|
|
90
107
|
}
|
|
91
108
|
end
|
|
92
109
|
end
|
|
110
|
+
|
|
111
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
112
|
+
error_text = task.is_a?(String) ? task : context["error"]
|
|
113
|
+
diagnose(error_text, context: context)
|
|
114
|
+
end
|
|
93
115
|
end
|
|
94
116
|
end
|
|
95
117
|
end
|
|
@@ -11,7 +11,7 @@ module Agentf
|
|
|
11
11
|
COMMANDS = %w[generate_component validate_design_system].freeze
|
|
12
12
|
MEMORY_CONCEPTS = {
|
|
13
13
|
"reads" => [],
|
|
14
|
-
"writes" => ["
|
|
14
|
+
"writes" => ["store_episode"],
|
|
15
15
|
"policy" => "Capture successful design implementation patterns."
|
|
16
16
|
}.freeze
|
|
17
17
|
|
|
@@ -64,13 +64,20 @@ module Agentf
|
|
|
64
64
|
|
|
65
65
|
spec = @commands.generate_component("GeneratedComponent", design_spec)
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
67
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Implemented design: #{design_spec}", outcome: "positive", agent: name }) do
|
|
68
|
+
memory.store_episode(
|
|
69
|
+
type: "episode",
|
|
70
|
+
title: "Implemented design: #{design_spec}",
|
|
71
|
+
description: "Created #{spec.name} in #{spec.framework}",
|
|
72
|
+
context: "Framework: #{framework}",
|
|
73
|
+
agent: name,
|
|
74
|
+
outcome: "positive"
|
|
75
|
+
)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
79
|
+
return { "design_spec" => design_spec, "component" => spec.name, "framework" => framework, "generated_code" => spec.code, "success" => true }.merge(res)
|
|
80
|
+
end
|
|
74
81
|
|
|
75
82
|
log "Created component: #{spec.name}"
|
|
76
83
|
|
|
@@ -83,6 +90,11 @@ module Agentf
|
|
|
83
90
|
}
|
|
84
91
|
end
|
|
85
92
|
end
|
|
93
|
+
|
|
94
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
95
|
+
spec = task.is_a?(String) ? task : context["design_spec"]
|
|
96
|
+
implement_design(spec, framework: context["framework"] || "react")
|
|
97
|
+
end
|
|
86
98
|
end
|
|
87
99
|
end
|
|
88
100
|
end
|
|
@@ -57,8 +57,8 @@ module Agentf
|
|
|
57
57
|
|
|
58
58
|
memories = memory.get_recent_memories(limit: 20)
|
|
59
59
|
|
|
60
|
-
successes = memories.select { |m| m["type"] == "
|
|
61
|
-
pitfalls = memories.select { |m| m["type"] == "
|
|
60
|
+
successes = memories.select { |m| m["type"] == "episode" && m["outcome"] == "positive" }
|
|
61
|
+
pitfalls = memories.select { |m| m["type"] == "episode" && m["outcome"] == "negative" }
|
|
62
62
|
|
|
63
63
|
log "Found #{successes.size} successes"
|
|
64
64
|
log "Found #{pitfalls.size} pitfalls"
|
|
@@ -69,6 +69,12 @@ module Agentf
|
|
|
69
69
|
"total_memories" => memories.size
|
|
70
70
|
}
|
|
71
71
|
end
|
|
72
|
+
|
|
73
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
74
|
+
project = task.is_a?(String) ? task : (context["project_name"] || "project")
|
|
75
|
+
sync_docs(project)
|
|
76
|
+
end
|
|
77
|
+
|
|
72
78
|
end
|
|
73
79
|
end
|
|
74
80
|
end
|
|
@@ -11,8 +11,8 @@ module Agentf
|
|
|
11
11
|
COMMANDS = %w[glob grep read_file].freeze
|
|
12
12
|
MEMORY_CONCEPTS = {
|
|
13
13
|
"reads" => [],
|
|
14
|
-
"writes" => ["
|
|
15
|
-
"policy" => "Store
|
|
14
|
+
"writes" => ["store_lesson"],
|
|
15
|
+
"policy" => "Store research findings as lessons after user confirmation."
|
|
16
16
|
}.freeze
|
|
17
17
|
|
|
18
18
|
def self.description
|
|
@@ -46,7 +46,7 @@ module Agentf
|
|
|
46
46
|
def self.policy_boundaries
|
|
47
47
|
{
|
|
48
48
|
"always" => ["Return concrete file evidence"],
|
|
49
|
-
"ask_first" => ["Scanning outside configured base path", "Persisting
|
|
49
|
+
"ask_first" => ["Scanning outside configured base path", "Persisting research lessons to memory"],
|
|
50
50
|
"never" => ["Mutate project files during exploration"],
|
|
51
51
|
"required_inputs" => [],
|
|
52
52
|
"required_outputs" => ["files", "context_gathered"]
|
|
@@ -63,19 +63,37 @@ module Agentf
|
|
|
63
63
|
|
|
64
64
|
files = @commands.glob(query, file_types: nil)
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
66
|
+
res = safe_memory_write(attempted: { action: "store_lesson", title: "Research finding: #{query}", agent: name }) do
|
|
67
|
+
memory.store_lesson(
|
|
68
|
+
title: "Research finding: #{query}",
|
|
69
|
+
description: "Found #{files.size} relevant files during exploration",
|
|
70
|
+
context: "Search pattern: #{file_pattern || 'all files'}",
|
|
71
|
+
agent: name
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
76
|
+
log "Memory confirmation required during exploration: #{res['confirmation_details'].inspect}"
|
|
77
|
+
return {
|
|
78
|
+
"files" => files,
|
|
79
|
+
"context_gathered" => true,
|
|
80
|
+
"confirmation_required" => true,
|
|
81
|
+
"confirmation_details" => res["confirmation_details"],
|
|
82
|
+
"attempted" => res["attempted"],
|
|
83
|
+
"confirmed_write_token" => res["confirmed_write_token"],
|
|
84
|
+
"confirmation_prompt" => res["confirmation_prompt"]
|
|
85
|
+
}
|
|
86
|
+
end
|
|
74
87
|
|
|
75
88
|
log "Found #{files.size} files"
|
|
76
89
|
|
|
77
90
|
{ "query" => query, "files" => files, "context_gathered" => true }
|
|
78
91
|
end
|
|
92
|
+
|
|
93
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
94
|
+
query = context["explore_query"] || task || "*.rb"
|
|
95
|
+
explore(query, file_pattern: context["file_pattern"])
|
|
96
|
+
end
|
|
79
97
|
end
|
|
80
98
|
end
|
|
81
99
|
end
|
|
@@ -9,9 +9,9 @@ module Agentf
|
|
|
9
9
|
DESCRIPTION = "Quality assurance and regression checking against memory."
|
|
10
10
|
COMMANDS = %w[read_file memory].freeze
|
|
11
11
|
MEMORY_CONCEPTS = {
|
|
12
|
-
"reads" => ["
|
|
12
|
+
"reads" => ["get_episodes", "get_recent_memories"],
|
|
13
13
|
"writes" => [],
|
|
14
|
-
"policy" => "Validate outputs against known
|
|
14
|
+
"policy" => "Validate outputs against known negative episodes before approval."
|
|
15
15
|
}.freeze
|
|
16
16
|
|
|
17
17
|
def self.description
|
|
@@ -56,14 +56,14 @@ module Agentf
|
|
|
56
56
|
execute_with_contract(context: { "execution" => subtask_result }) do
|
|
57
57
|
log "Reviewing subtask #{subtask_result['subtask_id']}"
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
pitfalls = memory.get_episodes(limit: 5, outcome: "negative")
|
|
60
|
+
memories = memory.get_recent_memories(limit: 5)
|
|
61
61
|
|
|
62
62
|
issues = []
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
pitfalls.each do |pitfall|
|
|
65
|
+
issues << "Warning: Known negative episode - #{pitfall['title']}" if pitfall["type"] == "episode"
|
|
66
|
+
end
|
|
67
67
|
|
|
68
68
|
approved = issues.empty?
|
|
69
69
|
|
|
@@ -77,6 +77,11 @@ module Agentf
|
|
|
77
77
|
{ "approved" => approved, "issues" => issues }
|
|
78
78
|
end
|
|
79
79
|
end
|
|
80
|
+
|
|
81
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
82
|
+
subtask = task.is_a?(Hash) ? task : context["execution"] || {}
|
|
83
|
+
review(subtask)
|
|
84
|
+
end
|
|
80
85
|
end
|
|
81
86
|
end
|
|
82
87
|
end
|
|
@@ -11,7 +11,7 @@ module Agentf
|
|
|
11
11
|
COMMANDS = %w[scan best_practices].freeze
|
|
12
12
|
MEMORY_CONCEPTS = {
|
|
13
13
|
"reads" => [],
|
|
14
|
-
"writes" => ["
|
|
14
|
+
"writes" => ["store_episode"],
|
|
15
15
|
"policy" => "Record findings while redacting sensitive values."
|
|
16
16
|
}.freeze
|
|
17
17
|
|
|
@@ -66,27 +66,39 @@ module Agentf
|
|
|
66
66
|
summary = summarize_findings(findings)
|
|
67
67
|
|
|
68
68
|
if findings["issues"].empty?
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
69
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Security review passed", outcome: "positive", agent: name }) do
|
|
70
|
+
memory.store_episode(
|
|
71
|
+
type: "episode",
|
|
72
|
+
title: "Security review passed",
|
|
73
|
+
description: summary,
|
|
74
|
+
context: task,
|
|
75
|
+
agent: name,
|
|
76
|
+
outcome: "positive"
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
return findings.merge(res) if res.is_a?(Hash) && res["confirmation_required"]
|
|
76
80
|
else
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Security findings detected", outcome: "negative", agent: name }) do
|
|
82
|
+
memory.store_episode(
|
|
83
|
+
type: "episode",
|
|
84
|
+
title: "Security findings detected",
|
|
85
|
+
description: summary,
|
|
86
|
+
context: task,
|
|
87
|
+
agent: name,
|
|
88
|
+
outcome: "negative"
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
return findings.merge(res) if res.is_a?(Hash) && res["confirmation_required"]
|
|
84
92
|
end
|
|
85
93
|
|
|
86
94
|
findings.merge("best_practices" => @commands.best_practices)
|
|
87
95
|
end
|
|
88
96
|
end
|
|
89
97
|
|
|
98
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
99
|
+
assess(task: task, context: context)
|
|
100
|
+
end
|
|
101
|
+
|
|
90
102
|
private
|
|
91
103
|
|
|
92
104
|
def summarize_findings(findings)
|
|
@@ -10,7 +10,7 @@ module Agentf
|
|
|
10
10
|
COMMANDS = %w[read_file write_file run_command].freeze
|
|
11
11
|
MEMORY_CONCEPTS = {
|
|
12
12
|
"reads" => [],
|
|
13
|
-
"writes" => ["
|
|
13
|
+
"writes" => ["store_episode"],
|
|
14
14
|
"policy" => "Persist execution outcomes as lessons for downstream agents."
|
|
15
15
|
}.freeze
|
|
16
16
|
|
|
@@ -52,7 +52,9 @@ module Agentf
|
|
|
52
52
|
}
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
def execute(
|
|
55
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
56
|
+
subtask = task.is_a?(Hash) ? task : (context["current_subtask"] || { "description" => task })
|
|
57
|
+
|
|
56
58
|
normalized_subtask = subtask.merge(
|
|
57
59
|
"id" => subtask["id"] || "ad-hoc",
|
|
58
60
|
"description" => subtask["description"] || "Execute implementation step"
|
|
@@ -64,23 +66,37 @@ module Agentf
|
|
|
64
66
|
success = normalized_subtask.fetch("success", true)
|
|
65
67
|
|
|
66
68
|
if success
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
69
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Completed: #{normalized_subtask['description']}", outcome: "positive", agent: name }) do
|
|
70
|
+
memory.store_episode(
|
|
71
|
+
type: "episode",
|
|
72
|
+
title: "Completed: #{normalized_subtask['description']}",
|
|
73
|
+
description: "Successfully executed subtask #{normalized_subtask['id']}",
|
|
74
|
+
context: "Working on #{normalized_subtask.fetch('task', 'unknown task')}",
|
|
75
|
+
agent: name,
|
|
76
|
+
outcome: "positive"
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
81
|
+
log "Memory confirmation required when storing success: #{res['confirmation_details'].inspect}"
|
|
82
|
+
return { "subtask_id" => normalized_subtask["id"], "success" => success, "result" => "Code executed", "confirmation_required" => true, "confirmation_details" => res["confirmation_details"], "attempted" => res["attempted"] }
|
|
83
|
+
end
|
|
75
84
|
else
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Failed: #{normalized_subtask['description']}", outcome: "negative", agent: name }) do
|
|
86
|
+
memory.store_episode(
|
|
87
|
+
type: "episode",
|
|
88
|
+
title: "Failed: #{normalized_subtask['description']}",
|
|
89
|
+
description: "Subtask #{normalized_subtask['id']} failed",
|
|
90
|
+
context: "Working on #{normalized_subtask.fetch('task', 'unknown task')}",
|
|
91
|
+
agent: name,
|
|
92
|
+
outcome: "negative"
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
97
|
+
log "Memory confirmation required when storing pitfall: #{res['confirmation_details'].inspect}"
|
|
98
|
+
return { "subtask_id" => normalized_subtask["id"], "success" => success, "result" => "Code executed", "confirmation_required" => true, "confirmation_details" => res["confirmation_details"], "attempted" => res["attempted"] }
|
|
99
|
+
end
|
|
84
100
|
end
|
|
85
101
|
|
|
86
102
|
{ "subtask_id" => normalized_subtask["id"], "success" => success, "result" => "Code executed" }
|
data/lib/agentf/agents/tester.rb
CHANGED
|
@@ -11,7 +11,7 @@ module Agentf
|
|
|
11
11
|
COMMANDS = %w[detect_framework generate_unit_tests run_tests].freeze
|
|
12
12
|
MEMORY_CONCEPTS = {
|
|
13
13
|
"reads" => [],
|
|
14
|
-
"writes" => ["
|
|
14
|
+
"writes" => ["store_episode"],
|
|
15
15
|
"policy" => "Persist test generation outcomes for future reuse."
|
|
16
16
|
}.freeze
|
|
17
17
|
|
|
@@ -63,13 +63,21 @@ module Agentf
|
|
|
63
63
|
|
|
64
64
|
template = @commands.generate_unit_tests(code_file)
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
66
|
+
res = safe_memory_write(attempted: { action: "store_episode", title: "Generated #{test_type} tests for #{code_file}", outcome: "positive", agent: name }) do
|
|
67
|
+
memory.store_episode(
|
|
68
|
+
type: "episode",
|
|
69
|
+
title: "Generated #{test_type} tests for #{code_file}",
|
|
70
|
+
description: "Created #{template.test_file} with #{test_type} tests",
|
|
71
|
+
context: "Test framework: #{template.framework}",
|
|
72
|
+
agent: name,
|
|
73
|
+
outcome: "positive"
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if res.is_a?(Hash) && res["confirmation_required"]
|
|
78
|
+
log "Memory confirmation required when storing generated tests: #{res['confirmation_details'].inspect}"
|
|
79
|
+
return { "test_file" => template.test_file, "test_type" => test_type, "generated_code" => template.test_code, "confirmation_required" => true, "confirmation_details" => res["confirmation_details"], "attempted" => res["attempted"] }
|
|
80
|
+
end
|
|
73
81
|
|
|
74
82
|
log "Created: #{template.test_file}"
|
|
75
83
|
|
|
@@ -90,6 +98,38 @@ module Agentf
|
|
|
90
98
|
|
|
91
99
|
{ "test_file" => test_file, "passed" => result["passed"] }
|
|
92
100
|
end
|
|
101
|
+
|
|
102
|
+
def execute(task:, context: {}, agents: {}, commands: {}, logger: nil)
|
|
103
|
+
# Support provider-driven TDD red-phase: when context signals a red phase,
|
|
104
|
+
# generate tests via the tester commands (if provided) and return a
|
|
105
|
+
# simulated failing test signature so orchestrator flows can short-circuit.
|
|
106
|
+
if context.to_h["tdd_phase"] == "red"
|
|
107
|
+
tester_commands = if commands.respond_to?(:fetch)
|
|
108
|
+
commands.fetch("tester", nil)
|
|
109
|
+
else
|
|
110
|
+
commands["tester"]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
begin
|
|
114
|
+
tester_commands&.generate_unit_tests(context.to_h["source_file"]) if tester_commands&.respond_to?(:generate_unit_tests)
|
|
115
|
+
rescue StandardError
|
|
116
|
+
# ignore command errors for the simulated red phase
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
return { "tdd_phase" => "red", "passed" => false, "failure_signature" => "expected-failure-#{context.to_h["source_file"] || 'unspecified'}" }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
action = context["action"] || (task.is_a?(String) ? "generate_tests" : context["action"])
|
|
123
|
+
case action
|
|
124
|
+
when "generate_tests"
|
|
125
|
+
code_file = task.is_a?(String) ? task : context["code_file"]
|
|
126
|
+
generate_tests(code_file, test_type: context["test_type"] || "unit")
|
|
127
|
+
when "run_tests"
|
|
128
|
+
run_tests(context["test_file"] || task)
|
|
129
|
+
else
|
|
130
|
+
{ "error" => "Unknown action for Tester: #{action}" }
|
|
131
|
+
end
|
|
132
|
+
end
|
|
93
133
|
end
|
|
94
134
|
end
|
|
95
135
|
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "arg_parser"
|
|
4
|
+
require_relative "../commands/registry"
|
|
5
|
+
require_relative "../commands"
|
|
6
|
+
require_relative "../agents"
|
|
7
|
+
require_relative "../memory"
|
|
8
|
+
|
|
9
|
+
module Agentf
|
|
10
|
+
module CLI
|
|
11
|
+
# CLI entry for running a single agent and returning JSON output.
|
|
12
|
+
class Agent
|
|
13
|
+
include ArgParser
|
|
14
|
+
|
|
15
|
+
def initialize
|
|
16
|
+
@memory = Agentf::Memory::RedisMemory.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(args)
|
|
20
|
+
if args.empty? || args.include?("--help") || args.include?("help")
|
|
21
|
+
show_help
|
|
22
|
+
return
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Allow callers (like the TypeScript plugin) to append `--json` to
|
|
26
|
+
# request machine-readable output. Strip it here so it's not treated as
|
|
27
|
+
# part of the agent payload.
|
|
28
|
+
args = args.dup
|
|
29
|
+
json_output = !args.delete("--json").nil?
|
|
30
|
+
confirmed_write = parse_single_option(args, "--confirmed-write=")
|
|
31
|
+
|
|
32
|
+
agent_name = args.shift
|
|
33
|
+
payload = args.join(" ")
|
|
34
|
+
|
|
35
|
+
# Build command registry with default implementations
|
|
36
|
+
registry = Agentf::Commands::Registry.new
|
|
37
|
+
# Register known command providers
|
|
38
|
+
registry.register("explorer", Agentf::Commands::Explorer.new)
|
|
39
|
+
registry.register("tester", Agentf::Commands::Tester.new)
|
|
40
|
+
registry.register("debugger", Agentf::Commands::Debugger.new)
|
|
41
|
+
registry.register("designer", Agentf::Commands::Designer.new)
|
|
42
|
+
registry.register("security", Agentf::Commands::SecurityScanner.new)
|
|
43
|
+
registry.register("architecture", Agentf::Commands::Architecture.new)
|
|
44
|
+
|
|
45
|
+
# Load agents (classes already required via lib/agentf)
|
|
46
|
+
agents = {}
|
|
47
|
+
Agentf::Agents.constants.each do |const|
|
|
48
|
+
klass = Agentf::Agents.const_get(const)
|
|
49
|
+
next unless klass.is_a?(Class) && klass < Agentf::Agents::Base
|
|
50
|
+
agents[klass.typed_name] = klass.new(@memory)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
agent = agents[agent_name.upcase]
|
|
54
|
+
unless agent
|
|
55
|
+
$stderr.puts JSON.generate({ ok: false, error: "Agent not found: #{agent_name}" })
|
|
56
|
+
exit 1
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Parse possible JSON payload
|
|
60
|
+
parsed = nil
|
|
61
|
+
begin
|
|
62
|
+
parsed = JSON.parse(payload) unless payload.strip.empty?
|
|
63
|
+
rescue StandardError
|
|
64
|
+
parsed = payload
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
previous = ENV["AGENTF_SUPPRESS_AGENT_LOGS"]
|
|
68
|
+
previous_auto_confirm = ENV["AGENTF_AUTO_CONFIRM_MEMORIES"]
|
|
69
|
+
ENV["AGENTF_SUPPRESS_AGENT_LOGS"] = "true" if json_output
|
|
70
|
+
ENV["AGENTF_AUTO_CONFIRM_MEMORIES"] = "true" unless confirmed_write.to_s.empty?
|
|
71
|
+
|
|
72
|
+
result = agent.execute(
|
|
73
|
+
task: parsed || payload,
|
|
74
|
+
context: { "confirmed_write" => confirmed_write },
|
|
75
|
+
agents: agents,
|
|
76
|
+
commands: registry,
|
|
77
|
+
logger: json_output ? nil : method(:puts)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
puts JSON.generate(result)
|
|
81
|
+
ensure
|
|
82
|
+
ENV["AGENTF_SUPPRESS_AGENT_LOGS"] = previous if json_output
|
|
83
|
+
ENV["AGENTF_AUTO_CONFIRM_MEMORIES"] = previous_auto_confirm unless confirmed_write.to_s.empty?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def show_help
|
|
87
|
+
puts <<~HELP
|
|
88
|
+
Usage: agentf agent <AGENT_NAME> [payload] [--json] [--confirmed-write=<token>]
|
|
89
|
+
|
|
90
|
+
Runs a single agent and prints JSON result.
|
|
91
|
+
HELP
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|