agentf 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/agentf/agents/architect.rb +4 -0
- data/lib/agentf/agents/base.rb +29 -1
- data/lib/agentf/agents/debugger.rb +31 -8
- data/lib/agentf/agents/designer.rb +18 -7
- data/lib/agentf/agents/documenter.rb +6 -0
- data/lib/agentf/agents/explorer.rb +30 -11
- data/lib/agentf/agents/reviewer.rb +5 -0
- data/lib/agentf/agents/security.rb +24 -14
- data/lib/agentf/agents/specialist.rb +31 -17
- data/lib/agentf/agents/tester.rb +46 -7
- data/lib/agentf/cli/agent.rb +95 -0
- data/lib/agentf/cli/eval.rb +203 -0
- data/lib/agentf/cli/install.rb +7 -0
- data/lib/agentf/cli/memory.rb +82 -30
- data/lib/agentf/cli/router.rb +15 -3
- data/lib/agentf/cli/update.rb +9 -2
- data/lib/agentf/commands/memory_reviewer.rb +10 -2
- data/lib/agentf/commands/metrics.rb +16 -14
- data/lib/agentf/commands/registry.rb +28 -0
- data/lib/agentf/evals/report.rb +134 -0
- data/lib/agentf/evals/runner.rb +771 -0
- data/lib/agentf/evals/scenario.rb +211 -0
- data/lib/agentf/installer.rb +486 -348
- data/lib/agentf/mcp/server.rb +291 -49
- data/lib/agentf/memory.rb +46 -53
- data/lib/agentf/service/providers.rb +10 -62
- data/lib/agentf/version.rb +1 -1
- data/lib/agentf/workflow_engine.rb +204 -73
- data/lib/agentf.rb +9 -3
- metadata +8 -3
- data/lib/agentf/packs.rb +0 -74
|
@@ -51,7 +51,8 @@ module Agentf
|
|
|
51
51
|
end
|
|
52
52
|
|
|
53
53
|
def pack_workflow_templates
|
|
54
|
-
|
|
54
|
+
# Workflow templates are now provided by the orchestrator profiles
|
|
55
|
+
Agentf::WorkflowEngine::PROFILES.fetch(@pack, Agentf::WorkflowEngine::PROFILES["generic"]).fetch("workflow_templates")
|
|
55
56
|
end
|
|
56
57
|
|
|
57
58
|
def execute_agent(agent_name:, task:, context:, agents:, commands:, logger: nil)
|
|
@@ -60,67 +61,14 @@ module Agentf
|
|
|
60
61
|
agent = agents[agent_name]
|
|
61
62
|
return { "error" => "Agent #{agent_name} not found" } unless agent
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
response
|
|
72
|
-
when Agentf::AgentRoles::QA_TESTER
|
|
73
|
-
source_file = context["source_file"] || "app/models/application_record.rb"
|
|
74
|
-
tester_commands = commands.fetch("tester")
|
|
75
|
-
tdd_phase = context["tdd_phase"] || "normal"
|
|
76
|
-
|
|
77
|
-
if tdd_phase == "red"
|
|
78
|
-
failure_signature = "expected-failure:#{File.basename(source_file)}:#{Time.now.to_i}"
|
|
79
|
-
{
|
|
80
|
-
"source_file" => source_file,
|
|
81
|
-
"test_file" => source_file.sub(/\.rb$/, "_spec.rb"),
|
|
82
|
-
"tdd_phase" => "red",
|
|
83
|
-
"passed" => false,
|
|
84
|
-
"failure_signature" => failure_signature,
|
|
85
|
-
"stdout" => "Intentional TDD red failure captured"
|
|
86
|
-
}
|
|
87
|
-
else
|
|
88
|
-
template = tester_commands.generate_unit_tests(source_file)
|
|
89
|
-
response = agent.generate_tests(source_file)
|
|
90
|
-
response["generated_code"] = template.test_code
|
|
91
|
-
response["tdd_phase"] = tdd_phase
|
|
92
|
-
response["failure_signature"] = context["tdd_failure_signature"]
|
|
93
|
-
response
|
|
94
|
-
end
|
|
95
|
-
when Agentf::AgentRoles::INCIDENT_RESPONDER
|
|
96
|
-
error = context["error"] || "No error provided"
|
|
97
|
-
analysis = commands.fetch("debugger").parse_error(error)
|
|
98
|
-
response = agent.diagnose(error, context: context["error_context"])
|
|
99
|
-
response["analysis"] = {
|
|
100
|
-
"error_type" => analysis.error_type,
|
|
101
|
-
"root_cause" => analysis.possible_causes,
|
|
102
|
-
"suggested_fix" => analysis.suggested_fix
|
|
103
|
-
}
|
|
104
|
-
response
|
|
105
|
-
when Agentf::AgentRoles::UI_ENGINEER
|
|
106
|
-
design_spec = context["design_spec"] || "Create a card component"
|
|
107
|
-
spec = commands.fetch("designer").generate_component("GeneratedComponent", design_spec)
|
|
108
|
-
response = agent.implement_design(design_spec)
|
|
109
|
-
response["generated_code"] = spec.code
|
|
110
|
-
response
|
|
111
|
-
when Agentf::AgentRoles::ENGINEER
|
|
112
|
-
subtask = context["current_subtask"] || { "description" => task }
|
|
113
|
-
agent.execute(subtask)
|
|
114
|
-
when Agentf::AgentRoles::SECURITY_REVIEWER
|
|
115
|
-
agent.assess(task: task, context: context)
|
|
116
|
-
when Agentf::AgentRoles::REVIEWER
|
|
117
|
-
last_result = context["execution"] || {}
|
|
118
|
-
agent.review(last_result)
|
|
119
|
-
when Agentf::AgentRoles::KNOWLEDGE_MANAGER
|
|
120
|
-
agent.sync_docs("project")
|
|
121
|
-
else
|
|
122
|
-
{ "status" => "not_implemented" }
|
|
123
|
-
end
|
|
64
|
+
# Provider no longer simulates TDD red-phase; delegate to Tester agent.
|
|
65
|
+
|
|
66
|
+
unless agent.respond_to?(:execute)
|
|
67
|
+
raise "Agent #{agent_name} does not implement execute"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Delegate execution to the agent's unified entrypoint.
|
|
71
|
+
result = agent.execute(task: task, context: context || {}, agents: agents, commands: commands, logger: logger)
|
|
124
72
|
|
|
125
73
|
logger&.call("→ #{agent_name} Complete")
|
|
126
74
|
result
|
data/lib/agentf/version.rb
CHANGED
|
@@ -8,6 +8,54 @@ require_relative "agent_policy"
|
|
|
8
8
|
|
|
9
9
|
module Agentf
|
|
10
10
|
class WorkflowEngine
|
|
11
|
+
# Profiles previously lived in Agentf::Packs. They are now embedded in the
|
|
12
|
+
# orchestrator so there's a single source of truth for workflow templates
|
|
13
|
+
# and keyword-based inference used by both runtime orchestration and any
|
|
14
|
+
# installer/CLI functionality.
|
|
15
|
+
PROFILES = {
|
|
16
|
+
"generic" => {
|
|
17
|
+
"name" => "Generic",
|
|
18
|
+
"description" => "Default provider workflows without domain specialization.",
|
|
19
|
+
"keywords" => [],
|
|
20
|
+
"workflow_templates" => {}
|
|
21
|
+
},
|
|
22
|
+
"rails_standard" => {
|
|
23
|
+
"name" => "Rails Standard",
|
|
24
|
+
"description" => "Thin models/controllers with services, queries, presenters, and policy reviews.",
|
|
25
|
+
"keywords" => %w[rails activerecord rspec pundit viewcomponent hotwire turbo stimulus],
|
|
26
|
+
"workflow_templates" => {
|
|
27
|
+
"feature" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER SECURITY_REVIEWER REVIEWER KNOWLEDGE_MANAGER],
|
|
28
|
+
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER SECURITY_REVIEWER REVIEWER],
|
|
29
|
+
"refactor" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER],
|
|
30
|
+
"quick_fix" => %w[ENGINEER QA_TESTER REVIEWER],
|
|
31
|
+
"exploration" => %w[RESEARCHER]
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"rails_37signals" => {
|
|
35
|
+
"name" => "Rails 37signals",
|
|
36
|
+
"description" => "Resource-centric workflows favoring concerns, CRUD and model-rich patterns.",
|
|
37
|
+
"keywords" => %w[rails concern crud closure model minitest hotwire],
|
|
38
|
+
"workflow_templates" => {
|
|
39
|
+
"feature" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER KNOWLEDGE_MANAGER],
|
|
40
|
+
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER REVIEWER],
|
|
41
|
+
"refactor" => %w[PLANNER ENGINEER QA_TESTER REVIEWER],
|
|
42
|
+
"quick_fix" => %w[ENGINEER REVIEWER],
|
|
43
|
+
"exploration" => %w[RESEARCHER]
|
|
44
|
+
}
|
|
45
|
+
},
|
|
46
|
+
"rails_feature_spec" => {
|
|
47
|
+
"name" => "Rails Feature Spec",
|
|
48
|
+
"description" => "Feature-spec-first orchestration with planning and review emphasis.",
|
|
49
|
+
"keywords" => %w[rails feature specification acceptance criteria],
|
|
50
|
+
"workflow_templates" => {
|
|
51
|
+
"feature" => %w[PLANNER RESEARCHER UI_ENGINEER ENGINEER QA_TESTER REVIEWER KNOWLEDGE_MANAGER],
|
|
52
|
+
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER REVIEWER],
|
|
53
|
+
"refactor" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER],
|
|
54
|
+
"quick_fix" => %w[ENGINEER REVIEWER],
|
|
55
|
+
"exploration" => %w[RESEARCHER]
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}.freeze
|
|
11
59
|
PROVIDERS = {
|
|
12
60
|
opencode: Agentf::Service::Providers::OpenCode,
|
|
13
61
|
copilot: Agentf::Service::Providers::Copilot
|
|
@@ -20,7 +68,8 @@ module Agentf
|
|
|
20
68
|
@base_path = base_path || Agentf.config.base_path
|
|
21
69
|
@name = Agentf::AgentRoles::ORCHESTRATOR
|
|
22
70
|
@provider_ref = provider
|
|
23
|
-
|
|
71
|
+
# Initialize provider using the orchestrator's default profile ("generic").
|
|
72
|
+
@provider = build_provider(@provider_ref, pack: "generic")
|
|
24
73
|
|
|
25
74
|
@explorer_commands = Commands::Explorer.new(base_path: @base_path)
|
|
26
75
|
@tester_commands = Commands::Tester.new(base_path: @base_path)
|
|
@@ -51,7 +100,9 @@ module Agentf
|
|
|
51
100
|
@workflow_state = {}
|
|
52
101
|
end
|
|
53
102
|
|
|
54
|
-
|
|
103
|
+
# Unified execute entrypoint for the workflow engine. Accepts keyword
|
|
104
|
+
# `task:` for consistency with agent `execute` contracts.
|
|
105
|
+
def execute(task:, context: nil)
|
|
55
106
|
log "=" * 60
|
|
56
107
|
log "EXECUTING #{provider.name} WORKFLOW"
|
|
57
108
|
log "=" * 60
|
|
@@ -136,10 +187,22 @@ module Agentf
|
|
|
136
187
|
requested = context["pack"].to_s.strip
|
|
137
188
|
return requested.downcase unless requested.empty?
|
|
138
189
|
|
|
139
|
-
|
|
140
|
-
|
|
190
|
+
# No config-based default profile is kept; rely on orchestrator inference.
|
|
191
|
+
infer_profile(context.merge("task" => task))
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def infer_profile(context = {})
|
|
195
|
+
text = [context["task"], context["design_spec"], context["stack"], context["framework"]]
|
|
196
|
+
.compact.join(" ").downcase
|
|
197
|
+
return "generic" if text.empty?
|
|
198
|
+
|
|
199
|
+
return "rails_standard" if includes_any_keyword?(text, PROFILES["rails_standard"]["keywords"])
|
|
200
|
+
|
|
201
|
+
"generic"
|
|
202
|
+
end
|
|
141
203
|
|
|
142
|
-
|
|
204
|
+
def includes_any_keyword?(text, keywords)
|
|
205
|
+
keywords.any? { |keyword| text.include?(keyword) }
|
|
143
206
|
end
|
|
144
207
|
|
|
145
208
|
def log(message)
|
|
@@ -161,8 +224,12 @@ module Agentf
|
|
|
161
224
|
enriched_context["tdd_failure_signature"] = @workflow_state.dig("tdd", "failure_signature")
|
|
162
225
|
end
|
|
163
226
|
|
|
227
|
+
# For ENGINEER, provide the current TDD phase and the expected failing
|
|
228
|
+
# test signature so the engineer can attempt a repair. Do NOT change the
|
|
229
|
+
# orchestrator-wide TDD state here; phase transitions must be driven by
|
|
230
|
+
# QA TESTER results (to ensure tests actually pass/fail).
|
|
164
231
|
if agent_name == Agentf::AgentRoles::ENGINEER
|
|
165
|
-
enriched_context["tdd_phase"] = "
|
|
232
|
+
enriched_context["tdd_phase"] = @workflow_state.dig("tdd", "phase")
|
|
166
233
|
enriched_context["expected_test_fix"] = @workflow_state.dig("tdd", "failure_signature")
|
|
167
234
|
end
|
|
168
235
|
|
|
@@ -170,14 +237,23 @@ module Agentf
|
|
|
170
237
|
enriched_context["execution"] = @workflow_state["results"].last&.fetch("result", {}) || {}
|
|
171
238
|
end
|
|
172
239
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
240
|
+
begin
|
|
241
|
+
result = @provider.execute_agent(
|
|
242
|
+
agent_name: agent_name,
|
|
243
|
+
task: @workflow_state["task"],
|
|
244
|
+
context: enriched_context,
|
|
245
|
+
agents: @agents,
|
|
246
|
+
commands: command_registry,
|
|
247
|
+
logger: method(:log)
|
|
248
|
+
)
|
|
249
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
250
|
+
# An agent attempted to persist memory but policy requires confirmation.
|
|
251
|
+
# Record the event and return a structured result that signals the
|
|
252
|
+
# orchestrator/UI to prompt the user. Do NOT set an "error" key so
|
|
253
|
+
# agent execution contract does not treat this as a failure.
|
|
254
|
+
handle_memory_confirmation(e, attempted: { action: "agent_persist", agent: agent_name })
|
|
255
|
+
return { "success" => false, "confirmation_required" => true, "confirmation_details" => e.details }
|
|
256
|
+
end
|
|
181
257
|
|
|
182
258
|
policy_violations = @agent_policy.validate(
|
|
183
259
|
agent_name: agent_name,
|
|
@@ -227,6 +303,8 @@ module Agentf
|
|
|
227
303
|
tags: tags,
|
|
228
304
|
agent: @name
|
|
229
305
|
)
|
|
306
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
307
|
+
handle_memory_confirmation(e, attempted: { action: "store_feature_intent", title: task, tags: tags })
|
|
230
308
|
rescue StandardError => e
|
|
231
309
|
log "Intent capture skipped: #{e.message}"
|
|
232
310
|
end
|
|
@@ -235,51 +313,67 @@ module Agentf
|
|
|
235
313
|
return unless result.is_a?(Hash)
|
|
236
314
|
|
|
237
315
|
if result["error"]
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
316
|
+
begin
|
|
317
|
+
@memory.store_pitfall(
|
|
318
|
+
title: "#{agent_name} execution failure",
|
|
319
|
+
description: result["error"],
|
|
320
|
+
context: @workflow_state["task"],
|
|
321
|
+
tags: [@workflow_state["workflow_type"], "workflow_error"],
|
|
322
|
+
agent: agent_name,
|
|
323
|
+
code_snippet: ""
|
|
324
|
+
)
|
|
325
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
326
|
+
handle_memory_confirmation(e, attempted: { action: "store_pitfall", agent: agent_name, error: result["error"] })
|
|
327
|
+
end
|
|
246
328
|
return
|
|
247
329
|
end
|
|
248
330
|
|
|
249
331
|
if agent_name == Agentf::AgentRoles::QA_TESTER && result["tdd_phase"] == "red" && result["passed"] == false
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
332
|
+
begin
|
|
333
|
+
@memory.store_pitfall(
|
|
334
|
+
title: "TDD red phase captured",
|
|
335
|
+
description: result["failure_signature"] || "Intentional failing test captured",
|
|
336
|
+
context: @workflow_state["task"],
|
|
337
|
+
tags: [@workflow_state["workflow_type"], "tdd_red"],
|
|
338
|
+
agent: agent_name,
|
|
339
|
+
code_snippet: ""
|
|
340
|
+
)
|
|
341
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
342
|
+
handle_memory_confirmation(e, attempted: { action: "store_pitfall", agent: agent_name, tdd: true })
|
|
343
|
+
end
|
|
258
344
|
return
|
|
259
345
|
end
|
|
260
346
|
|
|
261
347
|
if agent_name == Agentf::AgentRoles::QA_TESTER && result["tdd_phase"] == "green" && result["passed"] == true
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
348
|
+
begin
|
|
349
|
+
@memory.store_success(
|
|
350
|
+
title: "TDD green phase passed",
|
|
351
|
+
description: "Resolved failing test signature: #{result['failure_signature']}",
|
|
352
|
+
context: @workflow_state["task"],
|
|
353
|
+
tags: [@workflow_state["workflow_type"], "tdd_green"],
|
|
354
|
+
agent: agent_name,
|
|
355
|
+
code_snippet: ""
|
|
356
|
+
)
|
|
357
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
358
|
+
handle_memory_confirmation(e, attempted: { action: "store_success", agent: agent_name, tdd: true })
|
|
359
|
+
end
|
|
360
|
+
return
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
begin
|
|
364
|
+
@memory.store_lesson(
|
|
365
|
+
title: "#{agent_name} completed workflow step",
|
|
366
|
+
description: "Agent step completed for #{@workflow_state['workflow_type']} workflow",
|
|
265
367
|
context: @workflow_state["task"],
|
|
266
|
-
tags: [@workflow_state["workflow_type"], "
|
|
368
|
+
tags: [@workflow_state["workflow_type"], "workflow_step"],
|
|
267
369
|
agent: agent_name,
|
|
268
370
|
code_snippet: ""
|
|
269
371
|
)
|
|
270
|
-
|
|
372
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
373
|
+
handle_memory_confirmation(e, attempted: { action: "store_lesson", agent: agent_name })
|
|
271
374
|
end
|
|
272
|
-
|
|
273
|
-
@memory.store_lesson(
|
|
274
|
-
title: "#{agent_name} completed workflow step",
|
|
275
|
-
description: "Agent step completed for #{@workflow_state['workflow_type']} workflow",
|
|
276
|
-
context: @workflow_state["task"],
|
|
277
|
-
tags: [@workflow_state["workflow_type"], "workflow_step"],
|
|
278
|
-
agent: agent_name,
|
|
279
|
-
code_snippet: ""
|
|
280
|
-
)
|
|
281
375
|
rescue StandardError => e
|
|
282
|
-
log "Learning persistence skipped: #{e.message}"
|
|
376
|
+
log "Learning persistence skipped: #{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
283
377
|
end
|
|
284
378
|
|
|
285
379
|
def summarize_workflow
|
|
@@ -334,22 +428,27 @@ module Agentf
|
|
|
334
428
|
@workflow_state["results"] << { "agent" => "QA_TESTER_TDD_RED", "result" => red_result }
|
|
335
429
|
persist_agent_learning(agent_name: Agentf::AgentRoles::QA_TESTER, result: red_result)
|
|
336
430
|
rescue StandardError => e
|
|
337
|
-
log "TDD red phase skipped: #{e.message}"
|
|
431
|
+
log "TDD red phase skipped: #{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
338
432
|
end
|
|
339
433
|
|
|
340
434
|
def transition_tdd_phase(agent_name:, result:)
|
|
341
435
|
tdd = @workflow_state["tdd"]
|
|
342
436
|
return unless tdd["enabled"]
|
|
343
437
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
438
|
+
# Phase transitions should be decided by QA_TESTER outcomes. When the
|
|
439
|
+
# QA tester reports a green phase and passing tests, mark the workflow
|
|
440
|
+
# as green. We avoid changing phase when ENGINEER executes to prevent
|
|
441
|
+
# optimistic transitions.
|
|
442
|
+
if agent_name == Agentf::AgentRoles::QA_TESTER
|
|
443
|
+
if result["tdd_phase"] == "green" && result["passed"] == true
|
|
444
|
+
tdd["phase"] = "green"
|
|
445
|
+
tdd["green_executed"] = true
|
|
446
|
+
tdd["failure_signature"] ||= result["failure_signature"]
|
|
447
|
+
elsif result["tdd_phase"] == "green"
|
|
448
|
+
# Tester indicated green but didn't confirm passing — keep guarded.
|
|
449
|
+
tdd["failure_signature"] ||= result["failure_signature"]
|
|
450
|
+
end
|
|
348
451
|
end
|
|
349
|
-
|
|
350
|
-
return unless agent_name == Agentf::AgentRoles::QA_TESTER && result["tdd_phase"] == "green"
|
|
351
|
-
|
|
352
|
-
tdd["failure_signature"] ||= result["failure_signature"]
|
|
353
452
|
end
|
|
354
453
|
|
|
355
454
|
def record_workflow_metrics
|
|
@@ -360,18 +459,22 @@ module Agentf
|
|
|
360
459
|
|
|
361
460
|
log "Metrics capture skipped: #{result['error']}"
|
|
362
461
|
rescue StandardError => e
|
|
363
|
-
log "Metrics capture skipped: #{e.message}"
|
|
462
|
+
log "Metrics capture skipped: #{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
364
463
|
end
|
|
365
464
|
|
|
366
465
|
def perform_architecture_review
|
|
367
466
|
result = @architecture_commands.review_layer_violations
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
467
|
+
begin
|
|
468
|
+
@memory.store_lesson(
|
|
469
|
+
title: "Architecture review completed",
|
|
470
|
+
description: "Layer violations: #{Array(result['violations']).length}",
|
|
471
|
+
context: @workflow_state["task"],
|
|
472
|
+
tags: [@workflow_state["workflow_type"], "architecture_review"],
|
|
473
|
+
agent: @name
|
|
474
|
+
)
|
|
475
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
476
|
+
handle_memory_confirmation(e, attempted: { action: "store_lesson", agent: @name, context: @workflow_state["task"] })
|
|
477
|
+
end
|
|
375
478
|
result
|
|
376
479
|
rescue StandardError => e
|
|
377
480
|
{ "error" => e.message, "violations" => [] }
|
|
@@ -402,8 +505,10 @@ module Agentf
|
|
|
402
505
|
agent: @name,
|
|
403
506
|
metadata: { "workflow_contract_event" => true }
|
|
404
507
|
)
|
|
508
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
509
|
+
handle_memory_confirmation(e, attempted: { action: "store_episode", title: "Workflow contract #{evaluation['stage']}", agent: @name })
|
|
405
510
|
rescue StandardError => e
|
|
406
|
-
log "Contract event persistence skipped: #{e.message}"
|
|
511
|
+
log "Contract event persistence skipped: #{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
407
512
|
end
|
|
408
513
|
|
|
409
514
|
def append_policy_violations(policy_violations)
|
|
@@ -412,18 +517,44 @@ module Agentf
|
|
|
412
517
|
@workflow_state["policy_violations"] ||= []
|
|
413
518
|
@workflow_state["policy_violations"].concat(policy_violations)
|
|
414
519
|
policy_violations.each do |violation|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
520
|
+
begin
|
|
521
|
+
@memory.store_episode(
|
|
522
|
+
type: "pitfall",
|
|
523
|
+
title: "Agent policy violation: #{violation['code']}",
|
|
524
|
+
description: violation["message"],
|
|
525
|
+
context: @workflow_state["task"],
|
|
526
|
+
tags: ["agent_policy", violation["agent"].to_s.downcase],
|
|
527
|
+
agent: @name,
|
|
528
|
+
metadata: { "policy_violation" => true, "severity" => violation["severity"] }
|
|
529
|
+
)
|
|
530
|
+
rescue Agentf::Memory::RedisMemory::ConfirmationRequired => e
|
|
531
|
+
handle_memory_confirmation(e, attempted: { action: "store_policy_violation", violation: violation, agent: @name })
|
|
532
|
+
end
|
|
424
533
|
end
|
|
425
534
|
rescue StandardError => e
|
|
426
|
-
log "Policy violation persistence skipped: #{e.message}"
|
|
535
|
+
log "Policy violation persistence skipped: #{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# Helper to format exceptions for concise logs. Exposed here so multiple
|
|
539
|
+
# rescue handlers can produce consistent output if desired in the future.
|
|
540
|
+
def format_exception(e)
|
|
541
|
+
"#{e.class}: #{e.message}\n #{Array(e.backtrace).first(6).join("\n ")}"
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
# Handle a memory confirmation exception by recording an event in the
|
|
545
|
+
# workflow_state and emitting a log. This allows the orchestrator or UI to
|
|
546
|
+
# surface a prompt to the user, and optionally retry the attempted action
|
|
547
|
+
# with explicit confirmation.
|
|
548
|
+
def handle_memory_confirmation(exception, attempted: {})
|
|
549
|
+
@workflow_state["memory_confirmation_required"] ||= []
|
|
550
|
+
entry = {
|
|
551
|
+
"timestamp" => Time.now.to_i,
|
|
552
|
+
"confirmation_required" => true,
|
|
553
|
+
"confirmation_details" => exception.details,
|
|
554
|
+
"attempted" => attempted
|
|
555
|
+
}
|
|
556
|
+
@workflow_state["memory_confirmation_required"] << entry
|
|
557
|
+
log "Memory confirmation required: #{exception.message} -- attempted=#{attempted.inspect}"
|
|
427
558
|
end
|
|
428
559
|
end
|
|
429
560
|
end
|
data/lib/agentf.rb
CHANGED
|
@@ -16,7 +16,7 @@ module Agentf
|
|
|
16
16
|
attr_reader :redis_url
|
|
17
17
|
attr_accessor :project_name, :base_path, :metrics_enabled, :workflow_contract_enabled,
|
|
18
18
|
:workflow_contract_mode, :agent_contract_enabled, :agent_contract_mode,
|
|
19
|
-
:
|
|
19
|
+
:gem_path
|
|
20
20
|
|
|
21
21
|
def initialize
|
|
22
22
|
@redis_url = normalize_redis_url(ENV.fetch("REDIS_URL", "redis://localhost:6379"))
|
|
@@ -37,7 +37,7 @@ module Agentf
|
|
|
37
37
|
@agent_contract_mode = normalize_contract_mode(
|
|
38
38
|
ENV.fetch("AGENTF_AGENT_CONTRACT_MODE", "enforcing")
|
|
39
39
|
)
|
|
40
|
-
|
|
40
|
+
# Default profile removed; orchestrator defaults to "generic" internally.
|
|
41
41
|
@gem_path = ENV.fetch("AGENTF_GEM_PATH", nil)
|
|
42
42
|
end
|
|
43
43
|
|
|
@@ -85,9 +85,15 @@ end
|
|
|
85
85
|
require_relative "agentf/memory"
|
|
86
86
|
require_relative "agentf/tools"
|
|
87
87
|
require_relative "agentf/commands"
|
|
88
|
+
require_relative "agentf/commands/registry"
|
|
88
89
|
require_relative "agentf/service/providers"
|
|
89
90
|
require_relative "agentf/context_builder"
|
|
90
|
-
require_relative "agentf/
|
|
91
|
+
require_relative "agentf/evals/scenario"
|
|
92
|
+
require_relative "agentf/evals/runner"
|
|
93
|
+
require_relative "agentf/evals/report"
|
|
94
|
+
# Profiles previously lived in lib/agentf/packs.rb; the profile data is now
|
|
95
|
+
# embedded in the orchestrator (WorkflowEngine::PROFILES). The old file was
|
|
96
|
+
# removed as part of simplifying the profile surface.
|
|
91
97
|
require_relative "agentf/agent_policy"
|
|
92
98
|
require_relative "agentf/agent_execution_contract"
|
|
93
99
|
require_relative "agentf/workflow_contract"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: agentf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Neal Deters
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: redis
|
|
@@ -108,9 +108,11 @@ files:
|
|
|
108
108
|
- lib/agentf/agents/security.rb
|
|
109
109
|
- lib/agentf/agents/specialist.rb
|
|
110
110
|
- lib/agentf/agents/tester.rb
|
|
111
|
+
- lib/agentf/cli/agent.rb
|
|
111
112
|
- lib/agentf/cli/architecture.rb
|
|
112
113
|
- lib/agentf/cli/arg_parser.rb
|
|
113
114
|
- lib/agentf/cli/code.rb
|
|
115
|
+
- lib/agentf/cli/eval.rb
|
|
114
116
|
- lib/agentf/cli/install.rb
|
|
115
117
|
- lib/agentf/cli/memory.rb
|
|
116
118
|
- lib/agentf/cli/metrics.rb
|
|
@@ -123,14 +125,17 @@ files:
|
|
|
123
125
|
- lib/agentf/commands/explorer.rb
|
|
124
126
|
- lib/agentf/commands/memory_reviewer.rb
|
|
125
127
|
- lib/agentf/commands/metrics.rb
|
|
128
|
+
- lib/agentf/commands/registry.rb
|
|
126
129
|
- lib/agentf/commands/security_scanner.rb
|
|
127
130
|
- lib/agentf/commands/tester.rb
|
|
128
131
|
- lib/agentf/context_builder.rb
|
|
132
|
+
- lib/agentf/evals/report.rb
|
|
133
|
+
- lib/agentf/evals/runner.rb
|
|
134
|
+
- lib/agentf/evals/scenario.rb
|
|
129
135
|
- lib/agentf/installer.rb
|
|
130
136
|
- lib/agentf/mcp/server.rb
|
|
131
137
|
- lib/agentf/mcp/stub.rb
|
|
132
138
|
- lib/agentf/memory.rb
|
|
133
|
-
- lib/agentf/packs.rb
|
|
134
139
|
- lib/agentf/service/providers.rb
|
|
135
140
|
- lib/agentf/tools.rb
|
|
136
141
|
- lib/agentf/tools/component_spec.rb
|
data/lib/agentf/packs.rb
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Agentf
|
|
4
|
-
module Packs
|
|
5
|
-
PROFILES = {
|
|
6
|
-
"generic" => {
|
|
7
|
-
"name" => "Generic",
|
|
8
|
-
"description" => "Default provider workflows without domain specialization.",
|
|
9
|
-
"keywords" => [],
|
|
10
|
-
"workflow_templates" => {}
|
|
11
|
-
},
|
|
12
|
-
"rails_standard" => {
|
|
13
|
-
"name" => "Rails Standard",
|
|
14
|
-
"description" => "Thin models/controllers with services, queries, presenters, and policy reviews.",
|
|
15
|
-
"keywords" => %w[rails activerecord rspec pundit viewcomponent hotwire turbo stimulus],
|
|
16
|
-
"workflow_templates" => {
|
|
17
|
-
"feature" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER SECURITY_REVIEWER REVIEWER KNOWLEDGE_MANAGER],
|
|
18
|
-
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER SECURITY_REVIEWER REVIEWER],
|
|
19
|
-
"refactor" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER],
|
|
20
|
-
"quick_fix" => %w[ENGINEER QA_TESTER REVIEWER],
|
|
21
|
-
"exploration" => %w[RESEARCHER]
|
|
22
|
-
}
|
|
23
|
-
},
|
|
24
|
-
"rails_37signals" => {
|
|
25
|
-
"name" => "Rails 37signals",
|
|
26
|
-
"description" => "Resource-centric workflows favoring concerns, CRUD and model-rich patterns.",
|
|
27
|
-
"keywords" => %w[rails concern crud closure model minitest hotwire],
|
|
28
|
-
"workflow_templates" => {
|
|
29
|
-
"feature" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER KNOWLEDGE_MANAGER],
|
|
30
|
-
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER REVIEWER],
|
|
31
|
-
"refactor" => %w[PLANNER ENGINEER QA_TESTER REVIEWER],
|
|
32
|
-
"quick_fix" => %w[ENGINEER REVIEWER],
|
|
33
|
-
"exploration" => %w[RESEARCHER]
|
|
34
|
-
}
|
|
35
|
-
},
|
|
36
|
-
"rails_feature_spec" => {
|
|
37
|
-
"name" => "Rails Feature Spec",
|
|
38
|
-
"description" => "Feature-spec-first orchestration with planning and review emphasis.",
|
|
39
|
-
"keywords" => %w[rails feature specification acceptance criteria],
|
|
40
|
-
"workflow_templates" => {
|
|
41
|
-
"feature" => %w[PLANNER RESEARCHER UI_ENGINEER ENGINEER QA_TESTER REVIEWER KNOWLEDGE_MANAGER],
|
|
42
|
-
"bugfix" => %w[PLANNER INCIDENT_RESPONDER ENGINEER QA_TESTER REVIEWER],
|
|
43
|
-
"refactor" => %w[PLANNER RESEARCHER ENGINEER QA_TESTER REVIEWER],
|
|
44
|
-
"quick_fix" => %w[ENGINEER REVIEWER],
|
|
45
|
-
"exploration" => %w[RESEARCHER]
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}.freeze
|
|
49
|
-
|
|
50
|
-
module_function
|
|
51
|
-
|
|
52
|
-
def all
|
|
53
|
-
PROFILES
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def fetch(name)
|
|
57
|
-
PROFILES[name.to_s.downcase] || PROFILES["generic"]
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def infer(context = {})
|
|
61
|
-
text = [context["task"], context["design_spec"], context["stack"], context["framework"]]
|
|
62
|
-
.compact.join(" ").downcase
|
|
63
|
-
return "generic" if text.empty?
|
|
64
|
-
|
|
65
|
-
return "rails_standard" if includes_any_keyword?(text, PROFILES["rails_standard"]["keywords"])
|
|
66
|
-
|
|
67
|
-
"generic"
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def includes_any_keyword?(text, keywords)
|
|
71
|
-
keywords.any? { |keyword| text.include?(keyword) }
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
end
|