agentf 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/agentf +8 -0
- data/lib/agentf/agent_policy.rb +54 -0
- data/lib/agentf/agents/architect.rb +67 -0
- data/lib/agentf/agents/base.rb +53 -0
- data/lib/agentf/agents/debugger.rb +75 -0
- data/lib/agentf/agents/designer.rb +69 -0
- data/lib/agentf/agents/documenter.rb +58 -0
- data/lib/agentf/agents/explorer.rb +65 -0
- data/lib/agentf/agents/reviewer.rb +64 -0
- data/lib/agentf/agents/security.rb +84 -0
- data/lib/agentf/agents/specialist.rb +68 -0
- data/lib/agentf/agents/tester.rb +79 -0
- data/lib/agentf/agents.rb +19 -0
- data/lib/agentf/cli/architecture.rb +83 -0
- data/lib/agentf/cli/arg_parser.rb +50 -0
- data/lib/agentf/cli/code.rb +165 -0
- data/lib/agentf/cli/install.rb +112 -0
- data/lib/agentf/cli/memory.rb +393 -0
- data/lib/agentf/cli/metrics.rb +103 -0
- data/lib/agentf/cli/router.rb +111 -0
- data/lib/agentf/cli/update.rb +204 -0
- data/lib/agentf/commands/architecture.rb +183 -0
- data/lib/agentf/commands/debugger.rb +238 -0
- data/lib/agentf/commands/designer.rb +179 -0
- data/lib/agentf/commands/explorer.rb +208 -0
- data/lib/agentf/commands/memory_reviewer.rb +186 -0
- data/lib/agentf/commands/metrics.rb +272 -0
- data/lib/agentf/commands/security_scanner.rb +98 -0
- data/lib/agentf/commands/tester.rb +232 -0
- data/lib/agentf/commands.rb +17 -0
- data/lib/agentf/context_builder.rb +35 -0
- data/lib/agentf/installer.rb +580 -0
- data/lib/agentf/mcp/server.rb +310 -0
- data/lib/agentf/memory.rb +530 -0
- data/lib/agentf/packs.rb +74 -0
- data/lib/agentf/service/providers.rb +158 -0
- data/lib/agentf/tools/component_spec.rb +28 -0
- data/lib/agentf/tools/error_analysis.rb +19 -0
- data/lib/agentf/tools/file_match.rb +21 -0
- data/lib/agentf/tools/test_template.rb +17 -0
- data/lib/agentf/tools.rb +12 -0
- data/lib/agentf/version.rb +5 -0
- data/lib/agentf/workflow_contract.rb +158 -0
- data/lib/agentf/workflow_engine.rb +424 -0
- data/lib/agentf.rb +87 -0
- metadata +164 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Agentf
|
|
6
|
+
module Commands
|
|
7
|
+
class Metrics
|
|
8
|
+
NAME = "metrics"
|
|
9
|
+
|
|
10
|
+
WORKFLOW_METRICS_TAG = "workflow_metric"
|
|
11
|
+
|
|
12
|
+
def self.manifest
|
|
13
|
+
{
|
|
14
|
+
"name" => NAME,
|
|
15
|
+
"description" => "Record and summarize workflow quality metrics for provider parity and success tracking.",
|
|
16
|
+
"commands" => [
|
|
17
|
+
{ "name" => "record_workflow", "type" => "function" },
|
|
18
|
+
{ "name" => "summary", "type" => "function" },
|
|
19
|
+
{ "name" => "provider_parity", "type" => "function" }
|
|
20
|
+
]
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def initialize(memory: nil, project: nil)
|
|
25
|
+
@project = project || Agentf.config.project_name
|
|
26
|
+
@memory = memory || Agentf::Memory::RedisMemory.new(project: @project)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def record_workflow(workflow_state)
|
|
30
|
+
metrics = extract_metrics(workflow_state)
|
|
31
|
+
|
|
32
|
+
@memory.store_episode(
|
|
33
|
+
type: "success",
|
|
34
|
+
title: metric_title(metrics),
|
|
35
|
+
description: metric_description(metrics),
|
|
36
|
+
context: metric_context(metrics),
|
|
37
|
+
tags: metric_tags(metrics),
|
|
38
|
+
agent: "WORKFLOW_ENGINE",
|
|
39
|
+
code_snippet: ""
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
{ "status" => "recorded", "metrics" => metrics }
|
|
43
|
+
rescue StandardError => e
|
|
44
|
+
{ "status" => "error", "error" => e.message }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def summary(limit: 100)
|
|
48
|
+
records = metric_records(limit: limit)
|
|
49
|
+
return empty_summary if records.empty?
|
|
50
|
+
|
|
51
|
+
total = records.length
|
|
52
|
+
completed = records.count { |m| %w[completed approved].include?(m["status"]) }
|
|
53
|
+
approved = records.count { |m| m["approved"] == true }
|
|
54
|
+
failed = records.count { |m| m["status"] == "failed" }
|
|
55
|
+
security_issue_runs = records.count { |m| m["security_issues"].to_i > 0 }
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
"project" => @project,
|
|
59
|
+
"total_runs" => total,
|
|
60
|
+
"completion_rate" => ratio(completed, total),
|
|
61
|
+
"approval_rate" => ratio(approved, total),
|
|
62
|
+
"failure_rate" => ratio(failed, total),
|
|
63
|
+
"security_issue_rate" => ratio(security_issue_runs, total),
|
|
64
|
+
"avg_agents_executed" => average(records.map { |m| m["agents_executed"].to_i }),
|
|
65
|
+
"contract_adherence_rate" => ratio(records.count { |m| m["contract_blocked"] != true }, total),
|
|
66
|
+
"contract_blocked_runs" => records.count { |m| m["contract_blocked"] == true },
|
|
67
|
+
"policy_violation_rate" => ratio(records.count { |m| m["policy_violation_count"].to_i > 0 }, total),
|
|
68
|
+
"providers" => provider_breakdown(records),
|
|
69
|
+
"workflow_types" => workflow_breakdown(records),
|
|
70
|
+
"top_contract_violations" => top_contract_violations(records)
|
|
71
|
+
}
|
|
72
|
+
rescue StandardError => e
|
|
73
|
+
{ "error" => e.message }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def provider_parity(limit: 100)
|
|
77
|
+
records = metric_records(limit: limit)
|
|
78
|
+
grouped = records.group_by { |m| m["provider"].to_s.upcase }
|
|
79
|
+
|
|
80
|
+
opencode = grouped.fetch("OPENCODE", [])
|
|
81
|
+
copilot = grouped.fetch("COPILOT", [])
|
|
82
|
+
|
|
83
|
+
{
|
|
84
|
+
"project" => @project,
|
|
85
|
+
"providers_present" => grouped.keys.sort,
|
|
86
|
+
"opencode_runs" => opencode.length,
|
|
87
|
+
"copilot_runs" => copilot.length,
|
|
88
|
+
"completion_rate_gap" => (completion_rate(opencode) - completion_rate(copilot)).round(4),
|
|
89
|
+
"approval_rate_gap" => metric_gap(opencode, copilot, "approved", expected: true),
|
|
90
|
+
"security_issue_rate_gap" => security_issue_gap(opencode, copilot),
|
|
91
|
+
"avg_agents_gap" => average(opencode.map { |m| m["agents_executed"].to_i }) - average(copilot.map { |m| m["agents_executed"].to_i })
|
|
92
|
+
}
|
|
93
|
+
rescue StandardError => e
|
|
94
|
+
{ "error" => e.message }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def extract_metrics(workflow_state)
|
|
100
|
+
results = Array(workflow_state["results"])
|
|
101
|
+
status = infer_status(results)
|
|
102
|
+
|
|
103
|
+
{
|
|
104
|
+
"provider" => workflow_state["provider"],
|
|
105
|
+
"pack" => workflow_state["pack"],
|
|
106
|
+
"workflow_type" => workflow_state["workflow_type"],
|
|
107
|
+
"status" => status,
|
|
108
|
+
"approved" => reviewer_approved?(results),
|
|
109
|
+
"agents_executed" => Array(workflow_state["completed_agents"]).length,
|
|
110
|
+
"error_count" => results.count { |entry| entry.dig("result", "error") },
|
|
111
|
+
"security_issues" => security_issue_count(results),
|
|
112
|
+
"contract_blocked" => workflow_state.dig("workflow_contract", "blocked") == true,
|
|
113
|
+
"contract_violations" => collect_contract_violations(workflow_state),
|
|
114
|
+
"policy_violation_count" => Array(workflow_state["policy_violations"]).length,
|
|
115
|
+
"task" => workflow_state["task"].to_s
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def collect_contract_violations(workflow_state)
|
|
120
|
+
Array(workflow_state.dig("workflow_contract", "events"))
|
|
121
|
+
.flat_map { |event| Array(event["violations"]).map { |v| v["code"] } }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def infer_status(results)
|
|
125
|
+
return "failed" if results.any? { |entry| entry.dig("result", "error") }
|
|
126
|
+
|
|
127
|
+
reviewer_approved?(results) ? "approved" : "completed"
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def reviewer_approved?(results)
|
|
131
|
+
review = results.find { |entry| entry["agent"] == "REVIEWER" }
|
|
132
|
+
review&.dig("result", "approved") == true
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def security_issue_count(results)
|
|
136
|
+
security_result = results.find { |entry| entry["agent"] == "SECURITY" }
|
|
137
|
+
issues = security_result&.dig("result", "issues")
|
|
138
|
+
Array(issues).length
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def metric_title(metrics)
|
|
142
|
+
"Workflow metrics: #{metrics['provider']} #{metrics['workflow_type']}"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def metric_description(metrics)
|
|
146
|
+
[
|
|
147
|
+
"status=#{metrics['status']}",
|
|
148
|
+
"approved=#{metrics['approved']}",
|
|
149
|
+
"errors=#{metrics['error_count']}",
|
|
150
|
+
"security_issues=#{metrics['security_issues']}",
|
|
151
|
+
"agents=#{metrics['agents_executed']}"
|
|
152
|
+
].join(" ")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def metric_context(metrics)
|
|
156
|
+
{
|
|
157
|
+
"provider" => metrics["provider"],
|
|
158
|
+
"pack" => metrics["pack"],
|
|
159
|
+
"workflow_type" => metrics["workflow_type"],
|
|
160
|
+
"status" => metrics["status"],
|
|
161
|
+
"approved" => metrics["approved"],
|
|
162
|
+
"agents_executed" => metrics["agents_executed"],
|
|
163
|
+
"error_count" => metrics["error_count"],
|
|
164
|
+
"security_issues" => metrics["security_issues"],
|
|
165
|
+
"contract_blocked" => metrics["contract_blocked"],
|
|
166
|
+
"contract_violations" => metrics["contract_violations"],
|
|
167
|
+
"policy_violation_count" => metrics["policy_violation_count"],
|
|
168
|
+
"task" => metrics["task"]
|
|
169
|
+
}.to_json
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def metric_tags(metrics)
|
|
173
|
+
[
|
|
174
|
+
WORKFLOW_METRICS_TAG,
|
|
175
|
+
"provider:#{metrics['provider'].to_s.downcase}",
|
|
176
|
+
"workflow:#{metrics['workflow_type']}"
|
|
177
|
+
]
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def top_contract_violations(records)
|
|
181
|
+
counts = Hash.new(0)
|
|
182
|
+
records.each do |record|
|
|
183
|
+
Array(record["contract_violations"]).each { |code| counts[code] += 1 }
|
|
184
|
+
end
|
|
185
|
+
counts.sort_by { |(_code, count)| -count }.first(5).to_h
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def metric_records(limit: 100)
|
|
189
|
+
memories = @memory.get_recent_memories(limit: limit)
|
|
190
|
+
|
|
191
|
+
memories
|
|
192
|
+
.select { |m| Array(m["tags"]).include?(WORKFLOW_METRICS_TAG) }
|
|
193
|
+
.filter_map do |m|
|
|
194
|
+
context = parse_context_json(m["context"])
|
|
195
|
+
next if context.nil?
|
|
196
|
+
|
|
197
|
+
context
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def parse_context_json(value)
|
|
202
|
+
return nil if value.to_s.strip.empty?
|
|
203
|
+
|
|
204
|
+
JSON.parse(value)
|
|
205
|
+
rescue JSON::ParserError
|
|
206
|
+
nil
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def empty_summary
|
|
210
|
+
{
|
|
211
|
+
"project" => @project,
|
|
212
|
+
"total_runs" => 0,
|
|
213
|
+
"completion_rate" => 0.0,
|
|
214
|
+
"approval_rate" => 0.0,
|
|
215
|
+
"failure_rate" => 0.0,
|
|
216
|
+
"security_issue_rate" => 0.0,
|
|
217
|
+
"avg_agents_executed" => 0.0,
|
|
218
|
+
"contract_adherence_rate" => 0.0,
|
|
219
|
+
"contract_blocked_runs" => 0,
|
|
220
|
+
"policy_violation_rate" => 0.0,
|
|
221
|
+
"providers" => {},
|
|
222
|
+
"workflow_types" => {},
|
|
223
|
+
"top_contract_violations" => {}
|
|
224
|
+
}
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def ratio(part, total)
|
|
228
|
+
return 0.0 if total.to_i <= 0
|
|
229
|
+
|
|
230
|
+
(part.to_f / total.to_f).round(4)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def average(values)
|
|
234
|
+
return 0.0 if values.empty?
|
|
235
|
+
|
|
236
|
+
(values.sum.to_f / values.length.to_f).round(4)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def provider_breakdown(records)
|
|
240
|
+
records.group_by { |m| m["provider"].to_s.upcase }.transform_values do |items|
|
|
241
|
+
total = items.length
|
|
242
|
+
{
|
|
243
|
+
"runs" => total,
|
|
244
|
+
"completion_rate" => completion_rate(items),
|
|
245
|
+
"approval_rate" => ratio(items.count { |m| m["approved"] == true }, total),
|
|
246
|
+
"security_issue_rate" => ratio(items.count { |m| m["security_issues"].to_i > 0 }, total)
|
|
247
|
+
}
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def completion_rate(records)
|
|
252
|
+
ratio(records.count { |m| %w[completed approved].include?(m["status"]) }, records.length)
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def workflow_breakdown(records)
|
|
256
|
+
records.group_by { |m| m["workflow_type"].to_s }.transform_values(&:length)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def metric_gap(a_records, b_records, field, expected:)
|
|
260
|
+
a_rate = ratio(a_records.count { |m| m[field] == expected }, a_records.length)
|
|
261
|
+
b_rate = ratio(b_records.count { |m| m[field] == expected }, b_records.length)
|
|
262
|
+
(a_rate - b_rate).round(4)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def security_issue_gap(a_records, b_records)
|
|
266
|
+
a_rate = ratio(a_records.count { |m| m["security_issues"].to_i > 0 }, a_records.length)
|
|
267
|
+
b_rate = ratio(b_records.count { |m| m["security_issues"].to_i > 0 }, b_records.length)
|
|
268
|
+
(a_rate - b_rate).round(4)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Agentf
|
|
4
|
+
module Commands
|
|
5
|
+
# Performs lightweight security scanning for workflows
|
|
6
|
+
class SecurityScanner
|
|
7
|
+
NAME = "security"
|
|
8
|
+
|
|
9
|
+
def self.manifest
|
|
10
|
+
{
|
|
11
|
+
"name" => NAME,
|
|
12
|
+
"description" => "Scan for secrets, detect prompt injection, and provide recommendations.",
|
|
13
|
+
"commands" => [
|
|
14
|
+
{ "name" => "scan", "type" => "function" },
|
|
15
|
+
{ "name" => "best_practices", "type" => "function" }
|
|
16
|
+
]
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
DEFAULT_PATTERNS = {
|
|
21
|
+
"AWS Access Key" => /AKIA[0-9A-Z]{16}/,
|
|
22
|
+
"Generic API Key" => /(api|secret|token)_?(key|token)?\s*[:=]\s*[A-Za-z0-9_-]{16,}/i,
|
|
23
|
+
"Private Key" => /-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----/,
|
|
24
|
+
"Password Assignment" => /password\s*[:=]\s*['\"][^'\"]+['\"]/i
|
|
25
|
+
}.freeze
|
|
26
|
+
|
|
27
|
+
PROMPT_INJECTION_PATTERNS = [
|
|
28
|
+
/print\s+(?:all\s+)?env/i,
|
|
29
|
+
/show\s+(?:me\s+)?(?:your|the)\s+environment/i,
|
|
30
|
+
/exfiltrate/i,
|
|
31
|
+
/ignore\s+previous\s+instructions/i
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
BEST_PRACTICES = [
|
|
35
|
+
"Use secret scanning tools such as Gitleaks or TruffleHog before committing.",
|
|
36
|
+
"Enable GitHub Secret Scanning Push Protection to block accidental leaks.",
|
|
37
|
+
"Strip sensitive headers/body content from agent logs before persisting.",
|
|
38
|
+
"Sandbox agent file-system access and avoid storing raw secrets in episodic memory.",
|
|
39
|
+
"Harden prompts against injection by refusing to reveal environment variables or credentials."
|
|
40
|
+
].freeze
|
|
41
|
+
|
|
42
|
+
def initialize(patterns: DEFAULT_PATTERNS)
|
|
43
|
+
@patterns = patterns
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def scan(task:, context: {})
|
|
47
|
+
aggregated_text = ([task] + flatten_context(context)).compact.join("\n")
|
|
48
|
+
|
|
49
|
+
issues = detect_secret_patterns(aggregated_text) + detect_prompt_injection(aggregated_text)
|
|
50
|
+
|
|
51
|
+
{
|
|
52
|
+
"issues" => issues,
|
|
53
|
+
"score" => issues.size,
|
|
54
|
+
"recommendations" => issues.empty? ? [] : BEST_PRACTICES
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def best_practices
|
|
59
|
+
BEST_PRACTICES
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def flatten_context(context)
|
|
65
|
+
case context
|
|
66
|
+
when Hash
|
|
67
|
+
context.flat_map { |k, v| [k.to_s, *flatten_context(v)] }
|
|
68
|
+
when Array
|
|
69
|
+
context.flat_map { |v| flatten_context(v) }
|
|
70
|
+
else
|
|
71
|
+
[context.to_s]
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def detect_secret_patterns(text)
|
|
76
|
+
@patterns.each_with_object([]) do |(label, regex), findings|
|
|
77
|
+
next unless text.match?(regex)
|
|
78
|
+
|
|
79
|
+
findings << {
|
|
80
|
+
"issue" => "Potential Secret: #{label}",
|
|
81
|
+
"detail" => "Input matched sensitive pattern #{regex.source}."
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def detect_prompt_injection(text)
|
|
87
|
+
PROMPT_INJECTION_PATTERNS.each_with_object([]) do |regex, findings|
|
|
88
|
+
next unless text.match?(regex)
|
|
89
|
+
|
|
90
|
+
findings << {
|
|
91
|
+
"issue" => "Possible Prompt Injection",
|
|
92
|
+
"detail" => "Detected instruction matching #{regex.source}"
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "open3"
|
|
4
|
+
require "json"
|
|
5
|
+
require "pathname"
|
|
6
|
+
|
|
7
|
+
module Agentf
|
|
8
|
+
module Commands
|
|
9
|
+
class Tester
|
|
10
|
+
NAME = "tester"
|
|
11
|
+
|
|
12
|
+
def self.manifest
|
|
13
|
+
{
|
|
14
|
+
"name" => NAME,
|
|
15
|
+
"description" => "Generate and run tests for project files.",
|
|
16
|
+
"commands" => [
|
|
17
|
+
{ "name" => "detect_framework", "type" => "function" },
|
|
18
|
+
{ "name" => "generate_unit_tests", "type" => "function" },
|
|
19
|
+
{ "name" => "run_tests", "type" => "function" }
|
|
20
|
+
]
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
PYTEST_TEMPLATE = <<~RUBY
|
|
25
|
+
import pytest
|
|
26
|
+
from %<module>s import %<import_name>s
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Test%<class_name>s:
|
|
30
|
+
"""Tests for %<class_name>s"""
|
|
31
|
+
|
|
32
|
+
def test_%<method_name>s_success(self):
|
|
33
|
+
"""Test %<method_name>s with valid input"""
|
|
34
|
+
# Arrange
|
|
35
|
+
%<arrange_code>s
|
|
36
|
+
|
|
37
|
+
# Act
|
|
38
|
+
result = %<act_code>s
|
|
39
|
+
|
|
40
|
+
# Assert
|
|
41
|
+
assert result is not None
|
|
42
|
+
%<assertions>s
|
|
43
|
+
|
|
44
|
+
def test_%<method_name>s_invalid_input(self):
|
|
45
|
+
"""Test %<method_name>s with invalid input"""
|
|
46
|
+
# Arrange
|
|
47
|
+
%<arrange_code_invalid>s
|
|
48
|
+
|
|
49
|
+
# Act & Assert
|
|
50
|
+
with pytest.raises(%<exception_type>s):
|
|
51
|
+
%<act_code_invalid>s
|
|
52
|
+
RUBY
|
|
53
|
+
|
|
54
|
+
RSPEC_TEMPLATE = <<~RUBY
|
|
55
|
+
require_relative '%<module>s'
|
|
56
|
+
|
|
57
|
+
describe %<class_name>s do
|
|
58
|
+
describe '#%<method_name>s' do
|
|
59
|
+
it 'works with valid input' do
|
|
60
|
+
# Arrange
|
|
61
|
+
%<arrange_code>s
|
|
62
|
+
|
|
63
|
+
# Act
|
|
64
|
+
result = subject.%<method_name>s(input)
|
|
65
|
+
|
|
66
|
+
# Assert
|
|
67
|
+
expect(result).not_to be_nil
|
|
68
|
+
%<assertions>s
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it 'raises on invalid input' do
|
|
72
|
+
# Arrange
|
|
73
|
+
%<arrange_code_invalid>s
|
|
74
|
+
|
|
75
|
+
# Act & Assert
|
|
76
|
+
expect { subject.%<method_name>s(invalid_input) }.to raise_error(%<exception_type>s)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
RUBY
|
|
81
|
+
|
|
82
|
+
def initialize(base_path: nil)
|
|
83
|
+
@base_path = base_path || Agentf.config.base_path
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Detect testing framework from project
|
|
87
|
+
def detect_framework(file_path: nil)
|
|
88
|
+
base = Pathname.new(@base_path)
|
|
89
|
+
|
|
90
|
+
# Check for RSpec (Rails/Ruby)
|
|
91
|
+
if (base / "Gemfile").exist?
|
|
92
|
+
gemfile = base.join("Gemfile").read
|
|
93
|
+
return "rspec" if gemfile.include?("rspec")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Check for pytest
|
|
97
|
+
if (base / "pytest.ini").exist? || (base / "pyproject.toml").exist?
|
|
98
|
+
pyproject = base.join("pyproject.toml")
|
|
99
|
+
return "pytest" if pyproject.exist? && pyproject.read.include?("pytest")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Check for Jest/Vitest
|
|
103
|
+
if (base / "package.json").exist?
|
|
104
|
+
pkg = JSON.parse((base / "package.json").read)
|
|
105
|
+
deps = pkg.fetch("dependencies", {}).merge(pkg.fetch("devDependencies", {}))
|
|
106
|
+
return "vitest" if deps.key?("vitest")
|
|
107
|
+
return "jest" if deps.key?("jest")
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Default based on file extension
|
|
111
|
+
return "rspec" if file_path&.end_with?(".rb")
|
|
112
|
+
return "pytest" if file_path&.end_with?(".py")
|
|
113
|
+
return "jest" if file_path&.end_with?(".js", ".ts", ".jsx", ".tsx")
|
|
114
|
+
|
|
115
|
+
"rspec"
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Generate unit tests for a source file
|
|
119
|
+
def generate_unit_tests(source_file, test_class: nil)
|
|
120
|
+
framework = detect_framework(file_path: source_file)
|
|
121
|
+
source_path = Pathname.new(@base_path) + source_file
|
|
122
|
+
|
|
123
|
+
unless source_path.exist?
|
|
124
|
+
return Agentf::Tools::TestTemplate.new(
|
|
125
|
+
test_file: "",
|
|
126
|
+
test_code: "# Source file not found",
|
|
127
|
+
framework: framework
|
|
128
|
+
)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Determine test file path
|
|
132
|
+
test_file = case framework
|
|
133
|
+
when "rspec"
|
|
134
|
+
source_file.gsub(%r{^app/(.+)\.rb$}, "spec/\\1_spec.rb")
|
|
135
|
+
when "pytest"
|
|
136
|
+
source_file.gsub(".py", "_test.py")
|
|
137
|
+
when "jest", "vitest"
|
|
138
|
+
source_file.sub(/\.(js|ts|jsx|tsx)$/, ".test.\\1")
|
|
139
|
+
else
|
|
140
|
+
"#{source_file}.test"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Generate test code based on framework
|
|
144
|
+
test_code = case framework
|
|
145
|
+
when "rspec"
|
|
146
|
+
generate_rspec(source_file, test_class)
|
|
147
|
+
when "pytest"
|
|
148
|
+
generate_pytest(source_file, test_class)
|
|
149
|
+
else
|
|
150
|
+
"# Tests for #{source_file}\n"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
deps = case framework
|
|
154
|
+
when "rspec" then ["rspec"]
|
|
155
|
+
when "pytest" then ["pytest"]
|
|
156
|
+
else []
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
Agentf::Tools::TestTemplate.new(
|
|
160
|
+
test_file: test_file,
|
|
161
|
+
test_code: test_code,
|
|
162
|
+
framework: framework,
|
|
163
|
+
dependencies: deps
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Execute test suite
|
|
168
|
+
def run_tests(test_path: nil, test_file: nil, verbose: true)
|
|
169
|
+
path = test_path || test_file
|
|
170
|
+
framework = detect_framework(file_path: path)
|
|
171
|
+
|
|
172
|
+
cmd = case framework
|
|
173
|
+
when "rspec"
|
|
174
|
+
["bundle", "exec", "rspec", *(["-f", "documentation"] if verbose), path].compact
|
|
175
|
+
when "pytest"
|
|
176
|
+
["pytest", *(["-v"] if verbose), path].compact
|
|
177
|
+
when "jest"
|
|
178
|
+
["npx", "jest", *(["--verbose"] if verbose), path].compact
|
|
179
|
+
when "vitest"
|
|
180
|
+
["npx", "vitest", *(["--verbose"] if verbose), path].compact
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
stdout, stderr, status = Open3.capture3(*cmd, chdir: @base_path)
|
|
184
|
+
|
|
185
|
+
{
|
|
186
|
+
"passed" => status.success?,
|
|
187
|
+
"returncode" => status.exitstatus,
|
|
188
|
+
"stdout" => stdout,
|
|
189
|
+
"stderr" => stderr,
|
|
190
|
+
"framework" => framework
|
|
191
|
+
}
|
|
192
|
+
rescue StandardError => e
|
|
193
|
+
{ "passed" => false, "error" => e.message, "framework" => framework }
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
private
|
|
197
|
+
|
|
198
|
+
def generate_rspec(source_file, test_class)
|
|
199
|
+
module_name = source_file.gsub(%r{^app/(.+)\.rb$}, "\\1").gsub("/", "::").chomp("_controller")
|
|
200
|
+
class_name = test_class || module_name.split("::").last
|
|
201
|
+
|
|
202
|
+
format(RSPEC_TEMPLATE,
|
|
203
|
+
module: source_file.gsub(".rb", ""),
|
|
204
|
+
class_name: class_name,
|
|
205
|
+
method_name: "my_method",
|
|
206
|
+
arrange_code: "input = 'valid'",
|
|
207
|
+
act_code: "described_class.new.method(input)",
|
|
208
|
+
assertions: "expect(result).to be_truthy",
|
|
209
|
+
arrange_code_invalid: "invalid_input = nil",
|
|
210
|
+
act_code_invalid: "described_class.new.method(invalid_input)",
|
|
211
|
+
exception_type: "StandardError")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def generate_pytest(source_file, test_class)
|
|
215
|
+
module_name = source_file.gsub("/", ".").chomp(".py")
|
|
216
|
+
class_name = test_class || "TestClass"
|
|
217
|
+
|
|
218
|
+
format(PYTEST_TEMPLATE,
|
|
219
|
+
module: module_name,
|
|
220
|
+
import_name: class_name,
|
|
221
|
+
class_name: class_name,
|
|
222
|
+
method_name: "my_method",
|
|
223
|
+
arrange_code: "input = something",
|
|
224
|
+
act_code: "#{class_name}().my_method(input)",
|
|
225
|
+
assertions: "# assert expected behavior",
|
|
226
|
+
arrange_code_invalid: "invalid_input = None",
|
|
227
|
+
act_code_invalid: "#{class_name}().my_method(invalid_input)",
|
|
228
|
+
exception_type: "ValueError")
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Load command implementations
|
|
4
|
+
require_relative "commands/explorer"
|
|
5
|
+
require_relative "commands/tester"
|
|
6
|
+
require_relative "commands/debugger"
|
|
7
|
+
require_relative "commands/designer"
|
|
8
|
+
require_relative "commands/security_scanner"
|
|
9
|
+
require_relative "commands/memory_reviewer"
|
|
10
|
+
require_relative "commands/metrics"
|
|
11
|
+
require_relative "commands/architecture"
|
|
12
|
+
|
|
13
|
+
module Agentf
|
|
14
|
+
module Commands
|
|
15
|
+
# All commands are loaded above
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Agentf
|
|
4
|
+
class ContextBuilder
|
|
5
|
+
def initialize(memory:)
|
|
6
|
+
@memory = memory
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def build(agent:, workflow_state:, limit: 8)
|
|
10
|
+
task_type = workflow_state["workflow_type"]
|
|
11
|
+
task = workflow_state["task"]
|
|
12
|
+
|
|
13
|
+
@memory.get_agent_context(
|
|
14
|
+
agent: agent,
|
|
15
|
+
task_type: task_type,
|
|
16
|
+
query_embedding: simple_embedding(task),
|
|
17
|
+
limit: limit
|
|
18
|
+
)
|
|
19
|
+
rescue StandardError
|
|
20
|
+
{ "agent" => agent, "intent" => [], "memories" => [], "similar_tasks" => [] }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def simple_embedding(text)
|
|
26
|
+
normalized = text.to_s.downcase
|
|
27
|
+
[
|
|
28
|
+
normalized.include?("fix") || normalized.include?("bug") ? 1.0 : 0.0,
|
|
29
|
+
normalized.include?("feature") || normalized.include?("add") ? 1.0 : 0.0,
|
|
30
|
+
normalized.include?("security") ? 1.0 : 0.0,
|
|
31
|
+
normalized.length.to_f / 100.0
|
|
32
|
+
]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|