site-agent-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +689 -0
- package/dist/auth/credentialStore.js +62 -0
- package/dist/auth/inbox.js +193 -0
- package/dist/auth/profile.js +379 -0
- package/dist/auth/runner.js +1124 -0
- package/dist/backend/dashboardData.js +194 -0
- package/dist/backend/runArtifacts.js +48 -0
- package/dist/backend/runRepository.js +93 -0
- package/dist/bin.js +2 -0
- package/dist/cli/backfillSiteChecks.js +143 -0
- package/dist/cli/run.js +309 -0
- package/dist/cli/trade.js +69 -0
- package/dist/config.js +199 -0
- package/dist/core/agentProfiles.js +55 -0
- package/dist/core/aggregateReport.js +382 -0
- package/dist/core/audit.js +30 -0
- package/dist/core/customTaskSuite.js +148 -0
- package/dist/core/evaluator.js +217 -0
- package/dist/core/executor.js +788 -0
- package/dist/core/fallbackReport.js +335 -0
- package/dist/core/formHeuristics.js +411 -0
- package/dist/core/gameplaySummary.js +164 -0
- package/dist/core/interaction.js +202 -0
- package/dist/core/pageState.js +201 -0
- package/dist/core/planner.js +1669 -0
- package/dist/core/processSubmissionBatch.js +204 -0
- package/dist/core/runAuditJob.js +170 -0
- package/dist/core/runner.js +2352 -0
- package/dist/core/siteBrief.js +107 -0
- package/dist/core/siteChecks.js +1526 -0
- package/dist/core/taskDirectives.js +279 -0
- package/dist/core/taskHeuristics.js +263 -0
- package/dist/dashboard/client.js +1256 -0
- package/dist/dashboard/contracts.js +95 -0
- package/dist/dashboard/narrative.js +277 -0
- package/dist/dashboard/server.js +458 -0
- package/dist/dashboard/theme.js +888 -0
- package/dist/index.js +84 -0
- package/dist/llm/client.js +188 -0
- package/dist/paystack/account.js +123 -0
- package/dist/paystack/client.js +100 -0
- package/dist/paystack/index.js +13 -0
- package/dist/paystack/test-paystack.js +83 -0
- package/dist/paystack/transfer.js +138 -0
- package/dist/paystack/types.js +74 -0
- package/dist/paystack/webhook.js +121 -0
- package/dist/prompts/browserAgent.js +124 -0
- package/dist/prompts/reviewer.js +71 -0
- package/dist/reporting/clickReplay.js +290 -0
- package/dist/reporting/html.js +930 -0
- package/dist/reporting/markdown.js +238 -0
- package/dist/reporting/template.js +1141 -0
- package/dist/schemas/types.js +361 -0
- package/dist/submissions/customTasks.js +196 -0
- package/dist/submissions/html.js +770 -0
- package/dist/submissions/model.js +56 -0
- package/dist/submissions/publicUrl.js +76 -0
- package/dist/submissions/service.js +74 -0
- package/dist/submissions/store.js +37 -0
- package/dist/submissions/types.js +65 -0
- package/dist/trade/engine.js +241 -0
- package/dist/trade/evm/erc20.js +44 -0
- package/dist/trade/extractor.js +148 -0
- package/dist/trade/policy.js +35 -0
- package/dist/trade/session.js +31 -0
- package/dist/trade/types.js +107 -0
- package/dist/trade/validator.js +148 -0
- package/dist/utils/files.js +59 -0
- package/dist/utils/log.js +24 -0
- package/dist/utils/playwrightCompat.js +14 -0
- package/dist/utils/time.js +3 -0
- package/dist/wallet/provider.js +345 -0
- package/dist/wallet/relay.js +129 -0
- package/dist/wallet/wallet.js +178 -0
- package/docs/01-installation.md +134 -0
- package/docs/02-running-your-first-audit.md +136 -0
- package/docs/03-configuration.md +233 -0
- package/docs/04-how-the-agent-thinks.md +41 -0
- package/docs/05-extending-personas-and-tasks.md +42 -0
- package/docs/06-hardening-for-production.md +92 -0
- package/package.json +60 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { config } from "../config.js";
|
|
2
|
+
import { generateStructured } from "../llm/client.js";
|
|
3
|
+
import { TASK_OUTCOME_ANALYST_PROMPT } from "../prompts/reviewer.js";
|
|
4
|
+
import { buildFallbackReport } from "./fallbackReport.js";
|
|
5
|
+
import { deriveGameplaySummary } from "./gameplaySummary.js";
|
|
6
|
+
import { FinalReportSchema } from "../schemas/types.js";
|
|
7
|
+
function cleanErrorMessage(error) {
|
|
8
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9
|
+
return message.replace(/\u001b\[[0-9;]*m/g, "").replace(/\s+/g, " ").trim() || "Unknown evaluation error";
|
|
10
|
+
}
|
|
11
|
+
function normalizeText(value) {
|
|
12
|
+
return value.replace(/\s+/g, " ").trim();
|
|
13
|
+
}
|
|
14
|
+
function uniqueItems(items, limit) {
|
|
15
|
+
return [...new Set(items.map((item) => normalizeText(item)).filter(Boolean))].slice(0, limit);
|
|
16
|
+
}
|
|
17
|
+
function formatTaskStatus(status) {
|
|
18
|
+
switch (status) {
|
|
19
|
+
case "success":
|
|
20
|
+
return "succeeded";
|
|
21
|
+
case "partial_success":
|
|
22
|
+
return "partially succeeded";
|
|
23
|
+
case "failed":
|
|
24
|
+
default:
|
|
25
|
+
return "failed";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function buildTaskOutcomeSummary(taskResults) {
|
|
29
|
+
if (taskResults.length === 0) {
|
|
30
|
+
return "";
|
|
31
|
+
}
|
|
32
|
+
const successCount = taskResults.filter((task) => task.status === "success").length;
|
|
33
|
+
const partialCount = taskResults.filter((task) => task.status === "partial_success").length;
|
|
34
|
+
const failedCount = taskResults.filter((task) => task.status === "failed").length;
|
|
35
|
+
const perTask = taskResults
|
|
36
|
+
.slice(0, 5)
|
|
37
|
+
.map((task) => `${task.name} ${formatTaskStatus(task.status)}`)
|
|
38
|
+
.join("; ");
|
|
39
|
+
return normalizeText(`Accepted task outcomes: ${successCount} succeeded, ${partialCount} partially succeeded, and ${failedCount} failed.${perTask ? ` Per task: ${perTask}.` : ""}`);
|
|
40
|
+
}
|
|
41
|
+
function mergeTaskOutcomesIntoSummary(summary, taskResults) {
|
|
42
|
+
const taskOutcomeSummary = buildTaskOutcomeSummary(taskResults);
|
|
43
|
+
return taskOutcomeSummary ? normalizeText(`${taskOutcomeSummary} ${summary}`) : summary;
|
|
44
|
+
}
|
|
45
|
+
function mergeGameplayIntoSummary(summary, gameplaySummary) {
|
|
46
|
+
if (!gameplaySummary) {
|
|
47
|
+
return summary;
|
|
48
|
+
}
|
|
49
|
+
return /\b(?:wins?|loss(?:es)?|draws?|rounds?)\b/i.test(summary)
|
|
50
|
+
? summary
|
|
51
|
+
: normalizeText(`${summary} ${gameplaySummary.summary}`);
|
|
52
|
+
}
|
|
53
|
+
function summarizeHistoryEntry(entry) {
|
|
54
|
+
const actionLabel = entry.decision.action === "click"
|
|
55
|
+
? `click${entry.decision.target ? ` "${entry.decision.target}"` : ""}`
|
|
56
|
+
: entry.decision.action === "type"
|
|
57
|
+
? `type into "${entry.decision.target || "field"}"`
|
|
58
|
+
: entry.decision.action;
|
|
59
|
+
const locationLabel = entry.title || entry.url;
|
|
60
|
+
const visibleSnippet = entry.result.visibleTextSnippet ? ` Visible text: ${entry.result.visibleTextSnippet.slice(0, 180)}.` : "";
|
|
61
|
+
return `Step ${entry.step} on "${locationLabel}": ${actionLabel} -> ${entry.result.note}${visibleSnippet}`;
|
|
62
|
+
}
|
|
63
|
+
function rankHistoryEntry(entry) {
|
|
64
|
+
let score = 0;
|
|
65
|
+
if (!entry.result.success) {
|
|
66
|
+
score += 100;
|
|
67
|
+
}
|
|
68
|
+
if (entry.decision.friction === "high") {
|
|
69
|
+
score += 50;
|
|
70
|
+
}
|
|
71
|
+
if (entry.result.stateChanged) {
|
|
72
|
+
score += 20;
|
|
73
|
+
}
|
|
74
|
+
if (entry.result.elapsedMs && entry.result.elapsedMs > 2500) {
|
|
75
|
+
score += 10;
|
|
76
|
+
}
|
|
77
|
+
if (entry.decision.action === "extract" || entry.decision.action === "back") {
|
|
78
|
+
score += 5;
|
|
79
|
+
}
|
|
80
|
+
return score;
|
|
81
|
+
}
|
|
82
|
+
function distillTaskResult(taskResult) {
|
|
83
|
+
const prioritizedEvidence = [...taskResult.history]
|
|
84
|
+
.sort((left, right) => rankHistoryEntry(right) - rankHistoryEntry(left) || left.step - right.step)
|
|
85
|
+
.slice(0, 6)
|
|
86
|
+
.map((entry) => summarizeHistoryEntry(entry));
|
|
87
|
+
return {
|
|
88
|
+
name: taskResult.name,
|
|
89
|
+
status: taskResult.status,
|
|
90
|
+
finalUrl: taskResult.finalUrl,
|
|
91
|
+
finalTitle: taskResult.finalTitle,
|
|
92
|
+
reason: taskResult.reason,
|
|
93
|
+
evidence: uniqueItems([taskResult.reason, ...prioritizedEvidence], 7)
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
function summarizeRawEvent(event) {
|
|
97
|
+
if (!event || typeof event !== "object") {
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
const record = event;
|
|
101
|
+
const type = typeof record.type === "string" ? record.type : "";
|
|
102
|
+
const note = typeof record.note === "string" ? normalizeText(record.note) : "";
|
|
103
|
+
if (type === "requestfailed") {
|
|
104
|
+
const method = typeof record.method === "string" ? record.method : "request";
|
|
105
|
+
const url = typeof record.url === "string" ? record.url : "";
|
|
106
|
+
const failure = typeof record.failure === "string" ? record.failure : "unknown failure";
|
|
107
|
+
return `${method} ${url} failed with ${failure}`;
|
|
108
|
+
}
|
|
109
|
+
if (type === "pageerror") {
|
|
110
|
+
const text = typeof record.text === "string" ? normalizeText(record.text) : "";
|
|
111
|
+
return text ? `Page error: ${text}` : null;
|
|
112
|
+
}
|
|
113
|
+
if (type === "console") {
|
|
114
|
+
const level = typeof record.level === "string" ? record.level : "";
|
|
115
|
+
const text = typeof record.text === "string" ? normalizeText(record.text) : "";
|
|
116
|
+
if (!text) {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
const monitoringSignalPattern = /\b(?:event|analytics|log|monitor|amount|wallet|bank|account|copy|transfer|crypto|naira|payout|payment|displayed|initiated|triggered)\b/i;
|
|
120
|
+
return /error|warn/i.test(level) || monitoringSignalPattern.test(text) ? `Console ${level}: ${text}` : null;
|
|
121
|
+
}
|
|
122
|
+
if (["navigation_error", "session_timeout", "planner_fallback", "runner_error", "storage_state_load", "storage_state_save_error"].includes(type)) {
|
|
123
|
+
return note || null;
|
|
124
|
+
}
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
export async function evaluateRun(args) {
|
|
128
|
+
const totalRunDurationSeconds = Math.round((args.totalRunDurationMs ?? config.maxSessionDurationMs) / 1000);
|
|
129
|
+
const distilledTaskResults = args.taskResults.map((taskResult) => distillTaskResult(taskResult));
|
|
130
|
+
const gameplaySummary = deriveGameplaySummary({
|
|
131
|
+
suite: args.suite,
|
|
132
|
+
taskResults: args.taskResults
|
|
133
|
+
});
|
|
134
|
+
const runSignals = uniqueItems(args.rawEvents.map((event) => summarizeRawEvent(event)).filter((value) => Boolean(value)), 40);
|
|
135
|
+
const payload = {
|
|
136
|
+
website: args.baseUrl,
|
|
137
|
+
visitMode: args.mobile ? "mobile" : "desktop",
|
|
138
|
+
persona: {
|
|
139
|
+
name: args.suite.persona.name,
|
|
140
|
+
intent: args.suite.persona.intent,
|
|
141
|
+
constraints: args.suite.persona.constraints.slice(0, 8)
|
|
142
|
+
},
|
|
143
|
+
...(args.siteBrief
|
|
144
|
+
? {
|
|
145
|
+
siteUnderstanding: {
|
|
146
|
+
sitePurpose: args.siteBrief.sitePurpose,
|
|
147
|
+
summary: args.siteBrief.summary,
|
|
148
|
+
intendedUserActions: args.siteBrief.intendedUserActions,
|
|
149
|
+
evidence: args.siteBrief.evidence
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
: {}),
|
|
153
|
+
tasks: args.suite.tasks.map((task) => ({
|
|
154
|
+
name: task.name,
|
|
155
|
+
goal: task.goal,
|
|
156
|
+
success_condition: task.success_condition,
|
|
157
|
+
...(task.gameplay ? { gameplay: task.gameplay } : {})
|
|
158
|
+
})),
|
|
159
|
+
runOverview: {
|
|
160
|
+
taskCount: args.taskResults.length,
|
|
161
|
+
successCount: args.taskResults.filter((task) => task.status === "success").length,
|
|
162
|
+
partialCount: args.taskResults.filter((task) => task.status === "partial_success").length,
|
|
163
|
+
failedCount: args.taskResults.filter((task) => task.status === "failed").length
|
|
164
|
+
},
|
|
165
|
+
acceptedTaskOutcomes: distilledTaskResults.map((taskResult, index) => ({
|
|
166
|
+
name: taskResult.name,
|
|
167
|
+
goal: args.suite.tasks[index]?.goal ?? taskResult.name,
|
|
168
|
+
status: taskResult.status,
|
|
169
|
+
reason: taskResult.reason
|
|
170
|
+
})),
|
|
171
|
+
taskResults: distilledTaskResults,
|
|
172
|
+
runSignals,
|
|
173
|
+
accessibility: args.accessibility,
|
|
174
|
+
...(gameplaySummary ? { gameplaySummary } : {})
|
|
175
|
+
};
|
|
176
|
+
if ((args.timeoutMs ?? 0) <= 0) {
|
|
177
|
+
return buildFallbackReport({
|
|
178
|
+
baseUrl: args.baseUrl,
|
|
179
|
+
suite: args.suite,
|
|
180
|
+
taskResults: args.taskResults,
|
|
181
|
+
accessibility: args.accessibility,
|
|
182
|
+
...(args.mobile !== undefined ? { mobile: args.mobile } : {}),
|
|
183
|
+
fallbackReason: `The run exhausted its ${totalRunDurationSeconds}-second wall-clock budget before the model evaluator could start.`
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
try {
|
|
187
|
+
const report = await generateStructured({
|
|
188
|
+
...(args.llm ?? {}),
|
|
189
|
+
systemPrompt: TASK_OUTCOME_ANALYST_PROMPT,
|
|
190
|
+
userPayload: payload,
|
|
191
|
+
schemaName: "final_report",
|
|
192
|
+
schema: FinalReportSchema,
|
|
193
|
+
...(args.timeoutMs !== undefined ? { timeoutMs: args.timeoutMs } : {}),
|
|
194
|
+
maxRetries: 0
|
|
195
|
+
});
|
|
196
|
+
return FinalReportSchema.parse(gameplaySummary
|
|
197
|
+
? {
|
|
198
|
+
...report,
|
|
199
|
+
summary: mergeTaskOutcomesIntoSummary(mergeGameplayIntoSummary(report.summary, gameplaySummary), distilledTaskResults),
|
|
200
|
+
gameplay_summary: gameplaySummary
|
|
201
|
+
}
|
|
202
|
+
: {
|
|
203
|
+
...report,
|
|
204
|
+
summary: mergeTaskOutcomesIntoSummary(report.summary, distilledTaskResults)
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
catch (error) {
|
|
208
|
+
return buildFallbackReport({
|
|
209
|
+
baseUrl: args.baseUrl,
|
|
210
|
+
suite: args.suite,
|
|
211
|
+
taskResults: args.taskResults,
|
|
212
|
+
accessibility: args.accessibility,
|
|
213
|
+
...(args.mobile !== undefined ? { mobile: args.mobile } : {}),
|
|
214
|
+
fallbackReason: `The model evaluator did not finish cleanly within the remaining run budget: ${cleanErrorMessage(error)}`
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
}
|