site-agent-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +689 -0
  2. package/dist/auth/credentialStore.js +62 -0
  3. package/dist/auth/inbox.js +193 -0
  4. package/dist/auth/profile.js +379 -0
  5. package/dist/auth/runner.js +1124 -0
  6. package/dist/backend/dashboardData.js +194 -0
  7. package/dist/backend/runArtifacts.js +48 -0
  8. package/dist/backend/runRepository.js +93 -0
  9. package/dist/bin.js +2 -0
  10. package/dist/cli/backfillSiteChecks.js +143 -0
  11. package/dist/cli/run.js +309 -0
  12. package/dist/cli/trade.js +69 -0
  13. package/dist/config.js +199 -0
  14. package/dist/core/agentProfiles.js +55 -0
  15. package/dist/core/aggregateReport.js +382 -0
  16. package/dist/core/audit.js +30 -0
  17. package/dist/core/customTaskSuite.js +148 -0
  18. package/dist/core/evaluator.js +217 -0
  19. package/dist/core/executor.js +788 -0
  20. package/dist/core/fallbackReport.js +335 -0
  21. package/dist/core/formHeuristics.js +411 -0
  22. package/dist/core/gameplaySummary.js +164 -0
  23. package/dist/core/interaction.js +202 -0
  24. package/dist/core/pageState.js +201 -0
  25. package/dist/core/planner.js +1669 -0
  26. package/dist/core/processSubmissionBatch.js +204 -0
  27. package/dist/core/runAuditJob.js +170 -0
  28. package/dist/core/runner.js +2352 -0
  29. package/dist/core/siteBrief.js +107 -0
  30. package/dist/core/siteChecks.js +1526 -0
  31. package/dist/core/taskDirectives.js +279 -0
  32. package/dist/core/taskHeuristics.js +263 -0
  33. package/dist/dashboard/client.js +1256 -0
  34. package/dist/dashboard/contracts.js +95 -0
  35. package/dist/dashboard/narrative.js +277 -0
  36. package/dist/dashboard/server.js +458 -0
  37. package/dist/dashboard/theme.js +888 -0
  38. package/dist/index.js +84 -0
  39. package/dist/llm/client.js +188 -0
  40. package/dist/paystack/account.js +123 -0
  41. package/dist/paystack/client.js +100 -0
  42. package/dist/paystack/index.js +13 -0
  43. package/dist/paystack/test-paystack.js +83 -0
  44. package/dist/paystack/transfer.js +138 -0
  45. package/dist/paystack/types.js +74 -0
  46. package/dist/paystack/webhook.js +121 -0
  47. package/dist/prompts/browserAgent.js +124 -0
  48. package/dist/prompts/reviewer.js +71 -0
  49. package/dist/reporting/clickReplay.js +290 -0
  50. package/dist/reporting/html.js +930 -0
  51. package/dist/reporting/markdown.js +238 -0
  52. package/dist/reporting/template.js +1141 -0
  53. package/dist/schemas/types.js +361 -0
  54. package/dist/submissions/customTasks.js +196 -0
  55. package/dist/submissions/html.js +770 -0
  56. package/dist/submissions/model.js +56 -0
  57. package/dist/submissions/publicUrl.js +76 -0
  58. package/dist/submissions/service.js +74 -0
  59. package/dist/submissions/store.js +37 -0
  60. package/dist/submissions/types.js +65 -0
  61. package/dist/trade/engine.js +241 -0
  62. package/dist/trade/evm/erc20.js +44 -0
  63. package/dist/trade/extractor.js +148 -0
  64. package/dist/trade/policy.js +35 -0
  65. package/dist/trade/session.js +31 -0
  66. package/dist/trade/types.js +107 -0
  67. package/dist/trade/validator.js +148 -0
  68. package/dist/utils/files.js +59 -0
  69. package/dist/utils/log.js +24 -0
  70. package/dist/utils/playwrightCompat.js +14 -0
  71. package/dist/utils/time.js +3 -0
  72. package/dist/wallet/provider.js +345 -0
  73. package/dist/wallet/relay.js +129 -0
  74. package/dist/wallet/wallet.js +178 -0
  75. package/docs/01-installation.md +134 -0
  76. package/docs/02-running-your-first-audit.md +136 -0
  77. package/docs/03-configuration.md +233 -0
  78. package/docs/04-how-the-agent-thinks.md +41 -0
  79. package/docs/05-extending-personas-and-tasks.md +42 -0
  80. package/docs/06-hardening-for-production.md +92 -0
  81. package/package.json +60 -0
@@ -0,0 +1,382 @@
1
+ import path from "node:path";
2
+ import { config } from "../config.js";
3
+ import { renderHtmlReport } from "../reporting/html.js";
4
+ import { renderMarkdownReport } from "../reporting/markdown.js";
5
+ import { resolveRunDir, writeJson, writeText } from "../utils/files.js";
6
+ const IMPACT_ORDER = ["minor", "moderate", "serious", "critical"];
7
+ const COVERAGE_ORDER = ["blocked", "inferred", "verified"];
8
+ function clampScore(value) {
9
+ return Math.min(10, Math.max(1, Math.round(value)));
10
+ }
11
+ function average(values) {
12
+ if (values.length === 0) {
13
+ return 1;
14
+ }
15
+ return clampScore(values.reduce((sum, value) => sum + value, 0) / values.length);
16
+ }
17
+ function normalizeKey(value) {
18
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
19
+ }
20
+ function ensureSentence(value) {
21
+ const trimmed = value.trim();
22
+ if (!trimmed) {
23
+ return "";
24
+ }
25
+ return /[.!?]$/.test(trimmed) ? trimmed : `${trimmed}.`;
26
+ }
27
+ function overallFeeling(score) {
28
+ if (score >= 8) {
29
+ return "smooth";
30
+ }
31
+ if (score >= 6) {
32
+ return "mostly okay";
33
+ }
34
+ if (score >= 4) {
35
+ return "mixed";
36
+ }
37
+ return "frustrating";
38
+ }
39
+ function isInternalWeakness(value) {
40
+ return /model evaluator did not finish|request timed out|current quota|429\b|run budget/i.test(value);
41
+ }
42
+ function formatAgentRunSource(agentRun) {
43
+ return agentRun.profileLabel ? `${agentRun.label} (${agentRun.profileLabel})` : agentRun.label;
44
+ }
45
+ function formatTaskStatus(status) {
46
+ switch (status) {
47
+ case "success":
48
+ return "succeeded";
49
+ case "partial_success":
50
+ return "partially succeeded";
51
+ case "failed":
52
+ default:
53
+ return "failed";
54
+ }
55
+ }
56
+ function buildTaskOutcomeSummary(taskResults) {
57
+ if (taskResults.length === 0) {
58
+ return "";
59
+ }
60
+ const successCount = taskResults.filter((task) => task.status === "success").length;
61
+ const partialCount = taskResults.filter((task) => task.status === "partial_success").length;
62
+ const failedCount = taskResults.filter((task) => task.status === "failed").length;
63
+ const perTask = taskResults
64
+ .slice(0, 5)
65
+ .map((task) => `${task.name} ${formatTaskStatus(task.status)}`)
66
+ .join("; ");
67
+ return ensureSentence(`Accepted task outcomes across the panel: ${successCount} succeeded, ${partialCount} partially succeeded, and ${failedCount} failed.${perTask ? ` Per task: ${perTask}.` : ""}`);
68
+ }
69
+ function summarizeRankedItems(items, limit) {
70
+ const counts = new Map();
71
+ items.forEach((item, index) => {
72
+ const key = normalizeKey(item);
73
+ if (!key) {
74
+ return;
75
+ }
76
+ const existing = counts.get(key);
77
+ if (existing) {
78
+ existing.count += 1;
79
+ return;
80
+ }
81
+ counts.set(key, { count: 1, sample: item, firstSeen: index });
82
+ });
83
+ return Array.from(counts.values())
84
+ .sort((left, right) => right.count - left.count || left.firstSeen - right.firstSeen)
85
+ .slice(0, limit)
86
+ .map((entry) => entry.sample);
87
+ }
88
+ function chooseImpact(left, right) {
89
+ const leftIndex = left ? IMPACT_ORDER.indexOf(left) : -1;
90
+ const rightIndex = right ? IMPACT_ORDER.indexOf(right) : -1;
91
+ return rightIndex > leftIndex ? right : left;
92
+ }
93
+ function coverageScore(siteChecks) {
94
+ return Object.values(siteChecks.coverage).reduce((sum, coverage) => sum + COVERAGE_ORDER.indexOf(coverage.status), 0);
95
+ }
96
+ function pickBestSiteChecks(results) {
97
+ const best = [...results].sort((left, right) => coverageScore(right.siteChecks) - coverageScore(left.siteChecks))[0];
98
+ return best?.siteChecks ?? {
99
+ generatedAt: new Date().toISOString(),
100
+ baseUrl: results[0]?.siteChecks.baseUrl ?? "",
101
+ finalResolvedUrl: null,
102
+ coverage: {
103
+ performance: { status: "blocked", summary: "Performance checks were unavailable in the aggregate run.", evidence: [], blockers: [] },
104
+ seo: { status: "blocked", summary: "SEO checks were unavailable in the aggregate run.", evidence: [], blockers: [] },
105
+ uiux: { status: "inferred", summary: "UI and UX findings rely on the aggregate interaction evidence.", evidence: [], blockers: [] },
106
+ security: { status: "blocked", summary: "Security checks were unavailable in the aggregate run.", evidence: [], blockers: [] },
107
+ technicalHealth: { status: "inferred", summary: "Technical health relies on the aggregate runtime evidence.", evidence: [], blockers: [] },
108
+ mobileOptimization: { status: "blocked", summary: "Mobile checks were unavailable in the aggregate run.", evidence: [], blockers: [] },
109
+ contentQuality: { status: "blocked", summary: "Content checks were unavailable in the aggregate run.", evidence: [], blockers: [] },
110
+ cro: { status: "inferred", summary: "CRO findings rely on the aggregate interaction evidence.", evidence: [], blockers: [] }
111
+ },
112
+ performance: { desktop: null, mobile: null, failedRequestCount: 0, imageFailureCount: 0, apiFailureCount: 0, navigationErrorCount: 0, stalledInteractionCount: 0, evidence: [] },
113
+ seo: {
114
+ robotsTxt: { url: "", ok: false, statusCode: null, note: "Unavailable." },
115
+ sitemap: { url: "", ok: false, statusCode: null, note: "Unavailable." },
116
+ brokenLinkCount: 0,
117
+ checkedLinkCount: 0,
118
+ brokenLinks: [],
119
+ evidence: []
120
+ },
121
+ security: { https: false, secureTransportVerified: false, initialStatusCode: null, securityHeaders: [], missingHeaders: [], evidence: [] },
122
+ technicalHealth: { framework: null, consoleErrorCount: 0, consoleWarningCount: 0, pageErrorCount: 0, apiFailureCount: 0, evidence: [] },
123
+ mobileOptimization: { desktop: null, mobile: null, responsiveVerdict: "blocked", evidence: [] },
124
+ contentQuality: { readabilityScore: null, readabilityLabel: "Blocked", wordCount: 0, longParagraphCount: 0, mediaCount: 0, evidence: [] },
125
+ cro: { ctaCount: 0, primaryCtas: [], formCount: 0, submitControlCount: 0, trustSignalCount: 0, evidence: [] }
126
+ };
127
+ }
128
+ function mergeAccessibility(results) {
129
+ const grouped = new Map();
130
+ let errorCount = 0;
131
+ for (const result of results) {
132
+ if (result.accessibility.error) {
133
+ errorCount += 1;
134
+ }
135
+ for (const violation of result.accessibility.violations) {
136
+ const existing = grouped.get(violation.id);
137
+ if (existing) {
138
+ existing.nodes += violation.nodes;
139
+ existing.impact = chooseImpact(existing.impact ?? null, violation.impact ?? null) ?? undefined;
140
+ continue;
141
+ }
142
+ grouped.set(violation.id, { ...violation });
143
+ }
144
+ }
145
+ return {
146
+ violations: Array.from(grouped.values()).sort((left, right) => right.nodes - left.nodes || left.id.localeCompare(right.id)),
147
+ ...(errorCount > 0 ? { error: `Accessibility auditing reported issues in ${errorCount} of ${results.length} completed agent runs.` } : {})
148
+ };
149
+ }
150
+ function aggregateTaskResults(results) {
151
+ const taskNames = Array.from(new Set(results.flatMap((result) => result.report.task_results.map((task) => task.name))));
152
+ const aggregatedTasks = taskNames.map((taskName) => {
153
+ const taskReports = results
154
+ .map((result) => ({
155
+ agentRun: result.agentRun,
156
+ reportTask: result.report.task_results.find((task) => task.name === taskName) ?? null,
157
+ rawTask: result.taskResults.find((task) => task.name === taskName) ?? null
158
+ }))
159
+ .filter((entry) => entry.reportTask || entry.rawTask);
160
+ const statusCounts = {
161
+ success: taskReports.filter((entry) => (entry.reportTask?.status ?? entry.rawTask?.status) === "success").length,
162
+ partial_success: taskReports.filter((entry) => (entry.reportTask?.status ?? entry.rawTask?.status) === "partial_success").length,
163
+ failed: taskReports.filter((entry) => (entry.reportTask?.status ?? entry.rawTask?.status) === "failed").length
164
+ };
165
+ const status = statusCounts.success > 0 && statusCounts.failed === 0 && statusCounts.partial_success === 0
166
+ ? "success"
167
+ : statusCounts.failed === taskReports.length
168
+ ? "failed"
169
+ : "partial_success";
170
+ const evidence = taskReports
171
+ .flatMap((entry) => {
172
+ const reportEvidence = entry.reportTask?.evidence ?? [];
173
+ if (reportEvidence.length > 0) {
174
+ return reportEvidence.map((item) => `${formatAgentRunSource(entry.agentRun)}: ${item}`);
175
+ }
176
+ const fallbackReason = entry.reportTask?.reason ?? entry.rawTask?.reason;
177
+ return fallbackReason ? [`${formatAgentRunSource(entry.agentRun)}: ${fallbackReason}`] : [];
178
+ })
179
+ .slice(0, 8);
180
+ const finalUrl = taskReports.find((entry) => entry.rawTask?.finalUrl)?.rawTask?.finalUrl ?? "";
181
+ const finalTitle = taskReports.find((entry) => entry.rawTask?.finalTitle)?.rawTask?.finalTitle ?? "";
182
+ return {
183
+ reportTask: {
184
+ name: taskName,
185
+ status,
186
+ reason: `${statusCounts.success} success, ${statusCounts.partial_success} partial, and ${statusCounts.failed} failed across ${taskReports.length} agent perspectives.`,
187
+ evidence
188
+ },
189
+ syntheticRun: {
190
+ name: taskName,
191
+ status,
192
+ finalUrl,
193
+ finalTitle,
194
+ history: [],
195
+ reason: `${statusCounts.success} success, ${statusCounts.partial_success} partial, and ${statusCounts.failed} failed across ${taskReports.length} agent perspectives.`
196
+ }
197
+ };
198
+ });
199
+ return {
200
+ taskResults: aggregatedTasks.map((task) => task.reportTask),
201
+ syntheticRuns: aggregatedTasks.map((task) => task.syntheticRun)
202
+ };
203
+ }
204
+ function aggregateGameplaySummary(results) {
205
+ const gameplaySummaries = results
206
+ .map((result) => result.report.gameplay_summary)
207
+ .filter((summary) => Boolean(summary));
208
+ if (gameplaySummaries.length === 0) {
209
+ return undefined;
210
+ }
211
+ const roundsRequested = gameplaySummaries.reduce((sum, summary) => sum + summary.roundsRequested, 0);
212
+ const roundsRecorded = gameplaySummaries.reduce((sum, summary) => sum + summary.roundsRecorded, 0);
213
+ const wins = gameplaySummaries.reduce((sum, summary) => sum + summary.wins, 0);
214
+ const losses = gameplaySummaries.reduce((sum, summary) => sum + summary.losses, 0);
215
+ const draws = gameplaySummaries.reduce((sum, summary) => sum + summary.draws, 0);
216
+ const inconclusiveRounds = gameplaySummaries.reduce((sum, summary) => sum + summary.inconclusiveRounds, 0);
217
+ const howToPlayConfirmed = gameplaySummaries.some((summary) => summary.howToPlayConfirmed);
218
+ const replayConfirmed = gameplaySummaries.some((summary) => summary.replayConfirmed);
219
+ const evidence = gameplaySummaries.flatMap((summary) => summary.evidence).slice(0, 8);
220
+ return {
221
+ roundsRequested,
222
+ roundsRecorded,
223
+ wins,
224
+ losses,
225
+ draws,
226
+ inconclusiveRounds,
227
+ howToPlayConfirmed,
228
+ replayConfirmed,
229
+ summary: `Across ${gameplaySummaries.length} gameplay perspective(s), the run recorded ${roundsRecorded}/${roundsRequested} requested rounds: ${wins} wins, ${losses} losses, ${draws} draws, and ${inconclusiveRounds} inconclusive round(s).`,
230
+ evidence
231
+ };
232
+ }
233
+ function buildAggregateReport(submission, results) {
234
+ const completedAgentCount = results.length;
235
+ const failedAgentCount = submission.failedAgentCount;
236
+ const firstStrength = summarizeRankedItems(results.flatMap((result) => result.report.strengths), 6);
237
+ const firstWeakness = summarizeRankedItems(results.flatMap((result) => result.report.weaknesses), 6);
238
+ const firstFixes = summarizeRankedItems(results.flatMap((result) => result.report.top_fixes), 6);
239
+ const accessibility = mergeAccessibility(results);
240
+ const siteChecks = pickBestSiteChecks(results);
241
+ const { taskResults, syntheticRuns } = aggregateTaskResults(results);
242
+ const gameplaySummary = aggregateGameplaySummary(results);
243
+ const overallScore = average(results.map((result) => result.report.overall_score));
244
+ const visitorFacingWeaknesses = firstWeakness.filter((item) => !isInternalWeakness(item));
245
+ const primaryWeakness = visitorFacingWeaknesses[0];
246
+ const taskOutcomeSummary = buildTaskOutcomeSummary(taskResults);
247
+ const report = {
248
+ overall_score: overallScore,
249
+ summary: [
250
+ `I checked ${submission.url} from ${completedAgentCount} visitor perspective${completedAgentCount === 1 ? "" : "s"}, and overall the experience felt ${overallFeeling(overallScore)} at ${overallScore}/10.`,
251
+ taskOutcomeSummary,
252
+ gameplaySummary ? ensureSentence(gameplaySummary.summary) : "",
253
+ primaryWeakness ? ensureSentence(primaryWeakness) : "",
254
+ failedAgentCount > 0 ? `${failedAgentCount} perspective${failedAgentCount === 1 ? "" : "s"} failed before the visit could fully finish.` : ""
255
+ ]
256
+ .filter(Boolean)
257
+ .join(" "),
258
+ scores: {
259
+ clarity: average(results.map((result) => result.report.scores.clarity)),
260
+ navigation: average(results.map((result) => result.report.scores.navigation)),
261
+ trust: average(results.map((result) => result.report.scores.trust)),
262
+ friction: average(results.map((result) => result.report.scores.friction)),
263
+ conversion_readiness: average(results.map((result) => result.report.scores.conversion_readiness)),
264
+ accessibility_basics: average(results.map((result) => result.report.scores.accessibility_basics))
265
+ },
266
+ strengths: firstStrength,
267
+ weaknesses: firstWeakness,
268
+ task_results: taskResults,
269
+ top_fixes: firstFixes,
270
+ ...(gameplaySummary ? { gameplay_summary: gameplaySummary } : {})
271
+ };
272
+ const rawEvents = [
273
+ {
274
+ type: "batch_summary",
275
+ time: new Date().toISOString(),
276
+ completedAgentCount,
277
+ failedAgentCount,
278
+ note: `Batch task panel finished with ${completedAgentCount} completed agent runs and ${failedAgentCount} failed agent runs.`
279
+ },
280
+ ...results.map((result) => ({
281
+ type: "agent_batch_result",
282
+ time: result.agentRun.completedAt ?? new Date().toISOString(),
283
+ agentId: result.agentRun.id,
284
+ agentLabel: result.agentRun.label,
285
+ profileLabel: result.agentRun.profileLabel,
286
+ personaName: result.agentRun.personaName,
287
+ runId: result.runId,
288
+ overallScore: result.report.overall_score,
289
+ note: result.report.summary
290
+ }))
291
+ ];
292
+ return {
293
+ report,
294
+ taskRuns: syntheticRuns,
295
+ accessibility,
296
+ siteChecks,
297
+ rawEvents
298
+ };
299
+ }
300
+ export function createAggregateRun(submission, results) {
301
+ const aggregateStartedAt = submission.startedAt ?? new Date().toISOString();
302
+ const runDir = resolveRunDir(submission.url);
303
+ const runId = path.basename(runDir);
304
+ const aggregatePersona = submission.customTasks.length > 0
305
+ ? `Task panel: ${submission.customTasks[0]}${submission.customTasks.length > 1 ? ` + ${submission.customTasks.length - 1} more` : ""}`
306
+ : `${submission.agentCount}-agent task panel`;
307
+ const { report, taskRuns, accessibility, siteChecks, rawEvents } = buildAggregateReport(submission, results);
308
+ const timeZone = config.deviceTimezone;
309
+ const inputs = {
310
+ baseUrl: submission.url,
311
+ persona: aggregatePersona,
312
+ instructionText: submission.instructionText,
313
+ instructionFileName: submission.instructionFileName,
314
+ headed: submission.headed,
315
+ mobile: submission.mobile,
316
+ ignoreHttpsErrors: submission.ignoreHttpsErrors,
317
+ llmProvider: config.llmProvider,
318
+ model: config.model,
319
+ startedAt: aggregateStartedAt,
320
+ maxRunDurationMs: config.maxSessionDurationMs,
321
+ maxRunDurationSeconds: Math.round(config.maxSessionDurationMs / 1000),
322
+ browserExecutionBudgetMs: config.browserExecutionBudgetMs,
323
+ reportingReserveMs: config.reportingReserveMs,
324
+ maxRunDurationClamped: false,
325
+ deviceTimezone: config.deviceTimezone,
326
+ synchronizedTimezone: timeZone,
327
+ batchRole: "aggregate",
328
+ parentSubmissionId: submission.id,
329
+ agentCount: submission.agentCount,
330
+ completedAgentCount: submission.completedAgentCount,
331
+ failedAgentCount: submission.failedAgentCount,
332
+ customTasks: submission.customTasks,
333
+ aggregatedFromRunIds: results.map((result) => result.runId),
334
+ agentRuns: submission.agentRuns.map((agentRun) => ({
335
+ ...agentRun,
336
+ runDir: null
337
+ }))
338
+ };
339
+ writeJson(path.join(runDir, "inputs.json"), inputs);
340
+ writeJson(path.join(runDir, "raw-events.json"), rawEvents);
341
+ writeJson(path.join(runDir, "task-results.json"), taskRuns);
342
+ writeJson(path.join(runDir, "accessibility.json"), accessibility);
343
+ writeJson(path.join(runDir, "site-checks.json"), siteChecks);
344
+ writeJson(path.join(runDir, "report.json"), report);
345
+ writeText(path.join(runDir, "report.html"), renderHtmlReport({
346
+ website: submission.url,
347
+ persona: aggregatePersona,
348
+ acceptedTasks: submission.customTasks,
349
+ instructionText: submission.instructionText,
350
+ report,
351
+ taskResults: taskRuns,
352
+ accessibility,
353
+ siteChecks,
354
+ rawEvents,
355
+ runId,
356
+ startedAt: aggregateStartedAt,
357
+ mobile: submission.mobile,
358
+ timeZone
359
+ }));
360
+ writeText(path.join(runDir, "report.md"), renderMarkdownReport({
361
+ website: submission.url,
362
+ persona: aggregatePersona,
363
+ acceptedTasks: submission.customTasks,
364
+ instructionText: submission.instructionText,
365
+ report,
366
+ taskResults: taskRuns,
367
+ accessibility,
368
+ siteChecks,
369
+ rawEvents,
370
+ startedAt: aggregateStartedAt,
371
+ mobile: submission.mobile,
372
+ timeZone
373
+ }));
374
+ return {
375
+ runDir,
376
+ runId,
377
+ report,
378
+ taskResults: taskRuns,
379
+ accessibility,
380
+ siteChecks
381
+ };
382
+ }
@@ -0,0 +1,30 @@
1
+ import axeCore from "axe-core";
2
+ import { AccessibilityResultSchema } from "../schemas/types.js";
3
+ export async function runAccessibilityAudit(page) {
4
+ try {
5
+ await page.addScriptTag({ content: axeCore.source });
6
+ const results = await page.evaluate(`(async () => {
7
+ const axe = globalThis.axe;
8
+ if (!axe) {
9
+ throw new Error("axe-core did not load into the page context.");
10
+ }
11
+
12
+ return axe.run(document);
13
+ })()`);
14
+ return AccessibilityResultSchema.parse({
15
+ violations: results.violations.map((violation) => ({
16
+ id: violation.id,
17
+ impact: violation.impact ?? null,
18
+ description: violation.description,
19
+ help: violation.help,
20
+ nodes: violation.nodes.length
21
+ }))
22
+ });
23
+ }
24
+ catch (error) {
25
+ return AccessibilityResultSchema.parse({
26
+ error: error instanceof Error ? error.message : "Unknown accessibility audit error",
27
+ violations: []
28
+ });
29
+ }
30
+ }
@@ -0,0 +1,148 @@
1
+ import { TaskSuiteSchema } from "../schemas/types.js";
2
+ import { classifyTaskText, inferGameplayConfigFromTask } from "./taskHeuristics.js";
3
+ import { parseTaskDirectives } from "./taskDirectives.js";
4
+ const RUN_WIDE_CONSTRAINT_PATTERNS = [
5
+ /^(?:do not|don't|never|avoid|without)\b/i,
6
+ /^stop before\b/i,
7
+ /^(?:no more than|at most)\b/i,
8
+ /^(?:use|keep using|reuse)\s+(?:the\s+)?same\b/i,
9
+ /^(?:only create|create only)\b/i,
10
+ /\b(?:single|same|one)\s+(?:profile|account|identity)\b/i
11
+ ];
12
+ function buildTaskName(task, index) {
13
+ const firstClause = task.split(/[.!?]/, 1)[0]?.trim() || task;
14
+ const words = firstClause.split(/\s+/).filter(Boolean);
15
+ const compactLabel = words.length <= 8 ? firstClause : `${words.slice(0, 8).join(" ").trimEnd()}...`;
16
+ const shortened = compactLabel.length > 72 ? `${compactLabel.slice(0, 69).trimEnd()}...` : compactLabel;
17
+ return `Task ${index}: ${shortened}`;
18
+ }
19
+ function stripTaskPrefix(value) {
20
+ return value.replace(/^Task \d+:\s*/, "").trim();
21
+ }
22
+ function buildPersonaName(tasks) {
23
+ const firstTask = stripTaskPrefix(buildTaskName(tasks[0] ?? "submitted task", 1));
24
+ if (tasks.length === 1) {
25
+ return `Task-focused visitor: ${firstTask}`;
26
+ }
27
+ return `Task-focused visitor: ${firstTask} + ${tasks.length - 1} more`;
28
+ }
29
+ function isRunWideConstraint(task) {
30
+ return RUN_WIDE_CONSTRAINT_PATTERNS.some((pattern) => pattern.test(task.trim()));
31
+ }
32
+ function partitionTaskDirectives(tasks) {
33
+ const actionTasks = [];
34
+ const runWideConstraints = [];
35
+ for (const task of tasks) {
36
+ if (isRunWideConstraint(task)) {
37
+ runWideConstraints.push(task);
38
+ continue;
39
+ }
40
+ actionTasks.push(task);
41
+ }
42
+ return { actionTasks, runWideConstraints };
43
+ }
44
+ function collapseSequentialFormFlowTasks(tasks) {
45
+ if (tasks.length < 2 || tasks.length > 6) {
46
+ return tasks;
47
+ }
48
+ const directivesByTask = tasks.map((task) => parseTaskDirectives(task));
49
+ if (directivesByTask.some((directives) => directives.length === 0)) {
50
+ return tasks;
51
+ }
52
+ const flattenedDirectives = directivesByTask.flat();
53
+ const includesFormFlowStep = flattenedDirectives.some((directive) => directive.action === "fill_visible_form" || directive.action === "type_field" || directive.action === "submit");
54
+ if (!includesFormFlowStep) {
55
+ return tasks;
56
+ }
57
+ return [tasks.join("; ")];
58
+ }
59
+ function isNairaCryptoExchangeTask(task) {
60
+ return /\b(?:buy|sell)\s+flow\b/i.test(task) && /\bnaira|ngn\b/i.test(task) && /\bcrypto|token|wallet\b/i.test(task);
61
+ }
62
+ function isExchangeMonitoringTask(task) {
63
+ return /\bexchange-flow monitoring\b|\bmonitoring evidence\b/i.test(task) && /\bevents?|logs?\b/i.test(task);
64
+ }
65
+ export function buildCustomTaskSuite(tasks) {
66
+ const { actionTasks, runWideConstraints } = partitionTaskDirectives(tasks);
67
+ const collapsedActionTasks = collapseSequentialFormFlowTasks(actionTasks);
68
+ const effectiveTasks = collapsedActionTasks.length > 0 ? collapsedActionTasks : tasks;
69
+ const globalConstraintNotes = actionTasks.length > 0
70
+ ? runWideConstraints.map((constraint) => `Run-wide user constraint: ${constraint}`)
71
+ : [];
72
+ return TaskSuiteSchema.parse({
73
+ persona: {
74
+ name: buildPersonaName(effectiveTasks),
75
+ intent: `Visit the supplied website like a realistic, attentive human who first understands what the site appears to be for, then completes only the submitted tasks. Let the submitted task list set your priorities instead of any predefined agent profile. Requested tasks: ${effectiveTasks.join(" | ")}${globalConstraintNotes.length > 0 ? ` Run-wide constraints: ${runWideConstraints.join(" | ")}` : ""}`,
76
+ constraints: [
77
+ "First understand what the supplied site appears to help users do before attempting the accepted tasks.",
78
+ "Use the provided task list as the primary navigation plan for the visit.",
79
+ "Treat any run-wide user constraint as a hard guardrail that cannot be violated to satisfy a later task.",
80
+ "Do not assume a predefined agent personality or profile beyond what the submitted tasks require.",
81
+ "Use the site understanding only to interpret the accepted tasks, not to invent new ones.",
82
+ "Use only visible page information and honest interaction evidence.",
83
+ "Behave like a realistic first-time visitor rather than a rigid script runner.",
84
+ "When a task contains explicit named controls or ordered action verbs, follow those literally in order. Only choose a reasonable visible path when the user did not specify the next step.",
85
+ "Confirm whether the requested destination, content, or state actually appears before claiming success.",
86
+ "If a task stalls, dead-ends, loops, or becomes misleading, verify that before moving on.",
87
+ "Do not enter personal, financial, or secret information unless the accepted task explicitly requires harmless test wallet, bank, or amount values for a flow QA check.",
88
+ "For exchange-flow QA tasks, stop before making any real Naira payment, crypto transfer, purchase, or irreversible payout.",
89
+ "Use harmless test input only when typing is necessary to evaluate a public interaction safely.",
90
+ "Record blockers honestly when a task requires login, payment, invite-only access, or other gated access.",
91
+ "Give a direct, evidence-based account of which requested tasks worked, partially worked, or failed.",
92
+ ...globalConstraintNotes
93
+ ]
94
+ },
95
+ tasks: effectiveTasks.map((task, index) => {
96
+ const taskProfile = classifyTaskText(task);
97
+ const gameplay = inferGameplayConfigFromTask(task);
98
+ const successCondition = isNairaCryptoExchangeTask(task)
99
+ ? "The agent can safely exercise the requested exchange direction with harmless test values, verify the amount preview, required destination details, payment/address display card, copy behavior, and stop before any real money or crypto is transferred."
100
+ : isExchangeMonitoringTask(task)
101
+ ? "The agent can report whether relevant console logs, debug messages, analytics events, or visible emitted-event evidence appeared for the important exchange-flow stages."
102
+ : gameplay?.rounds
103
+ ? `The agent can reach a fair playable state, record ${gameplay.rounds} visible round outcome(s), and honestly report the wins, losses, or draws that actually appeared.`
104
+ : taskProfile.engagement
105
+ ? "The agent can follow the visible path, meaningfully use the live controls it reaches, and honestly report what visibly happened."
106
+ : taskProfile.instructionFocus
107
+ ? "The agent can confirm the visible rules or instructions, honestly report what they said, and verify whether the site reached a playable state."
108
+ : "The agent can attempt this requested task on the live site, describe the visible outcome honestly, and confirm whether the expected destination, content, or state appeared.";
109
+ const failureSignals = [
110
+ "the site does not provide a clear visible path to complete the requested task",
111
+ "the journey stalls, loops, errors, or becomes misleading before the task can be evaluated",
112
+ "the task requires login, payment, or private information before a safe stopping point",
113
+ "the expected page, content, or success state never clearly appears",
114
+ "the final output cannot clearly explain what happened when attempting the task",
115
+ ...(globalConstraintNotes.length > 0 ? ["the run violates a run-wide user constraint while attempting this task"] : []),
116
+ ...(taskProfile.engagement
117
+ ? ["the run never produces clear evidence of meaningful interaction with the live controls"]
118
+ : []),
119
+ ...(gameplay?.rounds
120
+ ? [
121
+ "the gameplay path never reaches a clearly playable state",
122
+ "the requested wins, losses, draws, or round outcomes cannot be visibly confirmed"
123
+ ]
124
+ : []),
125
+ ...(taskProfile.instructionFocus
126
+ ? ["the visible rules, instructions, or how-to-play guidance cannot be clearly confirmed"]
127
+ : []),
128
+ ...(isNairaCryptoExchangeTask(task)
129
+ ? [
130
+ "the flow does not request the required wallet or bank destination before showing payment details",
131
+ "the quoted conversion preview, account card, business wallet address, or copy control cannot be confirmed",
132
+ "the flow attempts to require or trigger a real payment or crypto transfer during the test"
133
+ ]
134
+ : []),
135
+ ...(isExchangeMonitoringTask(task)
136
+ ? ["no relevant monitoring log, emitted event, debug message, or console evidence can be observed for the requested exchange stages"]
137
+ : [])
138
+ ];
139
+ return {
140
+ name: buildTaskName(task, index + 1),
141
+ goal: task,
142
+ success_condition: successCondition,
143
+ failure_signals: failureSignals,
144
+ ...(gameplay ? { gameplay } : {})
145
+ };
146
+ })
147
+ });
148
+ }