sentinelayer-cli 0.8.11 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -5
- package/src/agents/devtestbot/config/definition.js +100 -0
- package/src/agents/devtestbot/config/system-prompt.js +92 -0
- package/src/agents/devtestbot/index.js +9 -0
- package/src/agents/devtestbot/runner.js +769 -0
- package/src/agents/devtestbot/tool.js +707 -0
- package/src/agents/jules/stream.js +2 -12
- package/src/audit/orchestrator.js +471 -114
- package/src/audit/persona-loop.js +1342 -0
- package/src/audit/registry.js +58 -2
- package/src/commands/audit.js +42 -1
- package/src/commands/legacy-args.js +32 -1
- package/src/commands/omargate.js +4 -0
- package/src/commands/session.js +417 -89
- package/src/commands/swarm.js +11 -2
- package/src/cost/history.js +41 -21
- package/src/events/schema.js +27 -1
- package/src/guide/generator.js +14 -0
- package/src/legacy-cli.js +110 -18
- package/src/prompt/generator.js +4 -16
- package/src/review/ai-review.js +95 -6
- package/src/review/dd-report-email-client.js +148 -0
- package/src/review/investor-dd-devtestbot.js +599 -0
- package/src/review/investor-dd-orchestrator.js +135 -3
- package/src/review/omargate-cache.js +285 -0
- package/src/review/omargate-orchestrator.js +605 -4
- package/src/review/persona-prompts.js +34 -1
- package/src/review/report.js +189 -4
- package/src/session/coordination-guidance.js +48 -0
- package/src/session/daemon.js +3 -2
- package/src/session/listener.js +236 -0
- package/src/session/senti-naming.js +36 -0
- package/src/session/setup-guides.js +3 -15
- package/src/session/store.js +54 -5
- package/src/session/sync.js +23 -0
- package/src/spec/generator.js +8 -10
- package/src/swarm/registry.js +20 -0
- package/src/swarm/runtime.js +139 -1
|
@@ -129,6 +129,42 @@ export function isAnonymousAgent(agent = {}) {
|
|
|
129
129
|
return idAnonymous || modelAnonymous;
|
|
130
130
|
}
|
|
131
131
|
|
|
132
|
+
/**
|
|
133
|
+
* Derive a deterministic session title from a workspace path + clock.
|
|
134
|
+
*
|
|
135
|
+
* Carter's complaint: every CLI invocation minted an unnamed session, so the
|
|
136
|
+
* web sidebar filled with hundreds of "<null>" rows that all looked like the
|
|
137
|
+
* same chat re-created. The fix: when the caller doesn't pass `--title`, give
|
|
138
|
+
* the session a stable label based on the codebase basename + today's date in
|
|
139
|
+
* UTC, e.g. `create-sentinelayer-2026-04-28`.
|
|
140
|
+
*
|
|
141
|
+
* - Basename only (we never leak the absolute path).
|
|
142
|
+
* - Sanitized to `[a-z0-9-]` so the title is URL-safe + dashboard-friendly.
|
|
143
|
+
* - Date is UTC ISO short form (YYYY-MM-DD) for reproducibility regardless of
|
|
144
|
+
* the host timezone.
|
|
145
|
+
* - Falls back to `session-<date>` if the path has no usable basename.
|
|
146
|
+
*
|
|
147
|
+
* @param {string} targetPath
|
|
148
|
+
* @param {{now?: Date}} [options]
|
|
149
|
+
* @returns {string}
|
|
150
|
+
*/
|
|
151
|
+
export function deriveSessionTitle(targetPath, { now = new Date() } = {}) {
|
|
152
|
+
const raw = String(targetPath || "").trim();
|
|
153
|
+
// Use forward slashes consistently — Windows paths come through with
|
|
154
|
+
// backslashes from path.resolve. We don't import the `path` module here
|
|
155
|
+
// to keep this function pure + cheap to test.
|
|
156
|
+
const last = raw.split(/[/\\]+/).filter(Boolean).pop() || "";
|
|
157
|
+
const slug = last
|
|
158
|
+
.toLowerCase()
|
|
159
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
160
|
+
.replace(/^-+|-+$/g, "")
|
|
161
|
+
.slice(0, 60);
|
|
162
|
+
const stamp = (now instanceof Date && !Number.isNaN(now.getTime()) ? now : new Date())
|
|
163
|
+
.toISOString()
|
|
164
|
+
.slice(0, 10);
|
|
165
|
+
return slug ? `${slug}-${stamp}` : `session-${stamp}`;
|
|
166
|
+
}
|
|
167
|
+
|
|
132
168
|
/**
|
|
133
169
|
* Build the payload Senti emits as `agent_identified` when it has
|
|
134
170
|
* stepped in to name a participant. Consumers (CLI / web) render it
|
|
@@ -3,6 +3,7 @@ import path from "node:path";
|
|
|
3
3
|
import process from "node:process";
|
|
4
4
|
|
|
5
5
|
import { buildAgentsSessionGuideContent } from "../legacy-cli.js";
|
|
6
|
+
import { renderCoordinationBulletList } from "./coordination-guidance.js";
|
|
6
7
|
import { getSession } from "./store.js";
|
|
7
8
|
|
|
8
9
|
const SECTION_START_MARKER = "<!-- SENTINELAYER_SESSION_COORDINATION:START -->";
|
|
@@ -26,21 +27,8 @@ function buildSessionCoordinationSection() {
|
|
|
26
27
|
return `${SECTION_START_MARKER}
|
|
27
28
|
${SECTION_HEADING}
|
|
28
29
|
|
|
29
|
-
###
|
|
30
|
-
|
|
31
|
-
- Join if one exists: \`sl session join <id> --name <your-name>\`
|
|
32
|
-
- Read recent context: \`sl session read <id> --tail 20\`
|
|
33
|
-
|
|
34
|
-
### While Working
|
|
35
|
-
- Emit status every 5 min: \`sl session say <id> "status: <what you're doing>"\`
|
|
36
|
-
- Lock files before editing: \`sl session say <id> "lock: <file> - <intent>"\`
|
|
37
|
-
- Unlock when done: \`sl session say <id> "unlock: <file> - done"\`
|
|
38
|
-
- Post findings: \`sl session say <id> "finding: [P2] <title> in <file>:<line>"\`
|
|
39
|
-
|
|
40
|
-
### On Problems
|
|
41
|
-
- Unexpected file change? Ask: \`sl session say <id> "help: unexpected change in <file>"\`
|
|
42
|
-
- Need another agent's work? Request: \`sl session say <id> "handoff: @<agent> <description>"\`
|
|
43
|
-
- Found issues for others? Assign: \`sl session say <id> "assign: @<agent> <task>"\`
|
|
30
|
+
### Required Etiquette
|
|
31
|
+
${renderCoordinationBulletList()}
|
|
44
32
|
|
|
45
33
|
### What Not To Do
|
|
46
34
|
- Do not break your autonomous loop on unexpected file changes; ask in session first.
|
package/src/session/store.js
CHANGED
|
@@ -43,6 +43,21 @@ function normalizeNonNegativeInteger(value, fallbackValue = 0) {
|
|
|
43
43
|
return Math.floor(normalized);
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
function normalizeCreateSessionId(value) {
|
|
47
|
+
const normalized = normalizeString(value);
|
|
48
|
+
if (!normalized) return randomUUID();
|
|
49
|
+
if (
|
|
50
|
+
normalized === "." ||
|
|
51
|
+
normalized === ".." ||
|
|
52
|
+
normalized.includes("/") ||
|
|
53
|
+
normalized.includes("\\") ||
|
|
54
|
+
normalized.includes("..")
|
|
55
|
+
) {
|
|
56
|
+
throw new Error("sessionId must not contain path traversal segments.");
|
|
57
|
+
}
|
|
58
|
+
return normalized;
|
|
59
|
+
}
|
|
60
|
+
|
|
46
61
|
function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
|
|
47
62
|
const normalized = normalizeString(value);
|
|
48
63
|
if (!normalized) {
|
|
@@ -148,7 +163,7 @@ function toRelativePosix(baseDir, absolutePath) {
|
|
|
148
163
|
|
|
149
164
|
function normalizeDateKeyFromCloseoutPath(closeoutPath = "", fallbackIso = new Date().toISOString()) {
|
|
150
165
|
const normalized = toPosixPath(closeoutPath);
|
|
151
|
-
const match = /\/observability\/(\d{4}-\d{2}-\d{2})\//.exec(
|
|
166
|
+
const match = /\/observability\/(\d{4}-\d{2}-\d{2})\//.exec("/" + normalized);
|
|
152
167
|
if (match) {
|
|
153
168
|
return match[1];
|
|
154
169
|
}
|
|
@@ -330,6 +345,7 @@ function normalizeMetadata(raw = {}, { sessionId, targetPath, nowIso } = {}) {
|
|
|
330
345
|
createdAt,
|
|
331
346
|
updatedAt: normalizeIsoTimestamp(raw.updatedAt, nowIso),
|
|
332
347
|
expiresAt,
|
|
348
|
+
title: normalizeString(raw.title) || null,
|
|
333
349
|
ttlSeconds,
|
|
334
350
|
renewalCount: Math.max(0, Number(raw.renewalCount || 0)),
|
|
335
351
|
maxLifetimeSeconds: normalizePositiveInteger(raw.maxLifetimeSeconds, MAX_SESSION_LIFETIME_SECONDS),
|
|
@@ -364,7 +380,10 @@ function buildSessionPayload(metadata, paths, nowIso = new Date().toISOString())
|
|
|
364
380
|
metadataPath: paths.metadataPath,
|
|
365
381
|
streamPath: paths.streamPath,
|
|
366
382
|
createdAt: metadata.createdAt,
|
|
383
|
+
updatedAt: metadata.updatedAt,
|
|
367
384
|
expiresAt: metadata.expiresAt,
|
|
385
|
+
lastInteractionAt: metadata.lastInteractionAt,
|
|
386
|
+
title: metadata.title,
|
|
368
387
|
elapsedTimer: buildElapsedTimer(metadata.createdAt, nowIso),
|
|
369
388
|
renewalCount: metadata.renewalCount,
|
|
370
389
|
status: metadata.status,
|
|
@@ -406,11 +425,22 @@ export async function createSession({
|
|
|
406
425
|
targetPath = process.cwd(),
|
|
407
426
|
ttlSeconds = DEFAULT_TTL_SECONDS,
|
|
408
427
|
template = null,
|
|
428
|
+
sessionId: requestedSessionId = "",
|
|
429
|
+
title = "",
|
|
430
|
+
createdAt = "",
|
|
431
|
+
expiresAt = "",
|
|
432
|
+
lastInteractionAt = "",
|
|
409
433
|
} = {}) {
|
|
410
434
|
const resolvedTargetPath = path.resolve(String(targetPath || "."));
|
|
411
435
|
const normalizedTtlSeconds = normalizePositiveInteger(ttlSeconds, DEFAULT_TTL_SECONDS);
|
|
412
|
-
const sessionId =
|
|
436
|
+
const sessionId = normalizeCreateSessionId(requestedSessionId);
|
|
413
437
|
const nowIso = new Date().toISOString();
|
|
438
|
+
const createdIso = normalizeIsoTimestamp(createdAt, nowIso);
|
|
439
|
+
const expiresIso = normalizeIsoTimestamp(
|
|
440
|
+
expiresAt,
|
|
441
|
+
toIsoAfterSeconds(createdIso, normalizedTtlSeconds)
|
|
442
|
+
);
|
|
443
|
+
const interactionIso = normalizeIsoTimestamp(lastInteractionAt, createdIso);
|
|
414
444
|
const paths = resolveSessionPaths(sessionId, { targetPath: resolvedTargetPath });
|
|
415
445
|
const codebaseContext = await collectSessionCodebaseContext(resolvedTargetPath);
|
|
416
446
|
|
|
@@ -419,14 +449,15 @@ export async function createSession({
|
|
|
419
449
|
schemaVersion: SESSION_SCHEMA_VERSION,
|
|
420
450
|
sessionId,
|
|
421
451
|
targetPath: resolvedTargetPath,
|
|
422
|
-
createdAt:
|
|
452
|
+
createdAt: createdIso,
|
|
423
453
|
updatedAt: nowIso,
|
|
424
|
-
expiresAt:
|
|
454
|
+
expiresAt: expiresIso,
|
|
455
|
+
title: normalizeString(title) || null,
|
|
425
456
|
ttlSeconds: normalizedTtlSeconds,
|
|
426
457
|
renewalCount: 0,
|
|
427
458
|
maxLifetimeSeconds: MAX_SESSION_LIFETIME_SECONDS,
|
|
428
459
|
status: SESSION_STATUS_ACTIVE,
|
|
429
|
-
lastInteractionAt:
|
|
460
|
+
lastInteractionAt: interactionIso,
|
|
430
461
|
expiredAt: null,
|
|
431
462
|
archivedAt: null,
|
|
432
463
|
s3Path: null,
|
|
@@ -449,6 +480,24 @@ export async function createSession({
|
|
|
449
480
|
return buildSessionPayload(metadata, paths, nowIso);
|
|
450
481
|
}
|
|
451
482
|
|
|
483
|
+
export async function updateSessionTitle(
|
|
484
|
+
sessionId,
|
|
485
|
+
{ targetPath = process.cwd(), title = "" } = {}
|
|
486
|
+
) {
|
|
487
|
+
const loaded = await loadMetadata(sessionId, { targetPath });
|
|
488
|
+
if (!loaded) {
|
|
489
|
+
return null;
|
|
490
|
+
}
|
|
491
|
+
const nowIso = new Date().toISOString();
|
|
492
|
+
const metadata = {
|
|
493
|
+
...loaded.metadata,
|
|
494
|
+
title: normalizeString(title) || null,
|
|
495
|
+
updatedAt: nowIso,
|
|
496
|
+
};
|
|
497
|
+
const saved = await saveMetadata(metadata, loaded.paths);
|
|
498
|
+
return buildSessionPayload(saved, loaded.paths, nowIso);
|
|
499
|
+
}
|
|
500
|
+
|
|
452
501
|
export async function getSession(sessionId, { targetPath = process.cwd() } = {}) {
|
|
453
502
|
const loaded = await loadMetadata(sessionId, { targetPath });
|
|
454
503
|
if (!loaded) {
|
package/src/session/sync.js
CHANGED
|
@@ -407,6 +407,17 @@ export async function syncSessionEventToApi(
|
|
|
407
407
|
return { synced: false, reason: "invalid_input" };
|
|
408
408
|
}
|
|
409
409
|
|
|
410
|
+
// Test-fixture leak guard. Tests in this repo (and downstream consumers)
|
|
411
|
+
// create + tear down sessions using a temp workspace; on a developer
|
|
412
|
+
// machine those calls inherit the user's stored auth and silently posted
|
|
413
|
+
// hundreds of orphan rooms to prod (Carter saw ~200 "<null>" sessions).
|
|
414
|
+
// Honoring SENTINELAYER_SKIP_REMOTE_SYNC=1 keeps everything local while
|
|
415
|
+
// still exercising the appendToStream + agent_join code paths the tests
|
|
416
|
+
// care about. Local NDJSON durability is unaffected.
|
|
417
|
+
if (String(process.env.SENTINELAYER_SKIP_REMOTE_SYNC || "").trim() === "1") {
|
|
418
|
+
return { synced: false, reason: "remote_sync_disabled_env" };
|
|
419
|
+
}
|
|
420
|
+
|
|
410
421
|
const normalizedNowMs = Number(nowMs()) || Date.now();
|
|
411
422
|
if (isCircuitOpen(outboundCircuit, normalizedNowMs)) {
|
|
412
423
|
return { synced: false, reason: "circuit_breaker_open" };
|
|
@@ -501,6 +512,13 @@ async function syncSessionAuxPayload(
|
|
|
501
512
|
return { synced: false, reason: "invalid_input" };
|
|
502
513
|
}
|
|
503
514
|
|
|
515
|
+
// Same test-fixture leak guard as syncSessionEventToApi — keep parity
|
|
516
|
+
// so neither the event channel nor the metadata/error channels can
|
|
517
|
+
// exfiltrate a test session into prod when the env flag is set.
|
|
518
|
+
if (String(process.env.SENTINELAYER_SKIP_REMOTE_SYNC || "").trim() === "1") {
|
|
519
|
+
return { synced: false, reason: "remote_sync_disabled_env" };
|
|
520
|
+
}
|
|
521
|
+
|
|
504
522
|
const normalizedNowMs = Number(nowMs()) || Date.now();
|
|
505
523
|
if (isCircuitOpen(outboundCircuit, normalizedNowMs)) {
|
|
506
524
|
return { synced: false, reason: "circuit_breaker_open" };
|
|
@@ -1034,6 +1052,11 @@ export function resetSessionSyncStateForTests() {
|
|
|
1034
1052
|
inboundCircuit.openedAtMs = 0;
|
|
1035
1053
|
sessionIngestWindowBySessionId.clear();
|
|
1036
1054
|
humanRelayWindowBySessionId.clear();
|
|
1055
|
+
// Tests that exercise the network path explicitly need the
|
|
1056
|
+
// SENTINELAYER_SKIP_REMOTE_SYNC guard off — otherwise the function
|
|
1057
|
+
// short-circuits before the mocked fetchImpl is ever called. Tests that
|
|
1058
|
+
// want the guard on can re-set the env after resetting.
|
|
1059
|
+
delete process.env.SENTINELAYER_SKIP_REMOTE_SYNC;
|
|
1037
1060
|
}
|
|
1038
1061
|
|
|
1039
1062
|
export {
|
package/src/spec/generator.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
|
|
3
|
+
import { getCoordinationEtiquetteItems } from "../session/coordination-guidance.js";
|
|
3
4
|
import { getDefaultTemplate, getTemplateById } from "./templates.js";
|
|
4
5
|
|
|
5
6
|
const VALID_PROJECT_TYPES = new Set(["greenfield", "add_feature", "bugfix"]);
|
|
@@ -494,7 +495,11 @@ function shouldIncludeCoordinationPhase({
|
|
|
494
495
|
description = "",
|
|
495
496
|
agentsMarkdown = "",
|
|
496
497
|
sessionActive = false,
|
|
498
|
+
sessionToolsAvailable = true,
|
|
497
499
|
} = {}) {
|
|
500
|
+
if (sessionToolsAvailable === true) {
|
|
501
|
+
return true;
|
|
502
|
+
}
|
|
498
503
|
if (sessionActive === true) {
|
|
499
504
|
return true;
|
|
500
505
|
}
|
|
@@ -507,16 +512,7 @@ function shouldIncludeCoordinationPhase({
|
|
|
507
512
|
function buildCoordinationPhase(phaseNumber, previousPhaseTitle = "") {
|
|
508
513
|
return {
|
|
509
514
|
title: `Phase ${phaseNumber}: Multi-Agent Coordination Protocol`,
|
|
510
|
-
items:
|
|
511
|
-
"Check for active sessions: `sl session list`.",
|
|
512
|
-
"If a session exists, join it: `sl session join <id> --name <your-name> --role coder`.",
|
|
513
|
-
"Emit status updates every 5 minutes: `sl session say <id> \"status: <what you're doing>\"`.",
|
|
514
|
-
"Before modifying a shared file, check recent session activity for that file.",
|
|
515
|
-
"On unexpected file changes, ask in-session instead of stopping: `sl session say <id> \"help: <question>\"`.",
|
|
516
|
-
"Post findings in-session: `sl session say <id> \"finding: [P2] <title> in <file>:<line>\"`.",
|
|
517
|
-
"On completion, update `tasks/todo.md` and emit completion status in-session.",
|
|
518
|
-
"Leave the session when done: `sl session leave <id>`.",
|
|
519
|
-
],
|
|
515
|
+
items: getCoordinationEtiquetteItems(),
|
|
520
516
|
dependencies: previousPhaseTitle ? [previousPhaseTitle] : [],
|
|
521
517
|
effort: "4-8 hours",
|
|
522
518
|
acceptanceCriteria: [
|
|
@@ -535,6 +531,7 @@ export function generateSpecMarkdown({
|
|
|
535
531
|
projectType,
|
|
536
532
|
agentsMarkdown = "",
|
|
537
533
|
sessionActive = false,
|
|
534
|
+
sessionToolsAvailable = true,
|
|
538
535
|
generatedAt = new Date().toISOString(),
|
|
539
536
|
} = {}) {
|
|
540
537
|
const resolvedTemplate = template || getDefaultTemplate();
|
|
@@ -566,6 +563,7 @@ export function generateSpecMarkdown({
|
|
|
566
563
|
description,
|
|
567
564
|
agentsMarkdown,
|
|
568
565
|
sessionActive,
|
|
566
|
+
sessionToolsAvailable,
|
|
569
567
|
})
|
|
570
568
|
) {
|
|
571
569
|
phases.push(buildCoordinationPhase(phases.length + 1, phases[phases.length - 1]?.title || ""));
|
package/src/swarm/registry.js
CHANGED
|
@@ -204,6 +204,26 @@ const BUILTIN_SWARM_AGENTS = Object.freeze([
|
|
|
204
204
|
evidenceRequirements: ["dependency_refs", "version_risks"],
|
|
205
205
|
escalationTargets: ["security", "release"],
|
|
206
206
|
},
|
|
207
|
+
{
|
|
208
|
+
id: "devtestbot",
|
|
209
|
+
persona: "AIdenID devTestBot",
|
|
210
|
+
role: "specialist",
|
|
211
|
+
domain: "Browser/System E2E",
|
|
212
|
+
tools: ["devtestbot.run_session"],
|
|
213
|
+
permissionMode: "runtime-readonly",
|
|
214
|
+
maxTurns: 8,
|
|
215
|
+
confidenceFloor: 0.8,
|
|
216
|
+
allowedPaths: ["."],
|
|
217
|
+
networkMode: "enabled",
|
|
218
|
+
defaultBudget: {
|
|
219
|
+
maxCostUsd: 1.5,
|
|
220
|
+
maxOutputTokens: 6000,
|
|
221
|
+
maxRuntimeMs: 600000,
|
|
222
|
+
maxToolCalls: 40,
|
|
223
|
+
},
|
|
224
|
+
evidenceRequirements: ["artifact_path", "runtime_evidence", "reproduction", "confidence"],
|
|
225
|
+
escalationTargets: ["testing", "frontend", "reliability"],
|
|
226
|
+
},
|
|
207
227
|
{
|
|
208
228
|
id: "frontend",
|
|
209
229
|
persona: "Jules Tanaka",
|
package/src/swarm/runtime.js
CHANGED
|
@@ -10,6 +10,13 @@ function normalizeString(value) {
|
|
|
10
10
|
return String(value || "").trim();
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
+
function sanitizeRuntimeError(error) {
|
|
14
|
+
return String(error?.message || error || "Runtime failed.")
|
|
15
|
+
.replace(/\b(?:authorization|cookie|token|secret|password|otp|reset)\s*[:=]\s*["']?[^"'\s&]+/gi, (match) =>
|
|
16
|
+
match.replace(/[:=]\s*["']?.*$/u, "=[REDACTED]")
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
|
|
13
20
|
function formatTimestampToken() {
|
|
14
21
|
const now = new Date();
|
|
15
22
|
const pad = (value) => String(value).padStart(2, "0");
|
|
@@ -298,6 +305,9 @@ export async function runSwarmRuntime({
|
|
|
298
305
|
execute = false,
|
|
299
306
|
maxSteps = 20,
|
|
300
307
|
startUrl = "about:blank",
|
|
308
|
+
identityId = "",
|
|
309
|
+
devTestBotScope = "",
|
|
310
|
+
devTestBotRunSession = null,
|
|
301
311
|
playbookActions = [],
|
|
302
312
|
outputDir = "",
|
|
303
313
|
env,
|
|
@@ -321,6 +331,9 @@ export async function runSwarmRuntime({
|
|
|
321
331
|
const runtimeRunDirectory = path.join(resolvedOutputRoot, "swarms", runId);
|
|
322
332
|
const runStartedAt = Date.now();
|
|
323
333
|
const events = [];
|
|
334
|
+
const findings = [];
|
|
335
|
+
const artifactBundles = [];
|
|
336
|
+
const devTestBotRuns = [];
|
|
324
337
|
let step = 0;
|
|
325
338
|
|
|
326
339
|
const usage = {
|
|
@@ -409,7 +422,128 @@ export async function runSwarmRuntime({
|
|
|
409
422
|
})
|
|
410
423
|
);
|
|
411
424
|
|
|
412
|
-
if (
|
|
425
|
+
if (assignment.agentId === "devtestbot") {
|
|
426
|
+
const scope = normalizeString(devTestBotScope || plan.scenario || "smoke") || "smoke";
|
|
427
|
+
const toolInput = {
|
|
428
|
+
scope,
|
|
429
|
+
identityId: normalizeString(identityId),
|
|
430
|
+
baseUrl: normalizeString(startUrl),
|
|
431
|
+
recordVideo: Boolean(execute),
|
|
432
|
+
execute: Boolean(execute),
|
|
433
|
+
targetPath: normalizedTargetPath,
|
|
434
|
+
outputRoot: resolvedOutputRoot,
|
|
435
|
+
outputDir: path.join(runtimeRunDirectory, "devtestbot", assignment.assignmentId),
|
|
436
|
+
runId: `${runId}-${assignment.assignmentId}`,
|
|
437
|
+
};
|
|
438
|
+
|
|
439
|
+
usage.toolCalls += 1;
|
|
440
|
+
usage.outputTokens += estimateTokens(`devtestbot.run_session:${scope}:${Boolean(execute)}`);
|
|
441
|
+
step += 1;
|
|
442
|
+
events.push(
|
|
443
|
+
createEvent({
|
|
444
|
+
runId,
|
|
445
|
+
step,
|
|
446
|
+
eventType: "tool_call",
|
|
447
|
+
agentId: assignment.agentId,
|
|
448
|
+
message: "devtestbot.run_session started",
|
|
449
|
+
metadata: {
|
|
450
|
+
tool: "devtestbot.run_session",
|
|
451
|
+
scope,
|
|
452
|
+
identityId: toolInput.identityId || null,
|
|
453
|
+
baseUrl: toolInput.baseUrl,
|
|
454
|
+
execute: toolInput.execute,
|
|
455
|
+
recordVideo: toolInput.recordVideo,
|
|
456
|
+
},
|
|
457
|
+
usage,
|
|
458
|
+
})
|
|
459
|
+
);
|
|
460
|
+
|
|
461
|
+
try {
|
|
462
|
+
const runner = devTestBotRunSession || (await import("../agents/devtestbot/tool.js")).runDevTestBotSession;
|
|
463
|
+
const result = await runner(toolInput, {
|
|
464
|
+
targetPath: normalizedTargetPath,
|
|
465
|
+
outputRoot: resolvedOutputRoot,
|
|
466
|
+
runId: toolInput.runId,
|
|
467
|
+
execute: Boolean(execute),
|
|
468
|
+
env,
|
|
469
|
+
});
|
|
470
|
+
const resultFindings = Array.isArray(result.findings) ? result.findings : [];
|
|
471
|
+
findings.push(...resultFindings);
|
|
472
|
+
if (result.artifactBundle) {
|
|
473
|
+
artifactBundles.push(result.artifactBundle);
|
|
474
|
+
}
|
|
475
|
+
devTestBotRuns.push({
|
|
476
|
+
assignmentId: assignment.assignmentId,
|
|
477
|
+
runId: result.runId || toolInput.runId,
|
|
478
|
+
completed: Boolean(result.completed),
|
|
479
|
+
dryRun: Boolean(result.dryRun),
|
|
480
|
+
findingCount: resultFindings.length,
|
|
481
|
+
artifactBundle: result.artifactBundle || null,
|
|
482
|
+
});
|
|
483
|
+
usage.outputTokens += estimateTokens(
|
|
484
|
+
JSON.stringify({
|
|
485
|
+
findingCount: resultFindings.length,
|
|
486
|
+
artifactBundle: result.artifactBundle ? "present" : "missing",
|
|
487
|
+
})
|
|
488
|
+
);
|
|
489
|
+
step += 1;
|
|
490
|
+
events.push(
|
|
491
|
+
createEvent({
|
|
492
|
+
runId,
|
|
493
|
+
step,
|
|
494
|
+
eventType: "tool_result",
|
|
495
|
+
agentId: assignment.agentId,
|
|
496
|
+
message: "devtestbot.run_session completed",
|
|
497
|
+
metadata: {
|
|
498
|
+
tool: "devtestbot.run_session",
|
|
499
|
+
success: true,
|
|
500
|
+
dryRun: Boolean(result.dryRun),
|
|
501
|
+
findingCount: resultFindings.length,
|
|
502
|
+
artifactBundle: result.artifactBundle || null,
|
|
503
|
+
},
|
|
504
|
+
usage,
|
|
505
|
+
})
|
|
506
|
+
);
|
|
507
|
+
for (const finding of resultFindings) {
|
|
508
|
+
step += 1;
|
|
509
|
+
events.push(
|
|
510
|
+
createEvent({
|
|
511
|
+
runId,
|
|
512
|
+
step,
|
|
513
|
+
eventType: "finding",
|
|
514
|
+
agentId: assignment.agentId,
|
|
515
|
+
message: normalizeString(finding.title || "devTestBot finding"),
|
|
516
|
+
metadata: {
|
|
517
|
+
finding,
|
|
518
|
+
},
|
|
519
|
+
usage,
|
|
520
|
+
})
|
|
521
|
+
);
|
|
522
|
+
}
|
|
523
|
+
} catch (error) {
|
|
524
|
+
stop = {
|
|
525
|
+
stopClass: error?.code || "DEVTESTBOT_RUN_FAILED",
|
|
526
|
+
reason: sanitizeRuntimeError(error),
|
|
527
|
+
blocking: true,
|
|
528
|
+
};
|
|
529
|
+
step += 1;
|
|
530
|
+
events.push(
|
|
531
|
+
createEvent({
|
|
532
|
+
runId,
|
|
533
|
+
step,
|
|
534
|
+
eventType: "agent_error",
|
|
535
|
+
agentId: assignment.agentId,
|
|
536
|
+
message: stop.reason,
|
|
537
|
+
metadata: {
|
|
538
|
+
tool: "devtestbot.run_session",
|
|
539
|
+
stopClass: stop.stopClass,
|
|
540
|
+
},
|
|
541
|
+
usage,
|
|
542
|
+
})
|
|
543
|
+
);
|
|
544
|
+
break;
|
|
545
|
+
}
|
|
546
|
+
} else if (normalizedEngine === "mock" || !execute) {
|
|
413
547
|
usage.toolCalls += 1;
|
|
414
548
|
usage.outputTokens += estimateTokens(`mock:${assignment.agentId}`);
|
|
415
549
|
step += 1;
|
|
@@ -558,6 +692,10 @@ export async function runSwarmRuntime({
|
|
|
558
692
|
usage,
|
|
559
693
|
eventCount: events.length,
|
|
560
694
|
selectedAgents: Array.isArray(plan.selectedAgents) ? [...plan.selectedAgents] : [],
|
|
695
|
+
findingCount: findings.length,
|
|
696
|
+
findings,
|
|
697
|
+
artifactBundles,
|
|
698
|
+
devTestBotRuns,
|
|
561
699
|
};
|
|
562
700
|
|
|
563
701
|
return writeRuntimeArtifacts({
|