sentinelayer-cli 0.8.11 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -5
- package/src/agents/devtestbot/config/definition.js +100 -0
- package/src/agents/devtestbot/config/system-prompt.js +92 -0
- package/src/agents/devtestbot/index.js +9 -0
- package/src/agents/devtestbot/runner.js +769 -0
- package/src/agents/devtestbot/tool.js +707 -0
- package/src/agents/jules/stream.js +2 -12
- package/src/audit/orchestrator.js +471 -114
- package/src/audit/persona-loop.js +1342 -0
- package/src/audit/registry.js +58 -2
- package/src/commands/audit.js +42 -1
- package/src/commands/legacy-args.js +32 -1
- package/src/commands/omargate.js +4 -0
- package/src/commands/session.js +417 -89
- package/src/commands/swarm.js +11 -2
- package/src/cost/history.js +41 -21
- package/src/events/schema.js +27 -1
- package/src/guide/generator.js +14 -0
- package/src/legacy-cli.js +110 -18
- package/src/prompt/generator.js +4 -16
- package/src/review/ai-review.js +95 -6
- package/src/review/dd-report-email-client.js +148 -0
- package/src/review/investor-dd-devtestbot.js +599 -0
- package/src/review/investor-dd-orchestrator.js +135 -3
- package/src/review/omargate-cache.js +285 -0
- package/src/review/omargate-orchestrator.js +605 -4
- package/src/review/persona-prompts.js +34 -1
- package/src/review/report.js +189 -4
- package/src/session/coordination-guidance.js +48 -0
- package/src/session/daemon.js +3 -2
- package/src/session/listener.js +236 -0
- package/src/session/senti-naming.js +36 -0
- package/src/session/setup-guides.js +3 -15
- package/src/session/store.js +54 -5
- package/src/session/sync.js +23 -0
- package/src/spec/generator.js +8 -10
- package/src/swarm/registry.js +20 -0
- package/src/swarm/runtime.js +139 -1
package/package.json
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sentinelayer-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Scaffold Sentinelayer spec/prompt/guide artifacts with secure browser auth and token bootstrap.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"scripts": {
|
|
7
7
|
"check": "node scripts/check.mjs",
|
|
8
|
+
"devtestbot:install-browsers": "playwright install chromium ffmpeg",
|
|
8
9
|
"docs:build": "node scripts/docs-build.mjs",
|
|
9
10
|
"test": "npm run test:unit && npm run test:e2e",
|
|
10
|
-
"test:unit": "node --test tests/unit*.test.mjs",
|
|
11
|
-
"test:e2e": "node --test tests/e2e.test.mjs",
|
|
12
|
-
"test:coverage": "c8 node --test tests/unit*.test.mjs",
|
|
11
|
+
"test:unit": "node --import ./tests/setup-env.mjs --test tests/unit*.test.mjs",
|
|
12
|
+
"test:e2e": "node --import ./tests/setup-env.mjs --test tests/e2e.test.mjs",
|
|
13
|
+
"test:coverage": "c8 node --import ./tests/setup-env.mjs --test tests/unit*.test.mjs",
|
|
13
14
|
"verify": "npm run check && npm run docs:build && npm run test:e2e && npm run test:coverage && npm pack --dry-run"
|
|
14
15
|
},
|
|
15
16
|
"bin": {
|
|
@@ -46,17 +47,21 @@
|
|
|
46
47
|
"url": "https://github.com/mrrCarter/create-sentinelayer/issues"
|
|
47
48
|
},
|
|
48
49
|
"dependencies": {
|
|
50
|
+
"@axe-core/playwright": "4.11.2",
|
|
49
51
|
"@babel/parser": "7.29.2",
|
|
52
|
+
"axe-core": "4.11.3",
|
|
50
53
|
"cli-highlight": "2.1.11",
|
|
51
54
|
"commander": "14.0.1",
|
|
52
55
|
"ignore": "7.0.5",
|
|
56
|
+
"lighthouse": "12.8.2",
|
|
57
|
+
"mp4-muxer": "5.2.2",
|
|
53
58
|
"open": "10.1.2",
|
|
54
59
|
"picocolors": "1.1.1",
|
|
60
|
+
"playwright": "1.59.1",
|
|
55
61
|
"prompts": "2.4.2",
|
|
56
62
|
"yaml": "2.8.3",
|
|
57
63
|
"zod": "4.1.12"
|
|
58
64
|
},
|
|
59
|
-
"optionalDependencies": {},
|
|
60
65
|
"devDependencies": {
|
|
61
66
|
"c8": "10.1.3",
|
|
62
67
|
"license-checker-rseidelsohn": "4.4.2"
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* devTestBot agent definition.
|
|
3
|
+
*
|
|
4
|
+
* Declarative configuration for the AIdenID-backed browser/system test persona.
|
|
5
|
+
* This persona is intentionally scan-only: it collects runtime evidence and
|
|
6
|
+
* returns redacted findings/artifact paths, never raw user or credential data.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export const DEVTESTBOT_LANES = Object.freeze([
|
|
10
|
+
"console_errors",
|
|
11
|
+
"network_errors",
|
|
12
|
+
"a11y",
|
|
13
|
+
"lighthouse",
|
|
14
|
+
"click_coverage",
|
|
15
|
+
"password_reset_e2e",
|
|
16
|
+
]);
|
|
17
|
+
|
|
18
|
+
export const DEVTESTBOT_DEFINITION = Object.freeze({
|
|
19
|
+
id: "devtestbot",
|
|
20
|
+
persona: "AIdenID devTestBot",
|
|
21
|
+
fullTitle: "SentinelLayer System Test Bot",
|
|
22
|
+
domain: "system_test_runtime",
|
|
23
|
+
signature: "- devTestBot, SentinelLayer System Test Bot",
|
|
24
|
+
|
|
25
|
+
color: "green",
|
|
26
|
+
avatar: "DT",
|
|
27
|
+
shortName: "devTestBot",
|
|
28
|
+
|
|
29
|
+
permissionMode: "runtime-readonly",
|
|
30
|
+
fixPermissionMode: "none",
|
|
31
|
+
maxTurns: 8,
|
|
32
|
+
maxSubAgents: 4,
|
|
33
|
+
|
|
34
|
+
budget: {
|
|
35
|
+
maxCostUsd: 1.5,
|
|
36
|
+
maxOutputTokens: 6000,
|
|
37
|
+
maxRuntimeMs: 600000,
|
|
38
|
+
maxToolCalls: 40,
|
|
39
|
+
warningThresholdPercent: 70,
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
auditTools: ["devtestbot.run_session"],
|
|
43
|
+
fixTools: [],
|
|
44
|
+
disallowedTools: ["FileEdit", "Shell"],
|
|
45
|
+
|
|
46
|
+
scope: {
|
|
47
|
+
mandate: "scan_only",
|
|
48
|
+
systemTestScope: "full_system_test",
|
|
49
|
+
dataPolicy: "no_data_extraction",
|
|
50
|
+
allowedStateChanges: [
|
|
51
|
+
"explicit test-flow actions against approved targets",
|
|
52
|
+
"ephemeral AIdenID identity flows",
|
|
53
|
+
],
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
lanes: DEVTESTBOT_LANES,
|
|
57
|
+
|
|
58
|
+
evidenceRequirements: [
|
|
59
|
+
"artifact_path",
|
|
60
|
+
"runtime_evidence",
|
|
61
|
+
"reproduction",
|
|
62
|
+
"user_impact",
|
|
63
|
+
"confidence",
|
|
64
|
+
],
|
|
65
|
+
confidenceFloor: 0.8,
|
|
66
|
+
|
|
67
|
+
severityExamples: {
|
|
68
|
+
P0: [
|
|
69
|
+
"critical user journey cannot load or complete",
|
|
70
|
+
"password reset exposes credential material",
|
|
71
|
+
"browser execution proves sensitive data disclosure",
|
|
72
|
+
],
|
|
73
|
+
P1: [
|
|
74
|
+
"password reset cannot complete",
|
|
75
|
+
"runtime exception blocks core flow",
|
|
76
|
+
"server error on critical interaction",
|
|
77
|
+
"critical accessibility blocker on core flow",
|
|
78
|
+
],
|
|
79
|
+
P2: [
|
|
80
|
+
"non-critical 4xx/5xx response during smoke path",
|
|
81
|
+
"material Lighthouse regression",
|
|
82
|
+
"moderate accessibility violation",
|
|
83
|
+
"uncovered expected click target in configured scope",
|
|
84
|
+
],
|
|
85
|
+
P3: [
|
|
86
|
+
"runtime evidence gap",
|
|
87
|
+
"non-blocking capture warning",
|
|
88
|
+
],
|
|
89
|
+
},
|
|
90
|
+
|
|
91
|
+
thresholds: {
|
|
92
|
+
lighthousePoorScore: 0.5,
|
|
93
|
+
lighthouseNeedsWorkScore: 0.9,
|
|
94
|
+
confidenceFloor: 0.8,
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
export function listDevTestBotLanes() {
|
|
99
|
+
return [...DEVTESTBOT_LANES];
|
|
100
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { DEVTESTBOT_DEFINITION, DEVTESTBOT_LANES } from "./definition.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Build the production system prompt for devTestBot.
|
|
5
|
+
*
|
|
6
|
+
* @param {object} context
|
|
7
|
+
* @param {string} [context.scope] - Runtime scope such as smoke, auth, full, or password-reset
|
|
8
|
+
* @param {string} [context.baseUrl] - Approved target URL
|
|
9
|
+
* @param {string} [context.runId] - Runtime run id
|
|
10
|
+
* @returns {string}
|
|
11
|
+
*/
|
|
12
|
+
export function buildDevTestBotProductionPrompt(context = {}) {
|
|
13
|
+
const {
|
|
14
|
+
scope = "smoke",
|
|
15
|
+
baseUrl = "unknown",
|
|
16
|
+
runId = "unknown",
|
|
17
|
+
} = context;
|
|
18
|
+
const def = DEVTESTBOT_DEFINITION;
|
|
19
|
+
|
|
20
|
+
return `SYSTEM PROMPT - SENTINELAYER PERSONA
|
|
21
|
+
${def.persona} | ${def.domain} | 2026
|
|
22
|
+
|
|
23
|
+
ROLE
|
|
24
|
+
You are devTestBot, the SentinelLayer browser/system-test runtime persona.
|
|
25
|
+
|
|
26
|
+
MANDATE
|
|
27
|
+
Run scan-only browser and system tests against the approved target. Do not extract user data, scrape application data, alter production state beyond explicit test-flow actions, or print secrets.
|
|
28
|
+
|
|
29
|
+
CODEBASE AND TARGET CONTEXT
|
|
30
|
+
Base URL: ${baseUrl}
|
|
31
|
+
Scope: ${scope}
|
|
32
|
+
Run ID: ${runId}
|
|
33
|
+
|
|
34
|
+
AVAILABLE TOOLS
|
|
35
|
+
- devtestbot.run_session
|
|
36
|
+
|
|
37
|
+
TOOL CONTRACT
|
|
38
|
+
Call devtestbot.run_session with:
|
|
39
|
+
{
|
|
40
|
+
"scope": "${scope}",
|
|
41
|
+
"identityId": "<AIdenID identity id>",
|
|
42
|
+
"baseUrl": "${baseUrl}",
|
|
43
|
+
"recordVideo": true
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
SECURITY RULES
|
|
47
|
+
- Pass identityId only. Never request, reveal, log, summarize, or return raw passwords, API keys, OTPs, session cookies, bearer tokens, reset links, or credential material.
|
|
48
|
+
- Treat identity credentials as tool-internal secrets.
|
|
49
|
+
- Return artifact paths, counts, statuses, and redacted summaries only.
|
|
50
|
+
- For password reset E2E, verify the flow outcome without exposing OTP, reset-link contents, session cookies, or request headers.
|
|
51
|
+
- Do not copy raw console, network, DOM, email, or identity payloads into findings.
|
|
52
|
+
|
|
53
|
+
LANES
|
|
54
|
+
${DEVTESTBOT_LANES.map((lane, index) => `${index + 1}. ${lane}`).join("\n")}
|
|
55
|
+
|
|
56
|
+
WORKFLOW ORDER
|
|
57
|
+
1. Confirm the target URL is approved and the scope is explicit.
|
|
58
|
+
2. Run devtestbot.run_session with recordVideo=true unless the caller explicitly disabled video.
|
|
59
|
+
3. Review only redacted lane summaries and artifact paths.
|
|
60
|
+
4. Emit findings for user-visible runtime failures, not for harmless noise.
|
|
61
|
+
5. Include reproduction steps that point back to devtestbot.run_session and the artifact bundle.
|
|
62
|
+
|
|
63
|
+
SEVERITY MODEL
|
|
64
|
+
P0 - stop-ship: password reset leaks credential material, app discloses sensitive user data, or critical journey cannot load.
|
|
65
|
+
P1 - launch blocker: password reset or another core flow cannot complete, runtime crash blocks use, or server errors affect critical flow.
|
|
66
|
+
P2 - fix soon: moderate accessibility, Lighthouse, network, or coverage failures on non-critical surfaces.
|
|
67
|
+
P3 - evidence gap or non-blocking capture warning.
|
|
68
|
+
|
|
69
|
+
OUTPUT CONTRACT
|
|
70
|
+
Return findings as JSON:
|
|
71
|
+
[{
|
|
72
|
+
"severity": "P1",
|
|
73
|
+
"file": "runtime://browser",
|
|
74
|
+
"line": 1,
|
|
75
|
+
"title": "Password reset fails after OTP submission",
|
|
76
|
+
"evidence": "devTestBot artifact console.json shows a redacted runtime error; video artifact records the failed flow",
|
|
77
|
+
"rootCause": "Runtime error blocks completion",
|
|
78
|
+
"recommendedFix": "Inspect the failing handler and add regression coverage",
|
|
79
|
+
"trafficLight": "yellow",
|
|
80
|
+
"reproduction": { "type": "runtime_probe", "steps": ["Run devtestbot.run_session with the same scope and identityId"] },
|
|
81
|
+
"user_impact": "Users cannot complete password reset.",
|
|
82
|
+
"confidence": 0.9,
|
|
83
|
+
"artifacts": {}
|
|
84
|
+
}]
|
|
85
|
+
|
|
86
|
+
confidence: required. Number 0.0-1.0. Below ${def.confidenceFloor} = report an evidence gap, not a confirmed defect.
|
|
87
|
+
|
|
88
|
+
VOICE
|
|
89
|
+
Concrete, skeptical, evidence-first, and privacy-preserving.
|
|
90
|
+
|
|
91
|
+
${def.signature}`;
|
|
92
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { DEVTESTBOT_DEFINITION, DEVTESTBOT_LANES, listDevTestBotLanes } from "./config/definition.js";
|
|
2
|
+
export { buildDevTestBotProductionPrompt } from "./config/system-prompt.js";
|
|
3
|
+
export {
|
|
4
|
+
DEVTESTBOT_RUN_SESSION_TOOL,
|
|
5
|
+
DevTestBotToolError,
|
|
6
|
+
executeDevTestBotRunSessionTool,
|
|
7
|
+
runDevTestBotSession,
|
|
8
|
+
} from "./tool.js";
|
|
9
|
+
export { launch } from "./runner.js";
|