sentinelayer-cli 0.8.11 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/package.json +10 -5
  2. package/src/agents/devtestbot/config/definition.js +100 -0
  3. package/src/agents/devtestbot/config/system-prompt.js +92 -0
  4. package/src/agents/devtestbot/index.js +9 -0
  5. package/src/agents/devtestbot/runner.js +769 -0
  6. package/src/agents/devtestbot/tool.js +707 -0
  7. package/src/agents/jules/stream.js +2 -12
  8. package/src/audit/orchestrator.js +471 -114
  9. package/src/audit/persona-loop.js +1342 -0
  10. package/src/audit/registry.js +58 -2
  11. package/src/commands/audit.js +42 -1
  12. package/src/commands/legacy-args.js +32 -1
  13. package/src/commands/omargate.js +4 -0
  14. package/src/commands/session.js +417 -89
  15. package/src/commands/swarm.js +11 -2
  16. package/src/cost/history.js +41 -21
  17. package/src/events/schema.js +27 -1
  18. package/src/guide/generator.js +14 -0
  19. package/src/legacy-cli.js +110 -18
  20. package/src/prompt/generator.js +4 -16
  21. package/src/review/ai-review.js +95 -6
  22. package/src/review/dd-report-email-client.js +148 -0
  23. package/src/review/investor-dd-devtestbot.js +599 -0
  24. package/src/review/investor-dd-orchestrator.js +135 -3
  25. package/src/review/omargate-cache.js +285 -0
  26. package/src/review/omargate-orchestrator.js +605 -4
  27. package/src/review/persona-prompts.js +34 -1
  28. package/src/review/report.js +189 -4
  29. package/src/session/coordination-guidance.js +48 -0
  30. package/src/session/daemon.js +3 -2
  31. package/src/session/listener.js +236 -0
  32. package/src/session/senti-naming.js +36 -0
  33. package/src/session/setup-guides.js +3 -15
  34. package/src/session/store.js +54 -5
  35. package/src/session/sync.js +23 -0
  36. package/src/spec/generator.js +8 -10
  37. package/src/swarm/registry.js +20 -0
  38. package/src/swarm/runtime.js +139 -1
package/package.json CHANGED
@@ -1,15 +1,16 @@
1
1
  {
2
2
  "name": "sentinelayer-cli",
3
- "version": "0.8.11",
3
+ "version": "0.9.0",
4
4
  "description": "Scaffold Sentinelayer spec/prompt/guide artifacts with secure browser auth and token bootstrap.",
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "check": "node scripts/check.mjs",
8
+ "devtestbot:install-browsers": "playwright install chromium ffmpeg",
8
9
  "docs:build": "node scripts/docs-build.mjs",
9
10
  "test": "npm run test:unit && npm run test:e2e",
10
- "test:unit": "node --test tests/unit*.test.mjs",
11
- "test:e2e": "node --test tests/e2e.test.mjs",
12
- "test:coverage": "c8 node --test tests/unit*.test.mjs",
11
+ "test:unit": "node --import ./tests/setup-env.mjs --test tests/unit*.test.mjs",
12
+ "test:e2e": "node --import ./tests/setup-env.mjs --test tests/e2e.test.mjs",
13
+ "test:coverage": "c8 node --import ./tests/setup-env.mjs --test tests/unit*.test.mjs",
13
14
  "verify": "npm run check && npm run docs:build && npm run test:e2e && npm run test:coverage && npm pack --dry-run"
14
15
  },
15
16
  "bin": {
@@ -46,17 +47,21 @@
46
47
  "url": "https://github.com/mrrCarter/create-sentinelayer/issues"
47
48
  },
48
49
  "dependencies": {
50
+ "@axe-core/playwright": "4.11.2",
49
51
  "@babel/parser": "7.29.2",
52
+ "axe-core": "4.11.3",
50
53
  "cli-highlight": "2.1.11",
51
54
  "commander": "14.0.1",
52
55
  "ignore": "7.0.5",
56
+ "lighthouse": "12.8.2",
57
+ "mp4-muxer": "5.2.2",
53
58
  "open": "10.1.2",
54
59
  "picocolors": "1.1.1",
60
+ "playwright": "1.59.1",
55
61
  "prompts": "2.4.2",
56
62
  "yaml": "2.8.3",
57
63
  "zod": "4.1.12"
58
64
  },
59
- "optionalDependencies": {},
60
65
  "devDependencies": {
61
66
  "c8": "10.1.3",
62
67
  "license-checker-rseidelsohn": "4.4.2"
@@ -0,0 +1,100 @@
1
+ /**
2
+ * devTestBot agent definition.
3
+ *
4
+ * Declarative configuration for the AIdenID-backed browser/system test persona.
5
+ * This persona is intentionally scan-only: it collects runtime evidence and
6
+ * returns redacted findings/artifact paths, never raw user or credential data.
7
+ */
8
+
9
+ export const DEVTESTBOT_LANES = Object.freeze([
10
+ "console_errors",
11
+ "network_errors",
12
+ "a11y",
13
+ "lighthouse",
14
+ "click_coverage",
15
+ "password_reset_e2e",
16
+ ]);
17
+
18
+ export const DEVTESTBOT_DEFINITION = Object.freeze({
19
+ id: "devtestbot",
20
+ persona: "AIdenID devTestBot",
21
+ fullTitle: "SentinelLayer System Test Bot",
22
+ domain: "system_test_runtime",
23
+ signature: "- devTestBot, SentinelLayer System Test Bot",
24
+
25
+ color: "green",
26
+ avatar: "DT",
27
+ shortName: "devTestBot",
28
+
29
+ permissionMode: "runtime-readonly",
30
+ fixPermissionMode: "none",
31
+ maxTurns: 8,
32
+ maxSubAgents: 4,
33
+
34
+ budget: {
35
+ maxCostUsd: 1.5,
36
+ maxOutputTokens: 6000,
37
+ maxRuntimeMs: 600000,
38
+ maxToolCalls: 40,
39
+ warningThresholdPercent: 70,
40
+ },
41
+
42
+ auditTools: ["devtestbot.run_session"],
43
+ fixTools: [],
44
+ disallowedTools: ["FileEdit", "Shell"],
45
+
46
+ scope: {
47
+ mandate: "scan_only",
48
+ systemTestScope: "full_system_test",
49
+ dataPolicy: "no_data_extraction",
50
+ allowedStateChanges: [
51
+ "explicit test-flow actions against approved targets",
52
+ "ephemeral AIdenID identity flows",
53
+ ],
54
+ },
55
+
56
+ lanes: DEVTESTBOT_LANES,
57
+
58
+ evidenceRequirements: [
59
+ "artifact_path",
60
+ "runtime_evidence",
61
+ "reproduction",
62
+ "user_impact",
63
+ "confidence",
64
+ ],
65
+ confidenceFloor: 0.8,
66
+
67
+ severityExamples: {
68
+ P0: [
69
+ "critical user journey cannot load or complete",
70
+ "password reset exposes credential material",
71
+ "browser execution proves sensitive data disclosure",
72
+ ],
73
+ P1: [
74
+ "password reset cannot complete",
75
+ "runtime exception blocks core flow",
76
+ "server error on critical interaction",
77
+ "critical accessibility blocker on core flow",
78
+ ],
79
+ P2: [
80
+ "non-critical 4xx/5xx response during smoke path",
81
+ "material Lighthouse regression",
82
+ "moderate accessibility violation",
83
+ "uncovered expected click target in configured scope",
84
+ ],
85
+ P3: [
86
+ "runtime evidence gap",
87
+ "non-blocking capture warning",
88
+ ],
89
+ },
90
+
91
+ thresholds: {
92
+ lighthousePoorScore: 0.5,
93
+ lighthouseNeedsWorkScore: 0.9,
94
+ confidenceFloor: 0.8,
95
+ },
96
+ });
97
+
98
+ export function listDevTestBotLanes() {
99
+ return [...DEVTESTBOT_LANES];
100
+ }
@@ -0,0 +1,92 @@
1
+ import { DEVTESTBOT_DEFINITION, DEVTESTBOT_LANES } from "./definition.js";
2
+
3
+ /**
4
+ * Build the production system prompt for devTestBot.
5
+ *
6
+ * @param {object} context
7
+ * @param {string} [context.scope] - Runtime scope such as smoke, auth, full, or password-reset
8
+ * @param {string} [context.baseUrl] - Approved target URL
9
+ * @param {string} [context.runId] - Runtime run id
10
+ * @returns {string}
11
+ */
12
+ export function buildDevTestBotProductionPrompt(context = {}) {
13
+ const {
14
+ scope = "smoke",
15
+ baseUrl = "unknown",
16
+ runId = "unknown",
17
+ } = context;
18
+ const def = DEVTESTBOT_DEFINITION;
19
+
20
+ return `SYSTEM PROMPT - SENTINELAYER PERSONA
21
+ ${def.persona} | ${def.domain} | 2026
22
+
23
+ ROLE
24
+ You are devTestBot, the SentinelLayer browser/system-test runtime persona.
25
+
26
+ MANDATE
27
+ Run scan-only browser and system tests against the approved target. Do not extract user data, scrape application data, alter production state beyond explicit test-flow actions, or print secrets.
28
+
29
+ CODEBASE AND TARGET CONTEXT
30
+ Base URL: ${baseUrl}
31
+ Scope: ${scope}
32
+ Run ID: ${runId}
33
+
34
+ AVAILABLE TOOLS
35
+ - devtestbot.run_session
36
+
37
+ TOOL CONTRACT
38
+ Call devtestbot.run_session with:
39
+ {
40
+ "scope": "${scope}",
41
+ "identityId": "<AIdenID identity id>",
42
+ "baseUrl": "${baseUrl}",
43
+ "recordVideo": true
44
+ }
45
+
46
+ SECURITY RULES
47
+ - Pass identityId only. Never request, reveal, log, summarize, or return raw passwords, API keys, OTPs, session cookies, bearer tokens, reset links, or credential material.
48
+ - Treat identity credentials as tool-internal secrets.
49
+ - Return artifact paths, counts, statuses, and redacted summaries only.
50
+ - For password reset E2E, verify the flow outcome without exposing OTP, reset-link contents, session cookies, or request headers.
51
+ - Do not copy raw console, network, DOM, email, or identity payloads into findings.
52
+
53
+ LANES
54
+ ${DEVTESTBOT_LANES.map((lane, index) => `${index + 1}. ${lane}`).join("\n")}
55
+
56
+ WORKFLOW ORDER
57
+ 1. Confirm the target URL is approved and the scope is explicit.
58
+ 2. Run devtestbot.run_session with recordVideo=true unless the caller explicitly disabled video.
59
+ 3. Review only redacted lane summaries and artifact paths.
60
+ 4. Emit findings for user-visible runtime failures, not for harmless noise.
61
+ 5. Include reproduction steps that point back to devtestbot.run_session and the artifact bundle.
62
+
63
+ SEVERITY MODEL
64
+ P0 - stop-ship: password reset leaks credential material, app discloses sensitive user data, or critical journey cannot load.
65
+ P1 - launch blocker: password reset or another core flow cannot complete, runtime crash blocks use, or server errors affect critical flow.
66
+ P2 - fix soon: moderate accessibility, Lighthouse, network, or coverage failures on non-critical surfaces.
67
+ P3 - evidence gap or non-blocking capture warning.
68
+
69
+ OUTPUT CONTRACT
70
+ Return findings as JSON:
71
+ [{
72
+ "severity": "P1",
73
+ "file": "runtime://browser",
74
+ "line": 1,
75
+ "title": "Password reset fails after OTP submission",
76
+ "evidence": "devTestBot artifact console.json shows a redacted runtime error; video artifact records the failed flow",
77
+ "rootCause": "Runtime error blocks completion",
78
+ "recommendedFix": "Inspect the failing handler and add regression coverage",
79
+ "trafficLight": "yellow",
80
+ "reproduction": { "type": "runtime_probe", "steps": ["Run devtestbot.run_session with the same scope and identityId"] },
81
+ "user_impact": "Users cannot complete password reset.",
82
+ "confidence": 0.9,
83
+ "artifacts": {}
84
+ }]
85
+
86
+ confidence: required. Number 0.0-1.0. Below ${def.confidenceFloor} = report an evidence gap, not a confirmed defect.
87
+
88
+ VOICE
89
+ Concrete, skeptical, evidence-first, and privacy-preserving.
90
+
91
+ ${def.signature}`;
92
+ }
@@ -0,0 +1,9 @@
1
+ export { DEVTESTBOT_DEFINITION, DEVTESTBOT_LANES, listDevTestBotLanes } from "./config/definition.js";
2
+ export { buildDevTestBotProductionPrompt } from "./config/system-prompt.js";
3
+ export {
4
+ DEVTESTBOT_RUN_SESSION_TOOL,
5
+ DevTestBotToolError,
6
+ executeDevTestBotRunSessionTool,
7
+ runDevTestBotSession,
8
+ } from "./tool.js";
9
+ export { launch } from "./runner.js";