@ironbee-ai/cli 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/clients/claude/agents/ironbee-scenario.md +191 -0
- package/dist/clients/claude/agents/ironbee-verifier.md +22 -5
- package/dist/clients/claude/commands/ironbee-manage-scenario.md +36 -0
- package/dist/clients/claude/commands/ironbee-search-scenario.md +22 -0
- package/dist/clients/claude/commands/ironbee-sync-scenario.md +31 -0
- package/dist/clients/claude/commands/ironbee-verify.md +13 -12
- package/dist/clients/claude/hooks/require-verification.js +3 -3
- package/dist/clients/claude/hooks/track-action.js +1 -1
- package/dist/clients/claude/index.js +4 -4
- package/dist/clients/claude/platforms/scenario.android.md +31 -0
- package/dist/clients/claude/platforms/scenario.backend.md +26 -0
- package/dist/clients/claude/platforms/scenario.browser.md +41 -0
- package/dist/clients/claude/platforms/scenario.node.md +27 -0
- package/dist/clients/claude/trust.js +1 -0
- package/dist/clients/codex/agents/ironbee-scenario.md +179 -0
- package/dist/clients/codex/agents/ironbee-verifier.md +22 -5
- package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.main.md +102 -0
- package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.md +38 -0
- package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.main.md +37 -0
- package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.md +23 -0
- package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.main.md +55 -0
- package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.md +33 -0
- package/dist/clients/codex/commands/ironbee-verify/SKILL.main.md +12 -3
- package/dist/clients/codex/commands/ironbee-verify/SKILL.md +4 -3
- package/dist/clients/codex/hooks/require-verification.js +3 -3
- package/dist/clients/codex/hooks/track-action.js +1 -1
- package/dist/clients/codex/index.js +2 -2
- package/dist/clients/codex/platforms/scenario.android.md +31 -0
- package/dist/clients/codex/platforms/scenario.backend.md +26 -0
- package/dist/clients/codex/platforms/scenario.browser.md +40 -0
- package/dist/clients/codex/platforms/scenario.node.md +27 -0
- package/dist/clients/codex/util.js +32 -26
- package/dist/clients/cursor/commands/ironbee-manage-scenario/SKILL.md +100 -0
- package/dist/clients/cursor/commands/ironbee-search-scenario/SKILL.md +34 -0
- package/dist/clients/cursor/commands/ironbee-sync-scenario/SKILL.md +54 -0
- package/dist/clients/cursor/commands/ironbee-verify/SKILL.md +2 -1
- package/dist/clients/cursor/hooks/require-verification.js +3 -3
- package/dist/clients/cursor/hooks/track-action.js +1 -1
- package/dist/clients/cursor/index.js +1 -1
- package/dist/clients/cursor/platforms/scenario.android.md +31 -0
- package/dist/clients/cursor/platforms/scenario.backend.md +26 -0
- package/dist/clients/cursor/platforms/scenario.browser.md +40 -0
- package/dist/clients/cursor/platforms/scenario.node.md +27 -0
- package/dist/commands/install.js +1 -1
- package/dist/commands/mode-select.js +2 -2
- package/dist/commands/scenario.js +1 -0
- package/dist/hooks/core/actions.js +7 -7
- package/dist/hooks/core/nested-tools.js +1 -1
- package/dist/hooks/core/scenario-tools.js +1 -0
- package/dist/index.js +1 -1
- package/dist/lib/config.js +1 -1
- package/dist/lib/git.js +1 -1
- package/dist/lib/install-version.js +1 -1
- package/dist/lib/platform-section.js +3 -3
- package/dist/lib/prompt.js +5 -4
- package/dist/lib/scenario-staleness.js +1 -0
- package/dist/tui/config/schema.js +1 -1
- package/dist/tui/projects/area.js +4 -4
- package/dist/tui/scenarios/area.js +2 -0
- package/dist/tui/shell/registry.js +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-sync-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Re-validate saved IronBee verification scenarios against the current code and repair MECHANICAL
|
|
5
|
+
drift, by delegating to the ironbee-scenario custom agent (operation sync). Use when the user types
|
|
6
|
+
`$ironbee-sync-scenario`. A leading `check` token = dry-run (report drift, no repair).
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Sync scenario(s)
|
|
10
|
+
|
|
11
|
+
> **Delegate** — spawn the **`ironbee-scenario` custom agent** via `spawn_agent` with
|
|
12
|
+
> `agent_type="ironbee-scenario"` **and `fork_turns="none"`** (the default `fork_turns="all"` silently
|
|
13
|
+
> drops the agent_type → a generic toolless agent). The sub-agent owns the `scenario-*` tools.
|
|
14
|
+
|
|
15
|
+
Re-validate + repair saved verification **scenarios**. This is NOT a verification cycle.
|
|
16
|
+
|
|
17
|
+
## Steps
|
|
18
|
+
1. **Resolve the mode + target**: strip a leading `check` token (→ dry-run) and a leading `force` token
|
|
19
|
+
(→ sync ALL scenarios, not just stale); remainder = `all` (stale ones; `force` = every one) or a
|
|
20
|
+
name / description (one). Empty → `all`.
|
|
21
|
+
2. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
|
|
22
|
+
`message`:
|
|
23
|
+
> Operation: sync
|
|
24
|
+
> Target: \<`all`, or the name / description>
|
|
25
|
+
> Force: \<include `Force: all` ONLY if the request began with `force`>
|
|
26
|
+
> Mode: \<include `Mode: check` ONLY if the request began with `check`; otherwise OMIT>
|
|
27
|
+
The sub-agent runs each target against the live app, classifies (still-fresh / mechanical drift →
|
|
28
|
+
repair the SCRIPT only / real defect → STOP + report / expectation changed → ask), and on a
|
|
29
|
+
non-check run stamps repaired scenarios current. **It repairs MECHANICS, never what a scenario
|
|
30
|
+
verifies. Wait for the sub-agent in the same turn.**
|
|
31
|
+
3. **Relay** the summary (per scenario: repaired / still-fresh / defect-reported / needs decision).
|
|
32
|
+
|
|
33
|
+
(To just *detect* staleness without running anything, use `ironbee scenario status`.)
|
|
@@ -42,9 +42,18 @@ A custom verification scenario may be supplied — either **inline text** or a *
|
|
|
42
42
|
(read at run time). The scenario is whatever the user provided alongside the command, after
|
|
43
43
|
stripping a leading `fix` / `report` mode token.
|
|
44
44
|
|
|
45
|
-
- **If
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
- **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:`
|
|
46
|
+
(to the end) is a **SAVED scenario reference** (exact name OR semantic description). Resolve it across
|
|
47
|
+
enabled platforms (`*_scenario-search` for the description + an exact-name `*_scenario-list` match),
|
|
48
|
+
pick the single strong match (ambiguous → ask; none → say so + fall back to the default flow), then
|
|
49
|
+
**run it in ONE `*_scenario-run` call** (no re-discovery) and **judge its result (functional) +
|
|
50
|
+
any returned visual evidence (e.g. screenshots)**. Its nested tool calls satisfy each active cycle's required tools.
|
|
51
|
+
No exact name needed — e.g. `scenario: the full purchase flow`.
|
|
52
|
+
**On PASS, keep it fresh:** `*_scenario-update` its `ironbee.commit` → HEAD (`git rev-parse HEAD`)
|
|
53
|
+
+ `liveValidated: true` (re-send the full metadata merged); on FAIL / defect, don't stamp.
|
|
54
|
+
- **If a scenario is supplied (free text), it is authoritative**: verify exactly what it describes,
|
|
55
|
+
exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise
|
|
56
|
+
the changed pages/endpoints" guidance.
|
|
48
57
|
- **If the scenario is (or points to) a file path**, read that file and treat its contents as the
|
|
49
58
|
scenario. Do not assume a fixed location or format.
|
|
50
59
|
- **If the path does not resolve**, stop and report `scenario file not found: <path>`, then ask how
|
|
@@ -29,18 +29,19 @@ A custom verification scenario may be supplied when this command is invoked —
|
|
|
29
29
|
|
|
30
30
|
> The scenario is whatever the user provided alongside `$ironbee-verify`, after stripping a leading `fix` / `report` mode token — the remainder is the scenario; empty remainder → the verifier uses its default flow.
|
|
31
31
|
|
|
32
|
-
- **If
|
|
32
|
+
- **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:` (to the end) is a **SAVED scenario reference** (exact name OR semantic description). Do NOT read a file / treat as free text — relay it to the verifier verbatim as a `Saved scenario: <ref>` line. The verifier resolves it (`scenario-search` + exact-name), runs it in one `scenario-run` call (no re-discovery), and judges the result (functional + any visual evidence). No exact name needed — e.g. `scenario: the full purchase flow`.
|
|
33
|
+
- **If a scenario is supplied (free text), it is authoritative**: the verifier must verify exactly what it describes, exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise the changed pages/endpoints" guidance.
|
|
33
34
|
- **If the scenario is (or points to) a file path**, read that file with your file-read tool yourself and pass its **contents** into the verifier's prompt (the verifier has no file-read tool). Do not assume a fixed location or format — read whatever path was given.
|
|
34
35
|
- **If the path does not resolve to an existing file**, stop and report `scenario file not found: <path>`, then ask how to proceed — do not delegate with the literal path string or guess a target.
|
|
35
36
|
- **If no scenario is supplied**, the verifier falls back to exercising the changed pages/endpoints per the active cycles.
|
|
36
37
|
|
|
37
38
|
## Steps
|
|
38
39
|
|
|
39
|
-
1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then file path → read it now; inline text → use as-is; empty → none.
|
|
40
|
+
1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then on the remainder — starts with `scenario:` → SAVED scenario reference (the rest after `scenario:`); a file path → read it now; inline text → use as-is; empty → none.
|
|
40
41
|
2. **Spawn the `ironbee-verifier` custom agent** — call `spawn_agent` with **`agent_type="ironbee-verifier"`** AND **`fork_turns="none"`**. The `fork_turns="none"` is REQUIRED: the default `fork_turns="all"` is a full-history fork that silently DROPS the `agent_type` override, giving you a generic agent *without* the verification tools. (Do NOT "act as" the verifier or use a plain generic fork either.) Put the task, the mode, and the resolved scenario in the `message`, e.g.:
|
|
41
42
|
> Verify the current code changes.
|
|
42
43
|
> Mode: \<`fix` in fix mode — OMIT this line entirely in verify-only mode>
|
|
43
|
-
>
|
|
44
|
+
> \<ONE of: `Saved scenario: <ref>` (when `scenario:` was given — the verifier resolves + runs it) — OR — `Scenario: <resolved text>` (free text / file contents) — OR — `Scenario: none — exercise the changed pages/endpoints`>
|
|
44
45
|
The verifier runs `verification-start` (relaying the fix intent to IronBee's completion gate, which then enforces fix-until-pass on you) → drives every active cycle's tools → submits the single verdict, all in this shared session. It resolves the session id from the environment, so you don't pass one.
|
|
45
46
|
**Wait for the verifier in the same turn — do NOT background it.** Let it run to completion and read its verdict before responding; a backgrounded verifier can let your turn end (and the Stop gate fire) before its verdict is recorded.
|
|
46
47
|
3. **Relay the verifier's summary** — the verdict status and, on fail, the issues it found.
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
"use strict";var u=Object.defineProperty;var
|
|
1
|
+
"use strict";var u=Object.defineProperty;var K=Object.getOwnPropertyDescriptor;var q=Object.getOwnPropertyNames;var B=Object.prototype.hasOwnProperty;var C=(o,e)=>u(o,"name",{value:e,configurable:!0});var J=(o,e)=>{for(var s in e)u(o,s,{get:e[s],enumerable:!0})},L=(o,e,s,n)=>{if(e&&typeof e=="object"||typeof e=="function")for(let t of q(e))!B.call(o,t)&&t!==s&&u(o,t,{get:()=>e[t],enumerable:!(n=K(e,t))||n.enumerable});return o};var H=o=>L(u({},"__esModule",{value:!0}),o);var M={};J(M,{run:()=>z});module.exports=H(M);var $=require("crypto"),x=require("../../../hooks/core/activity"),i=require("../../../hooks/core/session-state"),N=require("../../../hooks/core/actions"),E=require("../../../hooks/core/verification-lifecycle"),O=require("../../../hooks/core/verification-context"),U=require("../../../lib/config"),m=require("../../../lib/logger"),A=require("../../../lib/recording-tools"),D=require("../../../hooks/core/scenario-tools"),V=require("../../../lib/stdin"),f=require("../util");async function z(o,e){const s=e?.soft===!0,n=(0,f.parseCodexHookStdin)((0,V.readStdin)()),t=n.session_id??"default",r=`${o}/.ironbee/sessions/${t}`,y=`${r}/actions.jsonl`;(0,m.setLogFile)(`${r}/session.log`);const g=(0,D.isScenarioTool)(n.tool_name),h=(0,i.getActiveVerificationId)(r);if(!h&&!s&&!g){const p=`BLOCKED: You must start a verification cycle before using devtools tools.
|
|
2
2
|
|
|
3
3
|
Start verification first:
|
|
4
4
|
echo '{"session_id":"${t}"}' | ironbee hook verification-start
|
|
5
5
|
|
|
6
|
-
Then use the verification tools for the active cycle(s) \u2014 mcp__browser-devtools__bdt_* for browser, mcp__node-devtools__ndt_* for node, mcp__backend-devtools__bedt_* for backend, mcp__android-devtools__adt_* for android.`;process.stdout.write(JSON.stringify({hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:p}})),process.exit(0);return}const
|
|
6
|
+
Then use the verification tools for the active cycle(s) \u2014 mcp__browser-devtools__bdt_* for browser, mcp__node-devtools__ndt_* for node, mcp__backend-devtools__bedt_* for backend, mcp__android-devtools__adt_* for android.`;process.stdout.write(JSON.stringify({hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:p}})),process.exit(0);return}const _=n.tool_name??"",S=(0,f.extractCodexMcpServer)(_),c=(0,A.recordingToolsForServer)(S),P=c!==null?(0,f.canonicalizeCodexToolName)(_.split("__").pop()??""):"";if(!s&&!g&&c!==null&&(0,i.isRecordingRequired)(r)&&!(0,i.isRecordingActive)(r)&&P!==c.startTool){const p=`BLOCKED: Recording is required but not started.
|
|
7
7
|
|
|
8
8
|
1. Start recording NOW:
|
|
9
9
|
Use mcp__${c.server}__${c.startTool}
|
|
@@ -12,4 +12,4 @@ Then use the verification tools for the active cycle(s) \u2014 mcp__browser-devt
|
|
|
12
12
|
|
|
13
13
|
3. **Stop recording BEFORE submitting verdict:**
|
|
14
14
|
Use mcp__${c.server}__${c.stopTool}
|
|
15
|
-
submit-verdict will reject with "recording is still active" if you skip this.`;process.stdout.write(JSON.stringify({hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:p}})),process.exit(0);return}await(0
|
|
15
|
+
submit-verdict will reject with "recording is still active" if you skip this.`;process.stdout.write(JSON.stringify({hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:p}})),process.exit(0);return}await(0,x.startActivity)({sessionDir:r,actionsFile:y,source:"pre_tool_use"});let d=h;s&&!d&&!g&&(d=(await(0,E.startVerification)({sessionId:t,sessionDir:r,actionsFile:y,recordingEnabled:!1})).verificationId);const j=(0,i.getActiveTraceId)(r),v=(0,i.getActiveActivityId)(r),k=(0,N.resolveProjectName)(o),b=[`prj:${k}`,`sid:${t}`];v&&b.push(`aid:${v}`),d&&b.push(`vid:${d}`);const F=`ironbee=${b.join(";")}`,a=(0,U.loadConfig)(o),T={...n.tool_input&&typeof n.tool_input=="object"?n.tool_input:{}},l={projectName:k,sessionId:t,activityId:v,verificationId:d,traceId:j,traceState:F,toolCallId:(0,$.randomUUID)()};n.tool_use_id&&(l.toolUseId=n.tool_use_id),l.mcpServer=S??"browser-devtools";const w=(0,i.getUserEmail)(r);w&&(l.userEmail=w),a.collector?.url&&(l.collectorUrl=a.collector.url),a.collector?.oauthToken?l.collectorOAuthToken=a.collector.oauthToken:a.collector?.apiKey&&(l.collectorApiKey=a.collector.apiKey),T._metadata=l;const I={hookEventName:"PreToolUse",permissionDecision:"allow",updatedInput:T},R=(0,O.buildVerificationContextOnceForCycle)({projectDir:o,sessionId:t,sessionDir:r,activeVerificationId:d,config:a});R.length>0&&(I.additionalContext=R),process.stdout.write(JSON.stringify({hookSpecificOutput:I})),m.logger.debug(`require-verification: allowed ${_} with _metadata`),process.exit(0)}C(z,"run");0&&(module.exports={run});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";var
|
|
1
|
+
"use strict";var N=Object.defineProperty;var G=Object.getOwnPropertyDescriptor;var K=Object.getOwnPropertyNames;var W=Object.prototype.hasOwnProperty;var b=(t,e)=>N(t,"name",{value:e,configurable:!0});var X=(t,e)=>{for(var o in e)N(t,o,{get:e[o],enumerable:!0})},Z=(t,e,o,n)=>{if(e&&typeof e=="object"||typeof e=="function")for(let i of K(e))!W.call(t,i)&&i!==o&&N(t,i,{get:()=>e[i],enumerable:!(n=G(e,i))||n.enumerable});return t};var ee=t=>Z(N({},"__esModule",{value:!0}),t);var ie={};X(ie,{run:()=>re});module.exports=ee(ie);var T=require("../../../hooks/core/actions"),v=require("../../../hooks/core/nested-tools"),R=require("../../../import/ids"),r=require("../../../hooks/core/session-state"),L=require("../../../hooks/core/tool-use-stash"),P=require("../../../lib/config"),a=require("../../../lib/logger"),h=require("../../../lib/output"),U=require("../../../lib/recording-tools"),q=require("../../../lib/stdin"),x=require("../../../queue"),d=require("../util");function A(t){if(t==null)return 0;if(typeof t=="string")try{return Buffer.byteLength(t,"utf8")}catch{return 0}try{return Buffer.byteLength(JSON.stringify(t),"utf8")}catch{return 0}}b(A,"safeStringifyBytes");function te(t){if(t==null)return{isError:!1,errorText:void 0};if(typeof t=="object"&&t!==null){const e=t;if(e.isError===!0||e.is_error===!0){const o=e.error??e.message??e.errorMessage;return{isError:!0,errorText:typeof o=="string"?o:JSON.stringify(e).slice(0,500)}}}if(typeof t=="string"){const e=t;if(/(?:^|\n)Process exited with code [1-9]/.test(e)||/^Exit code:\s*[1-9]/m.test(e)||/apply_patch verification failed/i.test(e)||/failed to find expected lines/i.test(e)||/^\s*Error\b/.test(e)||/(?:^|\n)\[Request interrupted by user\]/.test(e)||/modified since (?:last )?read|stale read/i.test(e)||/file (?:is )?too large|exceeds/i.test(e)||/file not found|No such file or directory|does not exist/i.test(e))return{isError:!0,errorText:e.slice(0,500)}}return{isError:!1,errorText:void 0}}b(te,"detectFailure");function oe(t){if(t===null||typeof t!="object")return;const e=t._metadata;if(e===null||typeof e!="object")return;const o=e.toolCallId;if(typeof o=="string"&&/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(o))return o}b(oe,"extractMetadataToolCallId");function ne(t,e){const o=(0,L.consumeToolUseData)(t,e);if(!o?.start_ns)return null;try{const n=process.hrtime.bigint()-BigInt(o.start_ns);return Number(n/1000000n)}catch(n){return a.logger.debug(`failed to derive duration from stash: ${n}`),null}}b(ne,"deriveDurationMs");async function re(t){const e=(0,d.parseCodexHookStdin)((0,q.readStdin)()),o=e.session_id??"default",n=`${t}/.ironbee/sessions/${o}`,i=`${n}/actions.jsonl`;(0,a.setLogFile)(`${n}/session.log`);const y=e.tool_name??"",s=e.tool_use_id??"",g=e.tool_input,$=g&&typeof g=="object"?{...g,_metadata:void 0}:void 0,C=e.tool_response,l=(0,d.extractCodexMcpServer)(y),z=l==="browser-devtools"||l==="node-devtools"||l==="backend-devtools"||l==="android-devtools",F=ne(o,s),c=(0,d.classifyCodexTool)(y),J=z&&(0,v.isNestedToolContainer)(c.tool_name,l),D=J?(0,v.extractNestedToolCallsFromResponse)(C,l):null,u=D!==null?{isError:!1,errorText:void 0}:te(C);if(z){const w=c.tool_name,f=(0,U.recordingToolsForServer)(l);f!==null&&(w===f.startTool?(0,r.setRecordingActive)(n,!0):w===f.stopTool&&(0,r.setRecordingActive)(n,!1));const E=(0,r.getActiveActivityId)(n),m={...(0,T.baseFields)(i),type:"tool_call",timestamp:Date.now(),tool_type:c.tool_type,tool_name:c.tool_name,mcp_server:c.mcp_server??l,tool_input:$,tool_input_size:A($),tool_response:u.isError?void 0:C,tool_response_size:u.isError?0:A(C),duration:F};E&&(m.activity_id=E);const B=oe(g);B!==void 0?m.id=B:s.length>0&&(m.id=(0,R.deriveToolCallEventIdFromToolUseId)(o,s)),s&&(m.tool_use_id=s);const k=(0,r.getActiveVerificationId)(n);k&&(m.verification_id=k);const I=(0,r.getActiveTraceId)(n);if(I&&(m.trace_id=I),u.isError&&(m.error=u.errorText),await(0,T.appendAction)(i,m),J&&!u.isError){const Y=D??(0,v.extractNestedToolCalls)($??g,l);for(const _ of Y){f!==null&&(_.name===f.startTool?((0,r.setRecordingActive)(n,!0),a.logger.debug(`track-action (nested): recording started (${f.cycle})`)):_.name===f.stopTool&&((0,r.setRecordingActive)(n,!1),a.logger.debug(`track-action (nested): recording stopped (${f.cycle})`)));const S={...(0,T.baseFields)(i),type:"tool_call",timestamp:_.startTime??Date.now(),tool_name:_.name,tool_type:"mcp",tool_input:_.args,duration:_.duration??null,mcp_server:l,nested:!0,...s?{parent_tool_use_id:s}:{}};E&&(S.activity_id=E),k&&(S.verification_id=k),I&&(S.trace_id=I),await(0,T.appendAction)(i,S),a.logger.debug(`track-action (nested): ${_.name}`)}}(0,h.writeAndExit)(JSON.stringify({}),0);return}if(!(0,P.isJobQueueEnabled)(t)){(0,h.writeAndExit)(JSON.stringify({}),0);return}const M=(0,r.getActiveActivityId)(n),H=(0,d.extractCodexToolInput)(y,g),V=A(g),Q=u.isError?0:A(C),p={...(0,T.baseFields)(i),type:"tool_call",timestamp:Date.now(),tool_type:c.tool_type,tool_name:c.tool_name||(0,d.normalizeCodexToolName)(y),mcp_server:c.mcp_server,tool_input:H,tool_input_size:V,tool_response_size:Q,duration:F};M&&(p.activity_id=M),s.length>0&&(p.id=(0,R.deriveToolCallEventIdFromToolUseId)(o,s)),s&&(p.tool_use_id=s);const O=(0,r.getActiveVerificationId)(n);O&&(p.verification_id=O);const j=(0,r.getActiveTraceId)(n);j&&(p.trace_id=j),u.isError&&(p.error=u.errorText);try{(0,x.submit)(t,o,x.SEND_EVENT_TYPE,p)}catch(w){w instanceof x.JobTooLargeError?a.logger.debug(`track-action: wire event too large for tool_call ${y}; dropping`):a.logger.debug(`queue submit failed for tool_call ${y}: ${w}`)}(0,h.writeAndExit)(JSON.stringify({}),0)}b(re,"run");0&&(module.exports={run});
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
"use strict";var _=Object.defineProperty;var Z=Object.getOwnPropertyDescriptor;var j=Object.getOwnPropertyNames;var ee=Object.prototype.hasOwnProperty;var y=(f,e)=>_(f,"name",{value:e,configurable:!0});var oe=(f,e)=>{for(var o in e)_(f,o,{get:e[o],enumerable:!0})},ne=(f,e,o,s)=>{if(e&&typeof e=="object"||typeof e=="function")for(let i of j(e))!ee.call(f,i)&&i!==o&&_(f,i,{get:()=>e[i],enumerable:!(s=Z(e,i))||s.enumerable});return f};var te=f=>ne(_({},"__esModule",{value:!0}),f);var le={};oe(le,{CodexClient:()=>se});module.exports=te(le);var r=require("fs"),g=require("path"),O=require("../../lib/gitignore"),b=require("../../lib/logger"),d=require("../../lib/output"),P=require("../../lib/fs-prune"),c=require("../../lib/config"),C=require("../../lib/platform-section"),n=require("./util"),M=require("./thread-map"),L=require("./hooks/verify-gate"),G=require("./hooks/activity-end"),J=require("./hooks/session-start"),F=require("./hooks/activity-start"),K=require("./hooks/require-verification"),U=require("./hooks/require-verdict"),q=require("./hooks/clear-verdict"),D=require("./hooks/track-action"),X=require("./hooks/track-action-monitor"),W=require("./hooks/track-action-pre"),Y=require("./hooks/subagent-start"),z=require("./hooks/subagent-stop");const x="browser-devtools",E="node-devtools",w="backend-devtools",T="android-devtools",ie="ironbee",k="ironbee-verifier",H=30,V="Verifies recent code changes through real browser/runtime/backend tools and submits the IronBee verdict. Spawn this custom agent (by agent_type) after editing code to run the verification cycle out-of-band \u2014 it drives the devtools tools, judges the result, and records the verdict in the shared session. It does NOT edit code.";function A(f){return(0,g.join)(__dirname,"..",f,"platforms")}y(A,"platformsDirFor");function h(f){return d.pc.dim(f)}y(h,"codexColor");function B(f){return f.hooks.some(e=>e.command.includes(ie))}y(B,"isIronBeeHookGroup");function re(f){const e=Object.keys(f);return e.length===0?!0:e.length===1&&e[0]==="hooks"?Object.keys(f.hooks??{}).length===0:!1}y(re,"isCodexHooksEmpty");class se{constructor(){this.name="codex";this.supportsVerifierModel=!0}static{y(this,"CodexClient")}detect(e){return(0,r.existsSync)((0,g.join)(e,".agents","skills","ironbee-verify"))}resolveProjectDir(){return process.env.CODEX_PROJECT_DIR??process.env.IRONBEE_PROJECT_DIR??process.cwd()}install(e,o){const s=o??(0,c.loadConfig)(e),i=(0,c.getVerificationMode)(s),t=i!=="monitor",a=(0,c.getCodexVerifierMode)(s);this.cleanupArtifacts(e);const l=(0,n.codexHooksJsonPath)(e);if(this.mergeHooksConfig(l,i,a),this.mergeConfigToml(e,s,t,a),t&&(i==="enforce"&&this.writeAgentsMdBlock(e,s,a),this.writeSkills(e,i==="enforce",s,a),(0,C.syncPlatformSectionsToConfig)(e,A)),(0,O.ensureIronBeeGitignored)(e),console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} hooks ${d.pc.dim("\u2192")} ${d.pc.dim(l)}`),console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} config ${d.pc.dim("\u2192")} ${d.pc.dim((0,n.codexConfigTomlPath)(e))}`),t){const p=a==="main-agent"?`${d.pc.yellow("main-agent")} (the main agent drives the devtools tools directly)`:`${d.pc.bold("sub-agent")} (delegated to the ironbee-verifier custom agent)`;console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} verify ${d.pc.dim("\u2192")} ${p}`)}i==="enforce"?(console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} agents ${d.pc.dim("\u2192")} ${d.pc.dim((0,g.join)(e,"AGENTS.md"))}`),console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} skill ${d.pc.dim("\u2192")} ${d.pc.dim((0,g.join)(e,".agents","skills","ironbee-verification","SKILL.md"))}`),console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} command ${d.pc.dim("\u2192")} ${d.pc.dim((0,g.join)(e,".agents","skills","ironbee-verify","SKILL.md"))}`)):i==="assist"?(console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} ${d.pc.yellow("assist mode")} (verification.auto: false) \u2014 manual $ironbee-verify only, no enforcement`),console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} command ${d.pc.dim("\u2192")} ${d.pc.dim((0,g.join)(e,".agents","skills","ironbee-verify","SKILL.md"))}`)):console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} ${d.pc.yellow("monitoring-only mode")} (verification.enable: false)`),console.log(),console.log(` ${d.pc.yellow("\u26A0")} ${d.pc.yellow("Codex requires one-time TUI setup:")}`),console.log(` ${d.pc.yellow("1.")} Run ${d.pc.bold("/hooks")} in a fresh Codex session to review and trust IronBee hooks`),console.log(` ${d.pc.yellow("2.")} Restart any open Codex sessions to pick up new hook config`)}uninstall(e){this.cleanupArtifacts(e),(0,P.pruneEmptyDirs)((0,g.join)(e,".codex"));const o=(0,M.codexThreadMapPath)(e);if((0,r.existsSync)(o))try{(0,r.unlinkSync)(o)}catch(s){b.logger.debug(`failed to remove codex thread map: ${s}`)}console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} removed hooks, MCP entries, AGENTS.md block, and skills`)}cleanupArtifacts(e){this.migrateAwayFromUserLevel();const o=(0,n.codexHooksJsonPath)(e);this.removeIronBeeHooks(o),this.maybeDeleteEmptyHooks(o),this.removeIronBeeMcpServers(e),this.removeVerifierAgentToml(e);const s=(0,g.join)(e,"AGENTS.md");if((0,r.existsSync)(s))try{const t=(0,r.readFileSync)(s,"utf-8"),a=(0,n.stripAgentsMdBlock)(t);a===null?(0,r.unlinkSync)(s):a!==t&&(0,r.writeFileSync)(s,a)}catch(t){b.logger.debug(`failed to strip AGENTS.md block: ${t}`)}const i=(0,g.join)(e,".agents","skills");this.removeDir((0,g.join)(i,"ironbee-verification")),this.removeDir((0,g.join)(i,"ironbee-verify")),(0,P.pruneEmptyDirs)((0,g.join)(e,".agents"))}async runVerifyGate(e){await(0,L.run)(e)}async runActivityEnd(e){await(0,G.run)(e)}async runSessionStart(e){await(0,J.run)(e)}async runActivityStart(e){await(0,F.run)(e)}async runRequireVerification(e,o){await(0,K.run)(e,o)}async runRequireVerdict(e,o){await(0,U.run)(e,o)}async runClearVerdict(e){await(0,q.run)(e)}async runTrackAction(e){await(0,D.run)(e)}async runTrackActionMonitor(e){await(0,X.run)(e)}async runTrackActionPre(e){await(0,W.run)(e)}async runSubagentStart(e){await(0,Y.run)(e)}async runSubagentStop(e){await(0,z.run)(e)}resolveAgentSessionId(e,o){const s=process.env.CODEX_THREAD_ID;if(typeof s=="string"&&s.length>0&&o)return(0,M.lookupThreadSession)(o,s)}async runSessionEnd(e){b.logger.debug("session-end: no-op on Codex (no SessionEnd hook event)")}mergeHooksConfig(e,o,s){const i=o!=="monitor",t=o==="assist"?" --soft":"";(0,r.mkdirSync)((0,g.dirname)(e),{recursive:!0});let a={hooks:{}};if((0,r.existsSync)(e))try{a=JSON.parse((0,r.readFileSync)(e,"utf-8")),a.hooks||(a.hooks={})}catch(u){b.logger.debug(`failed to parse ${e}: ${u}`),a={hooks:{}}}for(const u of Object.keys(a.hooks)){const m=a.hooks[u].filter(S=>!B(S));m.length===0?delete a.hooks[u]:a.hooks[u]=m}const l=y((u,m,S)=>{a.hooks[u]||(a.hooks[u]=[]),a.hooks[u].push({matcher:m,hooks:[{type:"command",command:S}]})},"addGroup");l("SessionStart",".*","ironbee hook session-start --client codex"),l("UserPromptSubmit",".*","ironbee hook activity-start --client codex"),l("PreToolUse",".*","ironbee hook track-action-pre --client codex"),i&&(l("PreToolUse","^mcp__(browser|node|backend|android)[-_]devtools__.*",`ironbee hook require-verification --client codex${t}`),l("PreToolUse","^apply_patch$",`ironbee hook require-verdict --client codex${t}`),l("PostToolUse","^apply_patch$","ironbee hook clear-verdict --client codex"),s==="sub-agent"&&l("SubagentStart",".*","ironbee hook subagent-start --client codex")),l("SubagentStop",".*","ironbee hook subagent-stop --client codex"),l("PostToolUse",".*",i?"ironbee hook track-action --client codex":"ironbee hook track-action-monitor --client codex"),l("Stop",".*",o==="enforce"?"ironbee hook verify-gate --client codex":"ironbee hook activity-end --client codex"),(0,r.writeFileSync)(e,JSON.stringify(a,null,2))}removeIronBeeHooks(e){if((0,r.existsSync)(e))try{const o=(0,r.readFileSync)(e,"utf-8"),s=JSON.parse(o);if(!s.hooks)return;let i=!1;for(const t of Object.keys(s.hooks)){const a=s.hooks[t].filter(l=>!B(l));a.length!==s.hooks[t].length&&(i=!0),a.length===0?delete s.hooks[t]:s.hooks[t]=a}i&&(0,r.writeFileSync)(e,JSON.stringify(s,null,2))}catch(o){b.logger.debug(`failed to strip IronBee hooks from ${e}: ${o}`)}}maybeDeleteEmptyHooks(e){if((0,r.existsSync)(e))try{const o=JSON.parse((0,r.readFileSync)(e,"utf-8"));re(o)&&(0,r.unlinkSync)(e)}catch(o){b.logger.debug(`failed to inspect ${e} for emptiness: ${o}`)}}mergeConfigToml(e,o,s,i){(0,r.mkdirSync)((0,g.join)(e,".codex"),{recursive:!0});let t=(0,n.readCodexConfigToml)(e);if(t=(0,n.ensureFeaturesHooksTrue)(t),t=(0,n.removeMcpServer)(t,x),t=(0,n.removeMcpServer)(t,E),t=(0,n.removeMcpServer)(t,w),t=(0,n.removeMcpServer)(t,T),s&&i==="main-agent"){t=this.upsertSessionMcpServers(t,e,o),t=(0,n.removeAgentsTable)(t,k),t=(0,n.removeMultiAgentV2SpawnMetadata)(t),this.removeVerifierAgentToml(e),(0,n.writeCodexConfigToml)(e,t);return}if(s){const a=(0,c.getVerificationModel)(o,"codex"),l=(0,r.existsSync)((0,n.userCodexConfigTomlPath)())?(0,r.readFileSync)((0,n.userCodexConfigTomlPath)(),"utf-8"):"",p=(0,n.extractTomlTopLevelModel)(t)===null&&(0,n.extractTomlTopLevelModel)(l)===null;a===void 0&&p&&console.log(` ${d.pc.dim("\u2192")} ${h("[codex]")} ${d.pc.yellow("\u26A0 no model for the verifier")} \u2014 the ${d.pc.bold("ironbee-verifier")} sub-agent inherits the session model, but neither this project's .codex/config.toml nor ~/.codex/config.toml has a top-level ${d.pc.bold("model")}, so it may fail to spawn ("could not resolve the child model"). Fix: set ${d.pc.bold("model")} in ~/.codex/config.toml, or set ${d.pc.bold("verification.model")} in your ironbee config.`),this.writeVerifierAgentToml(e,o,a),t=(0,n.upsertAgentsTable)(t,k,[`description = ${JSON.stringify(V)}`,`config_file = ${JSON.stringify(`agents/${k}.toml`)}`]),t=(0,n.ensureMultiAgentV2SpawnMetadataExposed)(t)}else t=(0,n.removeAgentsTable)(t,k),t=(0,n.removeMultiAgentV2SpawnMetadata)(t),this.removeVerifierAgentToml(e);(0,n.writeCodexConfigToml)(e,t)}writeVerifierAgentToml(e,o,s){const i=(0,g.join)(__dirname,"agents",`${k}.md`);let t;try{t=(0,r.readFileSync)(i,"utf-8")}catch(u){b.logger.debug(`failed to read verifier agent source ${i}: ${u}`);return}const a=A("codex");for(const u of c.ALL_CYCLES){const S=(0,c.isCycleEnabled)(o,u)?I=>{const $=(0,g.join)(a,(0,C.fragmentFilename)("skill",u,I));return(0,r.existsSync)($)?(0,r.readFileSync)($,"utf-8").trimEnd():null}:null;t=(0,C.applyPlatformSection)(t,u,S,`${k}.toml`)}const l=[];l.push(`name = ${JSON.stringify(k)}`),l.push(`description = ${JSON.stringify(V)}`),l.push('sandbox_mode = "read-only"'),s&&l.push(`model = ${JSON.stringify(s)}`),l.push("developer_instructions = '''"),l.push(t.replace(/'''/g,"```").trimEnd()),l.push("'''");const p=y((u,m,S)=>{u&&(l.push(""),l.push(`[mcp_servers.${m}]`),l.push(...N(S)),l.push(`startup_timeout_sec = ${H}`),l.push("required = true"),l.push('default_tools_approval_mode = "approve"'))},"addCycle");p((0,c.isCycleEnabled)(o,"browser"),x,(0,c.getMcpServerEntry)(e)),p((0,c.isCycleEnabled)(o,"node"),E,(0,c.getNodeDevToolsMcpEntry)(e)),p((0,c.isCycleEnabled)(o,"backend"),w,(0,c.getBackendDevToolsMcpEntry)(e)),p((0,c.isCycleEnabled)(o,"android"),T,(0,c.getAndroidDevToolsMcpEntry)(e));const v=(0,n.codexAgentTomlPath)(e,k);(0,r.mkdirSync)((0,g.dirname)(v),{recursive:!0}),(0,r.writeFileSync)(v,l.join(`
|
|
1
|
+
"use strict";var R=Object.defineProperty;var te=Object.getOwnPropertyDescriptor;var ie=Object.getOwnPropertyNames;var re=Object.prototype.hasOwnProperty;var S=(f,e)=>R(f,"name",{value:e,configurable:!0});var se=(f,e)=>{for(var o in e)R(f,o,{get:e[o],enumerable:!0})},ae=(f,e,o,r)=>{if(e&&typeof e=="object"||typeof e=="function")for(let i of ie(e))!re.call(f,i)&&i!==o&&R(f,i,{get:()=>e[i],enumerable:!(r=te(e,i))||r.enumerable});return f};var le=f=>ae(R({},"__esModule",{value:!0}),f);var me={};se(me,{CodexClient:()=>ge});module.exports=le(me);var s=require("fs"),g=require("path"),K=require("../../lib/gitignore"),p=require("../../lib/logger"),l=require("../../lib/output"),B=require("../../lib/fs-prune"),d=require("../../lib/config"),C=require("../../lib/platform-section"),n=require("./util"),H=require("./thread-map"),U=require("./hooks/verify-gate"),q=require("./hooks/activity-end"),D=require("./hooks/session-start"),X=require("./hooks/activity-start"),W=require("./hooks/require-verification"),Y=require("./hooks/require-verdict"),z=require("./hooks/clear-verdict"),Q=require("./hooks/track-action"),Z=require("./hooks/track-action-monitor"),j=require("./hooks/track-action-pre"),ee=require("./hooks/subagent-start"),oe=require("./hooks/subagent-stop");const E="browser-devtools",A="node-devtools",_="backend-devtools",I="android-devtools",ce="ironbee",$="ironbee-verifier",V=30,N="Verifies recent code changes through real browser/runtime/backend tools and submits the IronBee verdict. Spawn this custom agent (by agent_type) after editing code to run the verification cycle out-of-band \u2014 it drives the devtools tools, judges the result, and records the verdict in the shared session. It does NOT edit code.",x="ironbee-scenario",L=["ironbee-manage-scenario","ironbee-search-scenario","ironbee-sync-scenario"],J="Manages and searches reusable IronBee verification scenarios via the devtools scenario tools. Spawn this custom agent (by agent_type) from the scenario slash commands to author/update/delete saved scenarios and find them by name/description/metadata. NOT a verification cycle (running a saved scenario to verify is done via $ironbee-verify scenario:<name>).";function P(f){return(0,g.join)(__dirname,"..",f,"platforms")}S(P,"platformsDirFor");function y(f){return l.pc.dim(f)}S(y,"codexColor");function F(f){return f.hooks.some(e=>e.command.includes(ce))}S(F,"isIronBeeHookGroup");function de(f){const e=Object.keys(f);return e.length===0?!0:e.length===1&&e[0]==="hooks"?Object.keys(f.hooks??{}).length===0:!1}S(de,"isCodexHooksEmpty");class ge{constructor(){this.name="codex";this.supportsVerifierModel=!0}static{S(this,"CodexClient")}detect(e){return(0,s.existsSync)((0,g.join)(e,".agents","skills","ironbee-verify"))}resolveProjectDir(){return process.env.CODEX_PROJECT_DIR??process.env.IRONBEE_PROJECT_DIR??process.cwd()}install(e,o){const r=o??(0,d.loadConfig)(e),i=(0,d.getVerificationMode)(r),t=i!=="monitor",a=(0,d.getCodexVerifierMode)(r);this.cleanupArtifacts(e);const m=(0,n.codexHooksJsonPath)(e);if(this.mergeHooksConfig(m,i,a),this.mergeConfigToml(e,r,t,a),t&&(i==="enforce"&&this.writeAgentsMdBlock(e,r,a),this.writeSkills(e,i==="enforce",r,a),(0,C.syncPlatformSectionsToConfig)(e,P)),(0,K.ensureIronBeeGitignored)(e),console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} hooks ${l.pc.dim("\u2192")} ${l.pc.dim(m)}`),console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} config ${l.pc.dim("\u2192")} ${l.pc.dim((0,n.codexConfigTomlPath)(e))}`),t){const b=a==="main-agent"?`${l.pc.yellow("main-agent")} (the main agent drives the devtools tools directly)`:`${l.pc.bold("sub-agent")} (delegated to the ironbee-verifier custom agent)`;console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} verify ${l.pc.dim("\u2192")} ${b}`)}i==="enforce"?(console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} agents ${l.pc.dim("\u2192")} ${l.pc.dim((0,g.join)(e,"AGENTS.md"))}`),console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} skill ${l.pc.dim("\u2192")} ${l.pc.dim((0,g.join)(e,".agents","skills","ironbee-verification","SKILL.md"))}`),console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} command ${l.pc.dim("\u2192")} ${l.pc.dim((0,g.join)(e,".agents","skills","ironbee-verify","SKILL.md"))}`)):i==="assist"?(console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} ${l.pc.yellow("assist mode")} (verification.auto: false) \u2014 manual $ironbee-verify only, no enforcement`),console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} command ${l.pc.dim("\u2192")} ${l.pc.dim((0,g.join)(e,".agents","skills","ironbee-verify","SKILL.md"))}`)):console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} ${l.pc.yellow("monitoring-only mode")} (verification.enable: false)`),console.log(),console.log(` ${l.pc.yellow("\u26A0")} ${l.pc.yellow("Codex requires one-time TUI setup:")}`),console.log(` ${l.pc.yellow("1.")} Run ${l.pc.bold("/hooks")} in a fresh Codex session to review and trust IronBee hooks`),console.log(` ${l.pc.yellow("2.")} Restart any open Codex sessions to pick up new hook config`)}uninstall(e){this.cleanupArtifacts(e),(0,s.existsSync)((0,n.codexHooksJsonPath)(e))||this.removeFeaturesHooksFlag(e),(0,B.pruneEmptyDirs)((0,g.join)(e,".codex"));const o=(0,H.codexThreadMapPath)(e);if((0,s.existsSync)(o))try{(0,s.unlinkSync)(o)}catch(r){p.logger.debug(`failed to remove codex thread map: ${r}`)}console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} removed hooks, MCP entries, AGENTS.md block, and skills`)}removeFeaturesHooksFlag(e){const o=(0,n.codexConfigTomlPath)(e);if((0,s.existsSync)(o))try{const r=(0,s.readFileSync)(o,"utf-8"),i=(0,n.removeFeaturesHooks)(r);i.trim().length===0?(0,s.unlinkSync)(o):i!==r&&(0,s.writeFileSync)(o,i)}catch(r){p.logger.debug(`failed to strip [features] hooks from config.toml: ${r}`)}}cleanupArtifacts(e){this.migrateAwayFromUserLevel();const o=(0,n.codexHooksJsonPath)(e);this.removeIronBeeHooks(o),this.maybeDeleteEmptyHooks(o),this.removeIronBeeMcpServers(e),this.removeVerifierAgentToml(e),this.removeScenarioAgentToml(e);const r=(0,g.join)(e,"AGENTS.md");if((0,s.existsSync)(r))try{const t=(0,s.readFileSync)(r,"utf-8"),a=(0,n.stripAgentsMdBlock)(t);a===null?(0,s.unlinkSync)(r):a!==t&&(0,s.writeFileSync)(r,a)}catch(t){p.logger.debug(`failed to strip AGENTS.md block: ${t}`)}const i=(0,g.join)(e,".agents","skills");this.removeDir((0,g.join)(i,"ironbee-verification")),this.removeDir((0,g.join)(i,"ironbee-verify"));for(const t of L)this.removeDir((0,g.join)(i,t));this.removeDir((0,g.join)(i,"ironbee-run-scenario")),(0,B.pruneEmptyDirs)((0,g.join)(e,".agents"))}async runVerifyGate(e){await(0,U.run)(e)}async runActivityEnd(e){await(0,q.run)(e)}async runSessionStart(e){await(0,D.run)(e)}async runActivityStart(e){await(0,X.run)(e)}async runRequireVerification(e,o){await(0,W.run)(e,o)}async runRequireVerdict(e,o){await(0,Y.run)(e,o)}async runClearVerdict(e){await(0,z.run)(e)}async runTrackAction(e){await(0,Q.run)(e)}async runTrackActionMonitor(e){await(0,Z.run)(e)}async runTrackActionPre(e){await(0,j.run)(e)}async runSubagentStart(e){await(0,ee.run)(e)}async runSubagentStop(e){await(0,oe.run)(e)}resolveAgentSessionId(e,o){const r=process.env.CODEX_THREAD_ID;if(typeof r=="string"&&r.length>0&&o)return(0,H.lookupThreadSession)(o,r)}async runSessionEnd(e){p.logger.debug("session-end: no-op on Codex (no SessionEnd hook event)")}mergeHooksConfig(e,o,r){const i=o!=="monitor",t=o==="assist"?" --soft":"";(0,s.mkdirSync)((0,g.dirname)(e),{recursive:!0});let a={hooks:{}};if((0,s.existsSync)(e))try{a=JSON.parse((0,s.readFileSync)(e,"utf-8")),a.hooks||(a.hooks={})}catch(v){p.logger.debug(`failed to parse ${e}: ${v}`),a={hooks:{}}}for(const v of Object.keys(a.hooks)){const c=a.hooks[v].filter(h=>!F(h));c.length===0?delete a.hooks[v]:a.hooks[v]=c}const m=S((v,c,h)=>{a.hooks[v]||(a.hooks[v]=[]),a.hooks[v].push({matcher:c,hooks:[{type:"command",command:h}]})},"addGroup");m("SessionStart",".*","ironbee hook session-start --client codex"),m("UserPromptSubmit",".*","ironbee hook activity-start --client codex"),m("PreToolUse",".*","ironbee hook track-action-pre --client codex"),i&&(m("PreToolUse","^mcp__(browser|node|backend|android)[-_]devtools__.*",`ironbee hook require-verification --client codex${t}`),m("PreToolUse","^apply_patch$",`ironbee hook require-verdict --client codex${t}`),m("PostToolUse","^apply_patch$","ironbee hook clear-verdict --client codex"),r==="sub-agent"&&m("SubagentStart",".*","ironbee hook subagent-start --client codex")),m("SubagentStop",".*","ironbee hook subagent-stop --client codex"),m("PostToolUse",".*",i?"ironbee hook track-action --client codex":"ironbee hook track-action-monitor --client codex"),m("Stop",".*",o==="enforce"?"ironbee hook verify-gate --client codex":"ironbee hook activity-end --client codex"),(0,s.writeFileSync)(e,JSON.stringify(a,null,2))}removeIronBeeHooks(e){if((0,s.existsSync)(e))try{const o=(0,s.readFileSync)(e,"utf-8"),r=JSON.parse(o);if(!r.hooks)return;let i=!1;for(const t of Object.keys(r.hooks)){const a=r.hooks[t].filter(m=>!F(m));a.length!==r.hooks[t].length&&(i=!0),a.length===0?delete r.hooks[t]:r.hooks[t]=a}i&&(0,s.writeFileSync)(e,JSON.stringify(r,null,2))}catch(o){p.logger.debug(`failed to strip IronBee hooks from ${e}: ${o}`)}}maybeDeleteEmptyHooks(e){if((0,s.existsSync)(e))try{const o=JSON.parse((0,s.readFileSync)(e,"utf-8"));de(o)&&(0,s.unlinkSync)(e)}catch(o){p.logger.debug(`failed to inspect ${e} for emptiness: ${o}`)}}mergeConfigToml(e,o,r,i){(0,s.mkdirSync)((0,g.join)(e,".codex"),{recursive:!0});let t=(0,n.readCodexConfigToml)(e);if(t=(0,n.ensureFeaturesHooksTrue)(t),t=(0,n.removeMcpServer)(t,E),t=(0,n.removeMcpServer)(t,A),t=(0,n.removeMcpServer)(t,_),t=(0,n.removeMcpServer)(t,I),r&&i==="main-agent"){t=this.upsertSessionMcpServers(t,e,o),t=(0,n.removeAgentsTable)(t,$),t=(0,n.removeAgentsTable)(t,x),t=(0,n.removeMultiAgentV2SpawnMetadata)(t),this.removeVerifierAgentToml(e),this.removeScenarioAgentToml(e),(0,n.writeCodexConfigToml)(e,t);return}if(r){const a=(0,d.getVerificationModel)(o,"codex"),m=(0,s.existsSync)((0,n.userCodexConfigTomlPath)())?(0,s.readFileSync)((0,n.userCodexConfigTomlPath)(),"utf-8"):"",b=(0,n.extractTomlTopLevelModel)(t)===null&&(0,n.extractTomlTopLevelModel)(m)===null;a===void 0&&b&&console.log(` ${l.pc.dim("\u2192")} ${y("[codex]")} ${l.pc.yellow("\u26A0 no model for the verifier")} \u2014 the ${l.pc.bold("ironbee-verifier")} sub-agent inherits the session model, but neither this project's .codex/config.toml nor ~/.codex/config.toml has a top-level ${l.pc.bold("model")}, so it may fail to spawn ("could not resolve the child model"). Fix: set ${l.pc.bold("model")} in ~/.codex/config.toml, or set ${l.pc.bold("verification.model")} in your ironbee config.`),this.writeVerifierAgentToml(e,o,a),t=(0,n.upsertAgentsTable)(t,$,[`description = ${JSON.stringify(N)}`,`config_file = ${JSON.stringify(`agents/${$}.toml`)}`]),t=(0,n.ensureMultiAgentV2SpawnMetadataExposed)(t),this.writeScenarioAgentToml(e,o,a),t=(0,n.upsertAgentsTable)(t,x,[`description = ${JSON.stringify(J)}`,`config_file = ${JSON.stringify(`agents/${x}.toml`)}`])}else t=(0,n.removeAgentsTable)(t,$),t=(0,n.removeAgentsTable)(t,x),t=(0,n.removeMultiAgentV2SpawnMetadata)(t),this.removeVerifierAgentToml(e),this.removeScenarioAgentToml(e);(0,n.writeCodexConfigToml)(e,t)}writeVerifierAgentToml(e,o,r){this.writeCustomAgentToml(e,o,r,$,N,"skill","read-only")}writeScenarioAgentToml(e,o,r){this.writeCustomAgentToml(e,o,r,x,J,"scenario","read-only")}writeCustomAgentToml(e,o,r,i,t,a,m){const b=(0,g.join)(__dirname,"agents",`${i}.md`);let u;try{u=(0,s.readFileSync)(b,"utf-8")}catch(k){p.logger.debug(`failed to read agent source ${b}: ${k}`);return}const v=P("codex");for(const k of d.ALL_CYCLES){const w=(0,d.isCycleEnabled)(o,k)?ne=>{const O=(0,g.join)(v,(0,C.fragmentFilename)(a,k,ne));return(0,s.existsSync)(O)?(0,s.readFileSync)(O,"utf-8").trimEnd():null}:null;u=(0,C.applyPlatformSection)(u,k,w,`${i}.toml`)}const c=[];c.push(`name = ${JSON.stringify(i)}`),c.push(`description = ${JSON.stringify(t)}`),c.push(`sandbox_mode = ${JSON.stringify(m)}`),r&&c.push(`model = ${JSON.stringify(r)}`),c.push("developer_instructions = '''"),c.push(u.replace(/'''/g,"```").trimEnd()),c.push("'''");const h=S((k,T,w)=>{k&&(c.push(""),c.push(`[mcp_servers.${T}]`),c.push(...G(w)),c.push(`startup_timeout_sec = ${V}`),c.push("required = true"),c.push('default_tools_approval_mode = "approve"'))},"addCycle");h((0,d.isCycleEnabled)(o,"browser"),E,(0,d.getMcpServerEntry)(e)),h((0,d.isCycleEnabled)(o,"node"),A,(0,d.getNodeDevToolsMcpEntry)(e)),h((0,d.isCycleEnabled)(o,"backend"),_,(0,d.getBackendDevToolsMcpEntry)(e)),h((0,d.isCycleEnabled)(o,"android"),I,(0,d.getAndroidDevToolsMcpEntry)(e));const M=(0,n.codexAgentTomlPath)(e,i);(0,s.mkdirSync)((0,g.dirname)(M),{recursive:!0}),(0,s.writeFileSync)(M,c.join(`
|
|
2
2
|
`)+`
|
|
3
|
-
`)}upsertSessionMcpServers(e,o,
|
|
3
|
+
`)}upsertSessionMcpServers(e,o,r){let i=e;const t=S((a,m,b)=>{if(!a)return;const u=[...G(b),`startup_timeout_sec = ${V}`,'default_tools_approval_mode = "approve"'];i=(0,n.upsertMcpServer)(i,m,u)},"addCycle");return t((0,d.isCycleEnabled)(r,"browser"),E,(0,d.getMcpServerEntry)(o)),t((0,d.isCycleEnabled)(r,"node"),A,(0,d.getNodeDevToolsMcpEntry)(o)),t((0,d.isCycleEnabled)(r,"backend"),_,(0,d.getBackendDevToolsMcpEntry)(o)),t((0,d.isCycleEnabled)(r,"android"),I,(0,d.getAndroidDevToolsMcpEntry)(o)),i}removeVerifierAgentToml(e){const o=(0,n.codexAgentTomlPath)(e,$);if((0,s.existsSync)(o))try{(0,s.unlinkSync)(o)}catch(r){p.logger.debug(`failed to remove verifier agent toml: ${r}`)}}removeScenarioAgentToml(e){const o=(0,n.codexAgentTomlPath)(e,x);if((0,s.existsSync)(o))try{(0,s.unlinkSync)(o)}catch(r){p.logger.debug(`failed to remove scenario agent toml: ${r}`)}}removeIronBeeMcpServers(e){let o=(0,n.readCodexConfigToml)(e);o&&(o=(0,n.removeMcpServer)(o,E),o=(0,n.removeMcpServer)(o,A),o=(0,n.removeMcpServer)(o,_),o=(0,n.removeMcpServer)(o,I),o=(0,n.removeAgentsTable)(o,$),o=(0,n.removeAgentsTable)(o,x),o=(0,n.removeMultiAgentV2SpawnMetadata)(o),(0,n.writeCodexConfigToml)(e,o))}migrateAwayFromUserLevel(){const e=(0,n.userCodexHooksJsonPath)();this.removeIronBeeHooks(e),this.maybeDeleteEmptyHooks(e);const o=(0,n.userCodexConfigTomlPath)();if((0,s.existsSync)(o))try{let i=(0,s.readFileSync)(o,"utf-8");const t=i;i=(0,n.removeMcpServer)(i,E),i=(0,n.removeMcpServer)(i,A),i=(0,n.removeMcpServer)(i,_),i=(0,n.removeMcpServer)(i,I),i=(0,n.removeAgentsTable)(i,$),i=(0,n.removeMultiAgentV2SpawnMetadata)(i),i!==t&&(0,s.writeFileSync)(o,i)}catch(i){p.logger.debug(`migrate: failed to clean user-level config.toml: ${i}`)}const r=(0,n.userCodexAgentTomlPath)($);if((0,s.existsSync)(r))try{(0,s.unlinkSync)(r)}catch(i){p.logger.debug(`migrate: failed to remove user-level verifier toml: ${i}`)}}writeAgentsMdBlock(e,o,r){const i=(0,g.join)(e,"AGENTS.md"),t=r==="main-agent"?"ironbee-verification.main.md":"ironbee-verification.md",a=(0,g.join)(__dirname,"rules",t);let m;try{m=(0,s.readFileSync)(a,"utf-8")}catch(c){p.logger.debug(`failed to read rule source ${a}: ${c}`);return}const b=P("codex");for(const c of d.ALL_CYCLES){const M=(0,d.isCycleEnabled)(o,c)?k=>{const T=(0,g.join)(b,(0,C.fragmentFilename)("rule",c,k));if(!(0,s.existsSync)(T)){const w=k.length>0?`${c}:${k}`:c;return p.logger.debug(`AGENTS.md platform-section ${w}: missing fragment ${T}, using placeholder`),null}return(0,s.readFileSync)(T,"utf-8").trimEnd()}:null;m=(0,C.applyPlatformSection)(m,c,M,"AGENTS.md")}const u=(0,s.existsSync)(i)?(0,s.readFileSync)(i,"utf-8"):"",v=(0,n.upsertAgentsMdBlock)(u,m);(0,s.writeFileSync)(i,v)}writeSkills(e,o,r,i){const t=(0,g.join)(e,".agents","skills"),a=i==="main-agent";if(o){const u=(0,g.join)(t,"ironbee-verification");(0,s.mkdirSync)(u,{recursive:!0});const v=(0,g.join)(__dirname,"skills",a?"ironbee-verification.main.md":"ironbee-verification.md");try{let c=(0,s.readFileSync)(v,"utf-8");a&&(c=this.spliceCycleFragments(c,"skill",r,"ironbee-verification/SKILL.md")),(0,s.writeFileSync)((0,g.join)(u,"SKILL.md"),c)}catch(c){p.logger.debug(`failed to copy skill ${v}: ${c}`)}}const m=(0,g.join)(t,"ironbee-verify");(0,s.mkdirSync)(m,{recursive:!0});const b=(0,g.join)(__dirname,"commands","ironbee-verify",a?"SKILL.main.md":"SKILL.md");try{let u=(0,s.readFileSync)(b,"utf-8");a&&(u=this.spliceCycleFragments(u,"command-verify",r,"ironbee-verify/SKILL.md")),(0,s.writeFileSync)((0,g.join)(m,"SKILL.md"),u)}catch(u){p.logger.debug(`failed to copy verify command ${b}: ${u}`)}for(const u of L){const v=(0,g.join)(t,u);(0,s.mkdirSync)(v,{recursive:!0});const c=(0,g.join)(__dirname,"commands",u,a?"SKILL.main.md":"SKILL.md");try{let h=(0,s.readFileSync)(c,"utf-8");a&&(h=this.spliceCycleFragments(h,"scenario",r,`${u}/SKILL.md`)),(0,s.writeFileSync)((0,g.join)(v,"SKILL.md"),h)}catch(h){p.logger.debug(`failed to copy scenario command ${c}: ${h}`)}}}spliceCycleFragments(e,o,r,i){const t=P("codex");let a=e;for(const m of d.ALL_CYCLES){const u=(0,d.isCycleEnabled)(r,m)?v=>{const c=(0,g.join)(t,(0,C.fragmentFilename)(o,m,v));return(0,s.existsSync)(c)?(0,s.readFileSync)(c,"utf-8").trimEnd():null}:null;a=(0,C.applyPlatformSection)(a,m,u,i)}return a}removeDir(e){if((0,s.existsSync)(e))try{(0,s.rmSync)(e,{recursive:!0,force:!0})}catch(o){p.logger.debug(`failed to remove ${e}: ${o}`)}}}function G(f){return(0,n.tomlBodyFromRecord)(f)}S(G,"mcpEntryToTomlBody");0&&(module.exports={CodexClient});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
### android platform (enabled)
|
|
2
|
+
- **Use for**: Android app scenarios on a real device / emulator.
|
|
3
|
+
- **Server**: `android-devtools` · **scenario tools**: the `adt_scenario-*` tools
|
|
4
|
+
(`adt_scenario-add` / `-update` / `-delete` / `-list` / `-search` / `-run`).
|
|
5
|
+
- **Store**: project → `.ironbee/scenarios/adt`, global → `~/.ironbee/scenarios/adt` (the
|
|
6
|
+
server's `SCENARIOS_DIR`; you pass `scope`, the server resolves the path).
|
|
7
|
+
- Scenario **scripts** call this platform's tools via `callTool('<bare-tool>', {...})` — discover
|
|
8
|
+
the available `adt_*` tool names from your connected MCP tool schemas; don't guess.
|
|
9
|
+
|
|
10
|
+
**What to test & how — capture the SAME evidence the verifier would** (a scenario runs FOR
|
|
11
|
+
verification, so its script must collect what the android cycle collects). In the script:
|
|
12
|
+
1. **Connect + launch** — `adt_device_connect` (list targets with `adt_device_list-targets`; an
|
|
13
|
+
emulator is usually `emulator-5554`), then `adt_device_launch-app` with the package name.
|
|
14
|
+
2. Pick an **evidence path** for the changed code area:
|
|
15
|
+
- **Device-evidence path** — drive the UI to exercise the change (`adt_interaction_tap` /
|
|
16
|
+
`adt_interaction_input-text` / `adt_interaction_swipe` / `adt_interaction_scroll`; locate elements
|
|
17
|
+
with `adt_a11y_find-element` / the UI-snapshot's element refs — do NOT hand-parse the snapshot
|
|
18
|
+
TEXT with regex), then capture **BOTH**: a screenshot (`adt_content_take-screenshot`
|
|
19
|
+
**with `returnOutput: true`** — put the returned `filePath` in your result; the verifier `Read`s
|
|
20
|
+
that file to judge the pixels. **Do NOT set `includeBase64`** — a nested scenario screenshot isn't
|
|
21
|
+
surfaced as an inline image and base64 only bloats the result) **AND** a UI snapshot
|
|
22
|
+
(`adt_a11y_take-ui-snapshot`, `returnOutput: true` — its TEXT view hierarchy / labels is what the
|
|
23
|
+
verifier reads). Both are MANDATORY (visual + structural, like the browser screenshot + aria pair).
|
|
24
|
+
- **Log-evidence path** — `adt_o11y_log-read` / `adt_o11y_log-follow` (with `returnOutput: true`)
|
|
25
|
+
for the tag(s) relevant to the change; confirm expected lines appear AND no FATAL / crash (E/
|
|
26
|
+
entries) for the app package.
|
|
27
|
+
|
|
28
|
+
`return` the evidence — UI-snapshot text, log lines, the screenshot `filePath`s — **plus explicit
|
|
29
|
+
pass/fail assertions**. That returned result is what `$ironbee-verify scenario:<name>` reads to judge
|
|
30
|
+
functional + structural (from the text) and **visual** (by `Read`ing the returned screenshot files).
|
|
31
|
+
**`android-devtools` is Android-only.**
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
### backend platform (enabled)
|
|
2
|
+
- **Use for**: backend protocol scenarios (HTTP / gRPC / GraphQL / WebSocket / DB).
|
|
3
|
+
- **Server**: `backend-devtools` · **scenario tools**: the `bedt_scenario-*` tools
|
|
4
|
+
(`bedt_scenario-add` / `-update` / `-delete` / `-list` / `-search` / `-run`).
|
|
5
|
+
- **Store**: project → `.ironbee/scenarios/bedt`, global → `~/.ironbee/scenarios/bedt` (the
|
|
6
|
+
server's `SCENARIOS_DIR`; you pass `scope`, the server resolves the path).
|
|
7
|
+
- Scenario **scripts** call this platform's tools via `callTool('<bare-tool>', {...})` — discover
|
|
8
|
+
the available `bedt_*` tool names from your connected MCP tool schemas; don't guess.
|
|
9
|
+
|
|
10
|
+
**What to test & how — capture the SAME evidence the verifier would** (a scenario runs FOR
|
|
11
|
+
verification, so its script must collect what the backend cycle collects). At least ONE evidence path
|
|
12
|
+
is required — in the script, exercise one+:
|
|
13
|
+
- **Protocol-call** — `bedt_request_http` / `bedt_request_grpc` / `bedt_request_graphql` /
|
|
14
|
+
`bedt_request_websocket-open…` / `bedt_request_replay`; inspect the response `status` / body /
|
|
15
|
+
headers (4xx/5xx and gRPC non-OK are NORMAL results, not transport errors — decide pass/fail by what
|
|
16
|
+
the task requires). Chain POST→GET to confirm side effects.
|
|
17
|
+
- **Log-evidence** — `bedt_log_register-source` then `bedt_log_read` / `bedt_log_read-multi` /
|
|
18
|
+
`bedt_log_follow` (filter by level / pattern / trace-id) when an external driver hits the endpoint.
|
|
19
|
+
- **DB-evidence** — `bedt_db_connect` (read-only by default) then `bedt_db_query` /
|
|
20
|
+
`bedt_db_describe-table` / `bedt_db_snapshot` + `bedt_db_diff` to inspect state after a migration /
|
|
21
|
+
write.
|
|
22
|
+
|
|
23
|
+
`return` the responses / log lines / rows (capture each read with `returnOutput: true` so the data
|
|
24
|
+
reaches the script's `return`) **plus explicit pass/fail assertions** so a later verify run can judge
|
|
25
|
+
them. Runtime-agnostic —
|
|
26
|
+
works for any backend language (Node, Java, Python, Go, Rust, Ruby, .NET, …).
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
### browser platform (enabled)
|
|
2
|
+
- **Use for**: UI / frontend scenarios driven through a real browser.
|
|
3
|
+
- **Server**: `browser-devtools` · **scenario tools**: the `bdt_scenario-*` tools
|
|
4
|
+
(`bdt_scenario-add` / `-update` / `-delete` / `-list` / `-search` / `-run`).
|
|
5
|
+
- **Store**: project → `.ironbee/scenarios/bdt`, global → `~/.ironbee/scenarios/bdt` (the
|
|
6
|
+
server's `SCENARIOS_DIR`; you pass `scope`, the server resolves the path).
|
|
7
|
+
- Scenario **scripts** call this platform's tools via `callTool('<bare-tool>', {...})` — discover
|
|
8
|
+
the available `bdt_*` tool names from your connected MCP tool schemas; don't guess.
|
|
9
|
+
|
|
10
|
+
**What to test & how — capture the SAME evidence the verifier would** (a scenario runs FOR
|
|
11
|
+
verification, so its script must collect what the browser cycle collects). In the script:
|
|
12
|
+
1. **Navigate** — `bdt_navigation_go-to` to the affected page(s), then **actually interact** (click
|
|
13
|
+
buttons, fill forms, submit data, trigger the workflow that changed). A click-through that asserts
|
|
14
|
+
nothing verifies nothing — the interaction is what makes the evidence meaningful. **Target elements
|
|
15
|
+
with the `selector`/`ref` the aria-snapshot returns for each** (e.g. `getByRole(...)` or `@e12`) —
|
|
16
|
+
do NOT hand-parse the snapshot TEXT with regex/string-matching: embedded quotes or special chars in
|
|
17
|
+
labels make that brittle (it silently misses elements). This includes deriving a positional
|
|
18
|
+
**`.nth(i)`** index by parsing the snapshot — a quote or special char in any earlier label shifts
|
|
19
|
+
every index, so the click lands on the wrong element (or none). Pick each element by its own
|
|
20
|
+
`getByRole(...)`/`ref`, or scope it to the matching card/row with a CSS `:has()` selector (e.g.
|
|
21
|
+
`.product-card:has(h4:has-text('Widget')) button:has-text('Add to cart')`). NOTE: the
|
|
22
|
+
browser-devtools resolver accepts only a flat `getByXYZ(...)` expression OR a CSS string — Playwright
|
|
23
|
+
locator chaining like `.filter({ hasText })` does NOT parse. Never compute element positions from
|
|
24
|
+
snapshot text.
|
|
25
|
+
2. **Screenshot** — `bdt_content_take-screenshot` (or `includeScreenshot: true` on a nav/interaction
|
|
26
|
+
call) **with `returnOutput: true`, and put the returned `filePath` (absolute path to the saved PNG)
|
|
27
|
+
in your result**. The later verifier opens that file with its `Read` tool to judge the pixels
|
|
28
|
+
(readability, layout, cut-off content, expected render). **Do NOT set `includeBase64`** — a nested
|
|
29
|
+
scenario screenshot is NOT surfaced as an inline MCP image (`scenario-run` strips nested image data)
|
|
30
|
+
and base64 only bloats the result; the returned `filePath` is how visual judging works.
|
|
31
|
+
3. **Accessibility** — `bdt_a11y_take-aria-snapshot` (or `includeSnapshot: true`), called with
|
|
32
|
+
`returnOutput: true` — the snapshot TEXT is what the verifier reads to judge page structure.
|
|
33
|
+
4. **Console** — `bdt_o11y_get-console-messages` with `returnOutput: true` to surface errors.
|
|
34
|
+
|
|
35
|
+
`return` the evidence — aria-snapshot text, page text (`bdt_content_get-as-text`), console errors, the
|
|
36
|
+
screenshot `filePath`s — **plus explicit pass/fail assertions**. That returned result is what
|
|
37
|
+
`$ironbee-verify scenario:<name>` reads to judge the run: functional + structural from the text, and
|
|
38
|
+
**visual by `Read`ing the returned screenshot files**. Capture the evidence AFTER the interactions
|
|
39
|
+
whose state you want to assert; for an intermediate state (a modal that opens then closes) capture at
|
|
40
|
+
that point too.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
### node platform (enabled)
|
|
2
|
+
- **Use for**: Node.js runtime-debug scenarios (V8 inspector probes / logs).
|
|
3
|
+
- **Server**: `node-devtools` · **scenario tools**: the `ndt_scenario-*` tools
|
|
4
|
+
(`ndt_scenario-add` / `-update` / `-delete` / `-list` / `-search` / `-run`).
|
|
5
|
+
- **Store**: project → `.ironbee/scenarios/ndt`, global → `~/.ironbee/scenarios/ndt` (the
|
|
6
|
+
server's `SCENARIOS_DIR`; you pass `scope`, the server resolves the path).
|
|
7
|
+
- Scenario **scripts** call this platform's tools via `callTool('<bare-tool>', {...})` — discover
|
|
8
|
+
the available `ndt_*` tool names from your connected MCP tool schemas; don't guess.
|
|
9
|
+
|
|
10
|
+
**What to test & how — capture the SAME evidence the verifier would** (a scenario runs FOR
|
|
11
|
+
verification, so its script must collect what the node cycle collects). In the script:
|
|
12
|
+
1. **Connect** — `ndt_debug_connect` (one of `pid` / `processName` / `containerName` /
|
|
13
|
+
`inspectorPort` / `wsUrl`).
|
|
14
|
+
2. Pick an **evidence path** for the changed code path:
|
|
15
|
+
- **Probe path** (proves the code path executed) — set a probe at the changed location
|
|
16
|
+
(`ndt_debug_put-tracepoint` / `ndt_debug_put-logpoint` / `ndt_debug_put-exceptionpoint`),
|
|
17
|
+
**exercise the path** (drive it via a request / CLI / another platform's call — without this the
|
|
18
|
+
probe never fires), then read `ndt_debug_get-probe-snapshots`; at least one probe must come back
|
|
19
|
+
`triggered: true`.
|
|
20
|
+
- **Log path** (proves no errors during execution) — exercise the path, then `ndt_debug_get-logs`
|
|
21
|
+
filtered to the error level (no ERROR-level entries = pass).
|
|
22
|
+
|
|
23
|
+
`return` the probe snapshots / logs (read them with `returnOutput: true` so their data reaches the
|
|
24
|
+
script's `return`) **plus explicit pass/fail assertions** so a later verify run can judge them.
|
|
25
|
+
**`node-devtools` is
|
|
26
|
+
Node.js ONLY** — never author `ndt_*` scenarios for Java / Python / Go / Rust / Ruby / .NET / PHP
|
|
27
|
+
backends; use the **backend** platform for those.
|
|
@@ -1,48 +1,54 @@
|
|
|
1
|
-
"use strict";var k=Object.defineProperty;var E=Object.getOwnPropertyDescriptor;var L=Object.getOwnPropertyNames;var W=Object.prototype.hasOwnProperty;var o=(n,t)=>k(n,"name",{value:t,configurable:!0});var M=(n,t)=>{for(var e in t)k(n,e,{get:t[e],enumerable:!0})},P=(n,t,e,s)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of L(t))!W.call(n,r)&&r!==e&&k(n,r,{get:()=>t[r],enumerable:!(s=E(t,r))||s.enumerable});return n};var J=n=>P(k({},"__esModule",{value:!0}),n);var
|
|
1
|
+
"use strict";var k=Object.defineProperty;var E=Object.getOwnPropertyDescriptor;var L=Object.getOwnPropertyNames;var W=Object.prototype.hasOwnProperty;var o=(n,t)=>k(n,"name",{value:t,configurable:!0});var M=(n,t)=>{for(var e in t)k(n,e,{get:t[e],enumerable:!0})},P=(n,t,e,s)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of L(t))!W.call(n,r)&&r!==e&&k(n,r,{get:()=>t[r],enumerable:!(s=E(t,r))||s.enumerable});return n};var J=n=>P(k({},"__esModule",{value:!0}),n);var mn={};M(mn,{AGENTS_MD_END_MARKER:()=>x,AGENTS_MD_START_MARKER:()=>I,canonicalizeCodexServerName:()=>v,canonicalizeCodexToolName:()=>C,classifyCodexTool:()=>V,codexAgentTomlPath:()=>rn,codexConfigTomlPath:()=>T,codexHooksJsonPath:()=>cn,decodeJwtPayload:()=>A,ensureFeaturesHooksTrue:()=>Z,ensureMultiAgentV2SpawnMetadataExposed:()=>Q,extractBashBinary:()=>O,extractCodexMcpServer:()=>S,extractCodexToolInput:()=>D,extractTomlTopLevelModel:()=>sn,findTomlSection:()=>_,normalizeCodexToolName:()=>R,parseCodexHookStdin:()=>B,readCodexConfigToml:()=>ln,removeAgentsTable:()=>en,removeFeaturesHooks:()=>q,removeMcpServer:()=>nn,removeMultiAgentV2SpawnMetadata:()=>Y,resolveCodexUsage:()=>U,stripAgentsMdBlock:()=>an,tomlBodyFromRecord:()=>on,upsertAgentsMdBlock:()=>un,upsertAgentsTable:()=>tn,upsertMcpServer:()=>N,userCodexAgentTomlPath:()=>pn,userCodexConfigTomlPath:()=>gn,userCodexHooksJsonPath:()=>fn,writeCodexConfigToml:()=>dn});module.exports=J(mn);var m=require("fs"),b=require("os"),p=require("path"),y=require("../../lib/logger");function B(n){try{return JSON.parse(n)}catch(t){return y.logger.debug(`failed to parse Codex hook stdin: ${t}`),{}}}o(B,"parseCodexHookStdin");const h="mcp__",z={browser_devtools:"browser-devtools",node_devtools:"node-devtools",backend_devtools:"backend-devtools",android_devtools:"android-devtools"},H=["bdt_","ndt_","bedt_","adt_"];function v(n){return z[n]??n}o(v,"canonicalizeCodexServerName");function C(n){if(!H.some(e=>n.startsWith(e)))return n;const t=n.split("_");return t.length>=3&&t[1]==="scenario"?`${t[0]}_scenario-${t.slice(2).join("-")}`:t.length<=3?n:`${t[0]}_${t[1]}_${t.slice(2).join("-")}`}o(C,"canonicalizeCodexToolName");const F=[["bdt_","browser-devtools"],["ndt_","node-devtools"],["bedt_","backend-devtools"],["adt_","android-devtools"]];function S(n){if(!n)return null;if(n.startsWith(h)){const t=n.slice(h.length),e=t.indexOf("__");return e<0?null:v(t.slice(0,e))}for(const[t,e]of F)if(n.startsWith(t))return e;return null}o(S,"extractCodexMcpServer");function R(n){return n==="exec_command"?"Bash":n==="apply_patch"?"Edit":n==="update_plan"?"TodoWrite":n==="read_file"?"Read":n==="web_search"?"WebSearch":n==="web_fetch"?"WebFetch":n}o(R,"normalizeCodexToolName");function V(n){if(!n)return{tool_type:null,tool_name:"",mcp_server:null};if(n.startsWith(h)){const s=n.slice(h.length),r=s.indexOf("__");if(r>=0){const i=s.slice(0,r),u=v(i),a=s.slice(r+2);return{tool_type:"mcp",tool_name:C(a),mcp_server:u}}}const t=S(n);if(t!==null&&!n.startsWith(h))return{tool_type:"mcp",tool_name:C(n),mcp_server:t};const e=R(n);return n==="spawn_agent"||n==="wait_agent"||n==="close_agent"?{tool_type:"sub_agent",tool_name:e,mcp_server:null}:{tool_type:null,tool_name:e,mcp_server:null}}o(V,"classifyCodexTool");function D(n,t){if(!n||t===void 0)return;if(n==="apply_patch"){if(typeof t=="string")return{input_size:t.length};if(typeof t=="object"&&t!==null){const r=t,i=r.command??r.input;if(typeof i=="string")return{input_size:i.length}}return{input_size:void 0}}if(typeof t!="object"||t===null)return;const e=t;if(R(n)==="Bash"){const r=e.cmd??e.command,i=typeof r=="string"?O(r):void 0;return{workdir:e.workdir,binary:i}}if(n==="update_plan"){const r=e.explanation,i=e.plan;return{explanation:typeof r=="string"?r:void 0,plan_step_count:Array.isArray(i)?i.length:void 0}}if(n==="spawn_agent"){const r=e.agent_type,i=e.message,u=e.fork_context;return{agent_type:typeof r=="string"?r:void 0,message_size:typeof i=="string"?i.length:void 0,fork_context:typeof u=="boolean"?u:void 0}}if(n==="wait_agent"){const r=e.targets,i=e.timeout_ms;return{target_count:Array.isArray(r)?r.length:void 0,timeout_ms:typeof i=="number"?i:void 0}}if(n==="close_agent"){const r=e.target;return{target:typeof r=="string"?r:void 0}}if(n==="view_image"){const r=e.path,i=e.detail;return{path:typeof r=="string"?r:void 0,detail:typeof i=="string"?i:void 0}}if(n==="write_stdin"){const r=e.session_id,i=e.chars,u=e.yield_time_ms,a=e.max_output_tokens;return{session_id:typeof r=="number"?r:void 0,chars_size:typeof i=="string"?i.length:void 0,yield_time_ms:typeof u=="number"?u:void 0,max_output_tokens:typeof a=="number"?a:void 0}}if(n.startsWith(h)||S(n)!==null){if("_metadata"in e){const{_metadata:r,...i}=e;return i}return e}}o(D,"extractCodexToolInput");function O(n){const t=n.trim();if(!t)return;const e=t.split(/\s+/);for(const s of e)if(!/^[A-Za-z_][A-Za-z0-9_]*=/.test(s)&&s.length>0)return s.split(/[\\/]/).pop()??s}o(O,"extractBashBinary");function A(n){const t=n.split(".");if(t.length!==3)return null;try{const e=Buffer.from(t[1],"base64url").toString("utf-8"),s=JSON.parse(e);return typeof s!="object"||s===null?null:s}catch{return null}}o(A,"decodeJwtPayload");function K(n){if(typeof n=="string"){const t=A(n);return t?{email:t.email,planType:t["https://api.openai.com/auth"]?.chatgpt_plan_type}:{}}if(typeof n=="object"&&n!==null){const t=n;return{email:t.email,planType:t.chatgpt_plan_type}}return{}}o(K,"extractIdTokenFields");function U(n){const t=n??(0,p.join)((0,b.homedir)(),".codex","auth.json");if(!(0,m.existsSync)(t))return{};try{const e=JSON.parse((0,m.readFileSync)(t,"utf-8")),s=e.auth_mode==="chatgpt"||e.auth_mode==="swic"?"subscription":e.auth_mode==="api"?"api":void 0,{email:r,planType:i}=K(e.tokens?.id_token);return{usageType:s,usagePlan:i?.toLowerCase(),userEmail:r}}catch(e){return y.logger.debug(`failed to parse ${t}: ${e}`),{}}}o(U,"resolveCodexUsage");function X(n,t){return n.trim()===`[${t}]`}o(X,"tableHeaderLineExact");function G(n){const t=n.trim();return/^\[\[?[^\]]+\]\]?$/.test(t)}o(G,"isAnyTableHeader");function j(n){const e=n.trim().match(/^\[([^[\]]+)\]$/);return e===null?null:e[1]}o(j,"tableHeaderName");function _(n,t){let e=-1;for(let r=0;r<n.length;r+=1)if(X(n[r],t)){e=r;break}if(e<0)return null;let s=n.length;for(let r=e+1;r<n.length;r+=1)if(G(n[r])){s=r;break}return{startIdx:e,endIdx:s}}o(_,"findTomlSection");function $(n){const t=[...n];for(;t.length>0&&t[t.length-1].trim()==="";)t.pop();return t}o($,"trimTrailingBlanks");function w(n,t){return n.length===0?t.join(`
|
|
2
2
|
`)+`
|
|
3
3
|
`:n.replace(/\n+$/,"")+`
|
|
4
4
|
|
|
5
5
|
`+t.join(`
|
|
6
6
|
`)+`
|
|
7
7
|
`}o(w,"appendBlockWithSeparator");function Z(n){const t=n.split(`
|
|
8
|
-
`),e=
|
|
9
|
-
`);return
|
|
10
|
-
`)?
|
|
8
|
+
`),e=_(t,"features");if(e===null)return w(n,["[features]","hooks = true"]);const s=t.slice(e.startIdx+1,e.endIdx),r=/^\s*hooks\s*=/;let i=!1;for(let l=0;l<s.length;l+=1)if(r.test(s[l])){s[l]="hooks = true",i=!0;break}i||s.unshift("hooks = true");const u=$(s),d=[...t.slice(0,e.startIdx),t[e.startIdx],...u,...e.endIdx<t.length?[""]:[],...t.slice(e.endIdx)].join(`
|
|
9
|
+
`);return d.endsWith(`
|
|
10
|
+
`)?d:d+`
|
|
11
11
|
`}o(Z,"ensureFeaturesHooksTrue");function q(n){const t=n.split(`
|
|
12
|
-
`),e=
|
|
13
|
-
`)
|
|
14
|
-
|
|
15
|
-
`
|
|
16
|
-
`)
|
|
12
|
+
`),e=_(t,"features");if(e===null)return n;const s=t.slice(e.startIdx+1,e.endIdx),r=/^\s*hooks\s*=\s*true\s*$/,i=s.filter(l=>!r.test(l));if(i.length===s.length)return n;const u=i.some(l=>l.trim().length>0);let a;if(u){const l=$(i);a=[...t.slice(0,e.startIdx),t[e.startIdx],...l,...e.endIdx<t.length?[""]:[],...t.slice(e.endIdx)]}else a=[...t.slice(0,e.startIdx),...t.slice(e.endIdx)];const d=a.join(`
|
|
13
|
+
`).replace(/\n{3,}/g,`
|
|
14
|
+
|
|
15
|
+
`);return d.endsWith(`
|
|
16
|
+
`)?d:d+`
|
|
17
|
+
`}o(q,"removeFeaturesHooks");function Q(n){const t=n.split(`
|
|
18
|
+
`),e=_(t,"features.multi_agent_v2");if(e===null)return w(n,["[features.multi_agent_v2]","hide_spawn_agent_metadata = false"]);const s=t.slice(e.startIdx+1,e.endIdx),r=/^\s*hide_spawn_agent_metadata\s*=/;let i=!1;for(let l=0;l<s.length;l+=1)if(r.test(s[l])){s[l]="hide_spawn_agent_metadata = false",i=!0;break}i||s.unshift("hide_spawn_agent_metadata = false");const u=$(s),d=[...t.slice(0,e.startIdx),t[e.startIdx],...u,...e.endIdx<t.length?[""]:[],...t.slice(e.endIdx)].join(`
|
|
19
|
+
`);return d.endsWith(`
|
|
20
|
+
`)?d:d+`
|
|
21
|
+
`}o(Q,"ensureMultiAgentV2SpawnMetadataExposed");function Y(n){const t=n.split(`
|
|
22
|
+
`),e=_(t,"features.multi_agent_v2");if(e===null)return n;const s=t.slice(e.startIdx+1,e.endIdx).filter(a=>a.trim().length>0);if(!(s.length===1&&/^\s*hide_spawn_agent_metadata\s*=\s*false\s*$/.test(s[0])))return n;const u=[...t.slice(0,e.startIdx),...t.slice(e.endIdx)].join(`
|
|
17
23
|
`).replace(/\n{3,}/g,`
|
|
18
24
|
|
|
19
25
|
`);return u.endsWith(`
|
|
20
26
|
`)?u:u+`
|
|
21
|
-
`}o(
|
|
22
|
-
`),i=
|
|
23
|
-
`);return
|
|
24
|
-
`)?
|
|
25
|
-
`}o(
|
|
26
|
-
`),i=[];let u=!1,a=!1;for(const
|
|
27
|
+
`}o(Y,"removeMultiAgentV2SpawnMetadata");function N(n,t,e){const s=`mcp_servers.${t}`,r=n.split(`
|
|
28
|
+
`),i=_(r,s),a=[`[${s}]`,...e];if(i===null)return w(n,a);const d=r.slice(0,i.startIdx),l=r.slice(i.endIdx),c=[...d,...a,...l.length>0?[""]:[],...l].join(`
|
|
29
|
+
`);return c.endsWith(`
|
|
30
|
+
`)?c:c+`
|
|
31
|
+
`}o(N,"upsertMcpServer");function nn(n,t){const e=`mcp_servers.${t}`,s=`${e}.`,r=n.split(`
|
|
32
|
+
`),i=[];let u=!1,a=!1;for(const c of r){const g=j(c);if(g!==null&&(u=g===e||g.startsWith(s),u)){a=!0;continue}u||i.push(c)}if(!a)return n;const d=[];let l=!1;for(const c of i){const g=c.trim().length===0;g&&l||(d.push(c),l=g)}const f=d.join(`
|
|
27
33
|
`);return f.endsWith(`
|
|
28
34
|
`)||f.length===0?f:f+`
|
|
29
|
-
`}o(
|
|
30
|
-
`),i=
|
|
31
|
-
`);return
|
|
32
|
-
`)?
|
|
33
|
-
`}o(
|
|
34
|
-
`),i=[];let u=!1,a=!1;for(const
|
|
35
|
+
`}o(nn,"removeMcpServer");function tn(n,t,e){const s=`agents.${t}`,r=n.split(`
|
|
36
|
+
`),i=_(r,s),a=[`[${s}]`,...e];if(i===null)return w(n,a);const d=r.slice(0,i.startIdx),l=r.slice(i.endIdx),c=[...d,...a,...l.length>0?[""]:[],...l].join(`
|
|
37
|
+
`);return c.endsWith(`
|
|
38
|
+
`)?c:c+`
|
|
39
|
+
`}o(tn,"upsertAgentsTable");function en(n,t){const e=`agents.${t}`,s=`${e}.`,r=n.split(`
|
|
40
|
+
`),i=[];let u=!1,a=!1;for(const c of r){const g=j(c);if(g!==null&&(u=g===e||g.startsWith(s),u)){a=!0;continue}u||i.push(c)}if(!a)return n;const d=[];let l=!1;for(const c of i){const g=c.trim().length===0;g&&l||(d.push(c),l=g)}const f=d.join(`
|
|
35
41
|
`);return f.endsWith(`
|
|
36
42
|
`)||f.length===0?f:f+`
|
|
37
|
-
`}o(
|
|
38
|
-
`)){const e=t.trim();if(e.startsWith("["))break;const s=e.match(/^model\s*=\s*"([^"]*)"/);if(s&&s[1].length>0)return s[1]}return null}o(
|
|
43
|
+
`}o(en,"removeAgentsTable");function rn(n,t){return(0,p.join)(n,".codex","agents",`${t}.toml`)}o(rn,"codexAgentTomlPath");function sn(n){for(const t of n.split(`
|
|
44
|
+
`)){const e=t.trim();if(e.startsWith("["))break;const s=e.match(/^model\s*=\s*"([^"]*)"/);if(s&&s[1].length>0)return s[1]}return null}o(sn,"extractTomlTopLevelModel");function on(n){const t=[];for(const[e,s]of Object.entries(n))if(s!=null){if(typeof s=="string")t.push(`${e} = ${JSON.stringify(s)}`);else if(typeof s=="number"||typeof s=="boolean")t.push(`${e} = ${s}`);else if(Array.isArray(s)){const r=s.map(i=>typeof i=="string"?JSON.stringify(i):typeof i=="number"||typeof i=="boolean"?String(i):JSON.stringify(i));t.push(`${e} = [${r.join(", ")}]`)}else if(typeof s=="object"){const r=s,i=[];for(const[u,a]of Object.entries(r))a!=null&&(typeof a=="string"?i.push(`${u} = ${JSON.stringify(a)}`):typeof a=="number"||typeof a=="boolean"?i.push(`${u} = ${a}`):i.push(`${u} = ${JSON.stringify(a)}`));t.push(`${e} = { ${i.join(", ")} }`)}}return t}o(on,"tomlBodyFromRecord");const I="<!-- ironbee:start -->",x="<!-- ironbee:end -->";function un(n,t){const e=`${I}
|
|
39
45
|
${t.trimEnd()}
|
|
40
|
-
${x}`,s=n.indexOf(
|
|
46
|
+
${x}`,s=n.indexOf(I),r=n.indexOf(x);if(s>=0&&r>s){const i=n.slice(0,s),u=n.slice(r+x.length);return i+e+u}return n.trim().length===0?e+`
|
|
41
47
|
`:n.trimEnd()+`
|
|
42
48
|
|
|
43
49
|
`+e+`
|
|
44
|
-
`}o(
|
|
50
|
+
`}o(un,"upsertAgentsMdBlock");function an(n){const t=n.indexOf(I),e=n.indexOf(x);if(t<0||e<t)return n.trim().length===0?null:n;const s=n.slice(0,t).trimEnd(),r=n.slice(e+x.length).trimStart(),i=s+(s.length>0&&r.length>0?`
|
|
45
51
|
|
|
46
52
|
`:"")+r;return i.trim().length===0?null:i.endsWith(`
|
|
47
53
|
`)?i:i+`
|
|
48
|
-
`}o(
|
|
54
|
+
`}o(an,"stripAgentsMdBlock");function ln(n){const t=T(n);if(!(0,m.existsSync)(t))return"";try{return(0,m.readFileSync)(t,"utf-8")}catch(e){return y.logger.debug(`failed to read ${t}: ${e}`),""}}o(ln,"readCodexConfigToml");function dn(n,t){const e=T(n);try{(0,m.writeFileSync)(e,t)}catch(s){y.logger.debug(`failed to write ${e}: ${s}`)}}o(dn,"writeCodexConfigToml");function T(n){return(0,p.join)(n,".codex","config.toml")}o(T,"codexConfigTomlPath");function cn(n){return(0,p.join)(n,".codex","hooks.json")}o(cn,"codexHooksJsonPath");function gn(){return(0,p.join)((0,b.homedir)(),".codex","config.toml")}o(gn,"userCodexConfigTomlPath");function fn(){return(0,p.join)((0,b.homedir)(),".codex","hooks.json")}o(fn,"userCodexHooksJsonPath");function pn(n){return(0,p.join)((0,b.homedir)(),".codex","agents",`${n}.toml`)}o(pn,"userCodexAgentTomlPath");0&&(module.exports={AGENTS_MD_END_MARKER,AGENTS_MD_START_MARKER,canonicalizeCodexServerName,canonicalizeCodexToolName,classifyCodexTool,codexAgentTomlPath,codexConfigTomlPath,codexHooksJsonPath,decodeJwtPayload,ensureFeaturesHooksTrue,ensureMultiAgentV2SpawnMetadataExposed,extractBashBinary,extractCodexMcpServer,extractCodexToolInput,extractTomlTopLevelModel,findTomlSection,normalizeCodexToolName,parseCodexHookStdin,readCodexConfigToml,removeAgentsTable,removeFeaturesHooks,removeMcpServer,removeMultiAgentV2SpawnMetadata,resolveCodexUsage,stripAgentsMdBlock,tomlBodyFromRecord,upsertAgentsMdBlock,upsertAgentsTable,upsertMcpServer,userCodexAgentTomlPath,userCodexConfigTomlPath,userCodexHooksJsonPath,writeCodexConfigToml});
|