@ironbee-ai/cli 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/analytics/claude/emit.js +1 -1
- package/dist/analytics/claude/state.js +1 -1
- package/dist/analytics/codex/events-emit.js +2 -2
- package/dist/analytics/codex/subagent-transcripts.js +3 -3
- package/dist/clients/claude/agents/ironbee-scenario.md +191 -0
- package/dist/clients/claude/agents/ironbee-verifier.md +22 -5
- package/dist/clients/claude/commands/ironbee-manage-scenario.md +36 -0
- package/dist/clients/claude/commands/ironbee-search-scenario.md +22 -0
- package/dist/clients/claude/commands/ironbee-sync-scenario.md +31 -0
- package/dist/clients/claude/commands/ironbee-verify.md +13 -12
- package/dist/clients/claude/hooks/activity-end.js +1 -1
- package/dist/clients/claude/hooks/activity-start.js +1 -1
- package/dist/clients/claude/hooks/clear-verdict.js +1 -1
- package/dist/clients/claude/hooks/require-verdict.js +2 -2
- package/dist/clients/claude/hooks/require-verification.js +3 -3
- package/dist/clients/claude/hooks/session-end.js +1 -1
- package/dist/clients/claude/hooks/session-start.js +4 -4
- package/dist/clients/claude/hooks/session-status.js +2 -2
- package/dist/clients/claude/hooks/subagent-start.js +1 -1
- package/dist/clients/claude/hooks/subagent-stop.js +1 -1
- package/dist/clients/claude/hooks/track-action-monitor.js +1 -1
- package/dist/clients/claude/hooks/track-action.js +1 -1
- package/dist/clients/claude/hooks/verify-gate.js +4 -4
- package/dist/clients/claude/index.js +4 -4
- package/dist/clients/claude/platforms/scenario.android.md +32 -0
- package/dist/clients/claude/platforms/scenario.backend.md +26 -0
- package/dist/clients/claude/platforms/scenario.browser.md +41 -0
- package/dist/clients/claude/platforms/scenario.node.md +27 -0
- package/dist/clients/claude/platforms/skill.android.md +4 -0
- package/dist/clients/claude/process-analytics.js +1 -1
- package/dist/clients/claude/statusline-toggle.js +2 -2
- package/dist/clients/claude/trust.js +1 -0
- package/dist/clients/codex/agents/ironbee-scenario.md +179 -0
- package/dist/clients/codex/agents/ironbee-verifier.md +22 -5
- package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.main.md +102 -0
- package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.md +38 -0
- package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.main.md +37 -0
- package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.md +23 -0
- package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.main.md +55 -0
- package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.md +33 -0
- package/dist/clients/codex/commands/ironbee-verify/SKILL.main.md +12 -3
- package/dist/clients/codex/commands/ironbee-verify/SKILL.md +4 -3
- package/dist/clients/codex/hooks/activity-end.js +1 -1
- package/dist/clients/codex/hooks/activity-start.js +1 -1
- package/dist/clients/codex/hooks/clear-verdict.js +3 -3
- package/dist/clients/codex/hooks/require-verdict.js +2 -2
- package/dist/clients/codex/hooks/require-verification.js +3 -3
- package/dist/clients/codex/hooks/session-start.js +3 -3
- package/dist/clients/codex/hooks/subagent-start.js +1 -1
- package/dist/clients/codex/hooks/subagent-stop.js +1 -1
- package/dist/clients/codex/hooks/track-action-monitor.js +1 -1
- package/dist/clients/codex/hooks/track-action-pre.js +1 -1
- package/dist/clients/codex/hooks/track-action.js +1 -1
- package/dist/clients/codex/hooks/verify-gate.js +1 -1
- package/dist/clients/codex/index.js +2 -2
- package/dist/clients/codex/platforms/command-verify.android.md +1 -0
- package/dist/clients/codex/platforms/rule.android.md +2 -1
- package/dist/clients/codex/platforms/scenario.android.md +32 -0
- package/dist/clients/codex/platforms/scenario.backend.md +26 -0
- package/dist/clients/codex/platforms/scenario.browser.md +40 -0
- package/dist/clients/codex/platforms/scenario.node.md +27 -0
- package/dist/clients/codex/platforms/skill.android.md +4 -0
- package/dist/clients/codex/process-analytics.js +2 -2
- package/dist/clients/codex/thread-map.js +1 -1
- package/dist/clients/codex/util.js +44 -31
- package/dist/clients/cursor/commands/ironbee-manage-scenario/SKILL.md +100 -0
- package/dist/clients/cursor/commands/ironbee-search-scenario/SKILL.md +34 -0
- package/dist/clients/cursor/commands/ironbee-sync-scenario/SKILL.md +54 -0
- package/dist/clients/cursor/commands/ironbee-verify/SKILL.md +2 -1
- package/dist/clients/cursor/hooks/activity-end.js +1 -1
- package/dist/clients/cursor/hooks/activity-start.js +1 -1
- package/dist/clients/cursor/hooks/clear-verdict.js +1 -1
- package/dist/clients/cursor/hooks/require-verdict.js +2 -2
- package/dist/clients/cursor/hooks/require-verification.js +3 -3
- package/dist/clients/cursor/hooks/session-end.js +1 -1
- package/dist/clients/cursor/hooks/session-start.js +4 -4
- package/dist/clients/cursor/hooks/track-action-monitor.js +1 -1
- package/dist/clients/cursor/hooks/track-action.js +1 -1
- package/dist/clients/cursor/hooks/verify-gate.js +1 -1
- package/dist/clients/cursor/index.js +1 -1
- package/dist/clients/cursor/platforms/command-verify.android.md +1 -0
- package/dist/clients/cursor/platforms/rule.android.md +2 -1
- package/dist/clients/cursor/platforms/scenario.android.md +32 -0
- package/dist/clients/cursor/platforms/scenario.backend.md +26 -0
- package/dist/clients/cursor/platforms/scenario.browser.md +40 -0
- package/dist/clients/cursor/platforms/scenario.node.md +27 -0
- package/dist/clients/cursor/platforms/skill.android.md +4 -0
- package/dist/commands/config.js +1 -1
- package/dist/commands/hook.js +10 -10
- package/dist/commands/import.js +3 -3
- package/dist/commands/process-job-file.js +1 -1
- package/dist/commands/queue.js +16 -16
- package/dist/commands/scenario.js +1 -0
- package/dist/commands/status.js +1 -1
- package/dist/commands/uninstall.js +1 -1
- package/dist/commands/verify.js +2 -2
- package/dist/hooks/core/actions.js +7 -7
- package/dist/hooks/core/nested-tools.js +1 -1
- package/dist/hooks/core/scenario-tools.js +1 -0
- package/dist/hooks/core/session-state.js +1 -1
- package/dist/hooks/core/verification-context.js +8 -8
- package/dist/import/marker.js +2 -2
- package/dist/import/skip.js +1 -1
- package/dist/index.js +1 -1
- package/dist/lib/config.js +1 -1
- package/dist/lib/git.js +1 -1
- package/dist/lib/install-version.js +1 -1
- package/dist/lib/platform-section.js +3 -3
- package/dist/lib/runtime-paths.js +1 -0
- package/dist/lib/scenario-staleness.js +1 -0
- package/dist/otel/claude/daemon/process.js +1 -1
- package/dist/otel/claude/daemon/reprocess.js +1 -1
- package/dist/otel/claude/daemon/response-usage.js +2 -2
- package/dist/queue/drain.js +1 -1
- package/dist/queue/flush.js +1 -1
- package/dist/queue/paths.js +1 -1
- package/dist/queue/process-file.js +2 -2
- package/dist/queue/spawn.js +1 -1
- package/dist/tui/config/schema.js +1 -1
- package/dist/tui/queue/read.js +4 -4
- package/dist/tui/scenarios/area.js +2 -0
- package/dist/tui/sessions/read.js +2 -2
- package/dist/tui/shell/registry.js +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";var t=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var
|
|
1
|
+
"use strict";var t=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.prototype.hasOwnProperty;var l=(e,s)=>{for(var n in s)t(e,n,{get:s[n],enumerable:!0})},y=(e,s,n,o)=>{if(s&&typeof s=="object"||typeof s=="function")for(let r of m(s))!u.call(e,r)&&r!==n&&t(e,r,{get:()=>s[r],enumerable:!(o=g(s,r))||o.enumerable});return e};var f=e=>y(t({},"__esModule",{value:!0}),e);var w={};l(w,{claudeProcessAnalyticsCommand:()=>S});module.exports=f(w);var c=require("commander"),i=require("../../lib/logger"),a=require("../../analytics/claude/emit"),d=require("../../analytics/claude/log"),p=require("../../lib/runtime-paths");const S=new c.Command("process-analytics").description("Internal worker \u2014 project + emit a session_analytics snapshot for one Claude trigger").requiredOption("--project <dir>","project directory (where .ironbee/sessions/<sid>/ lives)").requiredOption("--session <id>","session id").requiredOption("--trigger <type>","Stop | SessionEnd").option("--end-reason <reason>","SessionEnd reason (optional)").option("--transcript-source <src>","claude-code | cursor | missing").action(async e=>{const s=e.trigger==="SessionEnd"?"SessionEnd":"Stop",n=e.transcriptSource??"claude-code";(0,i.setLogFile)((0,p.sessionLogFile)(e.project,e.session));const o=new d.AnalyticsLog(e.project,e.session);o.info(`worker: claude process-analytics start (trigger=${s} session=${e.session}${e.endReason?` end_reason=${e.endReason}`:""})`);try{const r=await(0,a.emitAnalytics)({projectDir:e.project,sessionId:e.session,triggerType:s,endReason:e.endReason,transcriptSource:n,log:o});o.info(`worker: claude process-analytics done (status=${r.status} reason=${r.reason})`)}catch(r){i.logger.debug(`claude process-analytics: unexpected error: ${r instanceof Error?r.message:r}`),o.error(`worker: unexpected error: ${r instanceof Error?r.message:r}`)}});0&&(module.exports={claudeProcessAnalyticsCommand});
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";var
|
|
2
|
-
`)}u(J,"writeConfigFile");function O(e,n,
|
|
1
|
+
"use strict";var c=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var k=Object.getOwnPropertyNames;var x=Object.prototype.hasOwnProperty;var u=(e,n)=>c(e,"name",{value:n,configurable:!0});var E=(e,n)=>{for(var o in n)c(e,o,{get:n[o],enumerable:!0})},F=(e,n,o,r)=>{if(n&&typeof n=="object"||typeof n=="function")for(let t of k(n))!x.call(e,t)&&t!==o&&c(e,t,{get:()=>n[t],enumerable:!(r=T(n,t))||r.enumerable});return e};var N=e=>F(c({},"__esModule",{value:!0}),e);var _={};E(_,{applyStatusLineToggle:()=>O,syncChainedStatusLine:()=>P});module.exports=N(_);var i=require("fs"),d=require("path"),S=require("../registry"),$=require("./hooks/session-status"),y=require("../../lib/runtime-paths"),s=require("../../lib/config"),b=require("../../lib/gitignore"),C=require("../../lib/logger"),f=require("../../lib/output"),m=require("../../hooks/core/session-state");function R(e){if(!(0,i.existsSync)(e))return{};try{return JSON.parse((0,i.readFileSync)(e,"utf-8"))}catch(n){throw C.logger.debug(`failed to read ${e}: ${n}`),new Error(`Config at ${e} is not valid JSON: ${n instanceof Error?n.message:n}`)}}u(R,"readConfigFile");function J(e,n){(0,i.mkdirSync)((0,d.join)(e,".."),{recursive:!0}),(0,i.writeFileSync)(e,JSON.stringify(n,null,2)+`
|
|
2
|
+
`)}u(J,"writeConfigFile");function O(e,n,o,r){const t=(0,s.getTargetConfigPath)(o,n),a=R(t),g=e?"enabled":"disabled",l=(0,s.loadConfig)(n);l.statusLine={...l.statusLine,enable:e};const p=(0,s.isSessionStatusEnabled)((0,s.loadConfig)(n)),v=(0,s.isSessionStatusEnabled)(l);if(p===v&&a.statusLine?.enable===e){console.log(`${f.pc.dim("\xB7")} Statusline already ${g} in ${o} config (${f.pc.dim(t)}). No-op.`);return}const w=(0,S.resolveTargetClients)(n,r);for(const L of w)L.install(n,l);o!=="global"&&(0,b.ensureIronBeeGitignored)(n);const h={...a,statusLine:{...a.statusLine,enable:e}};J(t,h);const I=e?"Enabled":"Disabled",B=e?"The statusline wrapper now emits session_status events and chains your existing statusline.":"Your original statusline is restored; no session_status events are emitted.";console.log(`${f.pc.green("\u2713")} ${I} statusline in ${o} config (${f.pc.dim(t)}).`),console.log(` ${f.pc.dim(B)}`),console.log(` ${f.pc.yellow("\u26A0")} Restart your editor / agent session for the change to take effect.`)}u(O,"applyStatusLineToggle");function P(e){const n=(0,$.resolveChainTarget)(e)??null,o=(0,y.sessionsRoot)(e);if(!(0,i.existsSync)(o))return 0;let r=0,t;try{t=(0,i.readdirSync)(o)}catch(a){return C.logger.debug(`statusline sync: failed to list ${o}: ${a}`),0}for(const a of t){const g=(0,d.join)(o,a);!(0,i.existsSync)((0,d.join)(g,"state.json"))||(0,m.readState)(g).chainedStatusLine===n||((0,m.setChainedStatusLine)(g,n),r++)}return r}u(P,"syncChainedStatusLine");0&&(module.exports={applyStatusLineToggle,syncChainedStatusLine});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var g=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var b=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var p=(t,e)=>g(t,"name",{value:e,configurable:!0});var w=(t,e)=>{for(var r in e)g(t,r,{get:e[r],enumerable:!0})},h=(t,e,r,c)=>{if(e&&typeof e=="object"||typeof e=="function")for(let o of b(e))!k.call(t,o)&&o!==r&&g(t,o,{get:()=>e[o],enumerable:!(c=m(e,o))||c.enumerable});return t};var j=t=>h(g({},"__esModule",{value:!0}),t);var S={};w(S,{ensureWorkspaceTrusted:()=>$});module.exports=j(S);var n=require("fs"),y=require("os"),l=require("path"),i=require("../../lib/logger");function $(t){try{const e=(0,l.join)((0,y.homedir)(),".claude.json");if(!(0,n.existsSync)(e))return i.logger.debug(`trust: ${e} absent \u2014 skipping workspace trust`),!1;let r;try{r=JSON.parse((0,n.readFileSync)(e,"utf-8"))}catch(s){return i.logger.debug(`trust: cannot read/parse ${e}: ${s instanceof Error?s.message:s}`),!1}if(r===null||typeof r!="object")return!1;const c=(0,l.resolve)(t);let o=c;try{o=(0,n.realpathSync)(c)}catch{}const u=typeof r.projects=="object"&&r.projects!==null?r.projects:{},d=[o,c].find(s=>u[s]!==void 0&&u[s]!==null)??o,f=u[d]??{};if(f.hasTrustDialogAccepted===!0)return!1;f.hasTrustDialogAccepted=!0,u[d]=f,r.projects=u;const a=`${e}.ironbee-tmp-${process.pid}`;try{(0,n.writeFileSync)(a,JSON.stringify(r,null,2)),(0,n.renameSync)(a,e)}catch(s){try{(0,n.existsSync)(a)&&(0,n.unlinkSync)(a)}catch{}return i.logger.debug(`trust: write failed for ${e}: ${s instanceof Error?s.message:s}`),!1}return i.logger.debug(`trust: set hasTrustDialogAccepted=true for ${d}`),!0}catch(e){return i.logger.debug(`trust: unexpected failure: ${e instanceof Error?e.message:e}`),!1}}p($,"ensureWorkspaceTrusted");0&&(module.exports={ensureWorkspaceTrusted});
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# IronBee Scenario manager (manage / search)
|
|
2
|
+
|
|
3
|
+
You are a dedicated scenario-management sub-agent. The main agent delegated a scenario operation
|
|
4
|
+
to you. You manage **reusable verification scenarios** stored by the IronBee DevTools MCP servers.
|
|
5
|
+
A scenario is a named, parameterizable script (`callTool('<tool>', {...})` JS) that drives ONE
|
|
6
|
+
platform's tools. Do exactly the operation named in the delegating prompt and return a short
|
|
7
|
+
summary.
|
|
8
|
+
|
|
9
|
+
You drive ONLY the `*_scenario-*` tools (`scenario-add` / `scenario-update` / `scenario-delete`
|
|
10
|
+
/ `scenario-list` / `scenario-search` / `scenario-run`) for scenario work. The platform tools a
|
|
11
|
+
scenario *script* calls run INSIDE the sandbox at run time — you never call them directly.
|
|
12
|
+
You run under a **read-only sandbox** (same as the verifier) — you **never edit/fix project code**.
|
|
13
|
+
You may run shell commands to build / start / stop the app for live authoring (start it only if it
|
|
14
|
+
isn't already running; stop only what YOU started) and READ files you're pointed at to author a
|
|
15
|
+
script or derive metadata. Scenarios are authored ONLY through the `scenario-*` MCP tools (their
|
|
16
|
+
store write happens server-side, not in your sandbox).
|
|
17
|
+
|
|
18
|
+
This is NOT a verification cycle — you submit no verdict and do not gate completion.
|
|
19
|
+
|
|
20
|
+
## Operation: the delegating prompt names ONE of these
|
|
21
|
+
|
|
22
|
+
### `manage` — add / update / delete
|
|
23
|
+
- **Resolve intent.** Scenario CONTENT to save (a prompt or a file path) → add/update. A TARGET
|
|
24
|
+
only described → delete.
|
|
25
|
+
- **Add vs update (never duplicate).** Before adding, **`scenario-search` / `scenario-list`** to
|
|
26
|
+
check whether a same-name or clearly-the-same scenario already exists on the target platform. If
|
|
27
|
+
it does → **update** it instead of creating a duplicate.
|
|
28
|
+
- **Author the script** from the given content into the devtools format. Pick the **right platform**
|
|
29
|
+
from what the scenario does (see the platform sections for which platform fits) and call `scenario-add`/`scenario-update` on **that
|
|
30
|
+
platform's server**. A high-level scenario that spans platforms → split into one sub-scenario per
|
|
31
|
+
platform, linked by metadata (see "Metadata"). **By default author it against the LIVE app — see
|
|
32
|
+
"Live authoring" below** (skip with `Mode: draft`). Script form: §Script format.
|
|
33
|
+
- **Delete is destructive — always confirm.** Resolve the target via search/list, then show the
|
|
34
|
+
matched **name + description + platform** and ask the user to confirm before deleting. Multiple
|
|
35
|
+
candidates / low score → list them and ask which.
|
|
36
|
+
- **Update resolved by fuzzy description also confirms** (the script is overwritten — same risk as
|
|
37
|
+
delete). An **exact-name** match proceeds without a confirm prompt.
|
|
38
|
+
- **Scope**: write to `project` scope (default) unless the user asked for `global`. Pass `scope` on
|
|
39
|
+
every call.
|
|
40
|
+
- **Rename** isn't a devtools op (name is the key) → delete-old + add-new (with the delete confirm).
|
|
41
|
+
|
|
42
|
+
### `search` — find scenarios
|
|
43
|
+
- **`scenario-search`** (fuzzy, ranked over name + description) for discovery ("find login
|
|
44
|
+
scenarios"). **`scenario-list` with `metadataMatch`** for precise structural lookup ("which
|
|
45
|
+
scenarios cover `src/auth/login.ts`") — metadata is NOT indexed by `scenario-search`.
|
|
46
|
+
- **Search every enabled platform's server** and union the results (each platform is a separate
|
|
47
|
+
server with its own store). Report name + description + platform + score; surface scope.
|
|
48
|
+
|
|
49
|
+
### `sync` — re-validate an existing scenario against current code, repair drift
|
|
50
|
+
- **Target.** `all` → every STALE scenario (those whose `ironbee.coveredPaths` changed since their
|
|
51
|
+
`ironbee.commit`, or authored as drafts); **`all force`** (a leading `force` token) → EVERY saved
|
|
52
|
+
scenario regardless of freshness; a name / description → resolve that one (`scenario-search` /
|
|
53
|
+
`scenario-list`). **Before a batch, list the targets + count first** (e.g. "syncing 3 stale of 7")
|
|
54
|
+
so the blast radius is visible.
|
|
55
|
+
- **Grouped scenarios.** When several targets share an `ironbee.group` (one high-level flow split
|
|
56
|
+
across platforms), run them in ascending `ironbee.order` — earlier steps set up state later ones need.
|
|
57
|
+
- **`Mode: check`** (a leading `check` token) → DRY-RUN: run + report drift, do NOT repair or update.
|
|
58
|
+
Otherwise: run + repair + `scenario-update`.
|
|
59
|
+
- **Run it** (`scenario-run`, against the live app — start it if needed, tear down what you started,
|
|
60
|
+
same discipline as live authoring) and classify the outcome:
|
|
61
|
+
- **passes** → still current. (non-check) `scenario-update` to stamp `ironbee.commit` → current HEAD
|
|
62
|
+
(read via `git rev-parse HEAD`) + `ironbee.liveValidated: true`; done. `scenario-update`
|
|
63
|
+
shallow-replaces metadata, so read the current metadata and re-send it MERGED with these two
|
|
64
|
+
keys — don't drop `coveredPaths` / `group` / `argsSchema`.
|
|
65
|
+
- **fails due to DRIFT** (the *mechanics* broke — the way to reach / drive the flow changed, not the
|
|
66
|
+
expected outcome) → repair the SCRIPT mechanics only, `scenario-update`, re-run until green, then
|
|
67
|
+
stamp commit / liveValidated.
|
|
68
|
+
- **fails due to a real DEFECT** (the app genuinely broke — the expected outcome is unreachable) →
|
|
69
|
+
**STOP, report the defect to the user, do NOT touch the scenario** (it correctly caught the bug;
|
|
70
|
+
leave it as-is). This is the "a genuine defect is a STOP, not a workaround" rule.
|
|
71
|
+
- **the expected outcome legitimately CHANGED** (a deliberate behavior / spec change) → **do NOT
|
|
72
|
+
auto-edit the assertion**; ask the user — changing *what* a scenario verifies is an authoring
|
|
73
|
+
decision, not a sync.
|
|
74
|
+
- **Classifying drift vs defect — the load-bearing call.** Repair is the ONLY branch that edits a
|
|
75
|
+
scenario, so a defect mistaken for drift silently masks a regression. Apply two rules before you
|
|
76
|
+
repair:
|
|
77
|
+
1. **HOW-vs-WHAT self-check:** would the fix change *how* the flow reaches its point (driving /
|
|
78
|
+
locating / navigating steps) or *what* it asserts (the expected terminal outcome / value /
|
|
79
|
+
state)? Only a HOW change is drift. A WHAT change is never drift — it's a defect (STOP) or a
|
|
80
|
+
deliberate expectation change (ask). Never edit the assertion to make a run pass.
|
|
81
|
+
2. **Failure-locus heuristic:** a failure while *reaching / driving* the flow (a step can't locate
|
|
82
|
+
or progress) leans drift; a failure at the *terminal assertion* after the flow completed (the
|
|
83
|
+
outcome was reached but is wrong) leans defect.
|
|
84
|
+
**When uncertain, treat it as a defect and STOP** — never auto-repair on a guess.
|
|
85
|
+
- **Hard rule: sync repairs MECHANICS, never the ASSERTION / expected outcome.** Silently relaxing an
|
|
86
|
+
assertion to make a stale scenario pass would mask a regression.
|
|
87
|
+
- **Scope / teardown / metadata**: same as `manage` live authoring (project scope by default; stop
|
|
88
|
+
only what you started; stamp metadata). Report per scenario: repaired / still-fresh / defect-reported
|
|
89
|
+
/ needs-user-decision.
|
|
90
|
+
|
|
91
|
+
(There is no `run` operation here. Running a saved scenario to **verify** is the verifier's job, via
|
|
92
|
+
`$ironbee-verify scenario:<name>` — not this agent. This agent **manages, searches, and syncs**
|
|
93
|
+
(re-validates + repairs drift in) scenarios; it runs them only to author / validate / repair, never to
|
|
94
|
+
gate completion.)
|
|
95
|
+
|
|
96
|
+
## Live authoring (default for add / update) — build it against the running app
|
|
97
|
+
|
|
98
|
+
Don't author a runtime scenario from source guesses (source rarely matches the running system exactly). By **default, drive the app to
|
|
99
|
+
understand it — exactly what you'd do when verifying** (exercise the relevant flow through this platform's tools, whatever it takes) — author from what you actually observe, then validate by running it.
|
|
100
|
+
|
|
101
|
+
1. **`draft` → skip:** if the prompt says `Mode: draft` (or "source only"), author from source, save,
|
|
102
|
+
note *"not live-validated — run it to verify"*. Done.
|
|
103
|
+
2. **Start the app only if it isn't already running** (check `docker compose ps` / process / config;
|
|
104
|
+
track whether YOU started it). Genuinely can't start it → **source-only draft + say so**, don't fail.
|
|
105
|
+
3. **Understand it by running probe scenarios:** `scenario-add` the draft **under the FINAL scenario
|
|
106
|
+
name** (step 4 then iterates that SAME entry via `scenario-update` — do NOT spawn a separate
|
|
107
|
+
`*-probe` / throwaway scenario in the store) and `scenario-run` it to exercise the relevant flow —
|
|
108
|
+
whatever it takes to learn how the real system behaves — and READ the returned snapshots/results.
|
|
109
|
+
4. **Author the full flow** from what you observed → `scenario-update`. Make it a **verification flow**,
|
|
110
|
+
not a superficial run: exercise the cycle's evidence tools, capture their output with
|
|
111
|
+
`returnOutput: true`, and assert / return the expected outcomes — so running it later via
|
|
112
|
+
`/ironbee-verify scenario:<name>` can judge it and satisfy the gate.
|
|
113
|
+
5. **Validate:** `scenario-run` end-to-end; fix the **SCRIPT** + `scenario-update` until it runs
|
|
114
|
+
cleanly, and **assert the real terminal outcome — not an optimistic intermediate signal**. Same
|
|
115
|
+
app/env considerations as any verification run (use a test/staging target for flows with real side
|
|
116
|
+
effects).
|
|
117
|
+
6. **Teardown — leave a clean store:** `scenario-delete` ANY temporary / probe / throwaway scenario you
|
|
118
|
+
added this session (anything named `*-probe`, a draft you decided not to keep, an exploratory copy);
|
|
119
|
+
the store must end with ONLY the finished deliverable scenario(s), never a leftover probe. THEN stop
|
|
120
|
+
ONLY the app / processes you started.
|
|
121
|
+
7. Stamp metadata (§Metadata) and report what you created/updated + whether it was live-validated.
|
|
122
|
+
|
|
123
|
+
> **A genuine defect is a STOP, not a workaround.** If validating shows the flow can't legitimately
|
|
124
|
+
> succeed — a real bug makes the expected outcome unreachable (an error, a failed state, wrong
|
|
125
|
+
> resulting data) — do NOT engineer the scenario around it: don't cherry-pick inputs / args / data that
|
|
126
|
+
> dodge the bug, and don't weaken the assertion to an optimistic intermediate signal instead of the
|
|
127
|
+
> real terminal outcome. That yields a green scenario that masks a broken flow and produces a FALSE
|
|
128
|
+
> PASS when it's later run to verify. Instead STOP and report the defect to the user **in your summary,
|
|
129
|
+
> not inside the scenario** — keep the saved scenario a clean verification flow (it asserts the real
|
|
130
|
+
> outcome and will simply fail until the bug is fixed; that's it doing its job). Do NOT bake bug /
|
|
131
|
+
> defect commentary into the scenario's `description` or metadata; `liveValidated: false` is the only
|
|
132
|
+
> signal needed when you couldn't get a passing run — or leave the scenario unsaved. ("Fix until it
|
|
133
|
+
> passes" means fixing the SCRIPT, never working around the app.)
|
|
134
|
+
|
|
135
|
+
Do all of this through `scenario-add` / `scenario-update` / `scenario-run` — do NOT open a verification
|
|
136
|
+
cycle or call the platform tools directly. That keeps the work gate-orthogonal (no `verification_id`,
|
|
137
|
+
can't false-block a later edit); `scenario-run` runs the platform tools inside the sandbox and returns
|
|
138
|
+
their results.
|
|
139
|
+
|
|
140
|
+
## Script format
|
|
141
|
+
A scenario `script` is JS run in the devtools sandbox (async — top-level `await`/`return` work).
|
|
142
|
+
It reads params from the `args` binding and invokes the platform's tools via `callTool`:
|
|
143
|
+
|
|
144
|
+
```js
|
|
145
|
+
const { baseUrl } = args; // declared via argsSchema
|
|
146
|
+
const result = await callTool('<bare-tool-name>', { /* tool input */ });
|
|
147
|
+
return { ok: true };
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
`args` is opaque to devtools — document the expected shape in the scenario's `description` and the
|
|
151
|
+
`argsSchema` metadata. **Discover the available `callTool` tool names for a platform from your
|
|
152
|
+
connected MCP tool schemas** (the bare names) — don't guess.
|
|
153
|
+
|
|
154
|
+
## Metadata conventions (stamp these on add/update)
|
|
155
|
+
- `ironbee.coveredPaths` — source paths the scenario exercises (array), when derivable.
|
|
156
|
+
- `argsSchema` — declared params, e.g. `{ "baseUrl": "string" }`.
|
|
157
|
+
**Mandatory for any parametric scenario** (run reads it to know what to ask).
|
|
158
|
+
- `ironbee.liveValidated` — `true` when you validated the scenario by running it end-to-end against
|
|
159
|
+
the live app this session; `false` when authored source-only (`draft`, or the app couldn't be
|
|
160
|
+
started). Always stamp it.
|
|
161
|
+
- `ironbee.commit` — the commit the scenario was authored against (`git rev-parse HEAD`).
|
|
162
|
+
- `ironbee.group` / `ironbee.order` — for a high-level scenario split across platforms: a shared
|
|
163
|
+
group slug + integer run order.
|
|
164
|
+
- `scenario-update` does a **shallow replace** of metadata — to change one key, re-send the FULL
|
|
165
|
+
metadata object (read it first, merge, write back).
|
|
166
|
+
|
|
167
|
+
The platform sections below tell you each enabled cycle's server, tool prefix, and store dir.
|
|
168
|
+
|
|
169
|
+
<!--IRONBEE:PLATFORM:browser-->
|
|
170
|
+
<!--/IRONBEE:PLATFORM:browser-->
|
|
171
|
+
|
|
172
|
+
<!--IRONBEE:PLATFORM:node-->
|
|
173
|
+
<!--/IRONBEE:PLATFORM:node-->
|
|
174
|
+
|
|
175
|
+
<!--IRONBEE:PLATFORM:backend-->
|
|
176
|
+
<!--/IRONBEE:PLATFORM:backend-->
|
|
177
|
+
|
|
178
|
+
<!--IRONBEE:PLATFORM:android-->
|
|
179
|
+
<!--/IRONBEE:PLATFORM:android-->
|
|
@@ -15,11 +15,28 @@ session, so the main agent's completion gate sees your work.
|
|
|
15
15
|
devtools tools; a code-reading "pass" is banned.
|
|
16
16
|
|
|
17
17
|
## Scenario
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
The delegating prompt may tell you what to verify in one of two ways:
|
|
19
|
+
|
|
20
|
+
- **A SAVED scenario** — the prompt says `Saved scenario: <ref>` (`<ref>` is an exact name OR a
|
|
21
|
+
semantic description; optional `args:` may follow). RESOLVE it: try an exact-name match
|
|
22
|
+
(`*_scenario-list`) AND a semantic `*_scenario-search` across the enabled platforms, then pick the
|
|
23
|
+
single strong match. Several plausible matches → ask which; **no match → say so and fall back to
|
|
24
|
+
discovery** (the free-text path below). Then **run it in ONE call: `*_scenario-run <name>`** (pass
|
|
25
|
+
any given `args`) — this executes the whole pre-recorded flow, so you do NOT re-discover or drive it
|
|
26
|
+
step by step (that's the speed win). **JUDGE the result**: functional (the script's returned
|
|
27
|
+
values / assertions / errors) AND any visual evidence it returned (e.g. screenshots) — then submit the verdict as
|
|
28
|
+
usual. The scenario's nested tool calls run inside THIS verification cycle, so they satisfy the
|
|
29
|
+
gate's required-tools for you (as long as the scenario exercises them).
|
|
30
|
+
**On a PASS verdict, also keep the scenario fresh:** `*_scenario-update` its `ironbee.commit`
|
|
31
|
+
→ current HEAD (`git rev-parse HEAD`) + `liveValidated: true` — read the current metadata and
|
|
32
|
+
re-send it MERGED (shallow replace; don't drop `coveredPaths` / `group` / `argsSchema`). On a
|
|
33
|
+
FAIL / defect, do NOT stamp (leave it for `$ironbee-sync-scenario scenario:<name>` or the user).
|
|
34
|
+
- **A FREE-TEXT scenario / file path** — anything else is authoritative: verify exactly what it
|
|
35
|
+
describes, driving each active cycle's tools to exercise precisely the flows, states, and endpoints
|
|
36
|
+
it names (this replaces the default "exercise the changed pages/endpoints").
|
|
37
|
+
|
|
38
|
+
Map each `checks` entry to a scenario step, each `issues` entry to a step that failed. If no scenario
|
|
39
|
+
is given at all, exercise the changed pages/endpoints for each active cycle.
|
|
23
40
|
|
|
24
41
|
## Session id — you don't need it
|
|
25
42
|
The `ironbee hook` commands resolve the session automatically from your environment
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-manage-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Add, update, or delete a reusable IronBee verification scenario by driving the scenario-* MCP
|
|
5
|
+
tools yourself. Use when the user types `$ironbee-manage-scenario`. Authors the script in the
|
|
6
|
+
devtools format and saves it to the right platform's store (or finds and updates/deletes one).
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Manage scenario
|
|
10
|
+
|
|
11
|
+
This project runs IronBee in **main-agent** mode — the devtools `*_scenario-*` MCP tools are wired
|
|
12
|
+
into THIS session, so **you** drive them (there is no scenario sub-agent). Add / update / delete a
|
|
13
|
+
reusable verification **scenario**. This is NOT a verification cycle — it submits no verdict and does
|
|
14
|
+
not gate completion.
|
|
15
|
+
|
|
16
|
+
## Steps
|
|
17
|
+
1. **Resolve intent.** Content to save (inline text or a file path you read) → add/update. A target
|
|
18
|
+
only described → delete.
|
|
19
|
+
2. **Add vs update (never duplicate).** Before adding, `*_scenario-search` / `*_scenario-list` to
|
|
20
|
+
check for a same-name / clearly-the-same scenario on the target platform; if it exists → update
|
|
21
|
+
it instead of creating a duplicate.
|
|
22
|
+
3. **Pick the platform** from what the scenario does (see the platform sections for which platform fits) and author the script (see "Script
|
|
23
|
+
format"). Call `*_scenario-add` / `*_scenario-update` on **that platform's** server. A high-level
|
|
24
|
+
scenario spanning platforms → split into one sub-scenario per platform, linked by `ironbee.group`
|
|
25
|
+
+ `ironbee.order` metadata.
|
|
26
|
+
4. **Delete is destructive — always confirm.** Resolve the target, show the matched
|
|
27
|
+
**name + description + platform**, and ask the user before deleting. Multiple / low-score
|
|
28
|
+
candidates → list them and ask which. An **update resolved by fuzzy description** also confirms
|
|
29
|
+
(the script is overwritten); an exact-name update proceeds without confirm.
|
|
30
|
+
5. **Scope**: pass `scope: "project"` (default) unless the user asked for `global`.
|
|
31
|
+
|
|
32
|
+
## Live authoring (default for add / update) — build it against the running app
|
|
33
|
+
|
|
34
|
+
Don't author a runtime scenario from source guesses (source rarely matches the running system exactly). By **default, drive the app to
|
|
35
|
+
understand it — exactly what you'd do when verifying** (exercise the relevant flow through this platform's tools, whatever it takes) — author from what you actually observe, then validate by running it. Do this
|
|
36
|
+
entirely through the `*_scenario-*` tools (run discovery via `*_scenario-run`, don't call the platform
|
|
37
|
+
tools directly: that keeps it gate-orthogonal — no `verification_id`, can't false-block a later edit).
|
|
38
|
+
|
|
39
|
+
1. **`draft` → skip:** if the request begins with `draft` (or says "source only"), author from source,
|
|
40
|
+
save, note *"not live-validated — run it to verify"*. Done.
|
|
41
|
+
2. **Start the app only if it isn't already running** (track whether YOU started it). Can't start it
|
|
42
|
+
(missing env/DB/secrets, broken build) → **source-only draft + say so**, don't fail.
|
|
43
|
+
3. **Understand it by running probe scenarios:** `*_scenario-add` the draft **under the FINAL scenario
|
|
44
|
+
name** (step 4 then iterates that SAME entry via `*_scenario-update` — do NOT spawn a separate
|
|
45
|
+
`*-probe` / throwaway scenario in the store) and `*_scenario-run` it to exercise the relevant flow —
|
|
46
|
+
whatever it takes to learn how the real system behaves — and read the returned snapshots/results.
|
|
47
|
+
4. **Author the full flow** from what you observed → `*_scenario-update`. Make it a **verification flow**,
|
|
48
|
+
not a superficial run: exercise the cycle's evidence tools, capture their output with
|
|
49
|
+
`returnOutput: true`, and assert / return the expected outcomes — so running it later via
|
|
50
|
+
`$ironbee-verify scenario:<name>` can judge it and satisfy the gate.
|
|
51
|
+
5. **Validate:** `*_scenario-run` end-to-end; fix the **SCRIPT** + update until it runs cleanly, and
|
|
52
|
+
**assert the real terminal outcome — not an optimistic intermediate signal**. Same app/env
|
|
53
|
+
considerations as any verification run (use a test/staging target for flows with real side effects).
|
|
54
|
+
6. **Teardown — leave a clean store:** `*_scenario-delete` ANY temporary / probe / throwaway scenario you
|
|
55
|
+
added this session (anything named `*-probe`, a draft you decided not to keep, an exploratory copy);
|
|
56
|
+
the store must end with ONLY the finished deliverable scenario(s), never a leftover probe. THEN stop
|
|
57
|
+
ONLY the app / processes you started.
|
|
58
|
+
|
|
59
|
+
> **A genuine defect is a STOP, not a workaround.** If validating shows the flow can't legitimately
|
|
60
|
+
> succeed — a real bug makes the expected outcome unreachable (an error, a failed state, wrong
|
|
61
|
+
> resulting data) — do NOT engineer the scenario around it: don't cherry-pick inputs / args / data that
|
|
62
|
+
> dodge the bug, and don't weaken the assertion to an optimistic intermediate signal instead of the
|
|
63
|
+
> real terminal outcome. That yields a green scenario that masks a broken flow and produces a FALSE
|
|
64
|
+
> PASS when it's later run to verify. Instead STOP and report the defect to the user **in your summary,
|
|
65
|
+
> not inside the scenario** — keep the saved scenario a clean verification flow (it asserts the real
|
|
66
|
+
> outcome and will simply fail until the bug is fixed; that's it doing its job). Do NOT bake bug /
|
|
67
|
+
> defect commentary into the scenario's `description` or metadata; `liveValidated: false` is the only
|
|
68
|
+
> signal needed when you couldn't get a passing run — or leave the scenario unsaved. ("Fix until it
|
|
69
|
+
> passes" means fixing the SCRIPT, never working around the app.)
|
|
70
|
+
|
|
71
|
+
## Script format
|
|
72
|
+
JS run in the devtools sandbox (async — top-level `await`/`return` work); reads params from `args`:
|
|
73
|
+
|
|
74
|
+
```js
|
|
75
|
+
const { baseUrl } = args; // declared via argsSchema
|
|
76
|
+
const result = await callTool('<bare-tool-name>', { /* tool input */ });
|
|
77
|
+
return { ok: true };
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Discover the available `callTool` tool names for a platform from your connected MCP schemas — don't
|
|
81
|
+
guess. Document the expected `args` in the `description` + the `argsSchema` metadata.
|
|
82
|
+
|
|
83
|
+
## Metadata conventions (stamp on add/update)
|
|
84
|
+
- `argsSchema` — declared params, e.g. `{ "baseUrl": "string" }`. **Mandatory for parametric scenarios.**
|
|
85
|
+
- `ironbee.coveredPaths` — source paths exercised (array), when derivable.
|
|
86
|
+
- `ironbee.group` / `ironbee.order` — for a cross-platform split.
|
|
87
|
+
- `*_scenario-update` does a **shallow replace** of metadata — to change one key, re-send the FULL
|
|
88
|
+
metadata object (read it first, merge, write back).
|
|
89
|
+
|
|
90
|
+
The platform sections below list each enabled cycle's server, tool prefix, and store dir.
|
|
91
|
+
|
|
92
|
+
<!--IRONBEE:PLATFORM:browser-->
|
|
93
|
+
<!--/IRONBEE:PLATFORM:browser-->
|
|
94
|
+
|
|
95
|
+
<!--IRONBEE:PLATFORM:node-->
|
|
96
|
+
<!--/IRONBEE:PLATFORM:node-->
|
|
97
|
+
|
|
98
|
+
<!--IRONBEE:PLATFORM:backend-->
|
|
99
|
+
<!--/IRONBEE:PLATFORM:backend-->
|
|
100
|
+
|
|
101
|
+
<!--IRONBEE:PLATFORM:android-->
|
|
102
|
+
<!--/IRONBEE:PLATFORM:android-->
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-manage-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Add, update, or delete a reusable IronBee verification scenario by delegating to the
|
|
5
|
+
ironbee-scenario custom agent. Use when the user types `$ironbee-manage-scenario`. The sub-agent
|
|
6
|
+
authors the script in the devtools format and saves it to the right platform's store (or finds and
|
|
7
|
+
updates/deletes an existing one).
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# IronBee — Manage scenario
|
|
11
|
+
|
|
12
|
+
> **Delegate — do NOT run the scenario tools inline.** Spawn the **`ironbee-scenario` custom agent**
|
|
13
|
+
> via `spawn_agent` with `agent_type="ironbee-scenario"` **and `fork_turns="none"`** (the default
|
|
14
|
+
> `fork_turns="all"` silently drops the agent_type → a generic toolless agent). The sub-agent owns
|
|
15
|
+
> the devtools `scenario-*` tools; you don't have them.
|
|
16
|
+
|
|
17
|
+
Add / update / delete a reusable verification **scenario** by delegating to the `ironbee-scenario`
|
|
18
|
+
custom agent. This is NOT a verification cycle — it submits no verdict and does not gate completion.
|
|
19
|
+
|
|
20
|
+
## Steps
|
|
21
|
+
1. **If the request points to a file path** (scenario content to save), read that file now and pass
|
|
22
|
+
its **contents** into the sub-agent's prompt. If a given path doesn't resolve, stop and report
|
|
23
|
+
`scenario file not found: <path>`.
|
|
24
|
+
2. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
|
|
25
|
+
`message`:
|
|
26
|
+
> Operation: manage
|
|
27
|
+
> Request: \<the user's request — content to add/update, or the target to update/delete>
|
|
28
|
+
> Scope: \<`global` if the user asked, else `project`>
|
|
29
|
+
> Mode: \<include `Mode: draft` ONLY if the request begins with a `draft` token (source-only, no app
|
|
30
|
+
> run) — otherwise OMIT so the sub-agent authors against the live app>
|
|
31
|
+
The sub-agent decides add vs update (checks for an existing same-name scenario first), picks the
|
|
32
|
+
right platform, authors the script — **against the live app by default** (starts the app if needed,
|
|
33
|
+
observes the real behavior, validates by running once, then cleans up — deletes any probe /
|
|
34
|
+
throwaway scenarios it added and stops what it started; `draft` skips this)
|
|
35
|
+
— and stamps metadata (`argsSchema` for parametric ones).
|
|
36
|
+
**Delete and fuzzy-resolved update ask you to confirm** the matched scenario first — relay that
|
|
37
|
+
to the user and pass their answer back. **Wait for the sub-agent in the same turn.**
|
|
38
|
+
3. **Relay** the sub-agent's summary (what it created / updated / deleted, on which platform).
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-search-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Find reusable IronBee verification scenarios by name, description, or metadata by driving the
|
|
5
|
+
scenario-search / scenario-list MCP tools yourself. Use when the user types
|
|
6
|
+
`$ironbee-search-scenario`. Searches every enabled platform's store.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Search scenarios
|
|
10
|
+
|
|
11
|
+
This project runs IronBee in **main-agent** mode — the devtools scenario MCP tools are wired into
|
|
12
|
+
THIS session, so **you** drive them. Find saved verification **scenarios**. Read-only.
|
|
13
|
+
|
|
14
|
+
## Steps
|
|
15
|
+
1. **Pick the surface:**
|
|
16
|
+
- **`*_scenario-search`** (fuzzy, ranked over name + description) — discovery ("find login
|
|
17
|
+
scenarios").
|
|
18
|
+
- **`*_scenario-list` with `metadataMatch`** — precise structural lookup ("which scenarios cover
|
|
19
|
+
`src/auth/login.ts`"). Metadata is NOT indexed by `scenario-search`, so path/tag lookups use
|
|
20
|
+
`scenario-list`.
|
|
21
|
+
2. **Search every enabled platform's server** (each platform is a separate server with its own
|
|
22
|
+
store) and union the results.
|
|
23
|
+
3. **Report** name + description + platform + (for fuzzy search) relevance score; surface scope.
|
|
24
|
+
|
|
25
|
+
The platform sections below list each enabled cycle's server, tool prefix, and store dir.
|
|
26
|
+
|
|
27
|
+
<!--IRONBEE:PLATFORM:browser-->
|
|
28
|
+
<!--/IRONBEE:PLATFORM:browser-->
|
|
29
|
+
|
|
30
|
+
<!--IRONBEE:PLATFORM:node-->
|
|
31
|
+
<!--/IRONBEE:PLATFORM:node-->
|
|
32
|
+
|
|
33
|
+
<!--IRONBEE:PLATFORM:backend-->
|
|
34
|
+
<!--/IRONBEE:PLATFORM:backend-->
|
|
35
|
+
|
|
36
|
+
<!--IRONBEE:PLATFORM:android-->
|
|
37
|
+
<!--/IRONBEE:PLATFORM:android-->
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-search-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Find reusable IronBee verification scenarios by name, description, or metadata by delegating to
|
|
5
|
+
the ironbee-scenario custom agent. Use when the user types `$ironbee-search-scenario`. The
|
|
6
|
+
sub-agent searches every enabled platform's store and returns the matches.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Search scenarios
|
|
10
|
+
|
|
11
|
+
> **Delegate** — spawn the **`ironbee-scenario` custom agent** via `spawn_agent` with
|
|
12
|
+
> `agent_type="ironbee-scenario"` **and `fork_turns="none"`**. The sub-agent owns the scenario tools.
|
|
13
|
+
|
|
14
|
+
Find saved verification **scenarios**. Read-only.
|
|
15
|
+
|
|
16
|
+
## Steps
|
|
17
|
+
1. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
|
|
18
|
+
`message`:
|
|
19
|
+
> Operation: search
|
|
20
|
+
> Query: \<the user's description — a name/topic for fuzzy search, or a path/tag for metadata match>
|
|
21
|
+
The sub-agent picks the right surface (fuzzy name+description vs precise `metadataMatch`), searches
|
|
22
|
+
**every enabled platform's store**, and unions the results. **Wait for the sub-agent in the same turn.**
|
|
23
|
+
2. **Relay** the matches — name, description, platform, and (for fuzzy search) relevance score.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-sync-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Re-validate saved IronBee verification scenarios against the current code and repair MECHANICAL
|
|
5
|
+
drift, by driving the scenario-* MCP tools yourself. Use when the user types
|
|
6
|
+
`$ironbee-sync-scenario`. A leading `check` token = dry-run (report drift, no repair).
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Sync scenario(s)
|
|
10
|
+
|
|
11
|
+
This project runs IronBee in **main-agent** mode — the devtools `*_scenario-*` MCP tools are wired
|
|
12
|
+
into THIS session, so **you** drive them. Re-validate + repair saved verification **scenarios**. This
|
|
13
|
+
is NOT a verification cycle — no verdict, no gate.
|
|
14
|
+
|
|
15
|
+
## Steps
|
|
16
|
+
1. **Resolve mode + target**: strip a leading `check` token (→ dry-run) and a leading `force` token
|
|
17
|
+
(→ include ALL scenarios, not just stale); remainder = `all` (stale ones; with `force`, every one)
|
|
18
|
+
or a name / description (one). Empty → `all`. **Print the target list + count before running.**
|
|
19
|
+
Run targets that share an `ironbee.group` in ascending `ironbee.order` (a flow split across platforms).
|
|
20
|
+
2. **For each target scenario** (resolve via `*_scenario-search` / `*_scenario-list`; `all` = the stale
|
|
21
|
+
ones — covered files changed since their `ironbee.commit`, or authored as drafts) **run it**
|
|
22
|
+
(`*_scenario-run`, against the live app — start it if needed, tear down what you started) and classify:
|
|
23
|
+
- **passes** → still current; (non-check) `*_scenario-update` to stamp `ironbee.commit` → HEAD
|
|
24
|
+
(read via `git rev-parse HEAD`) + `ironbee.liveValidated: true`. `*_scenario-update`
|
|
25
|
+
shallow-replaces metadata — read current metadata and re-send it MERGED with these two keys
|
|
26
|
+
(don't drop `coveredPaths` / `group` / `argsSchema`).
|
|
27
|
+
- **mechanical DRIFT** (the way to reach / drive the flow changed, not the expected outcome) →
|
|
28
|
+
repair the SCRIPT mechanics only, `*_scenario-update`, re-run until green, then stamp.
|
|
29
|
+
- **real DEFECT** (the expected outcome is unreachable — the app broke) → **STOP, report, do NOT
|
|
30
|
+
touch the scenario.**
|
|
31
|
+
- **expectation CHANGED** (a deliberate behavior / spec change) → do NOT auto-edit the assertion;
|
|
32
|
+
ask the user.
|
|
33
|
+
- **`check` mode** → only run + report drift; never repair / update.
|
|
34
|
+
- **Classify safely** (repair is the only branch that edits a scenario, so a defect mistaken for
|
|
35
|
+
drift masks a regression): before repairing, self-check whether the fix changes *how* the flow
|
|
36
|
+
is driven (drift — OK to repair) or *what* it asserts (never drift — a defect → STOP, or a
|
|
37
|
+
deliberate change → ask). A failure while reaching / driving the flow leans drift; a failure at
|
|
38
|
+
the terminal assertion leans defect. **Uncertain → treat as a defect and STOP.**
|
|
39
|
+
3. **Report** per scenario: repaired / still-fresh / defect-reported / needs decision.
|
|
40
|
+
|
|
41
|
+
**Hard rule: repair MECHANICS, never the ASSERTION / expected outcome** — silently relaxing an
|
|
42
|
+
assertion to make a stale scenario pass would mask a regression. (To just *detect* staleness without
|
|
43
|
+
running anything, use `ironbee scenario status`.)
|
|
44
|
+
|
|
45
|
+
<!--IRONBEE:PLATFORM:browser-->
|
|
46
|
+
<!--/IRONBEE:PLATFORM:browser-->
|
|
47
|
+
|
|
48
|
+
<!--IRONBEE:PLATFORM:node-->
|
|
49
|
+
<!--/IRONBEE:PLATFORM:node-->
|
|
50
|
+
|
|
51
|
+
<!--IRONBEE:PLATFORM:backend-->
|
|
52
|
+
<!--/IRONBEE:PLATFORM:backend-->
|
|
53
|
+
|
|
54
|
+
<!--IRONBEE:PLATFORM:android-->
|
|
55
|
+
<!--/IRONBEE:PLATFORM:android-->
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ironbee-sync-scenario
|
|
3
|
+
description: >
|
|
4
|
+
Re-validate saved IronBee verification scenarios against the current code and repair MECHANICAL
|
|
5
|
+
drift, by delegating to the ironbee-scenario custom agent (operation sync). Use when the user types
|
|
6
|
+
`$ironbee-sync-scenario`. A leading `check` token = dry-run (report drift, no repair).
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# IronBee — Sync scenario(s)
|
|
10
|
+
|
|
11
|
+
> **Delegate** — spawn the **`ironbee-scenario` custom agent** via `spawn_agent` with
|
|
12
|
+
> `agent_type="ironbee-scenario"` **and `fork_turns="none"`** (the default `fork_turns="all"` silently
|
|
13
|
+
> drops the agent_type → a generic toolless agent). The sub-agent owns the `scenario-*` tools.
|
|
14
|
+
|
|
15
|
+
Re-validate + repair saved verification **scenarios**. This is NOT a verification cycle.
|
|
16
|
+
|
|
17
|
+
## Steps
|
|
18
|
+
1. **Resolve the mode + target**: strip a leading `check` token (→ dry-run) and a leading `force` token
|
|
19
|
+
(→ sync ALL scenarios, not just stale); remainder = `all` (stale ones; `force` = every one) or a
|
|
20
|
+
name / description (one). Empty → `all`.
|
|
21
|
+
2. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
|
|
22
|
+
`message`:
|
|
23
|
+
> Operation: sync
|
|
24
|
+
> Target: \<`all`, or the name / description>
|
|
25
|
+
> Force: \<include `Force: all` ONLY if the request began with `force`>
|
|
26
|
+
> Mode: \<include `Mode: check` ONLY if the request began with `check`; otherwise OMIT>
|
|
27
|
+
The sub-agent runs each target against the live app, classifies (still-fresh / mechanical drift →
|
|
28
|
+
repair the SCRIPT only / real defect → STOP + report / expectation changed → ask), and on a
|
|
29
|
+
non-check run stamps repaired scenarios current. **It repairs MECHANICS, never what a scenario
|
|
30
|
+
verifies. Wait for the sub-agent in the same turn.**
|
|
31
|
+
3. **Relay** the summary (per scenario: repaired / still-fresh / defect-reported / needs decision).
|
|
32
|
+
|
|
33
|
+
(To just *detect* staleness without running anything, use `ironbee scenario status`.)
|
|
@@ -42,9 +42,18 @@ A custom verification scenario may be supplied — either **inline text** or a *
|
|
|
42
42
|
(read at run time). The scenario is whatever the user provided alongside the command, after
|
|
43
43
|
stripping a leading `fix` / `report` mode token.
|
|
44
44
|
|
|
45
|
-
- **If
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
- **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:`
|
|
46
|
+
(to the end) is a **SAVED scenario reference** (exact name OR semantic description). Resolve it across
|
|
47
|
+
enabled platforms (`*_scenario-search` for the description + an exact-name `*_scenario-list` match),
|
|
48
|
+
pick the single strong match (ambiguous → ask; none → say so + fall back to the default flow), then
|
|
49
|
+
**run it in ONE `*_scenario-run` call** (no re-discovery) and **judge its result (functional) +
|
|
50
|
+
any returned visual evidence (e.g. screenshots)**. Its nested tool calls satisfy each active cycle's required tools.
|
|
51
|
+
No exact name needed — e.g. `scenario: the full purchase flow`.
|
|
52
|
+
**On PASS, keep it fresh:** `*_scenario-update` its `ironbee.commit` → HEAD (`git rev-parse HEAD`)
|
|
53
|
+
+ `liveValidated: true` (re-send the full metadata merged); on FAIL / defect, don't stamp.
|
|
54
|
+
- **If a scenario is supplied (free text), it is authoritative**: verify exactly what it describes,
|
|
55
|
+
exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise
|
|
56
|
+
the changed pages/endpoints" guidance.
|
|
48
57
|
- **If the scenario is (or points to) a file path**, read that file and treat its contents as the
|
|
49
58
|
scenario. Do not assume a fixed location or format.
|
|
50
59
|
- **If the path does not resolve**, stop and report `scenario file not found: <path>`, then ask how
|
|
@@ -29,18 +29,19 @@ A custom verification scenario may be supplied when this command is invoked —
|
|
|
29
29
|
|
|
30
30
|
> The scenario is whatever the user provided alongside `$ironbee-verify`, after stripping a leading `fix` / `report` mode token — the remainder is the scenario; empty remainder → the verifier uses its default flow.
|
|
31
31
|
|
|
32
|
-
- **If
|
|
32
|
+
- **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:` (to the end) is a **SAVED scenario reference** (exact name OR semantic description). Do NOT read a file / treat as free text — relay it to the verifier verbatim as a `Saved scenario: <ref>` line. The verifier resolves it (`scenario-search` + exact-name), runs it in one `scenario-run` call (no re-discovery), and judges the result (functional + any visual evidence). No exact name needed — e.g. `scenario: the full purchase flow`.
|
|
33
|
+
- **If a scenario is supplied (free text), it is authoritative**: the verifier must verify exactly what it describes, exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise the changed pages/endpoints" guidance.
|
|
33
34
|
- **If the scenario is (or points to) a file path**, read that file with your file-read tool yourself and pass its **contents** into the verifier's prompt (the verifier has no file-read tool). Do not assume a fixed location or format — read whatever path was given.
|
|
34
35
|
- **If the path does not resolve to an existing file**, stop and report `scenario file not found: <path>`, then ask how to proceed — do not delegate with the literal path string or guess a target.
|
|
35
36
|
- **If no scenario is supplied**, the verifier falls back to exercising the changed pages/endpoints per the active cycles.
|
|
36
37
|
|
|
37
38
|
## Steps
|
|
38
39
|
|
|
39
|
-
1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then file path → read it now; inline text → use as-is; empty → none.
|
|
40
|
+
1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then on the remainder — starts with `scenario:` → SAVED scenario reference (the rest after `scenario:`); a file path → read it now; inline text → use as-is; empty → none.
|
|
40
41
|
2. **Spawn the `ironbee-verifier` custom agent** — call `spawn_agent` with **`agent_type="ironbee-verifier"`** AND **`fork_turns="none"`**. The `fork_turns="none"` is REQUIRED: the default `fork_turns="all"` is a full-history fork that silently DROPS the `agent_type` override, giving you a generic agent *without* the verification tools. (Do NOT "act as" the verifier or use a plain generic fork either.) Put the task, the mode, and the resolved scenario in the `message`, e.g.:
|
|
41
42
|
> Verify the current code changes.
|
|
42
43
|
> Mode: \<`fix` in fix mode — OMIT this line entirely in verify-only mode>
|
|
43
|
-
>
|
|
44
|
+
> \<ONE of: `Saved scenario: <ref>` (when `scenario:` was given — the verifier resolves + runs it) — OR — `Scenario: <resolved text>` (free text / file contents) — OR — `Scenario: none — exercise the changed pages/endpoints`>
|
|
44
45
|
The verifier runs `verification-start` (relaying the fix intent to IronBee's completion gate, which then enforces fix-until-pass on you) → drives every active cycle's tools → submits the single verdict, all in this shared session. It resolves the session id from the environment, so you don't pass one.
|
|
45
46
|
**Wait for the verifier in the same turn — do NOT background it.** Let it run to completion and read its verdict before responding; a backgrounded verifier can let your turn end (and the Stop gate fire) before its verdict is recorded.
|
|
46
47
|
3. **Relay the verifier's summary** — the verdict status and, on fail, the issues it found.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";var d=Object.defineProperty;var
|
|
1
|
+
"use strict";var d=Object.defineProperty;var A=Object.getOwnPropertyDescriptor;var w=Object.getOwnPropertyNames;var E=Object.prototype.hasOwnProperty;var p=(i,t)=>d(i,"name",{value:t,configurable:!0});var h=(i,t)=>{for(var n in t)d(i,n,{get:t[n],enumerable:!0})},k=(i,t,n,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let e of w(t))!E.call(i,e)&&e!==n&&d(i,e,{get:()=>t[e],enumerable:!(o=A(t,e))||o.enumerable});return i};var x=i=>k(d({},"__esModule",{value:!0}),i);var C={};h(C,{run:()=>b});module.exports=x(C);var r=require("../../../hooks/core/actions"),m=require("../../../hooks/core/activity-end"),a=require("../../../lib/logger"),u=require("../../../lib/output"),f=require("../../../lib/stdin"),l=require("../../../analytics/codex/spawn"),c=require("../../../hooks/core/session-state"),y=require("../util"),g=require("../../../lib/runtime-paths");async function b(i){const t=(0,y.parseCodexHookStdin)((0,f.readStdin)()),n=t.session_id??"default",o=(0,g.sessionDir)(i,n),e=`${o}/actions.jsonl`;(0,a.setLogFile)(`${o}/session.log`);const S=(0,c.readState)(o)?.activeActivityId??"";if(await(0,m.runActivityEnd)({sessionDir:o,actionsFile:e,projectDir:i,sessionId:n})){const s=Date.now(),v={...(0,r.baseFields)(e),id:(0,r.deterministicSessionEndId)(n),type:"session_end",timestamp:s,session_id:n,duration:(0,r.findDurationSinceLastAction)(e,"session_start",s),reason:"checkpoint"};await(0,r.appendAction)(e,v)}try{const s=(0,c.readState)(o);(0,l.spawnDetachedCodexAnalyticsWorker)({projectDir:i,sessionId:n,rolloutPath:t.transcript_path,userEmail:s?.userEmail??void 0,usageType:s?.usageType??void 0,usagePlan:s?.usagePlan??void 0,activityId:S})}catch(s){a.logger.debug(`codex analytics spawn failed: ${s instanceof Error?s.message:s}`)}a.logger.debug(`activity-end: ${n}`),(0,u.writeAndExit)(JSON.stringify({}),0)}p(b,"run");0&&(module.exports={run});
|