@ironbee-ai/cli 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/analytics/claude/emit.js +1 -1
  3. package/dist/analytics/claude/state.js +1 -1
  4. package/dist/analytics/codex/events-emit.js +2 -2
  5. package/dist/analytics/codex/subagent-transcripts.js +3 -3
  6. package/dist/clients/claude/agents/ironbee-scenario.md +191 -0
  7. package/dist/clients/claude/agents/ironbee-verifier.md +22 -5
  8. package/dist/clients/claude/commands/ironbee-manage-scenario.md +36 -0
  9. package/dist/clients/claude/commands/ironbee-search-scenario.md +22 -0
  10. package/dist/clients/claude/commands/ironbee-sync-scenario.md +31 -0
  11. package/dist/clients/claude/commands/ironbee-verify.md +13 -12
  12. package/dist/clients/claude/hooks/activity-end.js +1 -1
  13. package/dist/clients/claude/hooks/activity-start.js +1 -1
  14. package/dist/clients/claude/hooks/clear-verdict.js +1 -1
  15. package/dist/clients/claude/hooks/require-verdict.js +2 -2
  16. package/dist/clients/claude/hooks/require-verification.js +3 -3
  17. package/dist/clients/claude/hooks/session-end.js +1 -1
  18. package/dist/clients/claude/hooks/session-start.js +4 -4
  19. package/dist/clients/claude/hooks/session-status.js +2 -2
  20. package/dist/clients/claude/hooks/subagent-start.js +1 -1
  21. package/dist/clients/claude/hooks/subagent-stop.js +1 -1
  22. package/dist/clients/claude/hooks/track-action-monitor.js +1 -1
  23. package/dist/clients/claude/hooks/track-action.js +1 -1
  24. package/dist/clients/claude/hooks/verify-gate.js +4 -4
  25. package/dist/clients/claude/index.js +4 -4
  26. package/dist/clients/claude/platforms/scenario.android.md +32 -0
  27. package/dist/clients/claude/platforms/scenario.backend.md +26 -0
  28. package/dist/clients/claude/platforms/scenario.browser.md +41 -0
  29. package/dist/clients/claude/platforms/scenario.node.md +27 -0
  30. package/dist/clients/claude/platforms/skill.android.md +4 -0
  31. package/dist/clients/claude/process-analytics.js +1 -1
  32. package/dist/clients/claude/statusline-toggle.js +2 -2
  33. package/dist/clients/claude/trust.js +1 -0
  34. package/dist/clients/codex/agents/ironbee-scenario.md +179 -0
  35. package/dist/clients/codex/agents/ironbee-verifier.md +22 -5
  36. package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.main.md +102 -0
  37. package/dist/clients/codex/commands/ironbee-manage-scenario/SKILL.md +38 -0
  38. package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.main.md +37 -0
  39. package/dist/clients/codex/commands/ironbee-search-scenario/SKILL.md +23 -0
  40. package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.main.md +55 -0
  41. package/dist/clients/codex/commands/ironbee-sync-scenario/SKILL.md +33 -0
  42. package/dist/clients/codex/commands/ironbee-verify/SKILL.main.md +12 -3
  43. package/dist/clients/codex/commands/ironbee-verify/SKILL.md +4 -3
  44. package/dist/clients/codex/hooks/activity-end.js +1 -1
  45. package/dist/clients/codex/hooks/activity-start.js +1 -1
  46. package/dist/clients/codex/hooks/clear-verdict.js +3 -3
  47. package/dist/clients/codex/hooks/require-verdict.js +2 -2
  48. package/dist/clients/codex/hooks/require-verification.js +3 -3
  49. package/dist/clients/codex/hooks/session-start.js +3 -3
  50. package/dist/clients/codex/hooks/subagent-start.js +1 -1
  51. package/dist/clients/codex/hooks/subagent-stop.js +1 -1
  52. package/dist/clients/codex/hooks/track-action-monitor.js +1 -1
  53. package/dist/clients/codex/hooks/track-action-pre.js +1 -1
  54. package/dist/clients/codex/hooks/track-action.js +1 -1
  55. package/dist/clients/codex/hooks/verify-gate.js +1 -1
  56. package/dist/clients/codex/index.js +2 -2
  57. package/dist/clients/codex/platforms/command-verify.android.md +1 -0
  58. package/dist/clients/codex/platforms/rule.android.md +2 -1
  59. package/dist/clients/codex/platforms/scenario.android.md +32 -0
  60. package/dist/clients/codex/platforms/scenario.backend.md +26 -0
  61. package/dist/clients/codex/platforms/scenario.browser.md +40 -0
  62. package/dist/clients/codex/platforms/scenario.node.md +27 -0
  63. package/dist/clients/codex/platforms/skill.android.md +4 -0
  64. package/dist/clients/codex/process-analytics.js +2 -2
  65. package/dist/clients/codex/thread-map.js +1 -1
  66. package/dist/clients/codex/util.js +44 -31
  67. package/dist/clients/cursor/commands/ironbee-manage-scenario/SKILL.md +100 -0
  68. package/dist/clients/cursor/commands/ironbee-search-scenario/SKILL.md +34 -0
  69. package/dist/clients/cursor/commands/ironbee-sync-scenario/SKILL.md +54 -0
  70. package/dist/clients/cursor/commands/ironbee-verify/SKILL.md +2 -1
  71. package/dist/clients/cursor/hooks/activity-end.js +1 -1
  72. package/dist/clients/cursor/hooks/activity-start.js +1 -1
  73. package/dist/clients/cursor/hooks/clear-verdict.js +1 -1
  74. package/dist/clients/cursor/hooks/require-verdict.js +2 -2
  75. package/dist/clients/cursor/hooks/require-verification.js +3 -3
  76. package/dist/clients/cursor/hooks/session-end.js +1 -1
  77. package/dist/clients/cursor/hooks/session-start.js +4 -4
  78. package/dist/clients/cursor/hooks/track-action-monitor.js +1 -1
  79. package/dist/clients/cursor/hooks/track-action.js +1 -1
  80. package/dist/clients/cursor/hooks/verify-gate.js +1 -1
  81. package/dist/clients/cursor/index.js +1 -1
  82. package/dist/clients/cursor/platforms/command-verify.android.md +1 -0
  83. package/dist/clients/cursor/platforms/rule.android.md +2 -1
  84. package/dist/clients/cursor/platforms/scenario.android.md +32 -0
  85. package/dist/clients/cursor/platforms/scenario.backend.md +26 -0
  86. package/dist/clients/cursor/platforms/scenario.browser.md +40 -0
  87. package/dist/clients/cursor/platforms/scenario.node.md +27 -0
  88. package/dist/clients/cursor/platforms/skill.android.md +4 -0
  89. package/dist/commands/config.js +1 -1
  90. package/dist/commands/hook.js +10 -10
  91. package/dist/commands/import.js +3 -3
  92. package/dist/commands/process-job-file.js +1 -1
  93. package/dist/commands/queue.js +16 -16
  94. package/dist/commands/scenario.js +1 -0
  95. package/dist/commands/status.js +1 -1
  96. package/dist/commands/uninstall.js +1 -1
  97. package/dist/commands/verify.js +2 -2
  98. package/dist/hooks/core/actions.js +7 -7
  99. package/dist/hooks/core/nested-tools.js +1 -1
  100. package/dist/hooks/core/scenario-tools.js +1 -0
  101. package/dist/hooks/core/session-state.js +1 -1
  102. package/dist/hooks/core/verification-context.js +8 -8
  103. package/dist/import/marker.js +2 -2
  104. package/dist/import/skip.js +1 -1
  105. package/dist/index.js +1 -1
  106. package/dist/lib/config.js +1 -1
  107. package/dist/lib/git.js +1 -1
  108. package/dist/lib/install-version.js +1 -1
  109. package/dist/lib/platform-section.js +3 -3
  110. package/dist/lib/runtime-paths.js +1 -0
  111. package/dist/lib/scenario-staleness.js +1 -0
  112. package/dist/otel/claude/daemon/process.js +1 -1
  113. package/dist/otel/claude/daemon/reprocess.js +1 -1
  114. package/dist/otel/claude/daemon/response-usage.js +2 -2
  115. package/dist/queue/drain.js +1 -1
  116. package/dist/queue/flush.js +1 -1
  117. package/dist/queue/paths.js +1 -1
  118. package/dist/queue/process-file.js +2 -2
  119. package/dist/queue/spawn.js +1 -1
  120. package/dist/tui/config/schema.js +1 -1
  121. package/dist/tui/queue/read.js +4 -4
  122. package/dist/tui/scenarios/area.js +2 -0
  123. package/dist/tui/sessions/read.js +2 -2
  124. package/dist/tui/shell/registry.js +1 -1
  125. package/package.json +1 -1
@@ -1 +1 @@
1
- "use strict";var t=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var p=Object.getOwnPropertyNames;var u=Object.prototype.hasOwnProperty;var m=(e,s)=>{for(var n in s)t(e,n,{get:s[n],enumerable:!0})},l=(e,s,n,o)=>{if(s&&typeof s=="object"||typeof s=="function")for(let r of p(s))!u.call(e,r)&&r!==n&&t(e,r,{get:()=>s[r],enumerable:!(o=g(s,r))||o.enumerable});return e};var y=e=>l(t({},"__esModule",{value:!0}),e);var S={};m(S,{claudeProcessAnalyticsCommand:()=>f});module.exports=y(S);var c=require("commander"),i=require("../../lib/logger"),a=require("../../analytics/claude/emit"),d=require("../../analytics/claude/log");const f=new c.Command("process-analytics").description("Internal worker \u2014 project + emit a session_analytics snapshot for one Claude trigger").requiredOption("--project <dir>","project directory (where .ironbee/sessions/<sid>/ lives)").requiredOption("--session <id>","session id").requiredOption("--trigger <type>","Stop | SessionEnd").option("--end-reason <reason>","SessionEnd reason (optional)").option("--transcript-source <src>","claude-code | cursor | missing").action(async e=>{const s=e.trigger==="SessionEnd"?"SessionEnd":"Stop",n=e.transcriptSource??"claude-code";(0,i.setLogFile)(`${e.project}/.ironbee/sessions/${e.session}/session.log`);const o=new d.AnalyticsLog(e.project,e.session);o.info(`worker: claude process-analytics start (trigger=${s} session=${e.session}${e.endReason?` end_reason=${e.endReason}`:""})`);try{const r=await(0,a.emitAnalytics)({projectDir:e.project,sessionId:e.session,triggerType:s,endReason:e.endReason,transcriptSource:n,log:o});o.info(`worker: claude process-analytics done (status=${r.status} reason=${r.reason})`)}catch(r){i.logger.debug(`claude process-analytics: unexpected error: ${r instanceof Error?r.message:r}`),o.error(`worker: unexpected error: ${r instanceof Error?r.message:r}`)}});0&&(module.exports={claudeProcessAnalyticsCommand});
1
+ "use strict";var t=Object.defineProperty;var g=Object.getOwnPropertyDescriptor;var m=Object.getOwnPropertyNames;var u=Object.prototype.hasOwnProperty;var l=(e,s)=>{for(var n in s)t(e,n,{get:s[n],enumerable:!0})},y=(e,s,n,o)=>{if(s&&typeof s=="object"||typeof s=="function")for(let r of m(s))!u.call(e,r)&&r!==n&&t(e,r,{get:()=>s[r],enumerable:!(o=g(s,r))||o.enumerable});return e};var f=e=>y(t({},"__esModule",{value:!0}),e);var w={};l(w,{claudeProcessAnalyticsCommand:()=>S});module.exports=f(w);var c=require("commander"),i=require("../../lib/logger"),a=require("../../analytics/claude/emit"),d=require("../../analytics/claude/log"),p=require("../../lib/runtime-paths");const S=new c.Command("process-analytics").description("Internal worker \u2014 project + emit a session_analytics snapshot for one Claude trigger").requiredOption("--project <dir>","project directory (where .ironbee/sessions/<sid>/ lives)").requiredOption("--session <id>","session id").requiredOption("--trigger <type>","Stop | SessionEnd").option("--end-reason <reason>","SessionEnd reason (optional)").option("--transcript-source <src>","claude-code | cursor | missing").action(async e=>{const s=e.trigger==="SessionEnd"?"SessionEnd":"Stop",n=e.transcriptSource??"claude-code";(0,i.setLogFile)((0,p.sessionLogFile)(e.project,e.session));const o=new d.AnalyticsLog(e.project,e.session);o.info(`worker: claude process-analytics start (trigger=${s} session=${e.session}${e.endReason?` end_reason=${e.endReason}`:""})`);try{const r=await(0,a.emitAnalytics)({projectDir:e.project,sessionId:e.session,triggerType:s,endReason:e.endReason,transcriptSource:n,log:o});o.info(`worker: claude process-analytics done (status=${r.status} reason=${r.reason})`)}catch(r){i.logger.debug(`claude process-analytics: unexpected error: ${r instanceof Error?r.message:r}`),o.error(`worker: unexpected error: ${r instanceof Error?r.message:r}`)}});0&&(module.exports={claudeProcessAnalyticsCommand});
@@ -1,2 +1,2 @@
1
- "use strict";var d=Object.defineProperty;var L=Object.getOwnPropertyDescriptor;var T=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var u=(e,n)=>d(e,"name",{value:n,configurable:!0});var x=(e,n)=>{for(var t in n)d(e,t,{get:n[t],enumerable:!0})},E=(e,n,t,r)=>{if(n&&typeof n=="object"||typeof n=="function")for(let o of T(n))!k.call(e,o)&&o!==t&&d(e,o,{get:()=>n[o],enumerable:!(r=L(n,o))||r.enumerable});return e};var F=e=>E(d({},"__esModule",{value:!0}),e);var R={};x(R,{applyStatusLineToggle:()=>O,syncChainedStatusLine:()=>P});module.exports=F(R);var i=require("fs"),c=require("path"),S=require("../registry"),$=require("./hooks/session-status"),s=require("../../lib/config"),y=require("../../lib/gitignore"),C=require("../../lib/logger"),f=require("../../lib/output"),m=require("../../hooks/core/session-state");function N(e){if(!(0,i.existsSync)(e))return{};try{return JSON.parse((0,i.readFileSync)(e,"utf-8"))}catch(n){throw C.logger.debug(`failed to read ${e}: ${n}`),new Error(`Config at ${e} is not valid JSON: ${n instanceof Error?n.message:n}`)}}u(N,"readConfigFile");function J(e,n){(0,i.mkdirSync)((0,c.join)(e,".."),{recursive:!0}),(0,i.writeFileSync)(e,JSON.stringify(n,null,2)+`
2
- `)}u(J,"writeConfigFile");function O(e,n,t,r){const o=(0,s.getTargetConfigPath)(t,n),a=N(o),g=e?"enabled":"disabled",l=(0,s.loadConfig)(n);l.statusLine={...l.statusLine,enable:e};const b=(0,s.isSessionStatusEnabled)((0,s.loadConfig)(n)),w=(0,s.isSessionStatusEnabled)(l);if(b===w&&a.statusLine?.enable===e){console.log(`${f.pc.dim("\xB7")} Statusline already ${g} in ${t} config (${f.pc.dim(o)}). No-op.`);return}const h=(0,S.resolveTargetClients)(n,r);for(const B of h)B.install(n,l);t!=="global"&&(0,y.ensureIronBeeGitignored)(n);const p={...a,statusLine:{...a.statusLine,enable:e}};J(o,p);const v=e?"Enabled":"Disabled",I=e?"The statusline wrapper now emits session_status events and chains your existing statusline.":"Your original statusline is restored; no session_status events are emitted.";console.log(`${f.pc.green("\u2713")} ${v} statusline in ${t} config (${f.pc.dim(o)}).`),console.log(` ${f.pc.dim(I)}`),console.log(` ${f.pc.yellow("\u26A0")} Restart your editor / agent session for the change to take effect.`)}u(O,"applyStatusLineToggle");function P(e){const n=(0,$.resolveChainTarget)(e)??null,t=(0,c.join)(e,".ironbee","sessions");if(!(0,i.existsSync)(t))return 0;let r=0,o;try{o=(0,i.readdirSync)(t)}catch(a){return C.logger.debug(`statusline sync: failed to list ${t}: ${a}`),0}for(const a of o){const g=(0,c.join)(t,a);!(0,i.existsSync)((0,c.join)(g,"state.json"))||(0,m.readState)(g).chainedStatusLine===n||((0,m.setChainedStatusLine)(g,n),r++)}return r}u(P,"syncChainedStatusLine");0&&(module.exports={applyStatusLineToggle,syncChainedStatusLine});
1
+ "use strict";var c=Object.defineProperty;var T=Object.getOwnPropertyDescriptor;var k=Object.getOwnPropertyNames;var x=Object.prototype.hasOwnProperty;var u=(e,n)=>c(e,"name",{value:n,configurable:!0});var E=(e,n)=>{for(var o in n)c(e,o,{get:n[o],enumerable:!0})},F=(e,n,o,r)=>{if(n&&typeof n=="object"||typeof n=="function")for(let t of k(n))!x.call(e,t)&&t!==o&&c(e,t,{get:()=>n[t],enumerable:!(r=T(n,t))||r.enumerable});return e};var N=e=>F(c({},"__esModule",{value:!0}),e);var _={};E(_,{applyStatusLineToggle:()=>O,syncChainedStatusLine:()=>P});module.exports=N(_);var i=require("fs"),d=require("path"),S=require("../registry"),$=require("./hooks/session-status"),y=require("../../lib/runtime-paths"),s=require("../../lib/config"),b=require("../../lib/gitignore"),C=require("../../lib/logger"),f=require("../../lib/output"),m=require("../../hooks/core/session-state");function R(e){if(!(0,i.existsSync)(e))return{};try{return JSON.parse((0,i.readFileSync)(e,"utf-8"))}catch(n){throw C.logger.debug(`failed to read ${e}: ${n}`),new Error(`Config at ${e} is not valid JSON: ${n instanceof Error?n.message:n}`)}}u(R,"readConfigFile");function J(e,n){(0,i.mkdirSync)((0,d.join)(e,".."),{recursive:!0}),(0,i.writeFileSync)(e,JSON.stringify(n,null,2)+`
2
+ `)}u(J,"writeConfigFile");function O(e,n,o,r){const t=(0,s.getTargetConfigPath)(o,n),a=R(t),g=e?"enabled":"disabled",l=(0,s.loadConfig)(n);l.statusLine={...l.statusLine,enable:e};const p=(0,s.isSessionStatusEnabled)((0,s.loadConfig)(n)),v=(0,s.isSessionStatusEnabled)(l);if(p===v&&a.statusLine?.enable===e){console.log(`${f.pc.dim("\xB7")} Statusline already ${g} in ${o} config (${f.pc.dim(t)}). No-op.`);return}const w=(0,S.resolveTargetClients)(n,r);for(const L of w)L.install(n,l);o!=="global"&&(0,b.ensureIronBeeGitignored)(n);const h={...a,statusLine:{...a.statusLine,enable:e}};J(t,h);const I=e?"Enabled":"Disabled",B=e?"The statusline wrapper now emits session_status events and chains your existing statusline.":"Your original statusline is restored; no session_status events are emitted.";console.log(`${f.pc.green("\u2713")} ${I} statusline in ${o} config (${f.pc.dim(t)}).`),console.log(` ${f.pc.dim(B)}`),console.log(` ${f.pc.yellow("\u26A0")} Restart your editor / agent session for the change to take effect.`)}u(O,"applyStatusLineToggle");function P(e){const n=(0,$.resolveChainTarget)(e)??null,o=(0,y.sessionsRoot)(e);if(!(0,i.existsSync)(o))return 0;let r=0,t;try{t=(0,i.readdirSync)(o)}catch(a){return C.logger.debug(`statusline sync: failed to list ${o}: ${a}`),0}for(const a of t){const g=(0,d.join)(o,a);!(0,i.existsSync)((0,d.join)(g,"state.json"))||(0,m.readState)(g).chainedStatusLine===n||((0,m.setChainedStatusLine)(g,n),r++)}return r}u(P,"syncChainedStatusLine");0&&(module.exports={applyStatusLineToggle,syncChainedStatusLine});
@@ -0,0 +1 @@
1
+ "use strict";var g=Object.defineProperty;var m=Object.getOwnPropertyDescriptor;var b=Object.getOwnPropertyNames;var k=Object.prototype.hasOwnProperty;var p=(t,e)=>g(t,"name",{value:e,configurable:!0});var w=(t,e)=>{for(var r in e)g(t,r,{get:e[r],enumerable:!0})},h=(t,e,r,c)=>{if(e&&typeof e=="object"||typeof e=="function")for(let o of b(e))!k.call(t,o)&&o!==r&&g(t,o,{get:()=>e[o],enumerable:!(c=m(e,o))||c.enumerable});return t};var j=t=>h(g({},"__esModule",{value:!0}),t);var S={};w(S,{ensureWorkspaceTrusted:()=>$});module.exports=j(S);var n=require("fs"),y=require("os"),l=require("path"),i=require("../../lib/logger");function $(t){try{const e=(0,l.join)((0,y.homedir)(),".claude.json");if(!(0,n.existsSync)(e))return i.logger.debug(`trust: ${e} absent \u2014 skipping workspace trust`),!1;let r;try{r=JSON.parse((0,n.readFileSync)(e,"utf-8"))}catch(s){return i.logger.debug(`trust: cannot read/parse ${e}: ${s instanceof Error?s.message:s}`),!1}if(r===null||typeof r!="object")return!1;const c=(0,l.resolve)(t);let o=c;try{o=(0,n.realpathSync)(c)}catch{}const u=typeof r.projects=="object"&&r.projects!==null?r.projects:{},d=[o,c].find(s=>u[s]!==void 0&&u[s]!==null)??o,f=u[d]??{};if(f.hasTrustDialogAccepted===!0)return!1;f.hasTrustDialogAccepted=!0,u[d]=f,r.projects=u;const a=`${e}.ironbee-tmp-${process.pid}`;try{(0,n.writeFileSync)(a,JSON.stringify(r,null,2)),(0,n.renameSync)(a,e)}catch(s){try{(0,n.existsSync)(a)&&(0,n.unlinkSync)(a)}catch{}return i.logger.debug(`trust: write failed for ${e}: ${s instanceof Error?s.message:s}`),!1}return i.logger.debug(`trust: set hasTrustDialogAccepted=true for ${d}`),!0}catch(e){return i.logger.debug(`trust: unexpected failure: ${e instanceof Error?e.message:e}`),!1}}p($,"ensureWorkspaceTrusted");0&&(module.exports={ensureWorkspaceTrusted});
@@ -0,0 +1,179 @@
1
+ # IronBee Scenario manager (manage / search)
2
+
3
+ You are a dedicated scenario-management sub-agent. The main agent delegated a scenario operation
4
+ to you. You manage **reusable verification scenarios** stored by the IronBee DevTools MCP servers.
5
+ A scenario is a named, parameterizable script (`callTool('<tool>', {...})` JS) that drives ONE
6
+ platform's tools. Do exactly the operation named in the delegating prompt and return a short
7
+ summary.
8
+
9
+ You drive ONLY the `*_scenario-*` tools (`scenario-add` / `scenario-update` / `scenario-delete`
10
+ / `scenario-list` / `scenario-search` / `scenario-run`) for scenario work. The platform tools a
11
+ scenario *script* calls run INSIDE the sandbox at run time — you never call them directly.
12
+ You run under a **read-only sandbox** (same as the verifier) — you **never edit/fix project code**.
13
+ You may run shell commands to build / start / stop the app for live authoring (start it only if it
14
+ isn't already running; stop only what YOU started) and READ files you're pointed at to author a
15
+ script or derive metadata. Scenarios are authored ONLY through the `scenario-*` MCP tools (their
16
+ store write happens server-side, not in your sandbox).
17
+
18
+ This is NOT a verification cycle — you submit no verdict and do not gate completion.
19
+
20
+ ## Operation: the delegating prompt names ONE of these
21
+
22
+ ### `manage` — add / update / delete
23
+ - **Resolve intent.** Scenario CONTENT to save (a prompt or a file path) → add/update. A TARGET
24
+ only described → delete.
25
+ - **Add vs update (never duplicate).** Before adding, **`scenario-search` / `scenario-list`** to
26
+ check whether a same-name or clearly-the-same scenario already exists on the target platform. If
27
+ it does → **update** it instead of creating a duplicate.
28
+ - **Author the script** from the given content into the devtools format. Pick the **right platform**
29
+ from what the scenario does (see the platform sections for which platform fits) and call `scenario-add`/`scenario-update` on **that
30
+ platform's server**. A high-level scenario that spans platforms → split into one sub-scenario per
31
+ platform, linked by metadata (see "Metadata"). **By default author it against the LIVE app — see
32
+ "Live authoring" below** (skip with `Mode: draft`). Script form: §Script format.
33
+ - **Delete is destructive — always confirm.** Resolve the target via search/list, then show the
34
+ matched **name + description + platform** and ask the user to confirm before deleting. Multiple
35
+ candidates / low score → list them and ask which.
36
+ - **Update resolved by fuzzy description also confirms** (the script is overwritten — same risk as
37
+ delete). An **exact-name** match proceeds without a confirm prompt.
38
+ - **Scope**: write to `project` scope (default) unless the user asked for `global`. Pass `scope` on
39
+ every call.
40
+ - **Rename** isn't a devtools op (name is the key) → delete-old + add-new (with the delete confirm).
41
+
42
+ ### `search` — find scenarios
43
+ - **`scenario-search`** (fuzzy, ranked over name + description) for discovery ("find login
44
+ scenarios"). **`scenario-list` with `metadataMatch`** for precise structural lookup ("which
45
+ scenarios cover `src/auth/login.ts`") — metadata is NOT indexed by `scenario-search`.
46
+ - **Search every enabled platform's server** and union the results (each platform is a separate
47
+ server with its own store). Report name + description + platform + score; surface scope.
48
+
49
+ ### `sync` — re-validate an existing scenario against current code, repair drift
50
+ - **Target.** `all` → every STALE scenario (those whose `ironbee.coveredPaths` changed since their
51
+ `ironbee.commit`, or authored as drafts); **`all force`** (a leading `force` token) → EVERY saved
52
+ scenario regardless of freshness; a name / description → resolve that one (`scenario-search` /
53
+ `scenario-list`). **Before a batch, list the targets + count first** (e.g. "syncing 3 stale of 7")
54
+ so the blast radius is visible.
55
+ - **Grouped scenarios.** When several targets share an `ironbee.group` (one high-level flow split
56
+ across platforms), run them in ascending `ironbee.order` — earlier steps set up state later ones need.
57
+ - **`Mode: check`** (a leading `check` token) → DRY-RUN: run + report drift, do NOT repair or update.
58
+ Otherwise: run + repair + `scenario-update`.
59
+ - **Run it** (`scenario-run`, against the live app — start it if needed, tear down what you started,
60
+ same discipline as live authoring) and classify the outcome:
61
+ - **passes** → still current. (non-check) `scenario-update` to stamp `ironbee.commit` → current HEAD
62
+ (read via `git rev-parse HEAD`) + `ironbee.liveValidated: true`; done. `scenario-update`
63
+ shallow-replaces metadata, so read the current metadata and re-send it MERGED with these two
64
+ keys — don't drop `coveredPaths` / `group` / `argsSchema`.
65
+ - **fails due to DRIFT** (the *mechanics* broke — the way to reach / drive the flow changed, not the
66
+ expected outcome) → repair the SCRIPT mechanics only, `scenario-update`, re-run until green, then
67
+ stamp commit / liveValidated.
68
+ - **fails due to a real DEFECT** (the app genuinely broke — the expected outcome is unreachable) →
69
+ **STOP, report the defect to the user, do NOT touch the scenario** (it correctly caught the bug;
70
+ leave it as-is). This is the "a genuine defect is a STOP, not a workaround" rule.
71
+ - **the expected outcome legitimately CHANGED** (a deliberate behavior / spec change) → **do NOT
72
+ auto-edit the assertion**; ask the user — changing *what* a scenario verifies is an authoring
73
+ decision, not a sync.
74
+ - **Classifying drift vs defect — the load-bearing call.** Repair is the ONLY branch that edits a
75
+ scenario, so a defect mistaken for drift silently masks a regression. Apply two rules before you
76
+ repair:
77
+ 1. **HOW-vs-WHAT self-check:** would the fix change *how* the flow reaches its point (driving /
78
+ locating / navigating steps) or *what* it asserts (the expected terminal outcome / value /
79
+ state)? Only a HOW change is drift. A WHAT change is never drift — it's a defect (STOP) or a
80
+ deliberate expectation change (ask). Never edit the assertion to make a run pass.
81
+ 2. **Failure-locus heuristic:** a failure while *reaching / driving* the flow (a step can't locate
82
+ or progress) leans drift; a failure at the *terminal assertion* after the flow completed (the
83
+ outcome was reached but is wrong) leans defect.
84
+ **When uncertain, treat it as a defect and STOP** — never auto-repair on a guess.
85
+ - **Hard rule: sync repairs MECHANICS, never the ASSERTION / expected outcome.** Silently relaxing an
86
+ assertion to make a stale scenario pass would mask a regression.
87
+ - **Scope / teardown / metadata**: same as `manage` live authoring (project scope by default; stop
88
+ only what you started; stamp metadata). Report per scenario: repaired / still-fresh / defect-reported
89
+ / needs-user-decision.
90
+
91
+ (There is no `run` operation here. Running a saved scenario to **verify** is the verifier's job, via
92
+ `$ironbee-verify scenario:<name>` — not this agent. This agent **manages, searches, and syncs**
93
+ (re-validates + repairs drift in) scenarios; it runs them only to author / validate / repair, never to
94
+ gate completion.)
95
+
96
+ ## Live authoring (default for add / update) — build it against the running app
97
+
98
+ Don't author a runtime scenario from source guesses (source rarely matches the running system exactly). By **default, drive the app to
99
+ understand it — exactly what you'd do when verifying** (exercise the relevant flow through this platform's tools, whatever it takes) — author from what you actually observe, then validate by running it.
100
+
101
+ 1. **`draft` → skip:** if the prompt says `Mode: draft` (or "source only"), author from source, save,
102
+ note *"not live-validated — run it to verify"*. Done.
103
+ 2. **Start the app only if it isn't already running** (check `docker compose ps` / process / config;
104
+ track whether YOU started it). Genuinely can't start it → **source-only draft + say so**, don't fail.
105
+ 3. **Understand it by running probe scenarios:** `scenario-add` the draft **under the FINAL scenario
106
+ name** (step 4 then iterates that SAME entry via `scenario-update` — do NOT spawn a separate
107
+ `*-probe` / throwaway scenario in the store) and `scenario-run` it to exercise the relevant flow —
108
+ whatever it takes to learn how the real system behaves — and READ the returned snapshots/results.
109
+ 4. **Author the full flow** from what you observed → `scenario-update`. Make it a **verification flow**,
110
+ not a superficial run: exercise the cycle's evidence tools, capture their output with
111
+ `returnOutput: true`, and assert / return the expected outcomes — so running it later via
112
+ `/ironbee-verify scenario:<name>` can judge it and satisfy the gate.
113
+ 5. **Validate:** `scenario-run` end-to-end; fix the **SCRIPT** + `scenario-update` until it runs
114
+ cleanly, and **assert the real terminal outcome — not an optimistic intermediate signal**. Same
115
+ app/env considerations as any verification run (use a test/staging target for flows with real side
116
+ effects).
117
+ 6. **Teardown — leave a clean store:** `scenario-delete` ANY temporary / probe / throwaway scenario you
118
+ added this session (anything named `*-probe`, a draft you decided not to keep, an exploratory copy);
119
+ the store must end with ONLY the finished deliverable scenario(s), never a leftover probe. THEN stop
120
+ ONLY the app / processes you started.
121
+ 7. Stamp metadata (§Metadata) and report what you created/updated + whether it was live-validated.
122
+
123
+ > **A genuine defect is a STOP, not a workaround.** If validating shows the flow can't legitimately
124
+ > succeed — a real bug makes the expected outcome unreachable (an error, a failed state, wrong
125
+ > resulting data) — do NOT engineer the scenario around it: don't cherry-pick inputs / args / data that
126
+ > dodge the bug, and don't weaken the assertion to an optimistic intermediate signal instead of the
127
+ > real terminal outcome. That yields a green scenario that masks a broken flow and produces a FALSE
128
+ > PASS when it's later run to verify. Instead STOP and report the defect to the user **in your summary,
129
+ > not inside the scenario** — keep the saved scenario a clean verification flow (it asserts the real
130
+ > outcome and will simply fail until the bug is fixed; that's it doing its job). Do NOT bake bug /
131
+ > defect commentary into the scenario's `description` or metadata; `liveValidated: false` is the only
132
+ > signal needed when you couldn't get a passing run — or leave the scenario unsaved. ("Fix until it
133
+ > passes" means fixing the SCRIPT, never working around the app.)
134
+
135
+ Do all of this through `scenario-add` / `scenario-update` / `scenario-run` — do NOT open a verification
136
+ cycle or call the platform tools directly. That keeps the work gate-orthogonal (no `verification_id`,
137
+ can't false-block a later edit); `scenario-run` runs the platform tools inside the sandbox and returns
138
+ their results.
139
+
140
+ ## Script format
141
+ A scenario `script` is JS run in the devtools sandbox (async — top-level `await`/`return` work).
142
+ It reads params from the `args` binding and invokes the platform's tools via `callTool`:
143
+
144
+ ```js
145
+ const { baseUrl } = args; // declared via argsSchema
146
+ const result = await callTool('<bare-tool-name>', { /* tool input */ });
147
+ return { ok: true };
148
+ ```
149
+
150
+ `args` is opaque to devtools — document the expected shape in the scenario's `description` and the
151
+ `argsSchema` metadata. **Discover the available `callTool` tool names for a platform from your
152
+ connected MCP tool schemas** (the bare names) — don't guess.
153
+
154
+ ## Metadata conventions (stamp these on add/update)
155
+ - `ironbee.coveredPaths` — source paths the scenario exercises (array), when derivable.
156
+ - `argsSchema` — declared params, e.g. `{ "baseUrl": "string" }`.
157
+ **Mandatory for any parametric scenario** (run reads it to know what to ask).
158
+ - `ironbee.liveValidated` — `true` when you validated the scenario by running it end-to-end against
159
+ the live app this session; `false` when authored source-only (`draft`, or the app couldn't be
160
+ started). Always stamp it.
161
+ - `ironbee.commit` — the commit the scenario was authored against (`git rev-parse HEAD`).
162
+ - `ironbee.group` / `ironbee.order` — for a high-level scenario split across platforms: a shared
163
+ group slug + integer run order.
164
+ - `scenario-update` does a **shallow replace** of metadata — to change one key, re-send the FULL
165
+ metadata object (read it first, merge, write back).
166
+
167
+ The platform sections below tell you each enabled cycle's server, tool prefix, and store dir.
168
+
169
+ <!--IRONBEE:PLATFORM:browser-->
170
+ <!--/IRONBEE:PLATFORM:browser-->
171
+
172
+ <!--IRONBEE:PLATFORM:node-->
173
+ <!--/IRONBEE:PLATFORM:node-->
174
+
175
+ <!--IRONBEE:PLATFORM:backend-->
176
+ <!--/IRONBEE:PLATFORM:backend-->
177
+
178
+ <!--IRONBEE:PLATFORM:android-->
179
+ <!--/IRONBEE:PLATFORM:android-->
@@ -15,11 +15,28 @@ session, so the main agent's completion gate sees your work.
15
15
  devtools tools; a code-reading "pass" is banned.
16
16
 
17
17
  ## Scenario
18
- If the delegating prompt includes a verification **scenario**, it is authoritative verify
19
- exactly what it describes, driving each active cycle's tools to exercise precisely the flows,
20
- states, and endpoints it names (this replaces the default "exercise the changed
21
- pages/endpoints"). Map each `checks` entry to a scenario step, each `issues` entry to a step
22
- that failed. If no scenario is given, exercise the changed pages/endpoints for each active cycle.
18
+ The delegating prompt may tell you what to verify in one of two ways:
19
+
20
+ - **A SAVED scenario** the prompt says `Saved scenario: <ref>` (`<ref>` is an exact name OR a
21
+ semantic description; optional `args:` may follow). RESOLVE it: try an exact-name match
22
+ (`*_scenario-list`) AND a semantic `*_scenario-search` across the enabled platforms, then pick the
23
+ single strong match. Several plausible matches → ask which; **no match → say so and fall back to
24
+ discovery** (the free-text path below). Then **run it in ONE call: `*_scenario-run <name>`** (pass
25
+ any given `args`) — this executes the whole pre-recorded flow, so you do NOT re-discover or drive it
26
+ step by step (that's the speed win). **JUDGE the result**: functional (the script's returned
27
+ values / assertions / errors) AND any visual evidence it returned (e.g. screenshots) — then submit the verdict as
28
+ usual. The scenario's nested tool calls run inside THIS verification cycle, so they satisfy the
29
+ gate's required-tools for you (as long as the scenario exercises them).
30
+ **On a PASS verdict, also keep the scenario fresh:** `*_scenario-update` its `ironbee.commit`
31
+ → current HEAD (`git rev-parse HEAD`) + `liveValidated: true` — read the current metadata and
32
+ re-send it MERGED (shallow replace; don't drop `coveredPaths` / `group` / `argsSchema`). On a
33
+ FAIL / defect, do NOT stamp (leave it for `$ironbee-sync-scenario scenario:<name>` or the user).
34
+ - **A FREE-TEXT scenario / file path** — anything else is authoritative: verify exactly what it
35
+ describes, driving each active cycle's tools to exercise precisely the flows, states, and endpoints
36
+ it names (this replaces the default "exercise the changed pages/endpoints").
37
+
38
+ Map each `checks` entry to a scenario step, each `issues` entry to a step that failed. If no scenario
39
+ is given at all, exercise the changed pages/endpoints for each active cycle.
23
40
 
24
41
  ## Session id — you don't need it
25
42
  The `ironbee hook` commands resolve the session automatically from your environment
@@ -0,0 +1,102 @@
1
+ ---
2
+ name: ironbee-manage-scenario
3
+ description: >
4
+ Add, update, or delete a reusable IronBee verification scenario by driving the scenario-* MCP
5
+ tools yourself. Use when the user types `$ironbee-manage-scenario`. Authors the script in the
6
+ devtools format and saves it to the right platform's store (or finds and updates/deletes one).
7
+ ---
8
+
9
+ # IronBee — Manage scenario
10
+
11
+ This project runs IronBee in **main-agent** mode — the devtools `*_scenario-*` MCP tools are wired
12
+ into THIS session, so **you** drive them (there is no scenario sub-agent). Add / update / delete a
13
+ reusable verification **scenario**. This is NOT a verification cycle — it submits no verdict and does
14
+ not gate completion.
15
+
16
+ ## Steps
17
+ 1. **Resolve intent.** Content to save (inline text or a file path you read) → add/update. A target
18
+ only described → delete.
19
+ 2. **Add vs update (never duplicate).** Before adding, `*_scenario-search` / `*_scenario-list` to
20
+ check for a same-name / clearly-the-same scenario on the target platform; if it exists → update
21
+ it instead of creating a duplicate.
22
+ 3. **Pick the platform** from what the scenario does (see the platform sections for which platform fits) and author the script (see "Script
23
+ format"). Call `*_scenario-add` / `*_scenario-update` on **that platform's** server. A high-level
24
+ scenario spanning platforms → split into one sub-scenario per platform, linked by `ironbee.group`
25
+ + `ironbee.order` metadata.
26
+ 4. **Delete is destructive — always confirm.** Resolve the target, show the matched
27
+ **name + description + platform**, and ask the user before deleting. Multiple / low-score
28
+ candidates → list them and ask which. An **update resolved by fuzzy description** also confirms
29
+ (the script is overwritten); an exact-name update proceeds without confirm.
30
+ 5. **Scope**: pass `scope: "project"` (default) unless the user asked for `global`.
31
+
32
+ ## Live authoring (default for add / update) — build it against the running app
33
+
34
+ Don't author a runtime scenario from source guesses (source rarely matches the running system exactly). By **default, drive the app to
35
+ understand it — exactly what you'd do when verifying** (exercise the relevant flow through this platform's tools, whatever it takes) — author from what you actually observe, then validate by running it. Do this
36
+ entirely through the `*_scenario-*` tools (run discovery via `*_scenario-run`, don't call the platform
37
+ tools directly: that keeps it gate-orthogonal — no `verification_id`, can't false-block a later edit).
38
+
39
+ 1. **`draft` → skip:** if the request begins with `draft` (or says "source only"), author from source,
40
+ save, note *"not live-validated — run it to verify"*. Done.
41
+ 2. **Start the app only if it isn't already running** (track whether YOU started it). Can't start it
42
+ (missing env/DB/secrets, broken build) → **source-only draft + say so**, don't fail.
43
+ 3. **Understand it by running probe scenarios:** `*_scenario-add` the draft **under the FINAL scenario
44
+ name** (step 4 then iterates that SAME entry via `*_scenario-update` — do NOT spawn a separate
45
+ `*-probe` / throwaway scenario in the store) and `*_scenario-run` it to exercise the relevant flow —
46
+ whatever it takes to learn how the real system behaves — and read the returned snapshots/results.
47
+ 4. **Author the full flow** from what you observed → `*_scenario-update`. Make it a **verification flow**,
48
+ not a superficial run: exercise the cycle's evidence tools, capture their output with
49
+ `returnOutput: true`, and assert / return the expected outcomes — so running it later via
50
+ `$ironbee-verify scenario:<name>` can judge it and satisfy the gate.
51
+ 5. **Validate:** `*_scenario-run` end-to-end; fix the **SCRIPT** + update until it runs cleanly, and
52
+ **assert the real terminal outcome — not an optimistic intermediate signal**. Same app/env
53
+ considerations as any verification run (use a test/staging target for flows with real side effects).
54
+ 6. **Teardown — leave a clean store:** `*_scenario-delete` ANY temporary / probe / throwaway scenario you
55
+ added this session (anything named `*-probe`, a draft you decided not to keep, an exploratory copy);
56
+ the store must end with ONLY the finished deliverable scenario(s), never a leftover probe. THEN stop
57
+ ONLY the app / processes you started.
58
+
59
+ > **A genuine defect is a STOP, not a workaround.** If validating shows the flow can't legitimately
60
+ > succeed — a real bug makes the expected outcome unreachable (an error, a failed state, wrong
61
+ > resulting data) — do NOT engineer the scenario around it: don't cherry-pick inputs / args / data that
62
+ > dodge the bug, and don't weaken the assertion to an optimistic intermediate signal instead of the
63
+ > real terminal outcome. That yields a green scenario that masks a broken flow and produces a FALSE
64
+ > PASS when it's later run to verify. Instead STOP and report the defect to the user **in your summary,
65
+ > not inside the scenario** — keep the saved scenario a clean verification flow (it asserts the real
66
+ > outcome and will simply fail until the bug is fixed; that's it doing its job). Do NOT bake bug /
67
+ > defect commentary into the scenario's `description` or metadata; `liveValidated: false` is the only
68
+ > signal needed when you couldn't get a passing run — or leave the scenario unsaved. ("Fix until it
69
+ > passes" means fixing the SCRIPT, never working around the app.)
70
+
71
+ ## Script format
72
+ JS run in the devtools sandbox (async — top-level `await`/`return` work); reads params from `args`:
73
+
74
+ ```js
75
+ const { baseUrl } = args; // declared via argsSchema
76
+ const result = await callTool('<bare-tool-name>', { /* tool input */ });
77
+ return { ok: true };
78
+ ```
79
+
80
+ Discover the available `callTool` tool names for a platform from your connected MCP schemas — don't
81
+ guess. Document the expected `args` in the `description` + the `argsSchema` metadata.
82
+
83
+ ## Metadata conventions (stamp on add/update)
84
+ - `argsSchema` — declared params, e.g. `{ "baseUrl": "string" }`. **Mandatory for parametric scenarios.**
85
+ - `ironbee.coveredPaths` — source paths exercised (array), when derivable.
86
+ - `ironbee.group` / `ironbee.order` — for a cross-platform split.
87
+ - `*_scenario-update` does a **shallow replace** of metadata — to change one key, re-send the FULL
88
+ metadata object (read it first, merge, write back).
89
+
90
+ The platform sections below list each enabled cycle's server, tool prefix, and store dir.
91
+
92
+ <!--IRONBEE:PLATFORM:browser-->
93
+ <!--/IRONBEE:PLATFORM:browser-->
94
+
95
+ <!--IRONBEE:PLATFORM:node-->
96
+ <!--/IRONBEE:PLATFORM:node-->
97
+
98
+ <!--IRONBEE:PLATFORM:backend-->
99
+ <!--/IRONBEE:PLATFORM:backend-->
100
+
101
+ <!--IRONBEE:PLATFORM:android-->
102
+ <!--/IRONBEE:PLATFORM:android-->
@@ -0,0 +1,38 @@
1
+ ---
2
+ name: ironbee-manage-scenario
3
+ description: >
4
+ Add, update, or delete a reusable IronBee verification scenario by delegating to the
5
+ ironbee-scenario custom agent. Use when the user types `$ironbee-manage-scenario`. The sub-agent
6
+ authors the script in the devtools format and saves it to the right platform's store (or finds and
7
+ updates/deletes an existing one).
8
+ ---
9
+
10
+ # IronBee — Manage scenario
11
+
12
+ > **Delegate — do NOT run the scenario tools inline.** Spawn the **`ironbee-scenario` custom agent**
13
+ > via `spawn_agent` with `agent_type="ironbee-scenario"` **and `fork_turns="none"`** (the default
14
+ > `fork_turns="all"` silently drops the agent_type → a generic toolless agent). The sub-agent owns
15
+ > the devtools `scenario-*` tools; you don't have them.
16
+
17
+ Add / update / delete a reusable verification **scenario** by delegating to the `ironbee-scenario`
18
+ custom agent. This is NOT a verification cycle — it submits no verdict and does not gate completion.
19
+
20
+ ## Steps
21
+ 1. **If the request points to a file path** (scenario content to save), read that file now and pass
22
+ its **contents** into the sub-agent's prompt. If a given path doesn't resolve, stop and report
23
+ `scenario file not found: <path>`.
24
+ 2. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
25
+ `message`:
26
+ > Operation: manage
27
+ > Request: \<the user's request — content to add/update, or the target to update/delete>
28
+ > Scope: \<`global` if the user asked, else `project`>
29
+ > Mode: \<include `Mode: draft` ONLY if the request begins with a `draft` token (source-only, no app
30
+ > run) — otherwise OMIT so the sub-agent authors against the live app>
31
+ The sub-agent decides add vs update (checks for an existing same-name scenario first), picks the
32
+ right platform, authors the script — **against the live app by default** (starts the app if needed,
33
+ observes the real behavior, validates by running once, then cleans up — deletes any probe /
34
+ throwaway scenarios it added and stops what it started; `draft` skips this)
35
+ — and stamps metadata (`argsSchema` for parametric ones).
36
+ **Delete and fuzzy-resolved update ask you to confirm** the matched scenario first — relay that
37
+ to the user and pass their answer back. **Wait for the sub-agent in the same turn.**
38
+ 3. **Relay** the sub-agent's summary (what it created / updated / deleted, on which platform).
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: ironbee-search-scenario
3
+ description: >
4
+ Find reusable IronBee verification scenarios by name, description, or metadata by driving the
5
+ scenario-search / scenario-list MCP tools yourself. Use when the user types
6
+ `$ironbee-search-scenario`. Searches every enabled platform's store.
7
+ ---
8
+
9
+ # IronBee — Search scenarios
10
+
11
+ This project runs IronBee in **main-agent** mode — the devtools scenario MCP tools are wired into
12
+ THIS session, so **you** drive them. Find saved verification **scenarios**. Read-only.
13
+
14
+ ## Steps
15
+ 1. **Pick the surface:**
16
+ - **`*_scenario-search`** (fuzzy, ranked over name + description) — discovery ("find login
17
+ scenarios").
18
+ - **`*_scenario-list` with `metadataMatch`** — precise structural lookup ("which scenarios cover
19
+ `src/auth/login.ts`"). Metadata is NOT indexed by `scenario-search`, so path/tag lookups use
20
+ `scenario-list`.
21
+ 2. **Search every enabled platform's server** (each platform is a separate server with its own
22
+ store) and union the results.
23
+ 3. **Report** name + description + platform + (for fuzzy search) relevance score; surface scope.
24
+
25
+ The platform sections below list each enabled cycle's server, tool prefix, and store dir.
26
+
27
+ <!--IRONBEE:PLATFORM:browser-->
28
+ <!--/IRONBEE:PLATFORM:browser-->
29
+
30
+ <!--IRONBEE:PLATFORM:node-->
31
+ <!--/IRONBEE:PLATFORM:node-->
32
+
33
+ <!--IRONBEE:PLATFORM:backend-->
34
+ <!--/IRONBEE:PLATFORM:backend-->
35
+
36
+ <!--IRONBEE:PLATFORM:android-->
37
+ <!--/IRONBEE:PLATFORM:android-->
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: ironbee-search-scenario
3
+ description: >
4
+ Find reusable IronBee verification scenarios by name, description, or metadata by delegating to
5
+ the ironbee-scenario custom agent. Use when the user types `$ironbee-search-scenario`. The
6
+ sub-agent searches every enabled platform's store and returns the matches.
7
+ ---
8
+
9
+ # IronBee — Search scenarios
10
+
11
+ > **Delegate** — spawn the **`ironbee-scenario` custom agent** via `spawn_agent` with
12
+ > `agent_type="ironbee-scenario"` **and `fork_turns="none"`**. The sub-agent owns the scenario tools.
13
+
14
+ Find saved verification **scenarios**. Read-only.
15
+
16
+ ## Steps
17
+ 1. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
18
+ `message`:
19
+ > Operation: search
20
+ > Query: \<the user's description — a name/topic for fuzzy search, or a path/tag for metadata match>
21
+ The sub-agent picks the right surface (fuzzy name+description vs precise `metadataMatch`), searches
22
+ **every enabled platform's store**, and unions the results. **Wait for the sub-agent in the same turn.**
23
+ 2. **Relay** the matches — name, description, platform, and (for fuzzy search) relevance score.
@@ -0,0 +1,55 @@
1
+ ---
2
+ name: ironbee-sync-scenario
3
+ description: >
4
+ Re-validate saved IronBee verification scenarios against the current code and repair MECHANICAL
5
+ drift, by driving the scenario-* MCP tools yourself. Use when the user types
6
+ `$ironbee-sync-scenario`. A leading `check` token = dry-run (report drift, no repair).
7
+ ---
8
+
9
+ # IronBee — Sync scenario(s)
10
+
11
+ This project runs IronBee in **main-agent** mode — the devtools `*_scenario-*` MCP tools are wired
12
+ into THIS session, so **you** drive them. Re-validate + repair saved verification **scenarios**. This
13
+ is NOT a verification cycle — no verdict, no gate.
14
+
15
+ ## Steps
16
+ 1. **Resolve mode + target**: strip a leading `check` token (→ dry-run) and a leading `force` token
17
+ (→ include ALL scenarios, not just stale); remainder = `all` (stale ones; with `force`, every one)
18
+ or a name / description (one). Empty → `all`. **Print the target list + count before running.**
19
+ Run targets that share an `ironbee.group` in ascending `ironbee.order` (a flow split across platforms).
20
+ 2. **For each target scenario** (resolve via `*_scenario-search` / `*_scenario-list`; `all` = the stale
21
+ ones — covered files changed since their `ironbee.commit`, or authored as drafts) **run it**
22
+ (`*_scenario-run`, against the live app — start it if needed, tear down what you started) and classify:
23
+ - **passes** → still current; (non-check) `*_scenario-update` to stamp `ironbee.commit` → HEAD
24
+ (read via `git rev-parse HEAD`) + `ironbee.liveValidated: true`. `*_scenario-update`
25
+ shallow-replaces metadata — read current metadata and re-send it MERGED with these two keys
26
+ (don't drop `coveredPaths` / `group` / `argsSchema`).
27
+ - **mechanical DRIFT** (the way to reach / drive the flow changed, not the expected outcome) →
28
+ repair the SCRIPT mechanics only, `*_scenario-update`, re-run until green, then stamp.
29
+ - **real DEFECT** (the expected outcome is unreachable — the app broke) → **STOP, report, do NOT
30
+ touch the scenario.**
31
+ - **expectation CHANGED** (a deliberate behavior / spec change) → do NOT auto-edit the assertion;
32
+ ask the user.
33
+ - **`check` mode** → only run + report drift; never repair / update.
34
+ - **Classify safely** (repair is the only branch that edits a scenario, so a defect mistaken for
35
+ drift masks a regression): before repairing, self-check whether the fix changes *how* the flow
36
+ is driven (drift — OK to repair) or *what* it asserts (never drift — a defect → STOP, or a
37
+ deliberate change → ask). A failure while reaching / driving the flow leans drift; a failure at
38
+ the terminal assertion leans defect. **Uncertain → treat as a defect and STOP.**
39
+ 3. **Report** per scenario: repaired / still-fresh / defect-reported / needs decision.
40
+
41
+ **Hard rule: repair MECHANICS, never the ASSERTION / expected outcome** — silently relaxing an
42
+ assertion to make a stale scenario pass would mask a regression. (To just *detect* staleness without
43
+ running anything, use `ironbee scenario status`.)
44
+
45
+ <!--IRONBEE:PLATFORM:browser-->
46
+ <!--/IRONBEE:PLATFORM:browser-->
47
+
48
+ <!--IRONBEE:PLATFORM:node-->
49
+ <!--/IRONBEE:PLATFORM:node-->
50
+
51
+ <!--IRONBEE:PLATFORM:backend-->
52
+ <!--/IRONBEE:PLATFORM:backend-->
53
+
54
+ <!--IRONBEE:PLATFORM:android-->
55
+ <!--/IRONBEE:PLATFORM:android-->
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: ironbee-sync-scenario
3
+ description: >
4
+ Re-validate saved IronBee verification scenarios against the current code and repair MECHANICAL
5
+ drift, by delegating to the ironbee-scenario custom agent (operation sync). Use when the user types
6
+ `$ironbee-sync-scenario`. A leading `check` token = dry-run (report drift, no repair).
7
+ ---
8
+
9
+ # IronBee — Sync scenario(s)
10
+
11
+ > **Delegate** — spawn the **`ironbee-scenario` custom agent** via `spawn_agent` with
12
+ > `agent_type="ironbee-scenario"` **and `fork_turns="none"`** (the default `fork_turns="all"` silently
13
+ > drops the agent_type → a generic toolless agent). The sub-agent owns the `scenario-*` tools.
14
+
15
+ Re-validate + repair saved verification **scenarios**. This is NOT a verification cycle.
16
+
17
+ ## Steps
18
+ 1. **Resolve the mode + target**: strip a leading `check` token (→ dry-run) and a leading `force` token
19
+ (→ sync ALL scenarios, not just stale); remainder = `all` (stale ones; `force` = every one) or a
20
+ name / description (one). Empty → `all`.
21
+ 2. **Spawn** `spawn_agent` with `agent_type="ironbee-scenario"` and `fork_turns="none"`, passing in
22
+ `message`:
23
+ > Operation: sync
24
+ > Target: \<`all`, or the name / description>
25
+ > Force: \<include `Force: all` ONLY if the request began with `force`>
26
+ > Mode: \<include `Mode: check` ONLY if the request began with `check`; otherwise OMIT>
27
+ The sub-agent runs each target against the live app, classifies (still-fresh / mechanical drift →
28
+ repair the SCRIPT only / real defect → STOP + report / expectation changed → ask), and on a
29
+ non-check run stamps repaired scenarios current. **It repairs MECHANICS, never what a scenario
30
+ verifies. Wait for the sub-agent in the same turn.**
31
+ 3. **Relay** the summary (per scenario: repaired / still-fresh / defect-reported / needs decision).
32
+
33
+ (To just *detect* staleness without running anything, use `ironbee scenario status`.)
@@ -42,9 +42,18 @@ A custom verification scenario may be supplied — either **inline text** or a *
42
42
  (read at run time). The scenario is whatever the user provided alongside the command, after
43
43
  stripping a leading `fix` / `report` mode token.
44
44
 
45
- - **If a scenario is supplied, it is authoritative**: verify exactly what it describes, exercising
46
- precisely the flows/states/endpoints it names this **replaces** the default "exercise the
47
- changed pages/endpoints" guidance.
45
+ - **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:`
46
+ (to the end) is a **SAVED scenario reference** (exact name OR semantic description). Resolve it across
47
+ enabled platforms (`*_scenario-search` for the description + an exact-name `*_scenario-list` match),
48
+ pick the single strong match (ambiguous → ask; none → say so + fall back to the default flow), then
49
+ **run it in ONE `*_scenario-run` call** (no re-discovery) and **judge its result (functional) +
50
+ any returned visual evidence (e.g. screenshots)**. Its nested tool calls satisfy each active cycle's required tools.
51
+ No exact name needed — e.g. `scenario: the full purchase flow`.
52
+ **On PASS, keep it fresh:** `*_scenario-update` its `ironbee.commit` → HEAD (`git rev-parse HEAD`)
53
+ + `liveValidated: true` (re-send the full metadata merged); on FAIL / defect, don't stamp.
54
+ - **If a scenario is supplied (free text), it is authoritative**: verify exactly what it describes,
55
+ exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise
56
+ the changed pages/endpoints" guidance.
48
57
  - **If the scenario is (or points to) a file path**, read that file and treat its contents as the
49
58
  scenario. Do not assume a fixed location or format.
50
59
  - **If the path does not resolve**, stop and report `scenario file not found: <path>`, then ask how
@@ -29,18 +29,19 @@ A custom verification scenario may be supplied when this command is invoked —
29
29
 
30
30
  > The scenario is whatever the user provided alongside `$ironbee-verify`, after stripping a leading `fix` / `report` mode token — the remainder is the scenario; empty remainder → the verifier uses its default flow.
31
31
 
32
- - **If a scenario is supplied, it is authoritative**: the verifier must verify exactly what it describes, exercising precisely the flows/states/endpoints it names this **replaces** the default "exercise the changed pages/endpoints" guidance.
32
+ - **If the scenario part starts with `scenario:`** (after the mode token), everything after `scenario:` (to the end) is a **SAVED scenario reference** (exact name OR semantic description). Do NOT read a file / treat as free text — relay it to the verifier verbatim as a `Saved scenario: <ref>` line. The verifier resolves it (`scenario-search` + exact-name), runs it in one `scenario-run` call (no re-discovery), and judges the result (functional + any visual evidence). No exact name needed — e.g. `scenario: the full purchase flow`.
33
+ - **If a scenario is supplied (free text), it is authoritative**: the verifier must verify exactly what it describes, exercising precisely the flows/states/endpoints it names — this **replaces** the default "exercise the changed pages/endpoints" guidance.
33
34
  - **If the scenario is (or points to) a file path**, read that file with your file-read tool yourself and pass its **contents** into the verifier's prompt (the verifier has no file-read tool). Do not assume a fixed location or format — read whatever path was given.
34
35
  - **If the path does not resolve to an existing file**, stop and report `scenario file not found: <path>`, then ask how to proceed — do not delegate with the literal path string or guess a target.
35
36
  - **If no scenario is supplied**, the verifier falls back to exercising the changed pages/endpoints per the active cycles.
36
37
 
37
38
  ## Steps
38
39
 
39
- 1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then file path → read it now; inline text → use as-is; empty → none.
40
+ 1. **Resolve the mode and scenario**: strip a leading `fix` / `report` token (see **Mode**); then on the remainder — starts with `scenario:` → SAVED scenario reference (the rest after `scenario:`); a file path → read it now; inline text → use as-is; empty → none.
40
41
  2. **Spawn the `ironbee-verifier` custom agent** — call `spawn_agent` with **`agent_type="ironbee-verifier"`** AND **`fork_turns="none"`**. The `fork_turns="none"` is REQUIRED: the default `fork_turns="all"` is a full-history fork that silently DROPS the `agent_type` override, giving you a generic agent *without* the verification tools. (Do NOT "act as" the verifier or use a plain generic fork either.) Put the task, the mode, and the resolved scenario in the `message`, e.g.:
41
42
  > Verify the current code changes.
42
43
  > Mode: \<`fix` in fix mode — OMIT this line entirely in verify-only mode>
43
- > Scenario: \<the resolved scenario text, or "none — exercise the changed pages/endpoints">
44
+ > \<ONE of: `Saved scenario: <ref>` (when `scenario:` was given — the verifier resolves + runs it) — OR — `Scenario: <resolved text>` (free text / file contents) — OR — `Scenario: none — exercise the changed pages/endpoints`>
44
45
  The verifier runs `verification-start` (relaying the fix intent to IronBee's completion gate, which then enforces fix-until-pass on you) → drives every active cycle's tools → submits the single verdict, all in this shared session. It resolves the session id from the environment, so you don't pass one.
45
46
  **Wait for the verifier in the same turn — do NOT background it.** Let it run to completion and read its verdict before responding; a backgrounded verifier can let your turn end (and the Stop gate fire) before its verdict is recorded.
46
47
  3. **Relay the verifier's summary** — the verdict status and, on fail, the issues it found.
@@ -1 +1 @@
1
- "use strict";var d=Object.defineProperty;var v=Object.getOwnPropertyDescriptor;var A=Object.getOwnPropertyNames;var w=Object.prototype.hasOwnProperty;var p=(i,t)=>d(i,"name",{value:t,configurable:!0});var E=(i,t)=>{for(var n in t)d(i,n,{get:t[n],enumerable:!0})},b=(i,t,n,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let e of A(t))!w.call(i,e)&&e!==n&&d(i,e,{get:()=>t[e],enumerable:!(o=v(t,e))||o.enumerable});return i};var h=i=>b(d({},"__esModule",{value:!0}),i);var x={};E(x,{run:()=>k});module.exports=h(x);var a=require("../../../hooks/core/actions"),m=require("../../../hooks/core/activity-end"),r=require("../../../lib/logger"),u=require("../../../lib/output"),f=require("../../../lib/stdin"),l=require("../../../analytics/codex/spawn"),c=require("../../../hooks/core/session-state"),y=require("../util");async function k(i){const t=(0,y.parseCodexHookStdin)((0,f.readStdin)()),n=t.session_id??"default",o=`${i}/.ironbee/sessions/${n}`,e=`${o}/actions.jsonl`;(0,r.setLogFile)(`${o}/session.log`);const g=(0,c.readState)(o)?.activeActivityId??"";if(await(0,m.runActivityEnd)({sessionDir:o,actionsFile:e,projectDir:i,sessionId:n})){const s=Date.now(),S={...(0,a.baseFields)(e),id:(0,a.deterministicSessionEndId)(n),type:"session_end",timestamp:s,session_id:n,duration:(0,a.findDurationSinceLastAction)(e,"session_start",s),reason:"checkpoint"};await(0,a.appendAction)(e,S)}try{const s=(0,c.readState)(o);(0,l.spawnDetachedCodexAnalyticsWorker)({projectDir:i,sessionId:n,rolloutPath:t.transcript_path,userEmail:s?.userEmail??void 0,usageType:s?.usageType??void 0,usagePlan:s?.usagePlan??void 0,activityId:g})}catch(s){r.logger.debug(`codex analytics spawn failed: ${s instanceof Error?s.message:s}`)}r.logger.debug(`activity-end: ${n}`),(0,u.writeAndExit)(JSON.stringify({}),0)}p(k,"run");0&&(module.exports={run});
1
+ "use strict";var d=Object.defineProperty;var A=Object.getOwnPropertyDescriptor;var w=Object.getOwnPropertyNames;var E=Object.prototype.hasOwnProperty;var p=(i,t)=>d(i,"name",{value:t,configurable:!0});var h=(i,t)=>{for(var n in t)d(i,n,{get:t[n],enumerable:!0})},k=(i,t,n,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let e of w(t))!E.call(i,e)&&e!==n&&d(i,e,{get:()=>t[e],enumerable:!(o=A(t,e))||o.enumerable});return i};var x=i=>k(d({},"__esModule",{value:!0}),i);var C={};h(C,{run:()=>b});module.exports=x(C);var r=require("../../../hooks/core/actions"),m=require("../../../hooks/core/activity-end"),a=require("../../../lib/logger"),u=require("../../../lib/output"),f=require("../../../lib/stdin"),l=require("../../../analytics/codex/spawn"),c=require("../../../hooks/core/session-state"),y=require("../util"),g=require("../../../lib/runtime-paths");async function b(i){const t=(0,y.parseCodexHookStdin)((0,f.readStdin)()),n=t.session_id??"default",o=(0,g.sessionDir)(i,n),e=`${o}/actions.jsonl`;(0,a.setLogFile)(`${o}/session.log`);const S=(0,c.readState)(o)?.activeActivityId??"";if(await(0,m.runActivityEnd)({sessionDir:o,actionsFile:e,projectDir:i,sessionId:n})){const s=Date.now(),v={...(0,r.baseFields)(e),id:(0,r.deterministicSessionEndId)(n),type:"session_end",timestamp:s,session_id:n,duration:(0,r.findDurationSinceLastAction)(e,"session_start",s),reason:"checkpoint"};await(0,r.appendAction)(e,v)}try{const s=(0,c.readState)(o);(0,l.spawnDetachedCodexAnalyticsWorker)({projectDir:i,sessionId:n,rolloutPath:t.transcript_path,userEmail:s?.userEmail??void 0,usageType:s?.usageType??void 0,usagePlan:s?.usagePlan??void 0,activityId:S})}catch(s){a.logger.debug(`codex analytics spawn failed: ${s instanceof Error?s.message:s}`)}a.logger.debug(`activity-end: ${n}`),(0,u.writeAndExit)(JSON.stringify({}),0)}p(b,"run");0&&(module.exports={run});