instar 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,16 +22,17 @@ export interface CodexCliIntelligenceProviderOptions {
22
22
  */
23
23
  sandboxMode?: 'read-only' | 'workspace-write' | 'danger-full-access';
24
24
  /**
25
- * Working directory for the codex CLI invocation. Defaults to
26
- * process.cwd(). Reviewer / canary calls don't depend on cwd content
27
- * but Codex CLI honors the flag so it's safe to pass.
25
+ * Retained for API compatibility. NOTE: this is intentionally NOT used as
26
+ * the `codex exec --cd` for judgment calls. Those always run in an empty
27
+ * instar-managed scratch dir (see `resolveIntelligenceScratchDir`) so the
28
+ * agent's project identity + hooks never load. These calls don't depend on
29
+ * cwd content, so ignoring this value is safe.
28
30
  */
29
31
  workingDirectory?: string;
30
32
  }
31
33
  export declare class CodexCliIntelligenceProvider implements IntelligenceProvider {
32
34
  private readonly codexPath;
33
35
  private readonly sandboxMode;
34
- private readonly workingDirectory;
35
36
  constructor(options: CodexCliIntelligenceProviderOptions);
36
37
  evaluate(prompt: string, options?: IntelligenceOptions): Promise<string>;
37
38
  }
@@ -1 +1 @@
1
- {"version":3,"file":"CodexCliIntelligenceProvider.d.ts","sourceRoot":"","sources":["../../src/core/CodexCliIntelligenceProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAM5E,MAAM,WAAW,mCAAmC;IAClD,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,WAAW,CAAC,EAAE,WAAW,GAAG,iBAAiB,GAAG,oBAAoB,CAAC;IACrE;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,qBAAa,4BAA6B,YAAW,oBAAoB;IACvE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAyD;IACrF,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;gBAE9B,OAAO,EAAE,mCAAmC;IAMlD,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,MAAM,CAAC;CAkE/E"}
1
+ {"version":3,"file":"CodexCliIntelligenceProvider.d.ts","sourceRoot":"","sources":["../../src/core/CodexCliIntelligenceProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAMH,OAAO,KAAK,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AA6C5E,MAAM,WAAW,mCAAmC;IAClD,+CAA+C;IAC/C,SAAS,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,WAAW,CAAC,EAAE,WAAW,GAAG,iBAAiB,GAAG,oBAAoB,CAAC;IACrE;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,qBAAa,4BAA6B,YAAW,oBAAoB;IACvE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAyD;gBAEzE,OAAO,EAAE,mCAAmC;IASlD,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,MAAM,CAAC;CA6E/E"}
@@ -12,26 +12,78 @@
12
12
  * implementation based on the agent's configured framework.
13
13
  */
14
14
  import { execFile } from 'node:child_process';
15
+ import { mkdtempSync, existsSync } from 'node:fs';
16
+ import { tmpdir } from 'node:os';
17
+ import { join } from 'node:path';
15
18
  import { resolveCliModelFlag } from '../providers/adapters/openai-codex/models.js';
16
19
  import { buildCodexChildEnv } from '../providers/adapters/openai-codex/transport/codexSpawn.js';
17
20
  const DEFAULT_TIMEOUT_MS = 30_000;
21
+ const INTELLIGENCE_SCRATCH_DIR_PREFIX = 'instar-codex-intel-scratch-';
22
+ let cachedScratchDir = null;
23
+ /**
24
+ * Lazily create and return an EMPTY scratch directory used as the `--cd`
25
+ * for every judgment call.
26
+ *
27
+ * Running `codex exec` in the agent's project directory loads the full
28
+ * ~26 KB `AGENTS.md` identity AND fires the project's `.codex/hooks.json`
29
+ * hooks (session_start / user_prompt_submit / stop) on every call — turning
30
+ * a one-word classification into a full agent boot. An empty, hook-free
31
+ * scratch dir gives these calls the clean-notepad guarantee that
32
+ * `ClaudeCliIntelligenceProvider` gets via `--setting-sources user`:
33
+ * no project doc, and no project hooks.
34
+ *
35
+ * SECURITY (why mkdtemp, not a fixed name): Codex discovers hooks by walking
36
+ * UP from the cwd and fires any `.codex/hooks.json` it finds — and
37
+ * `project_doc_max_bytes=0` does NOT cover hooks. On Linux `os.tmpdir()` is
38
+ * the world-writable `/tmp`, so a fixed, guessable dir name could be
39
+ * pre-created (or symlinked) by another local user with a planted
40
+ * `.codex/hooks.json`, re-introducing hook execution under our identity.
41
+ * `mkdtempSync` defeats this: it appends an unguessable random suffix, creates
42
+ * the dir with mode 0700 owned by this process, and refuses to follow a
43
+ * pre-existing path — so nothing can be planted in the cwd these calls run in.
44
+ *
45
+ * The dir is re-verified each call: a tmp-reaper may delete it during a
46
+ * long-lived process, so we recreate it if it has gone missing.
47
+ *
48
+ * Bug (2026-05-26): ~1,550 such judgment spawns/day were re-injecting the
49
+ * identity + firing session_start, causing notification spam and spawn-storm
50
+ * delivery failures. Spec: CODEX-INTELLIGENCE-PROVIDER-CLEAN-CALL-SPEC.md.
51
+ */
52
+ function resolveIntelligenceScratchDir() {
53
+ if (cachedScratchDir && existsSync(cachedScratchDir))
54
+ return cachedScratchDir;
55
+ cachedScratchDir = mkdtempSync(join(tmpdir(), INTELLIGENCE_SCRATCH_DIR_PREFIX));
56
+ return cachedScratchDir;
57
+ }
18
58
  export class CodexCliIntelligenceProvider {
19
59
  codexPath;
20
60
  sandboxMode;
21
- workingDirectory;
22
61
  constructor(options) {
23
62
  this.codexPath = options.codexPath;
24
63
  this.sandboxMode = options.sandboxMode ?? 'read-only';
25
- this.workingDirectory = options.workingDirectory ?? process.cwd();
64
+ // options.workingDirectory is intentionally NOT stored: judgment calls
65
+ // always run in an empty scratch dir (resolveIntelligenceScratchDir), so
66
+ // the agent's project identity + hooks never load. The option is retained
67
+ // on the type for API compatibility (the factory still forwards it).
26
68
  }
27
69
  async evaluate(prompt, options) {
28
70
  const model = resolveCliModelFlag(options?.model);
71
+ const scratchDir = resolveIntelligenceScratchDir();
29
72
  return new Promise((resolve, reject) => {
30
73
  const args = [
31
74
  'exec',
32
75
  '--model', model,
33
76
  '--sandbox', this.sandboxMode,
34
- '--cd', this.workingDirectory,
77
+ // Run judgment calls in an empty scratch dir, NOT the agent's project
78
+ // dir. The project dir loads the full ~26 KB AGENTS.md identity AND
79
+ // fires the project's .codex/hooks.json (session_start /
80
+ // user_prompt_submit / stop) on every call. The scratch dir is the
81
+ // Codex analog of ClaudeCliIntelligenceProvider's `--setting-sources
82
+ // user`. See resolveIntelligenceScratchDir + the spec.
83
+ '--cd', scratchDir,
84
+ // Belt-and-suspenders: hard-disable project-doc (AGENTS.md) loading
85
+ // even if a stray doc ever lands at or above the scratch path.
86
+ '-c', 'project_doc_max_bytes=0',
35
87
  // Reviewer/sentinel/canary calls are deterministic short prompts
36
88
  // that don't depend on the cwd being a trusted git repo. Codex
37
89
  // CLI's default behavior is to refuse to run when --cd points at
@@ -1 +1 @@
1
- {"version":3,"file":"CodexCliIntelligenceProvider.js","sourceRoot":"","sources":["../../src/core/CodexCliIntelligenceProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,8CAA8C,CAAC;AACnF,OAAO,EAAE,kBAAkB,EAAE,MAAM,4DAA4D,CAAC;AAEhG,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAmBlC,MAAM,OAAO,4BAA4B;IACtB,SAAS,CAAS;IAClB,WAAW,CAAyD;IACpE,gBAAgB,CAAS;IAE1C,YAAY,OAA4C;QACtD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QACnC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,WAAW,CAAC;QACtD,IAAI,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACpE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,OAA6B;QAC1D,MAAM,KAAK,GAAG,mBAAmB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAElD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,IAAI,GAAG;gBACX,MAAM;gBACN,SAAS,EAAE,KAAK;gBAChB,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,MAAM,EAAE,IAAI,CAAC,gBAAgB;gBAC7B,iEAAiE;gBACjE,+DAA+D;gBAC/D,iEAAiE;gBACjE,sCAAsC;gBACtC,kFAAkF;gBAClF,6DAA6D;gBAC7D,iEAAiE;gBACjE,4BAA4B;gBAC5B,uBAAuB;gBACvB,6DAA6D;gBAC7D,gEAAgE;gBAChE,gEAAgE;gBAChE,YAAY;gBACZ,MAAM;aACP,CAAC;YAEF,mEAAmE;YACnE,uEAAuE;YACvE,mEAAmE;YACnE,oEAAoE;YACpE,mEAAmE;YACnE,qEAAqE;YACrE,2DAA2D;YAC3D,EAAE;YACF,kEAAkE;YAClE,iEAAiE;YACjE,kEAAkE;YAClE,MAAM,QAAQ,GAAG,kBAAkB,EAAE,CAAC;YAEtC,MAAM,KAAK,GAAG,QAAQ,CACpB,IAAI,CAAC,SAAS,EACd,IAAI,EACJ;gBACE,OAAO,EAAE,kBAAkB;gBAC3B,SAAS,EAAE,IAAI,GAAG,IAAI;gBACtB,GAAG,EAAE,QAAQ;aACd,EACD,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;gBACxB,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,CACJ,IAAI,KAAK,CACP,oBAAoB,KAAK,CAAC,OAAO,EAAE;wBACjC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAC/C,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;YACzB,CAAC,CACF,CAAC;YAEF,iEAAiE;YACjE,kEAAkE;YAClE,kEAAkE;YAClE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC;QACrB,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
1
+ {"version":3,"file":"CodexCliIntelligenceProvider.js","sourceRoot":"","sources":["../../src/core/CodexCliIntelligenceProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,mBAAmB,EAAE,MAAM,8CAA8C,CAAC;AACnF,OAAO,EAAE,kBAAkB,EAAE,MAAM,4DAA4D,CAAC;AAEhG,MAAM,kBAAkB,GAAG,MAAM,CAAC;AAElC,MAAM,+BAA+B,GAAG,6BAA6B,CAAC;AAEtE,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAE3C;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,SAAS,6BAA6B;IACpC,IAAI,gBAAgB,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAC9E,gBAAgB,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,+BAA+B,CAAC,CAAC,CAAC;IAChF,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAqBD,MAAM,OAAO,4BAA4B;IACtB,SAAS,CAAS;IAClB,WAAW,CAAyD;IAErF,YAAY,OAA4C;QACtD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;QACnC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,WAAW,CAAC;QACtD,uEAAuE;QACvE,yEAAyE;QACzE,0EAA0E;QAC1E,qEAAqE;IACvE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,OAA6B;QAC1D,MAAM,KAAK,GAAG,mBAAmB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAElD,MAAM,UAAU,GAAG,6BAA6B,EAAE,CAAC;QAEnD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,IAAI,GAAG;gBACX,MAAM;gBACN,SAAS,EAAE,KAAK;gBAChB,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,sEAAsE;gBACtE,oEAAoE;gBACpE,yDAAyD;gBACzD,mEAAmE;gBACnE,qEAAqE;gBACrE,uDAAuD;gBACvD,MAAM,EAAE,UAAU;gBAClB,oEAAoE;gBACpE,+DAA+D;gBAC/D,IAAI,EAAE,yBAAyB;gBAC/B,iEAAiE;gBACjE,+DAA+D;gBAC/D,iEAAiE;gBACjE,sCAAsC;gBACtC,kFAAkF;gBAClF,6DAA6D;gBAC7D,iEAAiE;gBACjE,4BAA4B;gBAC5B,uBAAuB;gBACvB,6DAA6D;gBAC7D,gEAAgE;gBAChE,gEAAgE;gBAChE,YAAY;gBACZ,MAAM;aACP,CAAC;YAEF,mEAAmE;YACnE,uEAAuE;YACvE,mEAAmE;YACnE,oEAAoE;YACpE,mEAAmE;YACnE,qEAAqE;YACrE,2DAA2D;YAC3D,EAAE;YACF,kEAAkE;YAClE,iEAAiE;YACjE,kEAAkE;YAClE,MAAM,QAAQ,GAAG,kBAAkB,EAAE,CAAC;YAEtC,MAAM,KAAK,GAAG,QAAQ,CACpB,IAAI,CAAC,SAAS,EACd,IAAI,EACJ;gBACE,OAAO,EAAE,kBAAkB;gBAC3B,SAAS,EAAE,IAAI,GAAG,IAAI;gBACtB,GAAG,EAAE,QAAQ;aACd,EACD,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE;gBACxB,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,CACJ,IAAI,KAAK,CACP,oBAAoB,KAAK,CAAC,OAAO,EAAE;wBACjC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAC/C,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;YACzB,CAAC,CACF,CAAC;YAEF,iEAAiE;YACjE,kEAAkE;YAClE,kEAAkE;YAClE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC;QACrB,CAAC,CAAC,CAAC;IACL,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "instar",
3
- "version": "1.3.1",
3
+ "version": "1.3.2",
4
4
  "description": "Coherence infrastructure for self-evolving AI agents — on the Claude Code or Codex subscription you already have.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "$schema": "./builtin-manifest.schema.json",
3
3
  "schemaVersion": 1,
4
- "generatedAt": "2026-05-26T20:50:07.446Z",
5
- "instarVersion": "1.3.1",
4
+ "generatedAt": "2026-05-26T21:00:08.875Z",
5
+ "instarVersion": "1.3.2",
6
6
  "entryCount": 192,
7
7
  "entries": {
8
8
  "hook:session-start": {
@@ -0,0 +1,49 @@
1
+ # Upgrade Guide — NEXT
2
+
3
+ <!-- bump: patch -->
4
+ <!-- Valid values: patch, minor, major -->
5
+ <!-- patch = bug fixes, refactors, test additions, doc updates -->
6
+ <!-- minor = new features, new APIs, new capabilities (backwards-compatible) -->
7
+ <!-- major = breaking changes to existing APIs or behavior -->
8
+
9
+ ## What Changed
10
+
11
+ **Codex-powered agents stop reloading their full identity on every background "judgment" call.** Instar makes ~1,500+ tiny internal LLM calls per agent per day — classify this message, did that turn finish, summarize this chunk, extract the intent. On a Codex-powered agent, each of those ran `codex exec` *inside the agent's project directory*, which made Codex load the agent's entire ~26 KB `AGENTS.md` identity AND fire the project's `.codex/hooks.json` (session_start / user_prompt_submit / stop) **every single time** — just to answer one word like "normal."
12
+
13
+ This was the dominant cause of two visible problems on Codex agents: the flood of "actively working / message delivered / still working" notifications (the session_start hook firing on ~1,550 spawns/day, so the monitoring layer thought a real session was constantly starting), and intermittent "couldn't deliver — please resend" failures (a dozen of these heavyweight spawns landing in one minute saturated the machine so a real inbound message couldn't get a process slot).
14
+
15
+ The fix gives those calls a clean notepad — the Codex analog of what `ClaudeCliIntelligenceProvider` already does with `--setting-sources user`. `CodexCliIntelligenceProvider` now runs judgment calls in an empty, private (0700, unguessable-name via `mkdtempSync`) scratch directory instead of the project dir, plus `-c project_doc_max_bytes=0`. No identity load, no project hooks. Claude-powered agents are unaffected (they were already clean).
16
+
17
+ ## Evidence
18
+
19
+ Reproduced live on this machine's Codex install (codex-cli 0.133.0), before/after.
20
+
21
+ **Before (production incident, 2026-05-25 rollout logs in `~/.codex/sessions/`):** 1,601
22
+ `codex exec` spawns in one day, ~1,550 of them internal judgment calls. A sampled
23
+ message-classifier rollout (21:52) re-injected the full ~26 KB `AGENTS.md` identity AND a
24
+ `SESSION START` block — firing session_start — just to output the single word `normal`.
25
+ Those rollouts ran with `cwd` = the agent's project dir and were 63–110 KB each.
26
+
27
+ **After (controlled run of the built fixed provider against the real codex binary,
28
+ 2026-05-26 13:49):** called `CodexCliIntelligenceProvider.evaluate()` with a unique marker
29
+ prompt; located the exact rollout it produced
30
+ (`rollout-2026-05-26T13-49-18-…019e660c….jsonl`). Observed:
31
+ - `cwd` = `/var/folders/…/T/instar-codex-intel-scratch-AOYJWS` (the mkdtemp scratch dir) ✓
32
+ - `AGENTS.md instructions` blocks: **0** (was ≥1) ✓
33
+ - `SESSION START` blocks: **0** (was 1) ✓
34
+ - `CURRENT TIME` hook markers (user_prompt_submit): **0** ✓
35
+ - rollout size 29.6 KB (was 63–110 KB) — the residue is codex's own base prompt, not instar identity.
36
+
37
+ The identity load and the session_start/user_prompt_submit hook firing are gone for
38
+ judgment calls. The agent still returned the correct answer.
39
+
40
+ ## What to Tell Your User
41
+
42
+ - **If you run a Codex-powered agent, it should get noticeably quieter and more reliable — no action needed.** The "still working" notification spam and the occasional dropped/"please resend" messages were mostly this one plumbing bug; the agent was effectively re-reading its whole identity ~1,500 times a day. Claude-powered agents won't notice anything (they were never affected).
43
+
44
+ ## Summary of New Capabilities
45
+
46
+ | Capability | How to Use |
47
+ |-----------|-----------|
48
+ | Codex judgment calls run identity-free + hook-free | Automatic. `CodexCliIntelligenceProvider` runs `codex exec` in an empty `mkdtempSync` scratch dir + `-c project_doc_max_bytes=0` instead of the project dir. |
49
+ | Hardened scratch dir | Automatic. Unguessable random name, 0700 perms, recreated if a tmp-reaper deletes it — nothing can be planted in the cwd these calls run from. |
@@ -0,0 +1,108 @@
1
+ # Side-Effects Review — Codex Intelligence-Provider Clean-Call Fix
2
+
3
+ **Version / slug:** `fix-codex-intel-clean-call`
4
+ **Date:** 2026-05-26
5
+ **Author:** Echo
6
+ **Spec:** `docs/specs/CODEX-INTELLIGENCE-PROVIDER-CLEAN-CALL-SPEC.md` (converged + approved)
7
+
8
+ ## Summary of the change
9
+
10
+ `CodexCliIntelligenceProvider.evaluate()` ran `codex exec --cd <agent project dir>` for
11
+ every internal LLM "judgment" call (message classification, terminal-output analysis,
12
+ arc extraction, usher, coherence, etc.). Running in the project dir made Codex load the
13
+ full ~26 KB `AGENTS.md` identity AND fire the project's `.codex/hooks.json`
14
+ (session_start / user_prompt_submit / stop) on **every** call — ~1,550 such calls/day,
15
+ causing notification spam (session_start firing constantly) and spawn-storm delivery
16
+ failures (12 heavyweight spawns/minute saturating the machine).
17
+
18
+ The fix runs these calls in an empty, owner-only scratch dir instead — the Codex analog
19
+ of `ClaudeCliIntelligenceProvider`'s `--setting-sources user`. No identity, no project
20
+ hooks.
21
+
22
+ **Files changed (source):**
23
+ - `src/core/CodexCliIntelligenceProvider.ts` — `evaluate()` now uses an `mkdtempSync`
24
+ scratch dir for `--cd` (not the project dir) + `-c project_doc_max_bytes=0`; added the
25
+ `resolveIntelligenceScratchDir()` helper; removed the now-dead `workingDirectory` field
26
+ (kept on the options type for API compat).
27
+
28
+ **Files changed (tests):**
29
+ - `tests/unit/CodexCliIntelligenceProvider.test.ts` — updated the `--cd` assertion (it
30
+ previously asserted the buggy project-dir behavior) + added 7 cases covering the
31
+ scratch-dir contract, 0700 perms, unguessable name, and tmp-reaper recovery (12 total).
32
+
33
+ **Files changed (spec / report / release notes):**
34
+ - `docs/specs/CODEX-INTELLIGENCE-PROVIDER-CLEAN-CALL-SPEC.md` (+ `.eli16.md`)
35
+ - `docs/specs/reports/codex-intelligence-provider-clean-call-convergence.md`
36
+ - `upgrades/NEXT.md`
37
+
38
+ ## Decision-point inventory
39
+
40
+ - **Scratch dir, not the project dir** — the core fix. Judgment calls are cwd-independent
41
+ (per the existing code comment), so an empty cwd is correct.
42
+ - **`mkdtempSync` (random suffix, 0700), not a fixed name** — convergence security finding:
43
+ a fixed `/tmp` name on Linux is plantable (`.codex/hooks.json` squatting; not gated by
44
+ `project_doc_max_bytes`). The unguessable, owner-only dir closes that vector.
45
+ - **Re-verify-before-use** — recreate the dir if a tmp-reaper deleted it during a
46
+ long-lived process.
47
+ - **`-c project_doc_max_bytes=0`** — belt-and-suspenders for an `AGENTS.md` on the cwd
48
+ walk-up; real key, already used in `contextScopeControl.ts`.
49
+ - **Drop `workingDirectory` as exec cwd** — verified only `route.ts` passes it, and only
50
+ for its own PreferenceStore DB path, never the codex cwd.
51
+
52
+ ## Over-block / under-block analysis
53
+
54
+ - **Over-block:** none. The provider gates nothing; it only changes the cwd of a spawn.
55
+ Judgment calls that worked before continue to work (the fake-codex unit tests confirm
56
+ the full arg contract).
57
+ - **Under-block:** the *intended* behavioral subtraction is "stop loading identity + firing
58
+ hooks for judgment calls." There is no path where a judgment call legitimately needed the
59
+ identity or hooks — they are stateless classifications/extractions. If a future caller
60
+ did need project context, it must pass it in the prompt (as all current callers do), not
61
+ rely on cwd.
62
+
63
+ ## Level-of-abstraction fit
64
+
65
+ The fix lives in the single provider that owns the `codex exec` invocation — the same layer
66
+ where the Claude sibling already solves the identical problem with `--setting-sources user`.
67
+ No higher-level orchestration or config knob is introduced; the concern is local to the
68
+ spawn, so the fix is local to the spawn. Correct altitude.
69
+
70
+ ## Signal-vs-authority compliance
71
+
72
+ N/A in the gate sense — this change neither detects nor blocks anything. It is a pure
73
+ invocation-hygiene fix. It does not touch any sentinel/gate authority boundary.
74
+
75
+ ## Interactions
76
+
77
+ - **Claude provider:** untouched; asymmetry (flag vs scratch-cwd) is intentional and
78
+ documented — Codex has no single equivalent flag.
79
+ - **Callers (`reflect.ts`, `route.ts`, `server.ts`):** none depend on the codex cwd
80
+ content; verified during integration review. No behavior change for them beyond the
81
+ intended one.
82
+ - **Concurrency:** `mkdtempSync` once + cached + `existsSync` re-check; no race under the
83
+ high call volume (idempotent, read-only dir).
84
+ - **Monitoring layer:** positive interaction — the session_start hook no longer fires on
85
+ judgment spawns, so PresenceProxy/standby stops mistaking them for real sessions
86
+ (the notification-spam root cause).
87
+
88
+ ## Rollback cost
89
+
90
+ Trivial and isolated. Revert the single source file (and its test). No persisted state, no
91
+ schema, no config/hook/template/migration to unwind — the only on-disk footprint is an
92
+ empty 0700 tmp dir that the OS reaps on its own. Reverting restores the prior (buggy but
93
+ functional) behavior with zero data implications.
94
+
95
+ ## Migration parity
96
+
97
+ Code-only change inside the compiled provider. No agent-installed file
98
+ (settings/hooks/config/templates/skills) references the old behavior, so **no
99
+ `PostUpdateMigrator` entry is required** — existing Codex agents receive the fix via the
100
+ normal package update path. Verified by grep during integration review.
101
+
102
+ ## Testing evidence
103
+
104
+ - Unit: 12 tests in `CodexCliIntelligenceProvider.test.ts` pass; sibling env-allowlist (4)
105
+ + factory (10) tests unaffected; clean `tsc` build.
106
+ - Live / bug-fix evidence bar: the before/after rollout reproduction on a real Codex agent
107
+ (identity-loaded before, bare after) is run as the post-merge test-as-self gate and
108
+ recorded before the fix is declared shipped.