@kodax-ai/kodax 0.7.39 → 0.7.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +100 -0
  2. package/README.md +58 -0
  3. package/README_CN.md +31 -0
  4. package/dist/chunks/{chunk-SF7WD7E5.js → chunk-5TFLMGER.js} +1 -1
  5. package/dist/chunks/{chunk-HUAU4KB3.js → chunk-6OB4AJOM.js} +1 -1
  6. package/dist/chunks/chunk-6QO6HWGU.js +30 -0
  7. package/dist/chunks/{chunk-SONW6AC7.js → chunk-EQ5DGS2W.js} +1 -1
  8. package/dist/chunks/chunk-EVIDQWMF.js +5 -0
  9. package/dist/chunks/chunk-HYWVRTFA.js +1233 -0
  10. package/dist/chunks/chunk-SX2IS5JP.js +16 -0
  11. package/dist/chunks/chunk-V4WSBIXB.js +2 -0
  12. package/dist/chunks/chunk-ZPJPNLBK.js +462 -0
  13. package/dist/chunks/compaction-config-LT5PEXPT.js +2 -0
  14. package/dist/chunks/{construction-bootstrap-XSE7ZABG.js → construction-bootstrap-HBCWJFHC.js} +1 -1
  15. package/dist/chunks/{devtools-MOFU7YQF.js → devtools-EYGFOXEU.js} +1 -1
  16. package/dist/chunks/{dist-WKW4CBG6.js → dist-M57GIWR4.js} +1 -1
  17. package/dist/chunks/dist-V3BS2NKB.js +2 -0
  18. package/dist/chunks/paste-5DSTHQGK.js +2 -0
  19. package/dist/chunks/{utils-3HW4KOGE.js → utils-FAFUQJ2A.js} +1 -1
  20. package/dist/index.d.ts +232 -7
  21. package/dist/index.js +2 -2
  22. package/dist/kodax_cli.js +945 -923
  23. package/dist/sdk-agent.d.ts +1459 -10
  24. package/dist/sdk-agent.js +1 -1
  25. package/dist/sdk-coding.d.ts +4543 -14
  26. package/dist/sdk-coding.js +1 -1
  27. package/dist/sdk-llm.d.ts +209 -10
  28. package/dist/sdk-llm.js +1 -1
  29. package/dist/sdk-repl.d.ts +2694 -13
  30. package/dist/sdk-repl.js +1 -1
  31. package/dist/sdk-skills.d.ts +487 -11
  32. package/dist/sdk-skills.js +1 -1
  33. package/dist/types-chunks/bash-prefix-extractor.d-B2iliwdi.d.ts +2432 -0
  34. package/dist/types-chunks/capability.d-BxNgd1-c.d.ts +368 -0
  35. package/dist/types-chunks/cost-tracker.d-C4dMlQuV.d.ts +342 -0
  36. package/dist/types-chunks/history-cleanup.d-q1vAvCss.d.ts +1266 -0
  37. package/dist/types-chunks/instance-discovery.d-DZhp77vb.d.ts +1217 -0
  38. package/dist/types-chunks/resolver.d-BwD6TKz7.d.ts +262 -0
  39. package/dist/types-chunks/storage.d-Bv9T99Qu.d.ts +584 -0
  40. package/dist/types-chunks/types.d-C5mHR87z.d.ts +119 -0
  41. package/package.json +8 -2
  42. package/dist/acp_events.d.ts +0 -109
  43. package/dist/acp_logger.d.ts +0 -20
  44. package/dist/acp_server.d.ts +0 -92
  45. package/dist/chunks/chunk-4E76FLZ3.js +0 -2
  46. package/dist/chunks/chunk-7LQ2NCHF.js +0 -1221
  47. package/dist/chunks/chunk-N2VZ2MJF.js +0 -11
  48. package/dist/chunks/chunk-WEEQZYZS.js +0 -460
  49. package/dist/chunks/chunk-XI75LZIO.js +0 -30
  50. package/dist/chunks/compaction-config-YL4SWWII.js +0 -2
  51. package/dist/chunks/dist-AMUYI7R5.js +0 -2
  52. package/dist/cli_commands.d.ts +0 -17
  53. package/dist/cli_option_helpers.d.ts +0 -49
  54. package/dist/cli_option_helpers.test.d.ts +0 -1
  55. package/dist/constructed_cli.d.ts +0 -82
  56. package/dist/constructed_cli.test.d.ts +0 -1
  57. package/dist/kodax_cli.d.ts +0 -7
  58. package/dist/self_modify_cli.d.ts +0 -81
  59. package/dist/self_modify_cli.test.d.ts +0 -9
  60. package/dist/skill_cli.d.ts +0 -15
  61. package/dist/skill_cli.test.d.ts +0 -1
package/CHANGELOG.md CHANGED
@@ -4,6 +4,106 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  > Full history for versions prior to v0.7.0: [CHANGELOG_ARCHIVE.md](docs/CHANGELOG_ARCHIVE.md)
6
6
 
7
+ ## [0.7.41] - 2026-05-19
8
+
9
+ ### Theme
10
+
11
+ **KodaX Team Mode + AMA Reliability + Source-Tree Modularization + REPL Render & TTFB Perf** — The release lands a third-axis differentiator (multi-instance auto coordination) alongside three AMA-path reliability fixes (mid-turn inject, pending-children handoff gate, post-handoff label flip, terminal-verdict fallback), the Todo V2 per-task CRUD migration with extension hooks, and the largest source-tree refactor since v0.7.25 (`runner-driven.ts` 6406 → 1897 lines, -70.4%, byte-identical). FEATURE_125 KodaX Team Mode is the headline: zero-cognitive-load multi-session awareness (no `/team create`, no `team_id`) — each KodaX instance writes per-pid state to `<configHome>/instances/<pid>/`, every LLM round injects a sibling-snapshot block into all 5 managed roles' system prompts (Scout / Planner / Generator / Evaluator / Worker), and a runtime content-hash safety net catches the only genuine data-race surface (concurrent overwrite of a file another session already read). The LLM-First design contrasts with claude code Team Mode's mode-based 4-stage workflow and no-conflict-resolution semantics. FEATURE_167 closes a structural `signal:'COMPLETE'` false-positive on V2 Evaluator turns (3-layer probe-gated defense: B0 parser SKIP / B1 retry cap / B2 synthesized verdict accept). FEATURE_165 + 166 land together as a Worker→Evaluator handoff hardening pair — runtime gate blocks `emit_handoff` while child registry is non-empty (covers V1+V2 shared `handoffEmit` path), and the REPL surface flips role labels immediately on `agentSwitched` (was lagged a turn). FEATURE_170 migrates the todo subsystem from monolithic init/replace to per-task add/patch/remove with extension events + hooks + new `todo_create` tool; Layer 2 LLM-judge eval (Layer A + Layer B 3.2% disagreement → DATA VALID) clears gate (a)+(b) MET, (c) saturation-artifact noted. FEATURE_171 extracts 12 submodules from `runner-driven.ts` across R1–R4 — zero behavior change, 4 reviewer APPROVE rounds, 4314/4314 tests pass each commit, ADR-026 + HLD §3.5.1 documented.
12
+
13
+ ### Added
14
+
15
+ - **FEATURE_125 — KodaX Team Mode (Multi-Instance Auto Coordination)**. 11 commits S1–S7 + W1–W4 (`acef3c5e` → `9225ad31` → S7 `e2916675` + `e6bc5d7b` audit) + release-prep wiring `0cfc8bc4`. KodaX 自创的多 session 自动协调机制:用户**零认知负担**(无 `/team create`、无 `team_id` 概念),KodaX 自动感知本机其他 KodaX session 状态,把状态注入 LLM system prompt 让 LLM 自决避让/协作/调度;runtime 仅在 race condition 物理边界(content hash mismatch)兜底,不强制 lock、不强制等待。这是与 claude code Team Mode(mode-based 4-stage workflow + 完全无 conflict resolution)的核心差异化。**5 layers**: (S1) per-instance state writer at `<configHome>/instances/<pid>/{state.json,meta.json,heartbeat}` with atomic writes + 1s heartbeat + register/refresh/shutdown lifecycle; (S2) sibling-instance discovery + stale detection + reap with `PersistedSessionState v1` version guard + per-instance failure isolation; (S3) pure system-prompt formatter for the `=== Other active KodaX sessions ===` block (LLM-First wording, truncation, no behavior dictation); (S4) `KodaXToolExecutionContext.contentHashCache?` sha256-based stale-write detection with `recordRead` / `checkStale` / `recordWrite` per-task lifetime; (S5) tool-time soft-warning formatter for exact-path overlap match (no blocking, just an informational banner). **Wiring** (W1–W4): Read tool records sha256 on every successful read up to 5 MB (size cap so huge files don't pay the hash cost); Edit / Write / MultiEdit pre-mutation `checkStale` block + post-mutation `recordWrite` + sibling-overlap warning banner via `ctx.siblingSnapshot`; REPL bootstrap helper `bootstrapTeamMode()` with process-level singleton + `/exit` + SIGTERM lifecycle hooks (mirror wiring landed for both `runInteractiveMode` legacy path AND `runInkInteractiveMode` Ink REPL path — the latter was the release-blocker fix in commit `0cfc8bc4`); runner-driven adapter does per-LLM-round sibling discovery, injects `teamModeSection` into all 5 managed roles' system prompts (Scout / Planner / Generator / Evaluator / Worker) via a mutable `siblingSnapshot` ref + `Object.defineProperty` getter so tool ctx always reads the freshest snapshot. **S7 Layer 2 panel + audit**: `tests/feature-125-team-mode-awareness.eval.ts` + `benchmark/datasets/feature-125-team-mode-awareness/cases.ts`; 5 aliases × 2 cases × 5 runs = 50 LLM calls; **SHIP** per pre-registered matrix after audit-corrected regex extension (`buildToolNamePatterns` expanded from 4 to 9 syntax variants to capture kimi `read:0>{...}` and zhipu `<tool_name>read</tool_name>` forms). Layer A + Layer B audit-corrected primary verdict: case 1 84% / case 2 60% overall (4/5 aliases ≥60% — kimi case 2 narrate-without-tool documented as `feedback_model_structural_floor_not_prompt_tunable`; not addressable via prompt iteration). Design doc: [docs/features/v0.7.41.md#feature_125-kodax-team-mode--multi-instance-auto-coordination](docs/features/v0.7.41.md#feature_125-kodax-team-mode--multi-instance-auto-coordination). Test guide: `docs/test-guides/FEATURE_125_v0.7.41_TEST_GUIDE.md`.
16
+ - **FEATURE_165 — Worker `emit_handoff` pending-children gate**. Commit `0ebeb15f`. Runtime gate at `runner-driven.ts:2402` blocks `emit_handoff` when the child registry is non-empty (covers both V1 and V2's shared `handoffEmit` path). 9 unit tests + 1 integration test pin the gate semantics across both paths. **Prompt addition PARTIAL/dropped**: Layer 2 probe (250 calls × 5 aliases) showed negative-case D/E already 100% on the baseline (`Δ=0pp`), so the pre-registered SHIP condition (2) failed mathematically; the runtime gate is the production-load-bearing change. Probe also confirmed zhipu intent-vs-action floor reproduces in canned-history sessions (structural, not context-length-driven). Design doc: [docs/features/v0.7.41.md#feature_165--worker-emit_handoff-pending-children-gatev0741-hotfix](docs/features/v0.7.41.md#feature_165--worker-emit_handoff-pending-children-gatev0741-hotfix).
17
+ - **FEATURE_166 — Post-handoff role label flip**. Commit `0ebeb15f`. New `onAgentSwitched` hook on agent-runtime + `ObserverBridge.agentSwitched(role)` on coding-side. Fixes the V2 Worker→Evaluator handoff label-lag (`[Worker]` would persist on the next Evaluator turn until the assistant produced output). Production session `20260515_185354` gave a directly reproducible verdict trace. 7 unit tests + 1 pre-existing test corrected. Same session also surfaced FEATURE_167 (Evaluator text-only termination leaves `recorder.verdict === undefined`, V2 runner-driven never wired the `parseManagedTaskVerdictDirectiveFromJson` fallback — landed in FEATURE_167 below). Design doc: [docs/features/v0.7.41.md#feature_166--post-handoff-role-label-flipshipped](docs/features/v0.7.41.md#feature_166--post-handoff-role-label-flipshipped).
18
+ - **FEATURE_167 — Evaluator terminal-verdict fallback (B0 parser + B1 retry + B2 synthesized accept)**. Commit `d537c784` 2026-05-15. Three-layer probe-gated defense closes the structural `signal:'COMPLETE'` false-positive on V2 Evaluator turns where the model exits text-only without calling `emit_verdict` and `recorder.verdict` therefore stays `undefined`. **Layer B0** — parser SKIP path (regex+JSON parse on the assistant's terminal text looking for `{"signal":"COMPLETE","grade":...}` directives); **Layer B1** — retry gate with per-alias cap (default 2, zhipu cap 1 to avoid amplifying the intent-vs-action floor — see `project_zhipu_send_message_floor` memory); **Layer B2** — synthesized verdict accept (fabricates a `{signal:'NEEDS_REVISION', grade:'C', summary:'…inferred from terminal text…'}` envelope so the V2 task engine can complete instead of hanging on the missing verdict). Reviewer-suggested change "include `revise` in the gate" was rejected — correct invariant is `recorder.verdict` object identity comparison (NOT status comparison), otherwise a stale `revise` from a prior turn would falsely satisfy the gate. 29 tests (16 retry-config + 9 predicate + 4 integration); audit panel 0/75 disagreement → DATA VALID. Design doc: [docs/features/v0.7.41.md#feature_167--evaluator-terminal-verdict-兜底shipped](docs/features/v0.7.41.md#feature_167--evaluator-terminal-verdict-兜底shipped).
19
+ - **FEATURE_170 — Todo V2 Migration (per-task CRUD + extension hooks + `todo_create` tool)**. C1–C6 across 8 commits (`e45ddaa8` → `20e02103`). Replaces v0.7.x's monolithic init/replace todo-store API with per-task `add` / `patch` / `remove` operations + monotonic counter + metadata + extension events (`todo:added` / `todo:patched` / `todo:removed` / `todo:before-complete`) + before-complete hook for downstream consumers. New `todo_create` tool added to the registry + role wiring + throttle reset. Worker / legacy / throttle prompts updated to teach the per-item API (C5) with activeForm parity fix (C5 follow-up). **Layer 2 LLM-judge eval (Layer A + Layer B)**: 250-call panel + Layer A 5-sub-agent self-judge + Layer B 3-judge majority (750 calls), Layer B 3.2% disagreement → DATA VALID; gate (a)+(b) MET; gate (c) FAIL as a **pre-registered SHIP gate saturation artifact** (C2 baseline 96% / C3 100% — mathematically unable to add +20pp from a near-saturated baseline). C1 +32pp / mmx 0→100% are direct prompt-cause evidence — SHIP, keep the prompt rewrite. Lessons captured in two new memory entries: `feedback_pre_registered_gate_saturation` (pilot for baseline ceiling before deferring on Δ ≥+N pp) and `feedback_simplifying_prompt_can_regress` (Prefer over X when Y comparative clauses are load-bearing). Design doc: [docs/features/v0.7.41.md#feature_170--todo-v2-migration-per-task-crud--extension-hookssshipped-2026-05-16](docs/features/v0.7.41.md#feature_170--todo-v2-migration-per-task-crud--extension-hookssshipped-2026-05-16).
20
+ - **FEATURE_164 — Mid-turn user-input injection** (shipped as part of commit `0ebeb15f`, the FEATURE_164+165+166 triple). Closes the gap where a user prompt typed during an active LLM round was queued but only delivered as a synthetic `[user]` banner on the next idle-yield wake — semantically incorrect for the user's intent ("inject as if I'd typed it mid-turn"). Now the runner-driven adapter checks the `MessageQueue` snapshot before each LLM call and prepends any queued real-user messages as proper non-synthetic user-bubble messages within the same round.
21
+
22
+ ### Fixed
23
+
24
+ - **FEATURE_125 W3 — Ink REPL Team Mode bootstrap wiring** (release-blocker fix). Commit `0cfc8bc4`. Discovered during v0.7.41 release prep audit: FEATURE_125 W3 (commit `1a073ecc`) wired `bootstrapTeamMode` into the legacy `runInteractiveMode` path but never into `runInkInteractiveMode`, so the Ink REPL (the default REPL path on all platforms since v0.7.25) ran with Team Mode dormant — `<configHome>/instances/<pid>/` was never created, no heartbeat thread started, sibling discovery returned empty, and the system-prompt `teamModeSection` was a no-op for every Ink-launched session. Mirror-wires the bootstrap + `process.on('exit')` + `process.on('SIGTERM')` + clean-exit cleanup into `runInkInteractiveMode` at the same insertion point (after `gitRoot` resolution, before render). 37 lines net.
25
+ - **Issue 132 — h2-boundary `session.jsonl` ENOENT race**. Commit `bf3006fb`. Eager-read in `agent-task-runner` resolves the timing window where benchmark h2-boundary cases would call `tail -f session.jsonl` before the file existed on disk; pre-reads on task start instead of awaiting the first append.
26
+ - **FEATURE_166 stale-test correction** (1 pre-existing test): `agent-runtime.test.ts` had been asserting the buggy label-lag behavior as-correct — corrected to pin the fixed semantics so future regressions surface immediately.
27
+ - **FEATURE_171 build break + decl emit** (covered transitively by the R1–R4 chain test-pass discipline): every refactor commit ran `tsc -b tsconfig.build.json` + 4314 tests green; no stage shipped a partial transform.
28
+ - **Bundle SDK `.d.ts` so consumer `tsc` resolves types** (commit `af623000`). Footgun caught at the SDK consumer surface: tarball shipped `dist/index.js` + subpath bundles but no matching `.d.ts`, so `import { runKodaX } from '@kodax-ai/kodax'` worked at runtime while consumer `tsc` reported missing types. Build pipeline now layers `tsc --emitDeclarationOnly` on top of the esbuild bundle so every published subpath ships real types.
29
+ - **`KODAX_RENDER_TRACE` default path uses `os.tmpdir()` not `homedir()`** (commit `54a59caa`). Phase A.0 review follow-up — `homedir()` pollutes the user's home with per-pid trace files; `os.tmpdir()` is the conventional location for ephemeral diagnostic output and gets cleaned up by the OS.
30
+
31
+ ### Performance
32
+
33
+ - **FEATURE_172 — REPL Render Path Optimization (Phase 1 + Phase A.0/A.1)**. Triggered by user SSH long-session (`kodax -c` with 200+ history items) reporting "every 2-3s a frame refresh" during streaming. Two-phase work, with a mid-feature scope correction.
34
+ - **Phase 1 (data layer)** — 5 commits `19c6aff3` → `26d47084`. Split `transcript-layout.ts` into pure static/dynamic helpers (`buildTranscriptStaticPortion` / `buildTranscriptDynamicPortion` / `composeTranscriptRenderModel`); split `promptMainScreenRenderModel` + `transcriptMainScreenRenderModel` `useMemo` into static + dynamic with a static-cache-key invariant (streaming-state changes no longer invalidate the static portion); added `React.memo` `areTranscriptRowPropsEqual` comparator on `TranscriptRowRenderer`. **Data-layer bench** (`baseline-26d47084.json`, 800 items): streaming-tick p95 94.18ms → 0.52ms (-99.4%).
35
+ - **Phase 1 scope correction (2026-05-19)** — Phase 1 ship review with 3 parallel Explore-agent traces + claudecode end-to-end pipeline comparison revealed the data-layer bench (`benchmark/perf/repl-render-perf.bench.ts`) only measured `buildTranscriptRenderModel` inner function (~3-5% of total per-frame cost). The real ~80% lives in `tui/substrate/ink/` rendering substrate: `renderNodeToOutput` full-tree recursion (~55%), `setCellAt` `cells.slice()` O(N²) (~12%), `Output.getGrid()` rebuild (~12%), `diffEach` full-screen walk (~10%), `markDirty` propagation gap (~5%). **Lesson** captured to feedback memory: bench must measure end-to-end wall-time, not isolated inner functions; static analysis of a hot loop can miss the actual cost center.
36
+ - **Phase A.0 — `KODAX_RENDER_TRACE` env-gated per-frame trace + end-to-end bench scaffold** (commits `5ca91970` + `54a59caa` + `dae85141` + `99e7f2af`). Env-gated trace writes one `frame=N renderTime=X bytes_per_frame=Y writes=Z` line per render to `<tmpdir>/kodax-render-trace-<pid>.log`; bench scaffold parametrizes viewport at the user's real SSH dimensions (148×43) and measures the full engine `onRender` pipeline with a mock stdout so `setCellAt` / `outputToScreen` / `diff` costs are real.
37
+ - **Phase A.1 — `ScreenBuilder` eliminates `setCellAt` O(N²) `cells.slice()`** (commit `25bf0f52`). New mutable builder pattern at `output-to-screen.ts:211`: original `setCellAt(screen, ...)` did `screen.cells.slice()` (full width×height ref copy) + `{...screen, cells}` per non-empty cell — on a 148×43 viewport with ~500 non-empty cells/frame that's ~3.18M element-copies + 500 fresh arrays + 500 fresh Screen objects per frame. `createScreenBuilder(width, height)` exposes O(1) `setCellAt` writes + one-shot `build()` that returns a frozen Screen; only the `outputToScreen` hot loop migrated, public `setCellAt(Screen, ...)` API preserved for tests + future immutable callers. **End-to-end bench delta** (148×43, `mainscreen-windowed-800` scenario): renderer p95 14.804ms → 3.095ms (-79%, 4.78× speedup). 193 substrate-ink tests + 7 new ScreenBuilder unit tests (byte-equal vs `setCellAt`, OOB rejection, post-build-write rejection, 10k-write soft budget) + last-write-wins test `1105a181` close the review loop. 1426/1427 full repl PASS.
38
+ - **Phase A.2-E deferred pending user SSH trace measurement after A.1 ship.** ADR-028 documents the full claudecode port plan (Phase B nodeCache + markDirty / Phase C screen.damage bounding box / Phase D Output.charCache + StylePool / Phase E FRAME_INTERVAL + viewport culling). Layer 0 G1 (transcript render goldens, `925a4d77`) + G2 (perf bench + baseline, `4641ebb9`) + G4 (hit-test + selection 22 edge tests, `4fb590f3`) shipped as Phase 0 planning artifacts; ADR-027 + ADR-028 + `docs/test-guides/FEATURE_172_v0.7.41_REGRESSION_GUIDE.md` document the full pipeline.
39
+ - **First-round TTFB compression — drop `refresh:true` tax + parallel pre-LLM + REPL-mount prewarm** (commit `e8b336ed`). Triggered by user observation: review-type prompts on a medium repo paid ~24s pre-LLM wall-time (after parallel/memoize work) before any LLM token streamed. Compressed via L1+L2 to ~10-15s (LLM-TTFB-bound). 5 stacked changes:
40
+ - **L1 — `middleware/repo-intelligence.ts` first-round NEVER forces `refresh:true`**. 4 sites of `refresh: isNewSession` → `refresh: false`. The 30s `PREMIUM_REFRESH_TIMEOUT_MS` budget was paid on every new session, but the daemon's own background polling keeps its on-disk state fresh; the 4s budget path returns daemon's already-cached state immediately. Single biggest savings (~10-15s).
41
+ - **L2 — REPL-mount prewarm** (new `prewarmRepoIntelligenceCaches` helper exported from `@kodax-ai/coding` + Ink-REPL `useEffect`). Fires `getRepoRoutingSignals` + `getRepoPreturnBundle` with refresh:false at REPL mount, fire-and-forget. Cache-coherent with L1 (both refresh:false) so user-path either coalesces onto in-flight prewarm Promise (~2s) or hits warmed P3+ cache (~0ms). Default-on; opt-out via `KODAX_PREWARM_REPO_INTELLIGENCE=0`.
42
+ - **P1.a — middleware parallel fan-out**. Two-phase `Promise.all`: Phase 1 races OSS overview (git+fs) with premium preturn (daemon); Phase 2 races module + impact direct-call fallbacks ONLY for slots not already filled by preturn. Behavioral pins preserved (preturn gating + `.catch(() => null)` error isolation + emit order: preturn → module → impact).
43
+ - **P1.b — run-substrate parallel**. `hydrateSession` (MCP state restore) and `getRepoRoutingSignals` collapsed to one wall-time slot via `Promise.all`; hydration error propagation unchanged, routing has independent `.catch(() => null)`.
44
+ - **P2 / P3 / P3+ — multi-tier cache stack**. P2 in-flight Promise sharing in `tryPremiumPreturn` (1.5s TTL, cacheKey DELIBERATELY includes `refresh` so explicit `refresh:true` callers — `/repointel warm`, eval harness — get their own daemon work). P3/P3+ session-scoped caches (60s TTL on routing signals + preturn bundle, cacheKey OMITS refresh so prewarm + first-round share one entry under the "data within 60s is fresh by definition" semantics). `normalizeCachePath` helper makes cacheKey robust to Windows drive-letter case + relative-vs-absolute caller variations + Promise rejection paths.
45
+ - **Default repo-intelligence mode preserved as `'auto'`**. Briefly experimented with flipping default to `'oss'` for users without repointel; cost analysis showed `'auto'` fallback path is ~10ms localhost TCP RST + 2s `PREMIUM_FAILURE_TTL_MS` cache → 0ms within TTL + ~5-10ms per >2s gap (negligible vs LLM TTFB). Auto-detection of installed repointel is the right default per README:182.
46
+ - **Inline spinner-row stats tail — elapsed + tokens (claudecode parity)** (commit `58682cbf`). REPL spinner row gains an inline `Xs · Y tokens` running tail (matches claudecode's status indicator). Frontline of a sequence of claudecode-parity surface improvements; documented in ADR-027 Phase 0.
47
+
48
+ ### Internal / architecture
49
+
50
+ - **FEATURE_171 — `runner-driven.ts` modular split**. R1 `2fef1c31` (4 leaf modules + `types.ts`) → R2 `f0be2d4e` (4 mid-coupling modules) → R3 `bfb2b818` (agent-chain + llm-adapter) → R4 `62dc1c58` (payload-builder + checkpoint-flow). 12 submodule extraction; **6406 → 1897 lines (-70.4%)**; **zero behavior change**; **4 reviewer APPROVE rounds**; **4314/4314 tests pass each commit**. ADR-026 + HLD.md §3.5.1 documented in R5 (`4d108af9`). The refactor preserves the closure pattern around `baseCtx` / `siblingSnapshot` / `contextTokenSnapshotRef` — what was a 6400-line monolith is now a stack of named factories each under 800 lines. Module map: `types.ts`, `agent-chain.ts`, `payload-builder.ts`, `checkpoint-flow.ts`, `llm-adapter.ts`, `compaction-bridge.ts`, `manager-input-builder.ts`, `result-projection.ts`, `tool-ctx-builder.ts`, `child-task-orchestration.ts`, `recorder-bridge.ts`, plus the residual `runner-driven.ts` entry. Side benefit: faster IDE hover-pop on the public surface; the public export shape is unchanged so all consumers are byte-equivalent.
51
+ - **`bootstrapTeamMode` + `TeamModeHandle` exports added to `@kodax-ai/agent`** so the Ink REPL can import them without depending on legacy-CLI internals. The handle exposes `shutdown()` and is opaque otherwise (per the layer-independence guarantee — REPL has no business poking at the per-instance writer's internals).
52
+ - **`KodaXToolExecutionContext.contentHashCache?`** field added with `recordRead` / `checkStale` / `recordWrite` API surface. Per-task lifetime (created at task start, destroyed at completion). Wired into Read / Edit / Write / MultiEdit tool implementations so the FEATURE_125 race-detection works without per-tool plumbing.
53
+ - **`KodaXToolExecutionContext.siblingSnapshot?`** field added (as a mutable ref) with `Object.defineProperty` getter on the tool ctx so each tool invocation reads the freshest snapshot from the runner-driven adapter's per-round refresh. Avoids stale-snapshot reads when the LLM stream spans multiple seconds.
54
+ - **`buildToolNamePatterns` extended from 4 to 9 syntax variants** in the benchmark harness regex tooling (`benchmark/datasets/feature-125-team-mode-awareness/cases.ts` + downstream). Captures kimi `read:0>{...}`, zhipu `<tool_name>read</tool_name>` and 3 other non-canonical syntaxes; lesson saved as `feedback_regex_audit_per_new_eval`.
55
+ - **`JudgeContext.toolCalls?`** plumbed through `benchmark/harness/judges.ts` + both call sites in `benchmark/harness/harness.ts`. Optional `judge(output, context?)` arg lets binding-only providers (zhipu/glm51, mmx/m27, etc. — they emit `text=""` and put the tool call in the structured `tool_calls` field) be judged on what the harness actually captured, not on the empty raw text. Existing text-only judges ignore the arg and continue to work unchanged. Per `feedback_audit_must_see_binding` + `feedback_audit_binding_priority_in_prompt`: also requires the audit judge prompt to label the binding as "ABSOLUTE GROUND TRUTH" + a `CRITICAL RULE` system prompt section, or judges over-anchor on the empty raw text.
56
+ - **2 prompt-eval datasets** added under `benchmark/datasets/`: `feature-125-team-mode-awareness/` (S7 Layer 2 panel: peer-active-file-acknowledge-read-first + peer-recently-modified-reread) and `tool-schema-slim/` (Layer 2 eval of v2_slim ~half + v3_aggressive ~quarter description variants for `ask_user_question` + `todo_create` — see "Tool schema slim eval" below).
57
+ - **Tool schema slim eval (DEFER both v2 + v3)**. Commit `d68141ea`. Designed + ran the largest two-Scout-tool slim attempt: `ask_user_question` (2760 B / ~690 tok) + `todo_create` (2384 B / ~596 tok) — combined ~785–990 tokens potentially saved. 4-alias panel × 9 cases × 5 runs + panel-internal majority audit (initial 85–97% disagreement on AUQ_6 / 18–30% on TC_1 fixed by switching to v2 `CRITICAL RULE` prompt → 0% disagreement, data validated). Both variants **DEFER**: v2 gate (a) violations AUQ_1 zhipu −20pp + TC_1 zhipu/ds/kimi −20 to −40pp; v3 gate (a) violations AUQ_1 zhipu/ds −20 to −40pp + TC_1 zhipu/ds −40pp. Reason: `"For X use Y, NOT Z"` comparative clauses in schema descriptions are load-bearing disambiguation priors — slimming caused zhipu/ds to mis-classify simple cases. Pattern matches existing `feedback_simplifying_prompt_can_regress` + `feedback_model_structural_floor_not_prompt_tunable`. Future schema-slim work: don't touch "use X for ... NOT for ..." clauses; safe to slim version prefixes + return-value descriptions + "use sparingly" style instructions + property description secondary detail. Net cost ~$23 within ~$27 budget.
58
+
59
+ ### Test coverage delta
60
+
61
+ - New: 17 (S1) + 20 (S2) + 20 (S3) + 15 (S4) + 15 (S5) + 7 (S6 integration) + 4 (W1) + 14 (W2) + 10 (W3) + 9 (W4) = 131 FEATURE_125 tests; 9 (FEATURE_165) + 7 (FEATURE_166) + 29 (FEATURE_167) + ~30 (FEATURE_170 C1–C6 follow-ups) = ~75 reliability tests; 50 (FEATURE_171 R4 tool wiring contract) + 0 net new for R1–R3 (all R-series ran the full pre-existing 4314 each round); ~95 FEATURE_172 Phase 1 (65 transcript-layout helpers + 8 golden snapshot + 22 hit-test/selection edge) + 17 React.memo comparator + 7 ScreenBuilder + 1 last-write-wins + 1 KODAX_RENDER_TRACE = ~120 FEATURE_172 tests; 4 cache-coalesce regression tests for the TTFB stack (P2 in-flight, P3 cross-call, P3+ multi-round, refresh:true-within-TTL).
62
+ - Total green at HEAD: **5,081 tests pass + 23 todo + 1 skipped across 8 workspaces** (agent 477 / coding 2712 / llm 276 / mcp 28 / repl 1419 / repo-intel & skills 136 / repointel-protocol & session-lineage 18 / tracing 15). `tsc -p packages/coding/tsconfig.json --noEmit` + `tsc -p packages/repl/tsconfig.json --noEmit` both clean.
63
+
64
+ ## [0.7.40] - 2026-05-13
65
+
66
+ ### Theme
67
+
68
+ **Envelope Spillover + Vision Bridge** — Two parallel-developed features close gaps in the child-agent communication path (FEATURE_121) and the REPL input path (FEATURE_134). FEATURE_121 routes child task summaries through the existing `tool-result-policy.ts` spillover system (50KB per-banner + 200KB envelope aggregate cap, mirror of claudecode's `MAX_TOOL_RESULTS_PER_MESSAGE_CHARS`), removing two prior hard truncations (`orchestration.ts:1033` 1600-char slice + `dispatch-child-tasks.ts:256` 200-char slice) that were silently losing 95%+ of child output bytes. A follow-up LLM blob summarizer fallback handles the residual `spillFailed + content > 100KB` data-loss edge that the main slice's inline fallback would otherwise resolve by blowing the Worker context window. FEATURE_134 adds 5 paste sources (bracketed paste / `@<path>` file refs / macOS Cmd+V auto-link / Windows Alt+V explicit / macOS-Linux Ctrl+V backup) on top of the existing `KodaXImageBlock` AI-layer vision serialization, completing the round-trip from screenshot in clipboard to multimodal `user` message at any of the 12 KodaX providers. A late P0 regression in REPL transcript rendering (transcript items invisible during agent execution under AMA mode on Windows ConPTY) was diagnosed as `useDeferredValue` starvation in Ink under Node.js (no DOM idle-scheduling bridge), fixed by removing the deferral indirection — the 200-item UUID-anchored cap from FEATURE_060 Tier 2 retains the perf protection that was the actual fix for SSH-resume O(N) blow-up.
69
+
70
+ ### Added
71
+
72
+ - **FEATURE_121 — Envelope Spillover Gap-Fix + LLM Blob Summarizer Fallback**. Two-commit landing: main slice (`0a0f844e`) + follow-up LLM blob summarizer (`ba0c82f9` + review fixes `05259ab2`). Removes the `orchestration.ts:1033` 1600-char `truncateText` + `dispatch-child-tasks.ts:256` 200-char `slice` two-layer hard truncation that silently dropped 95%+ of child task output (a 25KB audit report reached the Worker as ~50 tokens). Routes every `<task-completed>` banner through `applyToolResultGuardrail('child_task_summary', ...)` — 50KB head + spill-to-file under `getAgentConfigPath('tool-results')/<id>.txt`, banner now carries a preview + spill path that the Worker reads via standard Read tool. `composeIdleYieldUserMessage` in `@kodax/agent/orchestration/idle-yield.ts` gains a 200_000-chars aggregate envelope cap (mirror of claudecode's `MAX_TOOL_RESULTS_PER_MESSAGE_CHARS=200_000`) — when N banners individually fit but together exceed envelope budget, the enforcer calls `applyToolResultGuardrail(..., { forceSpill: true })` to reclaim space. Capability sections include a `LARGE CHILD OUTPUT (FEATURE_121 v0.7.40)` block teaching the Worker the spillover-path Read pattern. 4-alias × 3-case × 5-runs Layer 2 eval clears PARTIAL SHIP (3/4 aliases ≥80% on each case; mmx/m27 weak floor 60-80% documented in test guide). **Follow-up — LLM Blob Summarizer (last-resort fallback)**: when `persistToolOutput` fails (ENOSPC / EACCES / EROFS / SELinux denial) AND raw content > `LARGE_CONTENT_THRESHOLD_BYTES` (100KB), `dispatch-child-tasks` now calls `ctx.summarizeBlob(content, {maxChars: 8000})` to compress to a ~2-8KB lossy summary preserving file paths / line numbers / error codes / identifiers / findings verbatim, banner-wrapped with `[SPILL FAILED — original ${size} compressed via LLM summarizer; raw content unavailable. Worker: treat this summary as LOSSY...]`. The summarizer callback is injected into `KodaXToolExecutionContext` via lazy-once memoization in `runner-driven.ts` bound to the Worker's own provider/model — layer-independent (`@kodax/agent` stays unaware of LLM client). If the summarizer itself fails (provider error / abort), falls back to inline full content with an emergency banner `[SPILL FAILED AND LLM SUMMARIZER FAILED — original ${size} inlined as last-resort emergency dump...]` so the Worker never silently receives oversized opaque content. Honors the FEATURE_121 contract: **silent data loss is the worst outcome; over-budget but observable is acceptable**. 4-alias × 2-case × 3-runs Layer 2 eval clears SHIP gate (4/4 aliases × 100% retention on audit_report + grep_findings cases). Test guide: `docs/test-guides/FEATURE_121_v0.7.40_TEST_GUIDE.md`. Design doc: `docs/features/v0.7.40.md#feature_121-envelope-spillover-gap-fix--child-task-summary-接入-tool-result-policy`.
73
+ - **FEATURE_159 — MessageQueue as Single Source of Truth + Idle-Yield Mode-Split**. Main commit `948b8879` (Phase 3 mode-split synthetic + unified queued-followup predicate) + design `daa8e846` + follow-up `9d4c6ae4` (queue filter scope + verdict.summary echo) + test-isolation `29369a2a` (MessageQueue test isolation + compaction flake fix). User-reported during v0.7.40 RC: after Worker dispatched 3 child tasks and the main agent was still tool-calling, user typed a follow-up "你是派出了子Agent再做嘛?" — status bar showed `Queue 1` / `Queued follow-ups: 1`, but the prompt looked **swallowed** when the agent finished its investigation (not folded into the answer). Forensic showed two stacked failures: (1) `waitForWakeEvent` consumed messages via `MessageQueue.dequeue()` with no reverse notification to REPL, so the React `state.pendingInputs` retained stale entries and the `Queue N` indicator never cleared; (2) drained prompts were marked `_synthetic: true` and hidden from transcript — the user prompt appeared dropped even though it had landed in the model context. Fix flips the substrate: `MessageQueue` becomes the **single source of truth** with typed event subscribe + frozen snapshot read + `mode`/`id`/`predicate` filtering APIs; REPL reverses its sync direction (queue→React mirror via `subscribe`); `composeIdleYieldUserMessage` branches on `msg.mode` — real user prompts emit as **non-synthetic** user-bubble messages (visible in transcript), task-notifications stay synthetic (silent background framing). Net result: claudecode-parity messageQueueManager semantics + SDK-grade observable substrate, without importing claudecode's `commandLifecycle` module, `recordQueueOperation` file sink, or 3-tier priority (kept KodaX's binary user/background). Net −70 LoC, zero new modules. Test guide: `docs/test-guides/FEATURE_159_v0.7.40_TEST_GUIDE.md`. Design doc: `docs/features/v0.7.40.md#feature_159-messagequeue-as-single-source-of-truth--idle-yield-mode-split`.
74
+ - **FEATURE_134 — Image / Screenshot Paste Input (REPL Vision Bridge)**. Commit `2e9674bb`. Adds the REPL paste entry point on top of KodaX's existing `KodaXImageBlock` AI-layer vision serialization (already implemented in `packages/ai/src/providers/anthropic.ts:770` + `openai.ts:904` + `image-serialization.ts`). 5 paste sources per claudecode `usePasteHandler.ts` parity: (1) bracketed paste base infra (DEC 2004 `ESC[200~/201~` wrapping — note FEATURE_134's redundant `enableBracketedPasteMode()` pre-render write was removed in `ca009b3a` since `KeypressContext.tsx:134` already owns DEC 2004 lifecycle via Ink's managed stdout); (2) file path paste — `extractImagePaths` splits on `/` or `[A-Za-z]:\\` + extension; (3) macOS Cmd+V auto-link via `osascript NSPasteboard` read; (4) Windows Alt+V explicit keybind (Ctrl+V is system-paste-reserved on Windows, same as claudecode); (5) macOS/Linux Ctrl+V backup via `wl-paste` / `xclip` / `osascript`. Building blocks in `packages/repl/src/paste/`: `bracketed-paste-mode.ts` (DEC 2004 lifecycle), `image-normalize.ts` (jimp decode + clamp 2000px + PNG→JPEG quality ladder 80/60/40 to fit 3.75MB), `clipboard-image.ts` (cross-platform reader, never throws), `persist-image.ts` (writes to `$TMPDIR/kodax-paste/` + returns path-based `KodaXImageBlock`), `paste-handler.ts` (5-source orchestrator). Library choice: jimp (~10MB pure JS) over sharp (~30MB native binary) — jimp install never fails on cross-platform CI; claudecode picked sharp for broader use cases but KodaX only paste-uses, so jimp is the simpler fit. Integration: paste event → `@<resolved-path>` text-token translation via existing `common/input-artifacts.ts:preparePromptInputArtifacts` pipeline which already converts `@<path>` refs to `KodaXInputArtifact[]` on submit — no changes needed downstream. Test guide: `docs/test-guides/FEATURE_134_v0.7.40_TEST_GUIDE.md`. Design doc: `docs/features/v0.7.40.md#feature_134-image--screenshot-paste-input--repl-vision-bridge`.
75
+
76
+ ### Fixed
77
+
78
+ - **REPL transcript rendering starvation — `useDeferredValue` removed from `displayHistory` chain**. Commit `ca009b3a`. User-reported P0 (AMA mode, Windows ConPTY): pressing Enter on a query showed nothing in the transcript for the entire agent run — header banner + TodoListSurface + spinner + status bar rendered normally, but assistant thinking blocks / tool calls / tool results / assistant text remained invisible until task completion forced a re-render. Root cause: `useDeferredValue(displayHistory)` (introduced in v0.7.30 FEATURE_060 Tier 2 as a polish on top of the 200-item cap that was the real perf fix) marks transcript rebuild as low-priority React work. Under React DOM, this work flushes during browser idle time via the scheduler's idle-callback bridge. Ink uses react-reconciler without that bridge — under Node.js, high-priority work (spinner ticks @30fps, streaming setState bursts, tool-state updates) perpetually pre-empts the deferred work, so the low-priority track never flushes until a "big" state change like `setIsLoading(false)` forces a sync re-render. v0.7.40 surfaced the latent starvation: FEATURE_121's envelope spillover replaced `slice(0, 200)` child task truncation with up-to-50KB head + spill content, raising per-item `buildPromptSurfaceItems` cost from ~0.1ms (200 chars) to ~10-50ms (50KB) — single-item cost crossed the React scheduler's low-priority deferral threshold, starving the deferred update perpetually under high-frequency setState bursts. Fix: replace `useDeferredValue(displayHistory)` with direct passthrough. The 200-item UUID-anchored cap (the real perf protection from FEATURE_060 Tier 2) and the transcript-mode 30-message cap remain — together with React's built-in `useMemo` memoization (which prevents spinner ticks from triggering `buildPromptSurfaceItems` when history is unchanged), per-render cost stays bounded at O(min(N, 200)). Trade-off: long-session `kodax -c` resume on Windows-SSH may add ~10-40ms one-time first-paint cost — well below human perception threshold (~100ms). Length-thresholded fallback pattern documented inline (`displayHistory.length > 100 ? lazyDeferred : displayHistory`) for future repro.
79
+ - **FEATURE_121 v0.7.40 follow-up build break + memoize + emergency banner**. Commit `05259ab2`. Fixes three review-uncovered issues from `ba0c82f9`: (1) **CRITICAL build break** — `blob-summarizer.ts` imported `KodaXProvider` (not exported from `@kodax-ai/llm`); replaced with `KodaXBaseProvider` everywhere. `tsc --noEmit` missed this; `tsc -b` with declaration emission caught it during `npm run build:packages`. (2) **HIGH memoize** — `runner-driven.ts` `summarizeBlob` was rebuilding the factory closure (including `resolveProvider`) on every call; changed to lazy-once memoize (cached on first invocation, reused for the rest of the Worker run). (3) **MEDIUM emergency banner** — when LLM summarizer ITSELF fails AND we fall back to inline 100KB+ content, the Worker received the raw blob with no banner; now prepends `[SPILL FAILED AND LLM SUMMARIZER FAILED — original ${size} inlined as last-resort emergency dump]` so the Worker sees a clear signal to expect possible downstream truncation and re-run upstream with narrower scope.
80
+ - **`fix(build,v0.7.39): make 'npm run build' produce shippable dist/`** (commit `b77fa0a3`). Footgun caught during clean-room publish dry-run: `npm run build` previously ran `npm run build:packages && tsc` — the trailing bare `tsc` read root `tsconfig.json` (`outDir: ./dist`, `declaration: true`) and overwrote the esbuild bundle with unbundled `tsc` output containing bare `import '@kodax-ai/coding'` specifiers, which would have shipped a broken tarball — consumers running `npm install @kodax-ai/kodax` would hit `ERR_MODULE_NOT_FOUND`. Fix: `"build": "npm run build:packages && npm run build:bundle && tsc --emitDeclarationOnly"`. The `--emitDeclarationOnly` flag is the critical guard — tsc skips `.js` entirely and only writes `.d.ts` on top of the esbuild bundle. Now any caller (developer, CI, `release.mjs`) can run `npm run build` at any time and get a complete, shippable dist/. Side benefit: SDK consumers get real TypeScript types for all 5 subpath entries (was untyped before).
81
+ - **`fix(release,v0.7.40): bake KODAX_VERSION into bundle via esbuild --define`** (commit `b70048b7`). The previous v0.7.39 release bundled with `process.env.KODAX_VERSION` left unresolved, so the runtime version was `undefined` in the published `dist/kodax_cli.js` — visible to users as `KodaX undefined` in the banner. Fix: `scripts/build-bundle.mjs` reads root `package.json` at build time and injects via esbuild `--define`, so the bundled CLI carries the correct version literal. `release.mjs` flow unchanged: bumps `package.json` first, then `npm run build` (which reads the bumped version).
82
+ - **FEATURE_134 follow-up: Alt+V duplicate-image-file accumulation in temp dir**. User-reported regression during v0.7.40 RC validation: pressing Alt+V on the same screenshot created many identical-content files with different UUID names under `$TMPDIR/kodax-paste/`. Two root causes: (1) `prompt-input-controller.ts:triggerExplicitImagePaste` had no single-flight guard — OS-level key autorepeat (Windows ConPTY in particular) fires multiple Alt+V keypresses on a brief hold, each spawning a concurrent `readClipboardImage` + `persistImageAsBlock` pair; (2) `persist-image.ts` used `randomUUID()` filenames so even identical buffer content wrote N distinct files. Fix in [packages/repl/src/paste/persist-image.ts](packages/repl/src/paste/persist-image.ts): filename derived from `sha256(buffer).slice(0,16)` — identical content reuses one path, `writeFile` is idempotent on rewrite. Fix in [packages/repl/src/ui/utils/prompt-input-controller.ts](packages/repl/src/ui/utils/prompt-input-controller.ts): `explicitImagePasteInflightRef` boolean ref guard drops re-entrant invocations until the in-flight clipboard read settles. New tests: `persist-image.test.ts` adds "reuses the same path for identical content" + "produces distinct paths for different content"; `prompt-input-controller.test.ts` adds "Alt+V autorepeat fires the clipboard read only once (single-flight guard)". Together the two fixes cap temp-dir growth per unique screenshot at one file per session.
83
+ - **FEATURE_134 follow-up: Gemini-CLI vision via `@<path>` token injection** (commit `71d45783`). The ACP base class (`packages/ai/src/providers/acp-base.ts`) was silently dropping image blocks at the prompt flatten step (`.filter(b => b.type === 'text')`) before forwarding to the CLI bridge. New extension point `serializeImageBlockToPromptToken(block)` on `KodaXAcpProvider` defaults to `null` (preserves silent-drop for Codex-CLI which has no `codex exec --json` image surface), and `KodaXGeminiCliProvider` overrides to return `@<absolutePath>` — Gemini CLI 2.x's file-include syntax inlines any readable file (including images) into the model context. The CLI-bridge capability profile gets a new sibling `IMAGE_INPUT_CLI_BRIDGE_PROVIDER_CAPABILITY_PROFILE` so the policy gate sees the right metadata. 2 new tests in `cli-bridge-providers.test.ts` pin the exact wire format for both Gemini (`@<path>`) and Codex (null).
84
+ - **FEATURE_134 follow-up: paste tmp dir age-based GC at REPL bootstrap** (commit `0eb6cbb4`). Per-session content-hash dedup (commit `12589a46`) prevented Alt+V autorepeat from stacking duplicates within one session, but cross-session accumulation was still unbounded — files only cleared on OS tmpdir reboot. New `prunePasteTmpDir(now?)` exported from `packages/repl/src/paste/persist-image.ts` deletes `paste-*` files older than `PASTE_TMP_TTL_MS` (24h). Non-paste files in the same dir survive (e.g. user-dropped `notes.txt`); per-file errors swallowed so concurrent KodaX instance races don't break REPL startup. Wired in `InkREPL.tsx` bootstrap as a fire-and-forget dynamic import — never blocks first paint. Active-session paste files (always within TTL) are preserved across overnight idle. 4 new tests cover dir-not-exist / age-based deletion / non-paste preservation / TTL boundary.
85
+ - **FEATURE_134 follow-up: custom provider vision opt-in documented** (commit `db36dc69`). README.md + README_CN.md gain a complete `customProviders` example showing the `capabilityProfile.multimodalSupport: 'image-input'` nested shape. Built-in 12 providers ship with the flag already enabled; only custom providers needed opt-in instructions because the field was implicitly available via `KodaXCustomProviderConfig.capabilityProfile` but undocumented.
86
+ - **AMA in-turn compaction parity gap — three-phase lifecycle restored (microcompact + snapshot-aware trigger + graceful fallback)**. User-reported during v0.7.40 RC validation: status bar climbed from 124k → 138k → 150k across three AMA rounds with zhipu-coding/glm-5.1, no compaction / microcompaction / graceful prune ever firing despite crossing the 60% × 200k = 120k threshold. Latent since FEATURE_114 v0.7.36 (4→2 role consolidation) but masked by FEATURE_076's over-aggressive round-exit reshape collapsing context to ~1k after every round; once the v0.7.40 reshape fix above let context grow naturally, the gap surfaced. Three structural deltas vs SA path's `runCompactionLifecycle` ([compaction-orchestration.ts:358](packages/coding/src/agent-runtime/middleware/compaction-orchestration.ts#L358)): (1) **trigger metric mismatch** — the AMA `compactionHook` ([_internal/managed-task/compaction.ts](packages/coding/src/task-engine/_internal/managed-task/compaction.ts)) compared `estimateTokens(transcript)` to the threshold, but transcript-only estimate excludes the system prompt + tools schema (Worker role prompt + AGENTS.md + REPO INTELLIGENCE TOOLS teaching + repo-intel capsule + 12 tool definitions ≈ 20–35k tokens after FEATURE_114 worker consolidation + FEATURE_161 prompt growth). 200k-window 60% trigger needs API total > 120k, but the hook saw ~95–115k transcript and never fired — the status bar's 138k was the real total (system + tools + transcript) sourced from `streamResult.usage.totalTokens`. (2) **no microcompact phase** — SA path runs `microcompact(messages, DEFAULT_MICROCOMPACTION_CONFIG)` every turn at zero LLM cost ([run-substrate.ts:603](packages/coding/src/agent-runtime/run-substrate.ts#L603)) to prune old tool_results / image blocks past `maxAge=20` turns; AMA hook bypassed it entirely. (3) **no graceful degradation fallback** — SA path's third phase `applyGracefulDegradationGate` ([compaction-orchestration.ts:250](packages/coding/src/agent-runtime/middleware/compaction-orchestration.ts#L250)) deterministically prunes tool_results when LLM compact threw / returned `compacted: false` / left context still above `triggerTokens × pruningGapRatio`; AMA hook bailed silently on LLM failure, letting context grow unbounded. Fix in [packages/coding/src/task-engine/_internal/managed-task/compaction.ts](packages/coding/src/task-engine/_internal/managed-task/compaction.ts): hook now mirrors SA path's three-phase lifecycle — (Phase 1) `microcompact` every call (free, prunes old tool blocks); (Phase 2) `intelligentCompact` LLM summary with snapshot-aware trigger check `resolveContextTokenCount(transcript, snapshot)` instead of raw `estimateTokens` — `snapshot.currentTokens` carries the LAST LLM call's API-reported `usage.totalTokens` (system + tools + transcript) so the threshold check uses the same metric the status bar displays; the snapshot is refreshed by the LLM adapter ([runner-driven.ts](packages/coding/src/task-engine/runner-driven.ts) `buildRunnerLlmAdapter`) after every stream completion via the new `contextTokenSnapshotRef` shared between adapter (writer) and hook (reader); (Phase 3) `gracefulCompactDegradation` deterministic prune fallback when LLM compact failed / partial / circuit-breaker-tripped — the gate's "still over" check also uses snapshot-aware accounting for symmetry with Phase 2. Snapshot rebases to `createEstimatedContextTokenSnapshot(compacted)` after any compaction (LLM or graceful) so subsequent delta corrections start from the compacted baseline rather than the stale pre-compaction API total. New test file [packages/coding/src/task-engine/_internal/managed-task/compaction.test.ts](packages/coding/src/task-engine/_internal/managed-task/compaction.test.ts) pins all three phases with 9 tests covering: microcompact-only return path (below trigger); microcompact identity (no diff returns undefined); snapshot-aware trigger fires when transcript-only estimate is sub-threshold but API-total crosses; no-snapshot cold-start falls back to transcript estimate (unchanged baseline); graceful fallback on LLM throw / `compacted: false` / partial success above gap-ratio; snapshot rebase after successful LLM compaction; circuit-breaker semantics (3 strikes skip LLM but graceful still fires). Side fix in [packages/coding/vitest.config.ts](packages/coding/vitest.config.ts) + [packages/repl/vitest.config.ts](packages/repl/vitest.config.ts): `@kodax-ai/llm` alias updated from stale `packages/ai/` to `packages/llm/` (rename leftover from v0.7.40 directory rename above). **Behaviour delta vs pre-fix**: AMA mode compaction now triggers at the same effective API-total threshold as SA mode; graceful fallback prevents zhipu/kimi/mimo provider-side LLM summary failures from leading to monotonic context growth; microcompact's per-turn pruning recovers ~5–15% of context per long-running Worker turn even without crossing the LLM trigger threshold. Full coding suite green: **2446 tests pass** (239 test files, 23 todo; +9 new compaction hook tests).
87
+ - **FEATURE_076 follow-up: round-boundary reshape now preserves tool_use / tool_result chains across rounds (cache + re-read regression fix)**. User-reported during v0.7.40 release validation: status bar dropped from `121k/200k` to `1.1k/200k` after an AMA round, and follow-up rounds were re-reading files the prior worker had already read. Root cause: FEATURE_076 (v0.7.25)'s `reshapeToUserConversation` replaced `result.messages` wholesale with a synthetic `[user, assistant]` dialog at every round exit, discarding the entire `tool_use` / `tool_result` chain. The replacement was the correct fix for v0.7.25's actual problem (cross-round role-prompt pollution — Evaluator role prompts leaking into the next round's worker context as user messages) but over-corrected by stripping structurally useful content, with two concrete production costs: (1) **cross-round file re-reads** — round 2 worker had no visible `tool_result` for files round 1 already read, so common follow-ups like "now modify that file you reviewed" forced re-reads; (2) **provider prompt-cache miss on the dialog prefix** — round 1's first LLM call prefix was `[system, user, assistant(tool_use), user(tool_result), …]`, round 2's is `[system, user, assistant(final), user_2, …]`; the prefixes diverge immediately after the first user message, so the dialog portion gets zero cache reuse across rounds. Fix in [packages/coding/src/task-engine/_internal/round-boundary.ts](packages/coding/src/task-engine/_internal/round-boundary.ts): new `preserveTranscriptForRoundExit` helper runs a 4-step pipeline — (Step 1) strip the leading stale role-prompt system message (Runner.run leaves the last-active agent's role prompt at `transcript[0]`; round 2's entry agent injects its own at position 0, so keeping the previous one would create two conflicting `system` instructions back-to-back); CompactionSummary system messages are preserved via the now-exported `COMPACTION_SUMMARY_PREFIX` discriminator from `@kodax-ai/session-lineage`. (Step 2) Apply `normalizeLoadedSessionMessages` to strip V1-legacy role-prompt-wrapped trailing `{user, assistant}` pairs (`"You are the Evaluator role..."` phrased as a user message); no-op for V2 AMA where role prompts are system-message-shaped. (Step 3) Ensure the round's user prompt is observable — V2 sessions retain it via `runnerInput`, V1 paths may have lost it when normalisation stripped the wrapper. (Step 4) Ensure the transcript ends with a plain-text assistant carrying the sanitised final answer; **replaces** (not appends) when the last message is an assistant with array content (typically `emit_verdict` / `emit_handoff` tool_use blocks — KodaX protocol machinery whose user-facing payload is captured in `result.lastText`). The replace avoids two consecutive `role: 'assistant'` messages which Anthropic's API rejects on the next request. Net behaviour: round 2 worker sees what round 1 read/edited, prompt-cache prefix stays continuous, status bar reflects the actual context size (e.g. ~50k instead of 1.1k after a heavy worker round) — a side-effect users will perceive as the bar "no longer collapsing after each turn". Updated [packages/coding/src/task-engine/_internal/round-boundary.test.ts](packages/coding/src/task-engine/_internal/round-boundary.test.ts) with 7 new cases covering: V2 worker-shape preservation, terminal `emit_verdict` tool_use replacement (no consecutive-assistant violation), `CompactionSummary` system message preservation, dedup of already-correct trailing assistant, empty `result.messages`, compaction-system-only transcript, terminal `thinking`-only assistant. `COMPACTION_SUMMARY_PREFIX` exported from [packages/session-lineage/src/compaction/compaction.ts](packages/session-lineage/src/compaction/compaction.ts) so the producer-consumer pair no longer maintains the literal in two places. Test guide: regression case lives in [docs/test-guides/FEATURE_134_v0.7.40_TEST_GUIDE.md](docs/test-guides/FEATURE_134_v0.7.40_TEST_GUIDE.md) once that doc's "round-boundary cache + re-read" section is appended. **Behaviour delta vs FEATURE_076 baseline**: the v0.7.25 cross-round-coherence guarantee is preserved (stale role prompts still get stripped); only the over-corrective full-transcript replacement is reverted. Full test suite green at HEAD: **5745 tests pass** (was 5742; +3 new edge-case tests).
88
+ - **FEATURE_168 — AMA agent tool wiring (exclude-based, registry as source of truth)**. Commit `56330d1c` + doc `e902b194`. Root cause investigation surfaced from production trace 2026-05-15 (zhipu/glm51 Worker blocked at `emit_handoff` pending-children gate, told by the gate's own error message to call `task_stop`, model honestly responded that `task_stop` "is not registered as a callable tool" — and the model was correct). `runner-driven.ts::buildRunnerAgentChain` had used **include-mode hand-written `agent.tools` arrays** for all 5 AMA roles (Scout / Planner / Generator / Evaluator / Worker) since v0.7.26, while the SA path defaults to `listToolDefinitions()` minus `excludeTools`. The two paths drifted across three features: **FEATURE_120 v0.7.39** registered `send_message` + `task_stop`, taught them in the Worker prompt, gated `emit_handoff` with a "call `task_stop` first" error message — but never wired the `RunnableTool` instances into any AMA agent's tools array. **FEATURE_161 v0.7.40 prompt teaching** taught the Worker 8 repo-intel pull tools (`module_context` / `symbol_context` / `process_context` / `impact_estimate` + 4 shallow ones), `worker-role-prompt.ts:127` comment claimed "the 8 pull tools get stripped from the LLM-visible tool list (see agent-runtime/tool-resolution.ts)", but AMA path doesn't go through `tool-resolution.ts` — it reads `agent.tools` directly, so only 4 of the 8 ever landed in the schema. **Web tools / ask_user_question / worktree_create / worktree_remove / insert_after_anchor / undo**: registered + `permission` system gave them entries, but no AMA agent could see them. Total **17 registered tools silently dropped** from production AMA; no test layer caught it because no test asserted "agent.tools actually contains a schema entry with this name". Fix: switch AMA path to exclude-based wiring matching SA semantics. New `buildAgentToolsFromRegistry(role, ctx, budget, events, overrides)` helper enumerates `listToolDefinitions()` and applies `AMA_BASELINE_EXCLUDE ∪ <ROLE>_EXTRA_EXCLUDE`. Role-specific wraps (mutation-guarded bash/write/edit/multi_edit for Generator/Worker, read-only bash for Evaluator, `dispatch_child_task` per-role drain wrappers, FEATURE_097 throttle-aware `todo_update`) flow through the `overrides` map. Evaluator security boundary made **architectural, not prompt-dependent**: write/edit/multi_edit/insert_after_anchor/undo/dispatch_child_task/send_message/task_stop/worktree_create/worktree_remove/exit_plan_mode/todo_update/ask_user_question all hard-excluded from `EVALUATOR_EXTRA_EXCLUDE` — a prompt-jailbroken or tool-confused Evaluator is physically unable to mutate, dispatch, or change plan state. Planner kept as read-only inspection role (no bash, no mutation, no dispatch, no user interaction). Scout/Worker carry the full execution surface. Generator unchanged behaviorally; new tools (web/repo-intel/coordinator) added to schema. New contract test [packages/coding/src/task-engine/runner-driven-tool-wiring.test.ts](packages/coding/src/task-engine/runner-driven-tool-wiring.test.ts) pins each role's full tool-name set against `getAmaRoleExpectedToolNames(role)` derivation plus spot-checks for FEATURE_120 / FEATURE_161 coverage / Evaluator boundary / Planner boundary / no-orphan invariant — 50 assertions total. Any future EXCLUDE-set change or registry addition surfaces as a concrete test failure, not a silent production schema gap. **Test impact**: 2507/2507 coding-package tests pass (240 files, +50 new contract assertions). No regression in `runner-driven.test.ts` (130 tests including FEATURE_165 race-regression), `child-executor.test.ts` (30), `task-stop` / `send-message` handler tests (28), `worker-role-prompt.test.ts`. Design doc: [docs/features/v0.7.40.md#feature_168--ama-agent-tool-wiringexclude-based-registry-as-source-of-truth](docs/features/v0.7.40.md#feature_168--ama-agent-tool-wiringexclude-based-registry-as-source-of-truth).
89
+ - **FEATURE_169 — Pull-Tool Prompt Adoption Hardening** (commit `519af4b9`). Production trace after FEATURE_161 wiring + FEATURE_168 schema fix surfaced 3 residual adoption gaps the wiring alone did not close: (1) **Worker hand-feeding bash in `dispatch_child_task.objective`** — 18% of production dispatch (3/17 in 2026-05-15 audit) embedded literal `git diff v0.7.39..HEAD`-style command directives, overriding the child's prompt-side tool teaching; 0/17 objectives recommended a pull-tool family. (2) **Child agent prompt stayed read+grep-first** — `CHILD_AGENT_SYSTEM_PROMPT` had taught "3-8 parallel tool calls (glob + grep + key file reads)" since v0.7.18 with no mention of pull-tools; children defaulted to grep/read in review and exploration tasks. (3) **Worker self-review still picked `bash git diff` first** — F7 taught tool existence but not "for review tasks, use which one". Three localized prompt strengthenings (F0a / F0b / F1v2 / F3) ship; F2 (3-tier order injection) **rejected** post-eval as zero-value churn. F0a teaches Worker to keep dispatch objective as data ("scope: v0.7.39..HEAD") not command ("使用 `git diff v0.7.39..HEAD`"); F0b adds an explicit pull-tool-family recommendation to the dispatch objective teaching; F1v2 adds child-agent reverse-steering toward pull-tools when the task is review/exploration-shaped; F3 reframes the Worker change-review surface from `bash git diff` first to `module_context` / `symbol_context` first. No wiring change, no new tool, no new permission. Prompt eval (`tests/feature-169-pull-tool-adoption.eval.ts`) clears SHIP gate (4/4 aliases, F0a/F0b/F1v2/F3 each ≥80% on 3 cases). Design doc: `docs/features/v0.7.40.md#feature_169--pull-tool-prompt-adoption-hardening-worker-dispatch-objective--child-reverse-steering--change-review-reframe`.
90
+ - **FEATURE_134 follow-up: vision capability flag widened to 9 additional providers**. User empirically validated 2026-05-13 that `kimi-code` accepts and processes image input despite its v0.7.40 RC snapshot flag claiming `multimodalSupport: 'none'`. Root cause: AMA path (`runner-driven.ts` `provider.stream` direct call) bypasses the SA-path `applyProviderPolicyGate` (`run-substrate.ts:660`) where multimodal block enforcement lives, so the latent flag mismatch was never observable in production. The pre-v0.7.40 RC flag was over-conservative: every Anthropic-compat clone inherits the image-block forwarding serializer at `anthropic.ts:770`, and every OpenAI-compat clone inherits the `image_url` forwarding serializer at `openai.ts:904`. Flag widened in `packages/ai/src/providers/registry.ts` from `NATIVE_PROVIDER_CAPABILITY_PROFILE` to `IMAGE_INPUT_NATIVE_PROVIDER_CAPABILITY_PROFILE` for: **Anthropic-compat clones** — `kimi-code`, `zhipu-coding`, `mimo-coding`, `ark-coding`, `minimax-coding`; **OpenAI-compat clones** — `deepseek`, `kimi`, `qwen`, `zhipu`. Plus a separate follow-up commit (`71d45783`) wires **Gemini-CLI** vision via the new `serializeImageBlockToPromptToken` extension point on `KodaXAcpProvider` (Gemini CLI 2.x `@<path>` file-include syntax), bringing the final post-release total to **12 vision-capable providers (was 2)** — only `codex-cli` remains text-only because `codex exec --json --full-auto` has no image-input surface today. Production semantics: the flag controls only KodaX's SA-path artificial block; the actual model-level vision contract remains the upstream provider's responsibility — if a specific model alias is text-only, users now see the real API error from the provider instead of a KodaX-side `[Provider Policy] multimodal requests are unsupported` rejection. `capability-profile.test.ts` gains an explicit pin test asserting all 12 vision-capable providers report `multimodalSupport === 'image-input'` and `codex-cli` reports `'none'` so future regressions are caught.
91
+
92
+ ### Internal / architecture
93
+
94
+ - **Directory rename: `packages/ai/` → `packages/llm/` for package-name parity**. Closes the directory-vs-npm-name discrepancy left behind by FEATURE_147 v0.7.37, which renamed the npm package from `@kodax/ai` to `@kodax-ai/llm` but left the on-disk directory at `packages/ai/`. New contributors had to mentally translate "ai is actually llm"; prompt examples and tool-description hints saying "Audit packages/ai" pointed to a path that disagreed with the package name. Scope: `git mv packages/ai packages/llm` + 8 tsconfig path/reference updates (root + agent + coding + mcp + session-lineage) + 2 real relative imports in `tests/feature-116-active-cache-control.eval.ts` + ~30 prompt/comment/eval-fixture string updates (`worker-role-prompt.ts` / `role-prompt.ts` / `tools/registry.ts` / `tools/todo-list.test.ts` / `paste/persist-image.ts` / 5 `tests/*.eval.ts` files / 5 `benchmark/datasets/*` fixture files including `h2-plan-execute-boundary/cases.ts` path-prefix `mustNotTouchFiles` assertions). Frozen historical artifacts NOT touched: `benchmark/results/**` snapshots, `.agent/**` runner cache, `.repointel/**` index cache, `docs/CHANGELOG_ARCHIVE.md` + `docs/features/v0.7.0-39.md` historical design records, and `.claude/settings.local.json` user-private allowlist. Active operational docs (CHANGELOG, README, README_CN, DD.md, KNOWN_ISSUES.md, root CLAUDE.md, docs/CLAUDE.md) all updated. No behavior change; git rename detection preserves blame across the move. Pure naming-consistency refactor.
95
+ - **`@kodax/coding` blob summarizer module** (`packages/coding/src/tools/blob-summarizer.ts`, 187 lines). `createBlobSummarizer({provider, model, timeoutMs?})` returns a `SummarizeBlob` callback. Combines caller's `abortSignal` with a 30-second timeout via a fresh `AbortController` + listener fan-out. Throws `BlobSummarizerError` on empty input / empty output / provider error. System prompt + user prompt builder exported as constants (`SUMMARIZER_SYSTEM_PROMPT`, `buildSummarizerUserMessage`) so the Layer 2 eval can pin the EXACT production prompt text. 8 deterministic-shell unit tests cover the contract surface (timeout / abort / empty rejection / error wrapping / threshold constants).
96
+ - **`KodaXToolExecutionContext.summarizeBlob?` field** (`packages/coding/src/types.ts`). Optional callback shape `(content: string, options?: {readonly maxChars?: number; readonly abortSignal?: AbortSignal}) => Promise<string>`. Wired in `runner-driven.ts` baseCtx with lazy-once memoization bound to the Worker's own provider/model. The dispatch tool calls it via `ctx.summarizeBlob` without owning provider construction — preserves `@kodax/agent` layer independence (agent stays unaware of LLM client).
97
+ - **`GuardedToolResult.spillFailed?` flag** (`packages/coding/src/tools/tool-result-policy.ts`). Set when `persistToolOutput` throws and content was returned inline as the data-loss-guard fallback. Callers that need an LLM-summary follow-up (`dispatch-child-tasks` for `child_task_summary` >100KB) branch on this flag. The `console.warn` is intentionally NOT gated on `KODAX_DEBUG_TOOL_GUARDRAILS` — disk failure is a severe operational event an operator must see immediately.
98
+ - **`applyChildSummaryGuardrailWithSummarizer` helper in `dispatch-child-tasks.ts`**. Unifies the 4 dispatch call sites (async success / async crash / sync success / sync failure) through a single helper that chains: `applyToolResultGuardrail` → if `spillFailed` + content > 100KB + `ctx.summarizeBlob` exists, attempt LLM summarize → on success, banner-wrap with LOSSY marker → on summarizer failure, console.warn + fall back to inline full content with emergency banner. Symmetric error handling across all 4 sites; banner strings consistent.
99
+ - **3 new tracker entries marked Released**: FEATURE_121 (Envelope Spillover Gap-Fix), FEATURE_134 (Image / Screenshot Paste Input). Both originally `Planned` → now `v0.7.40 Released`. FEATURE_LIST.md `Current released version` bumped from v0.7.39 to v0.7.40.
100
+ - **Layer 2 eval datasets** at `benchmark/datasets/feature-121-envelope-spillover/` (3 cases: `preview_sufficient` / `detail_required` / `inline_no_spillover`) and `benchmark/datasets/feature-121-blob-summarizer/` (2 cases: `audit_report` / `grep_findings`). Eval drivers at `tests/feature-121-envelope-spillover.eval.ts` and `tests/feature-121-blob-summarizer.eval.ts` (gated on `KODAX_EVAL_F121_SUMMARIZER=1`). Raw outputs preserved per `EVAL_GUIDELINES.md` §"Raw output preservation" under `os.tmpdir()/kodax-eval-dumps/feature-121-*/`.
101
+
102
+ ### Test coverage delta
103
+
104
+ - New: 8 blob-summarizer unit tests + 25 blob-summarizer dataset shape tests + 27 envelope-spillover dataset shape tests + 9 tool-output-gc tests + 4 worker-role-prompt new tests (LARGE CHILD OUTPUT block + spill-path hint). Plus FEATURE_134's 52 new tests (45 foundation + 7 integration in `prompt-input-controller`).
105
+ - Total green at HEAD: **5745 tests pass** (507 test files, 1 skipped, 23 todo).
106
+
7
107
  ## [0.7.39] - 2026-05-12
8
108
 
9
109
  ### Theme
package/README.md CHANGED
@@ -81,6 +81,37 @@ If you need a custom base URL or an OpenAI/Anthropic-compatible endpoint, define
81
81
 
82
82
  `userAgentMode` defaults to `"compat"`, which sends `KodaX` instead of the official SDK User-Agent. Switch it to `"sdk"` only when your gateway expects the upstream SDK header.
83
83
 
84
+ #### Opting a custom provider into image / vision input (FEATURE_134 v0.7.40)
85
+
86
+ If your custom provider's underlying model supports image input (vision), add a `capabilityProfile.multimodalSupport: "image-input"` block so KodaX does not artificially block multimodal requests at the SA-path policy gate. The 12 built-in vision-capable providers (Anthropic, OpenAI, the 9 Anthropic-/OpenAI-compat clones — DeepSeek, Kimi, Kimi-code, Qwen, Zhipu, Zhipu-coding, MiniMax-coding, MiMo-coding, Ark-coding — plus Gemini-CLI via the CLI's `@<path>` file-include syntax) already ship with this flag enabled by default; only Codex-CLI and custom providers need to opt in.
87
+
88
+ ```json
89
+ {
90
+ "customProviders": [
91
+ {
92
+ "name": "my-vision-provider",
93
+ "protocol": "openai",
94
+ "baseUrl": "https://example.com/v1",
95
+ "apiKeyEnv": "MY_LLM_API_KEY",
96
+ "model": "my-vision-model",
97
+ "capabilityProfile": {
98
+ "transport": "native-api",
99
+ "conversationSemantics": "full-history",
100
+ "mcpSupport": "none",
101
+ "contextFidelity": "full",
102
+ "toolCallingFidelity": "full",
103
+ "sessionSupport": "full",
104
+ "longRunningSupport": "full",
105
+ "multimodalSupport": "image-input",
106
+ "evidenceSupport": "full"
107
+ }
108
+ }
109
+ ]
110
+ }
111
+ ```
112
+
113
+ The serializer layer (`packages/llm/src/providers/anthropic.ts:770` for Anthropic-compat, `openai.ts:904` for OpenAI-compat) forwards image blocks automatically through base-class inheritance. The flag only gates whether KodaX's policy layer pre-rejects multimodal requests — the model-level vision contract remains your upstream provider's responsibility. If the model is actually text-only, you'll see the real upstream API error instead of a KodaX-side rejection.
114
+
84
115
  ### 3. Start in REPL or run a one-shot task
85
116
 
86
117
  ```bash
@@ -674,6 +705,33 @@ kodax -h team # Multi-agent parallel execution
674
705
  kodax -h print # Print configuration
675
706
  ```
676
707
 
708
+ ### Environment Variables
709
+
710
+ KodaX recognizes a number of environment variables for tuning runtime behavior. The most commonly used ones are listed below; for the full list, search the repo for `process.env.KODAX_`.
711
+
712
+ #### `KODAX_MAX_OUTPUT_TOKENS`
713
+
714
+ Overrides the per-turn `max_tokens` value sent to **every** provider (Anthropic, OpenAI, Zhipu, Kimi, MiniMax, Qwen, DeepSeek, MiMo, Gemini, Codex, …). Set to a positive integer; unset or non-numeric values are ignored. This is an **explicit user intent**: when set, it wins over the provider's model descriptor cap, over the provider config default, and over the global `KODAX_MAX_TOKENS` fallback. The runtime's automatic safety caps (e.g. the v0.7.28 P2b RST-prone write-turn cap that limits write/edit turns to 8K tokens on Zhipu/Kimi/MiniMax) are **bypassed** when this variable is set, so the user override is also a way to opt out of those caps.
715
+
716
+ ```bash
717
+ # Allow up to 48K output tokens per turn (use a higher cap when generating long files)
718
+ export KODAX_MAX_OUTPUT_TOKENS=48000
719
+ kodax "generate the full implementation"
720
+
721
+ # Unset to restore default behavior
722
+ unset KODAX_MAX_OUTPUT_TOKENS
723
+ ```
724
+
725
+ Precedence used by every provider's `getEffectiveMaxOutputTokens()` (see `packages/llm/src/providers/base.ts`):
726
+
727
+ 1. One-shot per-request override (agent-loop escalation / context-overflow recovery — internal)
728
+ 2. **`KODAX_MAX_OUTPUT_TOKENS`** (this variable, explicit user intent)
729
+ 3. Active model descriptor's `maxOutputTokens` (FEATURE_098 per-model cap)
730
+ 4. Provider config default
731
+ 5. Global `KODAX_MAX_TOKENS` fallback
732
+
733
+ Related variables: `KODAX_MAX_TOKENS` (global fallback when no provider/model cap applies), `KODAX_RST_PRONE_PROVIDERS` and `KODAX_WRITE_TURN_MAX_TOKENS` (v0.7.28 P2b write-turn safety cap configuration), `KODAX_ESCALATED_MAX_OUTPUT_TOKENS` (escalation budget used by the agent loop when a turn returns `stop_reason: max_tokens`).
734
+
677
735
  ## Advanced Library Usage
678
736
 
679
737
  #### Simple Mode (runKodaX)
package/README_CN.md CHANGED
@@ -131,6 +131,37 @@ import { loadConfig } from '@kodax-ai/kodax/repl'; // REPL 配置 / session
131
131
 
132
132
  `userAgentMode` 默认 `"compat"`(发送 `KodaX` 而非上游 SDK 的 User-Agent);如果你的网关要求原生 SDK header,再切到 `"sdk"`。
133
133
 
134
+ #### 给自定义 provider 开图片 / vision 输入(FEATURE_134 v0.7.40)
135
+
136
+ 如果你的自定义 provider 后面的模型支持 vision,加 `capabilityProfile.multimodalSupport: "image-input"` 显式开启,KodaX 的 SA-path policy gate 就不会人为拦截多模态请求。内置的 12 个 vision-capable provider(Anthropic、OpenAI、9 个 Anthropic-/OpenAI-compat clone:DeepSeek / Kimi / Kimi-code / Qwen / Zhipu / Zhipu-coding / MiniMax-coding / MiMo-coding / Ark-coding,加 Gemini-CLI 通过 CLI 的 `@<path>` file-include 语法)已经默认开了这个 flag。只有 Codex-CLI 和自定义 provider 需要手动 opt-in。
137
+
138
+ ```json
139
+ {
140
+ "customProviders": [
141
+ {
142
+ "name": "my-vision-provider",
143
+ "protocol": "openai",
144
+ "baseUrl": "https://example.com/v1",
145
+ "apiKeyEnv": "MY_LLM_API_KEY",
146
+ "model": "my-vision-model",
147
+ "capabilityProfile": {
148
+ "transport": "native-api",
149
+ "conversationSemantics": "full-history",
150
+ "mcpSupport": "none",
151
+ "contextFidelity": "full",
152
+ "toolCallingFidelity": "full",
153
+ "sessionSupport": "full",
154
+ "longRunningSupport": "full",
155
+ "multimodalSupport": "image-input",
156
+ "evidenceSupport": "full"
157
+ }
158
+ }
159
+ ]
160
+ }
161
+ ```
162
+
163
+ 序列化层(Anthropic-compat 走 `packages/llm/src/providers/anthropic.ts:770`,OpenAI-compat 走 `openai.ts:904`)通过基类继承自动转发 image block。这个 flag 只控制 KodaX 自身是否预先拒绝多模态请求 —— 上游模型到底支不支持 vision 由 provider 自己决定。如果模型实际是 text-only,你会看到真实的上游 API 错误,而不是 KodaX 一侧的 `[Provider Policy] multimodal requests are unsupported` 预拦截。
164
+
134
165
  库模式下用 `registerCustomProviders()` 显式注册:
135
166
 
136
167
  ```typescript
@@ -1,2 +1,2 @@
1
1
  // @kodax-ai/kodax — bundled distribution. See docs/ADR.md ADR-022 + ADR-024.
2
- import{ic as A,kc as M}from"./chunk-7LQ2NCHF.js";import{ia as O}from"./chunk-N2VZ2MJF.js";import{$ as m,S as w,T as S,U as x,V as I,W as L,X as h,_ as k,ca as b,ja as R}from"./chunk-XI75LZIO.js";import{a as i}from"./chunk-4E76FLZ3.js";import a from"fs";import c from"path";import{exec as F,spawnSync as G}from"child_process";import{promisify as V}from"util";import{fileURLToPath as q}from"url";var E="en";function U(){return(process.env.LC_ALL||process.env.LC_MESSAGES||process.env.LANG||"").toLowerCase().startsWith("zh")?"zh":"en"}i(U,"detectSystemLocale");function N(e){if(!e||e==="auto"){E=U();return}E=e.toLowerCase().replace(/[-_].*/,"")==="zh"?"zh":"en"}i(N,"setLocale");var T={en:{"dialog.confirm":"[Confirm]","dialog.select":"[Select]","dialog.input":"[Input]","confirm.instruction.basic":"Press (y) yes, (n) no","confirm.instruction.always":"Press (y) yes, (a) always yes for this tool, (n) no","confirm.instruction.protected":"Press (y) to confirm, (n) to cancel (protected path)","confirm.result.approved":"Approved","confirm.result.approved_always":"Approved (always)","confirm.result.denied":"Denied","tool.bash.title":"Execute bash command?","tool.shell.title":"Execute shell command?","tool.write.title":"Write to file?","tool.edit.title":"Edit file?","tool.generic.title":"Execute {tool}?","field.reason":"Reason","field.intent":"Intent","field.target":"Target","field.scope":"Scope","field.risk":"Risk","field.summary":"Summary","intent.read":"Read project files","intent.delete":"Delete files","intent.deps":"Modify dependencies or environment","intent.modify":"Modify files","intent.execute":"Execute command","intent.write_file":"Write file","intent.edit_file":"Edit file","intent.use_tool":"Use {tool}","risk.destructive":"Destructive change","risk.deps":"May change dependencies or local tools","risk.modify":"May modify files","risk.unknown":"Command effects depend on its arguments","risk.network":"May access network","scope.outside":"Outside project","scope.protected":"Protected path","waiting.confirm":"Waiting: approval required","waiting.select":"Waiting: choose an option","waiting.input":"Waiting: answer the prompt","placeholder.confirm":"Respond to the approval prompt above...","placeholder.select":"Choose an option above...","placeholder.input":"Answer the prompt above...","placeholder.busy":"Agent is busy...","placeholder.queue":"Queue a follow-up for the next round...","placeholder.idle":"Type a message...","select.choice":"Choice:","select.type_number":"(type a number)","select.more":"{count} more choices...","select.more_above":"\u2191 {count} more above","select.more_below":"\u2193 {count} more below","select.confirm_hint":"Press Enter to confirm, Esc to cancel","select.navigate_hint":"Use \u2191\u2193 to navigate, Enter to confirm, Esc to cancel","select.multiselect_hint":"Use \u2191\u2193 to navigate, Space to toggle, Enter to confirm, Esc to cancel","select.multiselect_empty":"Select at least one option with Space before confirming.","select.back_prev":"\u2190 Back to previous question","input.default":"Default:","input.value":"Value:","input.type_response":"(type your response)","managed.completed":"Task completed","managed.completed.blocked":"Task blocked","managed.completed.continuation":"Task needs continuation",cancelled:"[Cancelled] Operation cancelled by user"},zh:{"dialog.confirm":"[\u786E\u8BA4]","dialog.select":"[\u9009\u62E9]","dialog.input":"[\u8F93\u5165]","confirm.instruction.basic":"\u6309 (y) \u786E\u8BA4, (n) \u62D2\u7EDD","confirm.instruction.always":"\u6309 (y) \u786E\u8BA4, (a) \u59CB\u7EC8\u5141\u8BB8\u6B64\u5DE5\u5177, (n) \u62D2\u7EDD","confirm.instruction.protected":"\u6309 (y) \u786E\u8BA4, (n) \u53D6\u6D88 (\u53D7\u4FDD\u62A4\u8DEF\u5F84)","confirm.result.approved":"\u5DF2\u6279\u51C6","confirm.result.approved_always":"\u5DF2\u6279\u51C6 (\u59CB\u7EC8\u5141\u8BB8)","confirm.result.denied":"\u5DF2\u62D2\u7EDD","tool.bash.title":"\u6267\u884C bash \u547D\u4EE4\uFF1F","tool.shell.title":"\u6267\u884C shell \u547D\u4EE4\uFF1F","tool.write.title":"\u5199\u5165\u6587\u4EF6\uFF1F","tool.edit.title":"\u7F16\u8F91\u6587\u4EF6\uFF1F","tool.generic.title":"\u6267\u884C {tool}\uFF1F","field.reason":"\u539F\u56E0","field.intent":"\u610F\u56FE","field.target":"\u76EE\u6807","field.scope":"\u8303\u56F4","field.risk":"\u98CE\u9669","field.summary":"\u6458\u8981","intent.read":"\u8BFB\u53D6\u9879\u76EE\u6587\u4EF6","intent.delete":"\u5220\u9664\u6587\u4EF6","intent.deps":"\u4FEE\u6539\u4F9D\u8D56\u6216\u73AF\u5883","intent.modify":"\u4FEE\u6539\u6587\u4EF6","intent.execute":"\u6267\u884C\u547D\u4EE4","intent.write_file":"\u5199\u5165\u6587\u4EF6","intent.edit_file":"\u7F16\u8F91\u6587\u4EF6","intent.use_tool":"\u4F7F\u7528 {tool}","risk.destructive":"\u7834\u574F\u6027\u53D8\u66F4","risk.deps":"\u53EF\u80FD\u4FEE\u6539\u4F9D\u8D56\u6216\u672C\u5730\u5DE5\u5177","risk.modify":"\u53EF\u80FD\u4FEE\u6539\u6587\u4EF6","risk.unknown":"\u547D\u4EE4\u6548\u679C\u53D6\u51B3\u4E8E\u53C2\u6570","risk.network":"\u53EF\u80FD\u8BBF\u95EE\u7F51\u7EDC","scope.outside":"\u9879\u76EE\u5916\u90E8","scope.protected":"\u53D7\u4FDD\u62A4\u8DEF\u5F84","waiting.confirm":"\u7B49\u5F85\u4E2D\uFF1A\u9700\u8981\u5BA1\u6279","waiting.select":"\u7B49\u5F85\u4E2D\uFF1A\u8BF7\u9009\u62E9","waiting.input":"\u7B49\u5F85\u4E2D\uFF1A\u8BF7\u56DE\u7B54","placeholder.confirm":"\u8BF7\u56DE\u5E94\u4E0A\u65B9\u7684\u5BA1\u6279\u63D0\u793A...","placeholder.select":"\u8BF7\u5728\u4E0A\u65B9\u9009\u62E9\u4E00\u4E2A\u9009\u9879...","placeholder.input":"\u8BF7\u56DE\u7B54\u4E0A\u65B9\u7684\u63D0\u793A...","placeholder.busy":"\u4EE3\u7406\u6B63\u5728\u5DE5\u4F5C\u4E2D...","placeholder.queue":"\u6392\u961F\u7B49\u5F85\u4E0B\u4E00\u8F6E\u8DDF\u8FDB...","placeholder.idle":"\u8F93\u5165\u6D88\u606F...","select.choice":"\u9009\u9879\uFF1A","select.type_number":"(\u8F93\u5165\u7F16\u53F7)","select.more":"\u8FD8\u6709 {count} \u4E2A\u9009\u9879...","select.more_above":"\u2191 \u4E0A\u65B9\u8FD8\u6709 {count} \u4E2A","select.more_below":"\u2193 \u4E0B\u65B9\u8FD8\u6709 {count} \u4E2A","select.confirm_hint":"\u6309 Enter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.navigate_hint":"\u4F7F\u7528 \u2191\u2193 \u5BFC\u822A\uFF0CEnter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.multiselect_hint":"\u4F7F\u7528 \u2191\u2193 \u5BFC\u822A\uFF0C\u7A7A\u683C \u5207\u6362\u9009\u4E2D\uFF0CEnter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.multiselect_empty":"\u8BF7\u5148\u4F7F\u7528\u7A7A\u683C\u9009\u62E9\u81F3\u5C11\u4E00\u4E2A\u9009\u9879\u3002","select.back_prev":"\u2190 \u8FD4\u56DE\u4E0A\u4E00\u9898","input.default":"\u9ED8\u8BA4\u503C\uFF1A","input.value":"\u503C\uFF1A","input.type_response":"(\u8F93\u5165\u4F60\u7684\u56DE\u7B54)","managed.completed":"\u4EFB\u52A1\u5B8C\u6210","managed.completed.blocked":"\u4EFB\u52A1\u53D7\u963B","managed.completed.continuation":"\u4EFB\u52A1\u9700\u8981\u7EE7\u7EED",cancelled:"[\u5DF2\u53D6\u6D88] \u64CD\u4F5C\u5DF2\u88AB\u7528\u6237\u53D6\u6D88"}};function pe(e,t){let o=T[E][e]??T.en[e]??e;if(t)for(let[r,l]of Object.entries(t))o=o.replace(`{${r}}`,String(l));return o}i(pe,"t");var W=V(F),X=O(),Ce=c.join(X,"sessions"),d=c.join(X,"config.json"),we=60,s=null,_=!1,J="dumb";function Q(e){let t=c.basename(e).toLowerCase(),n="__KODAX_SHELL_ENV_START__",o=`printf '%s\\0' '${n}'; env -0`;return t==="fish"?{args:["-i","-c",o],sentinel:n}:{args:t==="bash"||t==="zsh"?["-ic",o]:["-lc",o],sentinel:n}}i(Q,"buildShellEnvCommand");function Y(e,t){let n=`${t}\0`,o=e.lastIndexOf(n);if(o===-1)return{};let r=e.slice(o+n.length),l={};for(let u of r.split("\0")){if(!u)continue;let p=u.indexOf("=");p<=0||(l[u.slice(0,p)]=u.slice(p+1))}return l}i(Y,"parseNullDelimitedShellEnv");function Z(e={}){let t=e.env??process.env;if((e.platform??process.platform)==="win32"||t.KODAX_DISABLE_SHELL_ENV_HYDRATION==="1")return!1;let o=e.shell??t.SHELL;if(!o||!c.isAbsolute(o))return!1;let{args:r,sentinel:l}=Q(o),u=e.run??G,p={...t,TERM:J},f=u(o,r,{encoding:"utf8",env:p,maxBuffer:1024*1024,timeout:5e3,windowsHide:!0,detached:!0,stdio:["ignore","pipe","pipe"]});if(f.status!==0||!f.stdout)return!1;let H=typeof f.stdout=="string"?f.stdout:f.stdout.toString("utf8"),j=Y(H,l),C=!1;for(let[v,z]of Object.entries(j))v!=="TERM"&&t[v]===void 0&&(t[v]=z,C=!0);return C}i(Z,"hydrateProcessEnvFromShell");function ee(){if(!_){_=!0;try{Z()}catch{}}}i(ee,"ensureShellEnvironmentHydrated");function Se(){_=!1}i(Se,"resetShellEnvironmentHydrationForTesting");function te(e){k(e.customProviders??[])}i(te,"registerConfiguredCustomProviders");function $(e){if(!Array.isArray(e))return;let t=e.filter(n=>typeof n=="string").map(n=>n.trim()).filter(n=>n.length>0);return t.length>0?t:[]}i($,"normalizeConfiguredExtensions");function ne(e){if(e.permissionMode!=="default")return e;let t={...e,permissionMode:"accept-edits"};try{a.mkdirSync(c.dirname(d),{recursive:!0}),a.writeFileSync(d,JSON.stringify(t,null,2))}catch{}return t}i(ne,"migrateLegacyPermissionModeInConfig");function oe(){if(s)return s;let e=process.env.KODAX_VERSION;if(e)return s=e,s;let t=c.join(c.dirname(q(import.meta.url)),"../../package.json");if(a.existsSync(t))try{return s=JSON.parse(a.readFileSync(t,"utf-8")).version??"0.0.0",s??"0.0.0"}catch{}return s="0.0.0",s}i(oe,"getVersion");var xe=oe();function Ie(e){return S(e)}i(Ie,"getProviderModel");function P(e,t){let n=new Set(e.map(r=>r.toLowerCase())),o=[...e];for(let r of t)n.has(r.toLowerCase())||o.push(r);return o}i(P,"mergeModels");function ie(e,t){t||(t=y().providerModels);let n=t?.[e];if(n&&n.length>0){try{let o=h(e);if(o.length>0)return P(n,o)}catch{}try{let o=m(e);if(o)return P(n,o.getAvailableModels())}catch{}return n}try{let o=h(e);if(o.length>0)return o}catch{}try{let o=m(e);if(o)return o.getAvailableModels()}catch{}return[]}i(ie,"getProviderAvailableModels");function re(e,t){let n=x(e,t);if(n!=="unknown")return n;try{let o=m(e);if(o)return o.getReasoningCapability(t)}catch{}return"unknown"}i(re,"getProviderReasoningCapability");function se(e){let t=I(e);if(t)return t;try{return b().find(o=>o.name===e)?.capabilityProfile??null}catch{return null}}i(se,"getProviderCapabilityProfile");function B(e,t){let n=se(e),o=re(e,t);if(n)return{capabilityProfile:n,reasoningCapability:o};try{let r=R(e);return{capabilityProfile:r.getCapabilityProfile(),reasoningCapability:r.getReasoningCapability(t)}}catch{return null}}i(B,"getProviderCapabilityMetadata");function Le(e,t){let n=B(e,t);return n?A({providerName:e,model:t,capabilityProfile:n.capabilityProfile,reasoningCapability:n.reasoningCapability==="unknown"?void 0:n.reasoningCapability}):null}i(Le,"getProviderCapabilitySnapshot");function ae(e,t,n,o){let r=B(e,t);return r?M({providerName:e,model:t,capabilityProfile:r.capabilityProfile,reasoningCapability:r.reasoningCapability==="unknown"?void 0:r.reasoningCapability,reasoningMode:n,hints:o}):null}i(ae,"getProviderPolicyDecision");function ke(e){let t=e.transport==="cli-bridge"?"CLI bridge":"Native API",n=e.conversationSemantics==="last-user-message"?"forwards only the latest user message":"preserves full conversation history",o=e.mcpSupport==="native"?"MCP available":"MCP unavailable";return`${t}; ${n}; ${o}`}i(ke,"describeProviderCapabilitySummary");function Re(e){switch(e){case"native-budget":return"B";case"native-effort":return"E";case"native-toggle":return"T";default:return"-"}}i(Re,"formatReasoningCapabilityShort");function ce(e){switch(e){case"native-budget":return"budget";case"native-effort":return"effort";case"native-toggle":return"toggle";default:return"none"}}i(ce,"describeReasoningCapabilityControl");function Oe(e,t){if(e==="off")return"Reasoning disabled";switch(t){case"native-budget":return"Uses native thinking budget control";case"native-effort":return"Uses native reasoning effort control";case"native-toggle":return"Uses provider-native thinking toggle only";case"none":return"Runs without native reasoning parameters";case"prompt-only":return"Uses prompt overlays only; no native reasoning parameter";default:return"Runs without native reasoning parameters"}}i(Oe,"describeReasoningExecution");function Ae(e){let t=[];e||(e=y().providerModels);for(let n of L())t.push({name:n.name,model:n.model,models:ie(n.name,e),configured:n.capabilityProfile.transport==="cli-bridge"?!0:n.configured,reasoningCapability:n.reasoningCapability,capabilityProfile:n.capabilityProfile});try{let n=b().map(o=>({...o,models:(()=>{let r=e?.[o.name];return r&&r.length>0?P(r,o.models):o.models})()}));t.push(...n)}catch{}return t}i(Ae,"getProviderList");function Me(e){if(w(e))return!0;try{return m(e)?.isConfigured()??!1}catch{return!1}}i(Me,"isProviderConfigured");function y(){try{if(a.existsSync(d)){let e=JSON.parse(a.readFileSync(d,"utf-8")),t=e.reasoningCeiling??e.reasoningMode;return ne({...e,reasoningMode:t,extensions:$(e.extensions)})}}catch{}return{}}i(y,"loadConfig");function le(e){e.streamIdleTimeoutMs&&!process.env.KODAX_STREAM_IDLE_TIMEOUT_MS&&(process.env.KODAX_STREAM_IDLE_TIMEOUT_MS=String(e.streamIdleTimeoutMs))}i(le,"applyResilienceRuntimeEnv");function ue(e){e.repoIntelligenceMode&&!process.env.KODAX_REPO_INTELLIGENCE_MODE&&(process.env.KODAX_REPO_INTELLIGENCE_MODE=e.repoIntelligenceMode),e.repointelEndpoint&&!process.env.KODAX_REPOINTEL_ENDPOINT&&(process.env.KODAX_REPOINTEL_ENDPOINT=e.repointelEndpoint),e.repointelBin&&!process.env.KODAX_REPOINTEL_BIN&&(process.env.KODAX_REPOINTEL_BIN=e.repointelBin),e.repoIntelligenceTrace===!0&&!process.env.KODAX_REPO_INTELLIGENCE_TRACE&&(process.env.KODAX_REPO_INTELLIGENCE_TRACE="1")}i(ue,"applyRepoIntelligenceRuntimeEnv");function Te(){ee();let e=y();return le(e),ue(e),te(e),N(e.locale),e}i(Te,"prepareRuntimeConfig");function Ne(e){let n={...y(),...e},o=$(n.extensions);o!==void 0&&(n.extensions=o);for(let r of Object.keys(e))e[r]===void 0&&delete n[r];a.mkdirSync(c.dirname(d),{recursive:!0}),a.writeFileSync(d,JSON.stringify(n,null,2))}i(Ne,"saveConfig");async function De(){try{let{stdout:e}=await W("git rev-parse --show-toplevel");return e.trim()}catch{return null}}i(De,"getGitRoot");function de(e){switch(e){case"builtin":return"Built-in";case"runtime":return"Runtime extension";case"custom":return"Custom config";default:return"Unknown"}}i(de,"formatProviderSourceKind");function Ke(e){let t=e.transport==="cli-bridge"?"CLI bridge":"Native API",n=e.conversationSemantics==="last-user-message"?"latest-user-message only":"full conversation history";return[`Source: ${de(e.sourceKind)}`,`Transport: ${t}`,`Conversation semantics: ${n}`,`Context fidelity: ${e.contextFidelity}`,`Tool calling: ${e.toolCallingFidelity}`,`Session behavior: ${e.sessionSupport}`,`Long-running support: ${e.longRunningSupport}`,`Evidence-heavy flows: ${e.evidenceSupport}`,`Multimodal support: ${e.multimodalSupport}`,`MCP support: ${e.mcpSupport}`,`Reasoning control: ${ce(e.reasoningCapability)}`]}i(Ke,"formatProviderCapabilityDetailLines");function Xe(e,t,n){return[{label:"General coding",hints:{}},{label:"Evidence-heavy review",hints:{evidenceHeavy:!0}},{label:"Long-running task",hints:{longRunning:!0}}].map(r=>({label:r.label,decision:ae(e,t,n,r.hints)})).filter(r=>r.decision!==null)}i(Xe,"getProviderCommonPolicyScenarios");var D=.5,K=0,g={locked:!1,queue:[]};async function $e(e){for(;g.locked;)await new Promise(t=>g.queue.push(t));g.locked=!0;try{let t=(Date.now()-K)/1e3;t<D&&await new Promise(o=>setTimeout(o,(D-t)*1e3));let n=await e();return K=Date.now(),n}finally{g.locked=!1;let t=g.queue.shift();t&&t()}}i($e,"rateLimitedCall");export{pe as a,X as b,Ce as c,d,we as e,Z as f,Se as g,te as h,oe as i,xe as j,Ie as k,ie as l,re as m,se as n,Le as o,ae as p,ke as q,Re as r,ce as s,Oe as t,Ae as u,Me as v,y as w,Te as x,Ne as y,De as z,de as A,Ke as B,Xe as C,$e as D};
2
+ import{pc as A,rc as M}from"./chunk-HYWVRTFA.js";import{ia as O}from"./chunk-SX2IS5JP.js";import{$ as m,S as w,T as S,U as x,V as I,W as L,X as h,_ as k,ca as b,ja as R}from"./chunk-6QO6HWGU.js";import{a as i}from"./chunk-V4WSBIXB.js";import a from"fs";import c from"path";import{exec as F,spawnSync as G}from"child_process";import{promisify as V}from"util";import{fileURLToPath as q}from"url";var E="en";function U(){return(process.env.LC_ALL||process.env.LC_MESSAGES||process.env.LANG||"").toLowerCase().startsWith("zh")?"zh":"en"}i(U,"detectSystemLocale");function N(e){if(!e||e==="auto"){E=U();return}E=e.toLowerCase().replace(/[-_].*/,"")==="zh"?"zh":"en"}i(N,"setLocale");var T={en:{"dialog.confirm":"[Confirm]","dialog.select":"[Select]","dialog.input":"[Input]","confirm.instruction.basic":"Press (y) yes, (n) no","confirm.instruction.always":"Press (y) yes, (a) always yes for this tool, (n) no","confirm.instruction.protected":"Press (y) to confirm, (n) to cancel (protected path)","confirm.result.approved":"Approved","confirm.result.approved_always":"Approved (always)","confirm.result.denied":"Denied","tool.bash.title":"Execute bash command?","tool.shell.title":"Execute shell command?","tool.write.title":"Write to file?","tool.edit.title":"Edit file?","tool.generic.title":"Execute {tool}?","field.reason":"Reason","field.intent":"Intent","field.target":"Target","field.scope":"Scope","field.risk":"Risk","field.summary":"Summary","intent.read":"Read project files","intent.delete":"Delete files","intent.deps":"Modify dependencies or environment","intent.modify":"Modify files","intent.execute":"Execute command","intent.write_file":"Write file","intent.edit_file":"Edit file","intent.use_tool":"Use {tool}","risk.destructive":"Destructive change","risk.deps":"May change dependencies or local tools","risk.modify":"May modify files","risk.unknown":"Command effects depend on its arguments","risk.network":"May access network","scope.outside":"Outside project","scope.protected":"Protected path","waiting.confirm":"Waiting: approval required","waiting.select":"Waiting: choose an option","waiting.input":"Waiting: answer the prompt","placeholder.confirm":"Respond to the approval prompt above...","placeholder.select":"Choose an option above...","placeholder.input":"Answer the prompt above...","placeholder.busy":"Agent is busy...","placeholder.queue":"Queue a follow-up for the next round...","placeholder.idle":"Type a message...","select.choice":"Choice:","select.type_number":"(type a number)","select.more":"{count} more choices...","select.more_above":"\u2191 {count} more above","select.more_below":"\u2193 {count} more below","select.confirm_hint":"Press Enter to confirm, Esc to cancel","select.navigate_hint":"Use \u2191\u2193 to navigate, Enter to confirm, Esc to cancel","select.multiselect_hint":"Use \u2191\u2193 to navigate, Space to toggle, Enter to confirm, Esc to cancel","select.multiselect_empty":"Select at least one option with Space before confirming.","select.back_prev":"\u2190 Back to previous question","input.default":"Default:","input.value":"Value:","input.type_response":"(type your response)","managed.completed":"Task completed","managed.completed.blocked":"Task blocked","managed.completed.continuation":"Task needs continuation",cancelled:"[Cancelled] Operation cancelled by user"},zh:{"dialog.confirm":"[\u786E\u8BA4]","dialog.select":"[\u9009\u62E9]","dialog.input":"[\u8F93\u5165]","confirm.instruction.basic":"\u6309 (y) \u786E\u8BA4, (n) \u62D2\u7EDD","confirm.instruction.always":"\u6309 (y) \u786E\u8BA4, (a) \u59CB\u7EC8\u5141\u8BB8\u6B64\u5DE5\u5177, (n) \u62D2\u7EDD","confirm.instruction.protected":"\u6309 (y) \u786E\u8BA4, (n) \u53D6\u6D88 (\u53D7\u4FDD\u62A4\u8DEF\u5F84)","confirm.result.approved":"\u5DF2\u6279\u51C6","confirm.result.approved_always":"\u5DF2\u6279\u51C6 (\u59CB\u7EC8\u5141\u8BB8)","confirm.result.denied":"\u5DF2\u62D2\u7EDD","tool.bash.title":"\u6267\u884C bash \u547D\u4EE4\uFF1F","tool.shell.title":"\u6267\u884C shell \u547D\u4EE4\uFF1F","tool.write.title":"\u5199\u5165\u6587\u4EF6\uFF1F","tool.edit.title":"\u7F16\u8F91\u6587\u4EF6\uFF1F","tool.generic.title":"\u6267\u884C {tool}\uFF1F","field.reason":"\u539F\u56E0","field.intent":"\u610F\u56FE","field.target":"\u76EE\u6807","field.scope":"\u8303\u56F4","field.risk":"\u98CE\u9669","field.summary":"\u6458\u8981","intent.read":"\u8BFB\u53D6\u9879\u76EE\u6587\u4EF6","intent.delete":"\u5220\u9664\u6587\u4EF6","intent.deps":"\u4FEE\u6539\u4F9D\u8D56\u6216\u73AF\u5883","intent.modify":"\u4FEE\u6539\u6587\u4EF6","intent.execute":"\u6267\u884C\u547D\u4EE4","intent.write_file":"\u5199\u5165\u6587\u4EF6","intent.edit_file":"\u7F16\u8F91\u6587\u4EF6","intent.use_tool":"\u4F7F\u7528 {tool}","risk.destructive":"\u7834\u574F\u6027\u53D8\u66F4","risk.deps":"\u53EF\u80FD\u4FEE\u6539\u4F9D\u8D56\u6216\u672C\u5730\u5DE5\u5177","risk.modify":"\u53EF\u80FD\u4FEE\u6539\u6587\u4EF6","risk.unknown":"\u547D\u4EE4\u6548\u679C\u53D6\u51B3\u4E8E\u53C2\u6570","risk.network":"\u53EF\u80FD\u8BBF\u95EE\u7F51\u7EDC","scope.outside":"\u9879\u76EE\u5916\u90E8","scope.protected":"\u53D7\u4FDD\u62A4\u8DEF\u5F84","waiting.confirm":"\u7B49\u5F85\u4E2D\uFF1A\u9700\u8981\u5BA1\u6279","waiting.select":"\u7B49\u5F85\u4E2D\uFF1A\u8BF7\u9009\u62E9","waiting.input":"\u7B49\u5F85\u4E2D\uFF1A\u8BF7\u56DE\u7B54","placeholder.confirm":"\u8BF7\u56DE\u5E94\u4E0A\u65B9\u7684\u5BA1\u6279\u63D0\u793A...","placeholder.select":"\u8BF7\u5728\u4E0A\u65B9\u9009\u62E9\u4E00\u4E2A\u9009\u9879...","placeholder.input":"\u8BF7\u56DE\u7B54\u4E0A\u65B9\u7684\u63D0\u793A...","placeholder.busy":"\u4EE3\u7406\u6B63\u5728\u5DE5\u4F5C\u4E2D...","placeholder.queue":"\u6392\u961F\u7B49\u5F85\u4E0B\u4E00\u8F6E\u8DDF\u8FDB...","placeholder.idle":"\u8F93\u5165\u6D88\u606F...","select.choice":"\u9009\u9879\uFF1A","select.type_number":"(\u8F93\u5165\u7F16\u53F7)","select.more":"\u8FD8\u6709 {count} \u4E2A\u9009\u9879...","select.more_above":"\u2191 \u4E0A\u65B9\u8FD8\u6709 {count} \u4E2A","select.more_below":"\u2193 \u4E0B\u65B9\u8FD8\u6709 {count} \u4E2A","select.confirm_hint":"\u6309 Enter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.navigate_hint":"\u4F7F\u7528 \u2191\u2193 \u5BFC\u822A\uFF0CEnter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.multiselect_hint":"\u4F7F\u7528 \u2191\u2193 \u5BFC\u822A\uFF0C\u7A7A\u683C \u5207\u6362\u9009\u4E2D\uFF0CEnter \u786E\u8BA4\uFF0CEsc \u53D6\u6D88","select.multiselect_empty":"\u8BF7\u5148\u4F7F\u7528\u7A7A\u683C\u9009\u62E9\u81F3\u5C11\u4E00\u4E2A\u9009\u9879\u3002","select.back_prev":"\u2190 \u8FD4\u56DE\u4E0A\u4E00\u9898","input.default":"\u9ED8\u8BA4\u503C\uFF1A","input.value":"\u503C\uFF1A","input.type_response":"(\u8F93\u5165\u4F60\u7684\u56DE\u7B54)","managed.completed":"\u4EFB\u52A1\u5B8C\u6210","managed.completed.blocked":"\u4EFB\u52A1\u53D7\u963B","managed.completed.continuation":"\u4EFB\u52A1\u9700\u8981\u7EE7\u7EED",cancelled:"[\u5DF2\u53D6\u6D88] \u64CD\u4F5C\u5DF2\u88AB\u7528\u6237\u53D6\u6D88"}};function pe(e,t){let o=T[E][e]??T.en[e]??e;if(t)for(let[r,l]of Object.entries(t))o=o.replace(`{${r}}`,String(l));return o}i(pe,"t");var W=V(F),X=O(),Ce=c.join(X,"sessions"),d=c.join(X,"config.json"),we=60,s=null,_=!1,J="dumb";function Q(e){let t=c.basename(e).toLowerCase(),n="__KODAX_SHELL_ENV_START__",o=`printf '%s\\0' '${n}'; env -0`;return t==="fish"?{args:["-i","-c",o],sentinel:n}:{args:t==="bash"||t==="zsh"?["-ic",o]:["-lc",o],sentinel:n}}i(Q,"buildShellEnvCommand");function Y(e,t){let n=`${t}\0`,o=e.lastIndexOf(n);if(o===-1)return{};let r=e.slice(o+n.length),l={};for(let u of r.split("\0")){if(!u)continue;let p=u.indexOf("=");p<=0||(l[u.slice(0,p)]=u.slice(p+1))}return l}i(Y,"parseNullDelimitedShellEnv");function Z(e={}){let t=e.env??process.env;if((e.platform??process.platform)==="win32"||t.KODAX_DISABLE_SHELL_ENV_HYDRATION==="1")return!1;let o=e.shell??t.SHELL;if(!o||!c.isAbsolute(o))return!1;let{args:r,sentinel:l}=Q(o),u=e.run??G,p={...t,TERM:J},f=u(o,r,{encoding:"utf8",env:p,maxBuffer:1024*1024,timeout:5e3,windowsHide:!0,detached:!0,stdio:["ignore","pipe","pipe"]});if(f.status!==0||!f.stdout)return!1;let H=typeof f.stdout=="string"?f.stdout:f.stdout.toString("utf8"),j=Y(H,l),C=!1;for(let[v,z]of Object.entries(j))v!=="TERM"&&t[v]===void 0&&(t[v]=z,C=!0);return C}i(Z,"hydrateProcessEnvFromShell");function ee(){if(!_){_=!0;try{Z()}catch{}}}i(ee,"ensureShellEnvironmentHydrated");function Se(){_=!1}i(Se,"resetShellEnvironmentHydrationForTesting");function te(e){k(e.customProviders??[])}i(te,"registerConfiguredCustomProviders");function $(e){if(!Array.isArray(e))return;let t=e.filter(n=>typeof n=="string").map(n=>n.trim()).filter(n=>n.length>0);return t.length>0?t:[]}i($,"normalizeConfiguredExtensions");function ne(e){if(e.permissionMode!=="default")return e;let t={...e,permissionMode:"accept-edits"};try{a.mkdirSync(c.dirname(d),{recursive:!0}),a.writeFileSync(d,JSON.stringify(t,null,2))}catch{}return t}i(ne,"migrateLegacyPermissionModeInConfig");function oe(){if(s)return s;let e="0.7.41";if(e)return s=e,s;let t=c.join(c.dirname(q(import.meta.url)),"../../package.json");if(a.existsSync(t))try{return s=JSON.parse(a.readFileSync(t,"utf-8")).version??"0.0.0",s??"0.0.0"}catch{}return s="0.0.0",s}i(oe,"getVersion");var xe=oe();function Ie(e){return S(e)}i(Ie,"getProviderModel");function P(e,t){let n=new Set(e.map(r=>r.toLowerCase())),o=[...e];for(let r of t)n.has(r.toLowerCase())||o.push(r);return o}i(P,"mergeModels");function ie(e,t){t||(t=y().providerModels);let n=t?.[e];if(n&&n.length>0){try{let o=h(e);if(o.length>0)return P(n,o)}catch{}try{let o=m(e);if(o)return P(n,o.getAvailableModels())}catch{}return n}try{let o=h(e);if(o.length>0)return o}catch{}try{let o=m(e);if(o)return o.getAvailableModels()}catch{}return[]}i(ie,"getProviderAvailableModels");function re(e,t){let n=x(e,t);if(n!=="unknown")return n;try{let o=m(e);if(o)return o.getReasoningCapability(t)}catch{}return"unknown"}i(re,"getProviderReasoningCapability");function se(e){let t=I(e);if(t)return t;try{return b().find(o=>o.name===e)?.capabilityProfile??null}catch{return null}}i(se,"getProviderCapabilityProfile");function B(e,t){let n=se(e),o=re(e,t);if(n)return{capabilityProfile:n,reasoningCapability:o};try{let r=R(e);return{capabilityProfile:r.getCapabilityProfile(),reasoningCapability:r.getReasoningCapability(t)}}catch{return null}}i(B,"getProviderCapabilityMetadata");function Le(e,t){let n=B(e,t);return n?A({providerName:e,model:t,capabilityProfile:n.capabilityProfile,reasoningCapability:n.reasoningCapability==="unknown"?void 0:n.reasoningCapability}):null}i(Le,"getProviderCapabilitySnapshot");function ae(e,t,n,o){let r=B(e,t);return r?M({providerName:e,model:t,capabilityProfile:r.capabilityProfile,reasoningCapability:r.reasoningCapability==="unknown"?void 0:r.reasoningCapability,reasoningMode:n,hints:o}):null}i(ae,"getProviderPolicyDecision");function ke(e){let t=e.transport==="cli-bridge"?"CLI bridge":"Native API",n=e.conversationSemantics==="last-user-message"?"forwards only the latest user message":"preserves full conversation history",o=e.mcpSupport==="native"?"MCP available":"MCP unavailable";return`${t}; ${n}; ${o}`}i(ke,"describeProviderCapabilitySummary");function Re(e){switch(e){case"native-budget":return"B";case"native-effort":return"E";case"native-toggle":return"T";default:return"-"}}i(Re,"formatReasoningCapabilityShort");function ce(e){switch(e){case"native-budget":return"budget";case"native-effort":return"effort";case"native-toggle":return"toggle";default:return"none"}}i(ce,"describeReasoningCapabilityControl");function Oe(e,t){if(e==="off")return"Reasoning disabled";switch(t){case"native-budget":return"Uses native thinking budget control";case"native-effort":return"Uses native reasoning effort control";case"native-toggle":return"Uses provider-native thinking toggle only";case"none":return"Runs without native reasoning parameters";case"prompt-only":return"Uses prompt overlays only; no native reasoning parameter";default:return"Runs without native reasoning parameters"}}i(Oe,"describeReasoningExecution");function Ae(e){let t=[];e||(e=y().providerModels);for(let n of L())t.push({name:n.name,model:n.model,models:ie(n.name,e),configured:n.capabilityProfile.transport==="cli-bridge"?!0:n.configured,reasoningCapability:n.reasoningCapability,capabilityProfile:n.capabilityProfile});try{let n=b().map(o=>({...o,models:(()=>{let r=e?.[o.name];return r&&r.length>0?P(r,o.models):o.models})()}));t.push(...n)}catch{}return t}i(Ae,"getProviderList");function Me(e){if(w(e))return!0;try{return m(e)?.isConfigured()??!1}catch{return!1}}i(Me,"isProviderConfigured");function y(){try{if(a.existsSync(d)){let e=JSON.parse(a.readFileSync(d,"utf-8")),t=e.reasoningCeiling??e.reasoningMode;return ne({...e,reasoningMode:t,extensions:$(e.extensions)})}}catch{}return{}}i(y,"loadConfig");function le(e){e.streamIdleTimeoutMs&&!process.env.KODAX_STREAM_IDLE_TIMEOUT_MS&&(process.env.KODAX_STREAM_IDLE_TIMEOUT_MS=String(e.streamIdleTimeoutMs))}i(le,"applyResilienceRuntimeEnv");function ue(e){e.repoIntelligenceMode&&!process.env.KODAX_REPO_INTELLIGENCE_MODE&&(process.env.KODAX_REPO_INTELLIGENCE_MODE=e.repoIntelligenceMode),e.repointelEndpoint&&!process.env.KODAX_REPOINTEL_ENDPOINT&&(process.env.KODAX_REPOINTEL_ENDPOINT=e.repointelEndpoint),e.repointelBin&&!process.env.KODAX_REPOINTEL_BIN&&(process.env.KODAX_REPOINTEL_BIN=e.repointelBin),e.repoIntelligenceTrace===!0&&!process.env.KODAX_REPO_INTELLIGENCE_TRACE&&(process.env.KODAX_REPO_INTELLIGENCE_TRACE="1")}i(ue,"applyRepoIntelligenceRuntimeEnv");function Te(){ee();let e=y();return le(e),ue(e),te(e),N(e.locale),e}i(Te,"prepareRuntimeConfig");function Ne(e){let n={...y(),...e},o=$(n.extensions);o!==void 0&&(n.extensions=o);for(let r of Object.keys(e))e[r]===void 0&&delete n[r];a.mkdirSync(c.dirname(d),{recursive:!0}),a.writeFileSync(d,JSON.stringify(n,null,2))}i(Ne,"saveConfig");async function De(){try{let{stdout:e}=await W("git rev-parse --show-toplevel");return e.trim()}catch{return null}}i(De,"getGitRoot");function de(e){switch(e){case"builtin":return"Built-in";case"runtime":return"Runtime extension";case"custom":return"Custom config";default:return"Unknown"}}i(de,"formatProviderSourceKind");function Ke(e){let t=e.transport==="cli-bridge"?"CLI bridge":"Native API",n=e.conversationSemantics==="last-user-message"?"latest-user-message only":"full conversation history";return[`Source: ${de(e.sourceKind)}`,`Transport: ${t}`,`Conversation semantics: ${n}`,`Context fidelity: ${e.contextFidelity}`,`Tool calling: ${e.toolCallingFidelity}`,`Session behavior: ${e.sessionSupport}`,`Long-running support: ${e.longRunningSupport}`,`Evidence-heavy flows: ${e.evidenceSupport}`,`Multimodal support: ${e.multimodalSupport}`,`MCP support: ${e.mcpSupport}`,`Reasoning control: ${ce(e.reasoningCapability)}`]}i(Ke,"formatProviderCapabilityDetailLines");function Xe(e,t,n){return[{label:"General coding",hints:{}},{label:"Evidence-heavy review",hints:{evidenceHeavy:!0}},{label:"Long-running task",hints:{longRunning:!0}}].map(r=>({label:r.label,decision:ae(e,t,n,r.hints)})).filter(r=>r.decision!==null)}i(Xe,"getProviderCommonPolicyScenarios");var D=.5,K=0,g={locked:!1,queue:[]};async function $e(e){for(;g.locked;)await new Promise(t=>g.queue.push(t));g.locked=!0;try{let t=(Date.now()-K)/1e3;t<D&&await new Promise(o=>setTimeout(o,(D-t)*1e3));let n=await e();return K=Date.now(),n}finally{g.locked=!1;let t=g.queue.shift();t&&t()}}i($e,"rateLimitedCall");export{pe as a,X as b,Ce as c,d,we as e,Z as f,Se as g,te as h,oe as i,xe as j,Ie as k,ie as l,re as m,se as n,Le as o,ae as p,ke as q,Re as r,ce as s,Oe as t,Ae as u,Me as v,y as w,Te as x,Ne as y,De as z,de as A,Ke as B,Xe as C,$e as D};
@@ -1,2 +1,2 @@
1
1
  // @kodax-ai/kodax — bundled distribution. See docs/ADR.md ADR-022 + ADR-024.
2
- import{ja as e}from"./chunk-N2VZ2MJF.js";import{a as n}from"./chunk-4E76FLZ3.js";import{readFile as i}from"fs/promises";var c={enabled:!0,triggerPercent:75};async function g(o){let t=e("config.json");try{let r=await a(t);if(r?.compaction)return{...c,...r.compaction}}catch{}return c}n(g,"loadCompactionConfig");async function a(o){try{let t=await i(o,"utf-8");return JSON.parse(t)}catch{return null}}n(a,"readConfigFile");export{g as a};
2
+ import{ja as e}from"./chunk-SX2IS5JP.js";import{a as n}from"./chunk-V4WSBIXB.js";import{readFile as i}from"fs/promises";var c={enabled:!0,triggerPercent:75};async function g(o){let t=e("config.json");try{let r=await a(t);if(r?.compaction)return{...c,...r.compaction}}catch{}return c}n(g,"loadCompactionConfig");async function a(o){try{let t=await i(o,"utf-8");return JSON.parse(t)}catch{return null}}n(a,"readConfigFile");export{g as a};