npm - @linnlabs/linnkit - Versions diffs - 0.9.0 → 0.10.0 - Mend

@linnlabs/linnkit 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/CHANGELOG.md +13 -0
package/bin/linnkit.cjs +7 -0
package/dist/{agentSpec-EkmviZjy.d.cts → agentSpec-Du4Iye0q.d.cts} +16 -1
package/dist/{agentSpec-EkmviZjy.d.ts → agentSpec-Du4Iye0q.d.ts} +16 -1
package/dist/cli.cjs +118 -65
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +118 -65
package/dist/cli.js.map +1 -1
package/dist/context-manager.cjs +234 -32
package/dist/context-manager.cjs.map +1 -1
package/dist/context-manager.d.cts +52 -15
package/dist/context-manager.d.ts +52 -15
package/dist/context-manager.js +234 -33
package/dist/context-manager.js.map +1 -1
package/dist/{context-trace-HE2qY5Q-.d.cts → context-trace-BHKDS-eq.d.cts} +2 -2
package/dist/{context-trace-DRi5M4lX.d.ts → context-trace-CHbqHmyE.d.ts} +2 -2
package/dist/contracts.cjs +3 -1
package/dist/contracts.cjs.map +1 -1
package/dist/contracts.d.cts +3 -3
package/dist/contracts.d.ts +3 -3
package/dist/contracts.js +3 -1
package/dist/contracts.js.map +1 -1
package/dist/{defaultGraphExecutor-BBswR8wn.d.ts → defaultGraphExecutor-B29_qTHy.d.ts} +16 -15
package/dist/{defaultGraphExecutor-BIjJj7WF.d.cts → defaultGraphExecutor-C2E59v_R.d.cts} +16 -15
package/dist/{index-BanRABEt.d.cts → index-BAaUP9yU.d.cts} +26 -14
package/dist/{index-Z8NXKNwI.d.ts → index-BaVpVNi2.d.ts} +26 -14
package/dist/{index-DO4dQgf2.d.cts → index-BnYCS8Zg.d.cts} +2 -2
package/dist/{index-CJeWHopy.d.ts → index-C0DAjsdX.d.ts} +2 -2
package/dist/{index-Dl5PLgAv.d.cts → index-CKQzzZ5Y.d.cts} +2 -2
package/dist/{index-CHqwkvGp.d.ts → index-D0mKxTR5.d.ts} +2 -2
package/dist/index.cjs +186 -85
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +10 -10
package/dist/index.d.ts +10 -10
package/dist/index.js +186 -85
package/dist/index.js.map +1 -1
package/dist/{ports-DnLuKfpE.d.ts → ports-DpPTFhSd.d.ts} +2 -2
package/dist/{ports-DaatKJXp.d.cts → ports-s-tSp3sB.d.cts} +2 -2
package/dist/quickstart.cjs +119 -65
package/dist/quickstart.cjs.map +1 -1
package/dist/quickstart.d.cts +7 -7
package/dist/quickstart.d.ts +7 -7
package/dist/quickstart.js +119 -65
package/dist/quickstart.js.map +1 -1
package/dist/{runAgent-CPj_9e58.d.ts → runAgent-C6F-399C.d.ts} +5 -5
package/dist/{runAgent-HYKlXbVr.d.cts → runAgent-ilEj66Ik.d.cts} +5 -5
package/dist/{runHandle-D3gPsD7B.d.cts → runHandle-BNOqS-Bl.d.cts} +3 -3
package/dist/{runHandle-CyXvzgzk.d.ts → runHandle-BdLXOFqF.d.ts} +3 -3
package/dist/runtime-kernel/events.cjs +1 -0
package/dist/runtime-kernel/events.cjs.map +1 -1
package/dist/runtime-kernel/events.d.cts +4 -4
package/dist/runtime-kernel/events.d.ts +4 -4
package/dist/runtime-kernel/events.js +1 -0
package/dist/runtime-kernel/events.js.map +1 -1
package/dist/runtime-kernel.cjs +181 -82
package/dist/runtime-kernel.cjs.map +1 -1
package/dist/runtime-kernel.d.cts +8 -8
package/dist/runtime-kernel.d.ts +8 -8
package/dist/runtime-kernel.js +181 -83
package/dist/runtime-kernel.js.map +1 -1
package/dist/testkit.cjs +181 -82
package/dist/testkit.cjs.map +1 -1
package/dist/testkit.d.cts +8 -8
package/dist/testkit.d.ts +8 -8
package/dist/testkit.js +181 -82
package/dist/testkit.js.map +1 -1
package/dist/{todo-B1PmDlp3.d.cts → todo-Ca8llpRQ.d.cts} +1 -1
package/dist/{todo-B1PmDlp3.d.ts → todo-Ca8llpRQ.d.ts} +1 -1
package/dist/{toolContracts-CLkQmhTG.d.cts → toolContracts-Bm3EZ1UM.d.cts} +13 -2
package/dist/{toolContracts-Blll0241.d.ts → toolContracts-f8lzZBNa.d.ts} +13 -2
package/docs/integration/README.md +1 -1
package/docs/integration/agent-registration-guide.md +1 -1
package/docs/integration/child-runs.md +4 -1
package/docs/integration/context-engineering.md +30 -15
package/docs/integration/context-fences.md +32 -3
package/docs/integration/llm-provider.md +1 -1
package/docs/integration/persistence.md +1 -0
package/docs/integration/run-supervisor.md +3 -0
package/docs/integration/tool-development-guide.md +7 -5
package/docs/integration/tool-history.md +43 -17
package/package.json +4 -3

package/dist/{todo-B1PmDlp3.d.cts → todo-Ca8llpRQ.d.cts} RENAMED Viewed

@@ -2250,4 +2250,4 @@ interface AgentTodoSnapshot {
     items: AgentTodoItem[];
 }
-export { AgentTodoItem$1 as A, BaseEvent as B, type ControlEvent as C, validateRuntimeEvent as D, type ErrorEvent as E, type FinalAnswerChunkEvent as F, validateRuntimeEvents as G, type HistorySummaryEvent as H, ProviderReasoningDetailsPayload as P, type RequiresUserInteractionEvent as R, Status as S, type ThoughtEvent as T, type UserInputEvent as U, type AgentTodoSnapshot as a, AgentTodoStatus as b, type AuditEnvelopeEvent as c, type FinalAnswerEvent as d, RuntimeEvent as e, type StreamEndEvent as f, type SubRunTraceEvent as g, type TodoUpdatedEvent as h, type ToolCallDecisionEvent as i, ToolCallDecisionPayload as j, ToolCallPhase as k, type ToolOutputEvent as l, type ToolProcessEvent as m, createAuditEnvelopeEvent as n, createErrorEvent as o, createFinalAnswerChunkEvent as p, createFinalAnswerEvent as q, createHistorySummaryEvent as r, createStreamEndEvent as s, createSubRunTraceEvent as t, createThoughtEvent as u, createTodoUpdatedEvent as v, createToolCallDecisionEvent as w, createToolOutputEvent as x, createToolProcessEvent as y, createUserInputEvent as z };
+export { AgentTodoItem$1 as A, BaseEvent as B, type ControlEvent as C, validateRuntimeEvent as D, type ErrorEvent as E, type FinalAnswerChunkEvent as F, validateRuntimeEvents as G, type HistorySummaryEvent as H, ProviderReasoningDetailsPayload as P, RuntimeEvent as R, Status as S, type ThoughtEvent as T, type UserInputEvent as U, type AgentTodoSnapshot as a, AgentTodoStatus as b, type AuditEnvelopeEvent as c, type FinalAnswerEvent as d, type RequiresUserInteractionEvent as e, type StreamEndEvent as f, type SubRunTraceEvent as g, type TodoUpdatedEvent as h, type ToolCallDecisionEvent as i, ToolCallDecisionPayload as j, ToolCallPhase as k, type ToolOutputEvent as l, type ToolProcessEvent as m, createAuditEnvelopeEvent as n, createErrorEvent as o, createFinalAnswerChunkEvent as p, createFinalAnswerEvent as q, createHistorySummaryEvent as r, createStreamEndEvent as s, createSubRunTraceEvent as t, createThoughtEvent as u, createTodoUpdatedEvent as v, createToolCallDecisionEvent as w, createToolOutputEvent as x, createToolProcessEvent as y, createUserInputEvent as z };

package/dist/{todo-B1PmDlp3.d.ts → todo-Ca8llpRQ.d.ts} RENAMED Viewed

@@ -2250,4 +2250,4 @@ interface AgentTodoSnapshot {
     items: AgentTodoItem[];
 }
-export { AgentTodoItem$1 as A, BaseEvent as B, type ControlEvent as C, validateRuntimeEvent as D, type ErrorEvent as E, type FinalAnswerChunkEvent as F, validateRuntimeEvents as G, type HistorySummaryEvent as H, ProviderReasoningDetailsPayload as P, type RequiresUserInteractionEvent as R, Status as S, type ThoughtEvent as T, type UserInputEvent as U, type AgentTodoSnapshot as a, AgentTodoStatus as b, type AuditEnvelopeEvent as c, type FinalAnswerEvent as d, RuntimeEvent as e, type StreamEndEvent as f, type SubRunTraceEvent as g, type TodoUpdatedEvent as h, type ToolCallDecisionEvent as i, ToolCallDecisionPayload as j, ToolCallPhase as k, type ToolOutputEvent as l, type ToolProcessEvent as m, createAuditEnvelopeEvent as n, createErrorEvent as o, createFinalAnswerChunkEvent as p, createFinalAnswerEvent as q, createHistorySummaryEvent as r, createStreamEndEvent as s, createSubRunTraceEvent as t, createThoughtEvent as u, createTodoUpdatedEvent as v, createToolCallDecisionEvent as w, createToolOutputEvent as x, createToolProcessEvent as y, createUserInputEvent as z };
+export { AgentTodoItem$1 as A, BaseEvent as B, type ControlEvent as C, validateRuntimeEvent as D, type ErrorEvent as E, type FinalAnswerChunkEvent as F, validateRuntimeEvents as G, type HistorySummaryEvent as H, ProviderReasoningDetailsPayload as P, RuntimeEvent as R, Status as S, type ThoughtEvent as T, type UserInputEvent as U, type AgentTodoSnapshot as a, AgentTodoStatus as b, type AuditEnvelopeEvent as c, type FinalAnswerEvent as d, type RequiresUserInteractionEvent as e, type StreamEndEvent as f, type SubRunTraceEvent as g, type TodoUpdatedEvent as h, type ToolCallDecisionEvent as i, ToolCallDecisionPayload as j, ToolCallPhase as k, type ToolOutputEvent as l, type ToolProcessEvent as m, createAuditEnvelopeEvent as n, createErrorEvent as o, createFinalAnswerChunkEvent as p, createFinalAnswerEvent as q, createHistorySummaryEvent as r, createStreamEndEvent as s, createSubRunTraceEvent as t, createThoughtEvent as u, createTodoUpdatedEvent as v, createToolCallDecisionEvent as w, createToolOutputEvent as x, createToolProcessEvent as y, createUserInputEvent as z };

package/dist/{toolContracts-CLkQmhTG.d.cts → toolContracts-Bm3EZ1UM.d.cts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { e as RuntimeEvent, g as SubRunTraceEvent, a as AgentTodoSnapshot } from './todo-B1PmDlp3.cjs';
+import { R as RuntimeEvent, g as SubRunTraceEvent, a as AgentTodoSnapshot } from './todo-Ca8llpRQ.cjs';
 interface ToolContextConversationView {
     /**
@@ -163,6 +163,15 @@ interface ChildRunExecutionPolicy {
      * 显式 child-run runId。默认由 host adapter 取 subrunId。
      */
     runId?: string;
+    /**
+     * child-run 的宿主会话归属。
+     *
+     * 中文备注：
+     * - 同步 child-run 通常复用父 conversationId，方便 EventStore / Audit / Telemetry
+     *   在同一条会话链路下写入 child run；
+     * - 框架内部仍可使用独立 checkpoint key 隔离 GraphExecutor 状态。
+     */
+    conversationId?: string;
     /**
      * 父 runId。默认从 parentToolContext.runId 继承。
      */
@@ -365,6 +374,8 @@ interface ToolParameterProperty {
     description: string;
     default?: unknown;
     enum?: string[];
+    minimum?: number;
+    maximum?: number;
     properties?: Record<string, ToolParameterProperty>;
     items?: ToolParameterProperty;
     required?: string[];
@@ -460,4 +471,4 @@ interface ToolCallResult<TResult = unknown> {
     durationMs: number;
 }
-export { type AgentTool as A, BaseTool as B, CommonParameterTypes as C, type JsonObjectSchema as J, type OpenAIToolSchema as O, type RunContext as R, type StructuredToolResult as S, type ToolExecutionContext as T, type UnifiedToolResult as U, type ToolArgs as a, type ToolParameterSchema as b, type ToolParameterProperty as c, type ToolContextConversationView as d, type ToolCallResult as e, type ToolContextCompatibilityFields as f, type ToolControlInfo as g, type ToolDisplayOptions as h, type ToolLayoutOptions as i, type ToolRegistryEntry as j, type ToolResult as k, computeToolIdempotencyKey as l, findCachedToolOutputByIdempotencyKey as m, readToolContextRunContext as n, readToolContextUserQuery as o, createDefaultRunContext as p, type ChildRunParentContext as q, readToolContextModelId as r, type SubRunTracePublisher as s, type ChildRunHistoryPolicy as t, type ChildRunInvokerPort as u, type ChildRunRequest as v, type ChildRunResult as w, type SubRunTraceEnvelope as x, type ToolIdempotencyPolicy as y };
+export { type AgentTool as A, BaseTool as B, CommonParameterTypes as C, type JsonObjectSchema as J, type OpenAIToolSchema as O, type RunContext as R, type StructuredToolResult as S, type ToolArgs as T, type UnifiedToolResult as U, type ToolExecutionContext as a, type ToolParameterSchema as b, type ToolParameterProperty as c, type ToolContextConversationView as d, type ToolCallResult as e, type ToolContextCompatibilityFields as f, type ToolControlInfo as g, type ToolDisplayOptions as h, type ToolLayoutOptions as i, type ToolRegistryEntry as j, type ToolResult as k, computeToolIdempotencyKey as l, findCachedToolOutputByIdempotencyKey as m, readToolContextRunContext as n, readToolContextUserQuery as o, createDefaultRunContext as p, type ChildRunParentContext as q, readToolContextModelId as r, type SubRunTracePublisher as s, type ChildRunHistoryPolicy as t, type ChildRunInvokerPort as u, type ChildRunRequest as v, type ChildRunResult as w, type SubRunTraceEnvelope as x, type ToolIdempotencyPolicy as y };

package/dist/{toolContracts-Blll0241.d.ts → toolContracts-f8lzZBNa.d.ts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { e as RuntimeEvent, g as SubRunTraceEvent, a as AgentTodoSnapshot } from './todo-B1PmDlp3.js';
+import { R as RuntimeEvent, g as SubRunTraceEvent, a as AgentTodoSnapshot } from './todo-Ca8llpRQ.js';
 interface ToolContextConversationView {
     /**
@@ -163,6 +163,15 @@ interface ChildRunExecutionPolicy {
      * 显式 child-run runId。默认由 host adapter 取 subrunId。
      */
     runId?: string;
+    /**
+     * child-run 的宿主会话归属。
+     *
+     * 中文备注：
+     * - 同步 child-run 通常复用父 conversationId，方便 EventStore / Audit / Telemetry
+     *   在同一条会话链路下写入 child run；
+     * - 框架内部仍可使用独立 checkpoint key 隔离 GraphExecutor 状态。
+     */
+    conversationId?: string;
     /**
      * 父 runId。默认从 parentToolContext.runId 继承。
      */
@@ -365,6 +374,8 @@ interface ToolParameterProperty {
     description: string;
     default?: unknown;
     enum?: string[];
+    minimum?: number;
+    maximum?: number;
     properties?: Record<string, ToolParameterProperty>;
     items?: ToolParameterProperty;
     required?: string[];
@@ -460,4 +471,4 @@ interface ToolCallResult<TResult = unknown> {
     durationMs: number;
 }
-export { type AgentTool as A, BaseTool as B, CommonParameterTypes as C, type JsonObjectSchema as J, type OpenAIToolSchema as O, type RunContext as R, type StructuredToolResult as S, type ToolExecutionContext as T, type UnifiedToolResult as U, type ToolArgs as a, type ToolParameterSchema as b, type ToolParameterProperty as c, type ToolContextConversationView as d, type ToolCallResult as e, type ToolContextCompatibilityFields as f, type ToolControlInfo as g, type ToolDisplayOptions as h, type ToolLayoutOptions as i, type ToolRegistryEntry as j, type ToolResult as k, computeToolIdempotencyKey as l, findCachedToolOutputByIdempotencyKey as m, readToolContextRunContext as n, readToolContextUserQuery as o, createDefaultRunContext as p, type ChildRunParentContext as q, readToolContextModelId as r, type SubRunTracePublisher as s, type ChildRunHistoryPolicy as t, type ChildRunInvokerPort as u, type ChildRunRequest as v, type ChildRunResult as w, type SubRunTraceEnvelope as x, type ToolIdempotencyPolicy as y };
+export { type AgentTool as A, BaseTool as B, CommonParameterTypes as C, type JsonObjectSchema as J, type OpenAIToolSchema as O, type RunContext as R, type StructuredToolResult as S, type ToolArgs as T, type UnifiedToolResult as U, type ToolExecutionContext as a, type ToolParameterSchema as b, type ToolParameterProperty as c, type ToolContextConversationView as d, type ToolCallResult as e, type ToolContextCompatibilityFields as f, type ToolControlInfo as g, type ToolDisplayOptions as h, type ToolLayoutOptions as i, type ToolRegistryEntry as j, type ToolResult as k, computeToolIdempotencyKey as l, findCachedToolOutputByIdempotencyKey as m, readToolContextRunContext as n, readToolContextUserQuery as o, createDefaultRunContext as p, type ChildRunParentContext as q, readToolContextModelId as r, type SubRunTracePublisher as s, type ChildRunHistoryPolicy as t, type ChildRunInvokerPort as u, type ChildRunRequest as v, type ChildRunResult as w, type SubRunTraceEnvelope as x, type ToolIdempotencyPolicy as y };

package/docs/integration/README.md CHANGED Viewed

@@ -131,7 +131,7 @@ import type { RuntimeEvent } from '@linnlabs/linnkit/contracts';
 | **上下文工程总览**（所有作用在 messages 上的机制 + `contextPolicy` / `ContextTrace` 可观测闭环）⭐            | [context-engineering.md](./context-engineering.md)                                                                          |
 | **接 context engineering（fence 注册 + 注入）⭐ 一等接入面**                                       | [context-fences.md](./context-fences.md)                                                                                    |
 | 自定义 token 估算（`TokenizerPort` 替换默认 tokenizer · 0.8.0+）                                 | [context-engineering.md §9.4](./context-engineering.md) / [agent-registration-guide.md](./agent-registration-guide.md) §4.1 |
-| 配置工具历史压缩策略（per-pair / per-run / none）                                                 | [tool-history.md](./tool-history.md)                                                                                        |
+| 配置工具历史保留策略（per-pair / per-run / none；drop / compress）                              | [tool-history.md](./tool-history.md)                                                                                        |
 | 接持久化（Checkpointer / EventStore / RunRegistryStore）                                    | [persistence.md](./persistence.md)                                                                                          |
 | 接 RunSupervisor + RunHandle                                                           | [run-supervisor.md](./run-supervisor.md)                                                                                    |
 | 同步嵌入 vs 异步后台子 agent                                                                   | [child-runs.md](./child-runs.md)                                                                                            |

package/docs/integration/agent-registration-guide.md CHANGED Viewed

@@ -270,7 +270,7 @@ await executor.run({
 | 修改 | 版本号 | 配套动作 |
 |------|--------|----------|
 | 加工具 / 加 capability | minor | audit log 记"能力扩展" |
-| 改 `contextPolicy.budget` / `toolHistory.strategy` | minor | audit log；考虑 replay 验证 |
+| 改 `contextPolicy.budget` / `toolHistory.strategy` / `toolHistory.retentionMode` | minor | audit log；考虑 replay 验证 |
 | 删工具 / 删 capability | **major** | audit log；既有 run 进 deprecated 路径 |
 | 改 `metadata` / `role` / `description` 文案 | patch | 一般不需要 audit |

package/docs/integration/child-runs.md CHANGED Viewed

@@ -79,9 +79,12 @@ const outcome = await supervisor.waitForTerminal(handle.runId);
 - **不要**把 `invokeChildRun` 当成"小一号的 run"——它本质上是父 run 内部的一个嵌入式调用，与父 run 共享 abort signal、cost 聚合、enrichment registry。
 - **不要**把 `spawnDetached` 用于工具调用流（HTTP 端到端响应里不应该等 spawnDetached 完成）——那是 invokeChildRun 的场景。
 - 父子 run 的 cost 通过 `scope.parentRunId` 关联；如果你的 telemetry adapter 没把 `parentRunId` 透传到 sink，那 `childrenTotal` 字段就是 0。
+- 同步 child-run 的 `conversationId` 是宿主审计/事件归属，不是内部 checkpoint key。host 如果先用 `RunSupervisor.registerRun({ runId: childRunId, conversationId })` 注册 child run，随后调用 `ChildRunInvoker` / `invokeChildRun` 时也必须传入同一个 `conversationId`。框架内部会继续使用独立 checkpoint key 隔离子图状态，但 RuntimeEvent / Audit / Telemetry 会落在这个 host conversation 下。
+- 异步 `spawnDetached` 不创建同步 child-run 的内部 checkpoint key；它注册的就是一个真实 run，`RunExecutionContext.conversationId/runId/parentRunId` 必须与 `RunRecord` 对齐。executor 读取的是注册时的 `AgentSpec` / request / metadata 快照，不应依赖调用方之后继续修改对象。
 ## 6. 最小验证
 - 单测：父 agent 工具内 `invokeChildRun` → 父 run 的 `cost().childrenTotal.llmCost > 0`
-- 单测：`spawnDetached` 立刻返回；`waitForTerminal` 在执行结束后 resolve 出最终 status
+- 集成测：child run 内部调用工具时，`model.select` / `tool.allow` audit envelope 的 `scope.conversationId` 与注册 child run 的 conversationId 一致，`scope.runId` 是 child runId。
+- 单测：`spawnDetached` 立刻返回；executor 收到注册时的 `conversationId/runId/parentRunId` 与 request 快照；`waitForTerminal` 在执行结束后 resolve 出最终 status
 - 单测：`spawnDetached` 中的 run 被 `cancel()` 后，executor 收到的 `ctx.signal.aborted === true`

package/docs/integration/context-engineering.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Context Engineering · linnkit 的上下文工程总览
-> **What** · 所有作用在 messages 上的机制总览 —— `contextPolicy` 12 大分组 + `ContextTrace` 可观测闭环 + `TokenizerPort` + 摘要 / 围栏 / 工具历史压缩。
+> **What** · 所有作用在 messages 上的机制总览 —— `contextPolicy` 12 大分组 + `ContextTrace` 可观测闭环 + `TokenizerPort` + 摘要 / 围栏 / 工具历史保留。
 > **When to read** · 想精确控制每个 token；上下文超长被裁；要诊断"为什么这条消息被丢了"；自定义 tokenizer；做 token 预算选型。
 > **Prerequisites** · [`agent-registration-guide.md`](./agent-registration-guide.md) ⭐（先理解 `AgentSpec.contextPolicy` 字段结构）。
 > **Key exports** · `ContextTrace` from `@linnlabs/linnkit/contracts` · `TokenizerPort` from `@linnlabs/linnkit/ports` · `formatAgentLlmMessages` / `createMessageFormatter` from `@linnlabs/linnkit/context-manager`。
@@ -27,7 +27,7 @@ host 发出 invoke request
   │
   ▼
 [B] Preprocessor Pipeline（按优先级跑）
-    1. ToolHistoryCompressorPreprocessor   ─ 工具历史压缩
+    1. ToolHistoryCompressorPreprocessor   ─ 工具历史保留/删除（可选压缩）
     2. ToolReplayProtocolGuardPreprocessor ─ 工具回放协议守卫
     3. HistoryPurificationPreprocessor     ─ 历史净化（清孤儿 / 同 ID 去重）
     4. FenceLifetimePreprocessor           ─ 剥离旧轮 turn-only fence
@@ -35,7 +35,7 @@ host 发出 invoke request
   ▼
 [C] ContextProvider 三阶段填充
     1. AgentCoreContextProvider          ─ 不可裁的核心层（system / user）
-    2. AgentWorkingMemoryProvider        ─ 工作记忆按 P1-P4 优先级填到预算上限
+    2. AgentWorkingMemoryProvider        ─ 工作记忆按 P1-P3 优先级填到预算上限
     3. CheckpointSummarizationProvider   ─ checkpoint 前的旧轮裁干净
     4. (自动触发) SummarizationProvider   ─ 超预算时整段历史摘要
   │
@@ -66,6 +66,18 @@ linnkit 的内部消息（`AiMessage` union）最终都会按 LLM 协议的三
 | `assistant` | LLM 自己产的 `final_answer` / `thought`（reasoning_content）/ `tool_calls`；以及配对的 `tool_output`（在物理 wire 上挂 `tool` role） |
 | `user` | 用户的 `user_input`、`placement: 'before-current-user'` / `'after-current-user'` 的 fence（经常变化的高频上下文）、例如用户上传的文件、当前时间等以及触发后的 `<system-reminder>` 注入 |
+当前轮 `llmRole: 'user'` 且 `placement` 指向当前请求附近的 fence，会先按 host 提供的 `formatter` 组装进同一条 `user_input`：
+```text
+<formatter(before-current-user fence)>   # 只有真实注入时才出现
+<user_request>
+用户原始请求
+</user_request>
+<formatter(after-current-user fence)>    # 只有真实注入时才出现
+```
+这里的 `<formatter(...)>` 不是字面输出；真正发给 LLM 的是 host 自己声明的 XML/tag，例如 `<document_context>`。未注入某类 fence 时，不会生成空标签或占位符。
 **重要不变量**：`tool_calls` 和 `tool_output` **必须成对出现**——任何一边丢了另一边就废了。这条不变量贯穿所有压缩 / 裁剪机制。
 ---
@@ -120,9 +132,11 @@ contextPolicy: {
 这一层在"消息进 ContextProvider 之前"跑。按 priority 顺序执行；任何一个抛 fatal `ContextProviderError` 都会中断 pipeline。
-### 4.1 ToolHistoryCompressorPreprocessor —— 工具历史压缩（AgentSpec 高度可配置 ✅）
+### 4.1 ToolHistoryCompressorPreprocessor —— 工具历史保留（AgentSpec 高度可配置 ✅）
+控制旧的 `tool_calls` + `tool_output` 配对如何进入上下文。三种选择窗口：`'per-run'`（默认，按 user_input 边界保留最近 K 个 run）/ `'per-pair'`（保留最近 N 个工具对）/ `'none'`（不做常规筛选）。未进入保留窗口的旧工具组默认按 `retentionMode: 'drop'` 整组删除；需要旧摘要线索时可显式设 `retentionMode: 'compress'`，兼容旧的 assistant 摘要行为。安全阀：`maxInteractionGroups` 硬上限 + `overflowStrategy`（`'keep-latest'` / `'fail-fast'`）。
-把旧的 `tool_calls` + `tool_output` 配对压缩成简短 assistant 文本，控制工具历史不无限膨胀。三种策略：`'per-run'`（默认，按 user_input 边界保留最近 K 个 run）/ `'per-pair'`（保留最近 N 个工具对）/ `'none'`（不压缩）。安全阀：`maxInteractionGroups` 硬上限 + `overflowStrategy`（`'keep-latest'` / `'fail-fast'`）。
+这里有一个容易误解的边界：`compress` 只发生在 preprocessor 阶段，表示“旧 raw 工具组先被替换成自然语言摘要”；这些摘要后续在 working memory 里仍算历史工具交互，会继续受到 `maxInteractionGroups` 和 token budget 限制。也就是说，压缩摘要不是永久保留，只是给窗口外旧工具组一次以摘要形式进入最终 prompt 的机会；`drop` 则连这条摘要都不生成。
 完整字段、默认值与选型对比见 [`tool-history.md`](./tool-history.md) ⭐。
@@ -142,7 +156,7 @@ contextPolicy: {
 ### 4.4 FenceLifetimePreprocessor —— 旧轮 turn-only 剥离（自动跟 FenceRegistry 走）
-上一轮注入的 `lifetime: 'turn-only'` 的 fence（比如临时引用文本、临时记忆片段），这一轮自动剥掉。
+上一轮注入的 `lifetime: 'turn-only'` 的 fence（比如临时引用文本、临时记忆片段），这一轮自动剥掉。当前轮 system/user side fence 会先由 `CurrentTurnMessageAssembler` 合并进本轮 `system_prompt` / `user_input`，因此不会因为 `placement: 'before-current-user'` 位于用户请求前而被误判成历史上下文。
 **配置位置**：注册 fence 时通过 `lifetime` 字段控制；不需要单独配 preprocessor。
@@ -156,29 +170,30 @@ contextPolicy: {
 **开放状态**：行为完全由 MustKeepPolicy 决定（见 §3）。
-### 5.2 AgentWorkingMemoryProvider —— 工作记忆按 P1-P4 优先级填充（AgentSpec 已运行时接线 ✅）
+### 5.2 AgentWorkingMemoryProvider —— 工作记忆按优先级填充（AgentSpec 已运行时接线 ✅）
-扣掉核心层之后剩多少预算，按 4 个优先级倒着塞进消息：
+以本次输入总预算为基准划出工作记忆额度；扣掉核心层后，剩余工作记忆按优先级倒着塞进消息：
 | 优先级 | 内容 |
 |--------|------|
 | **P1** | 最近的工具交互对（tool_calls + tool_output） |
 | **P2** | 纯文本对话（final_answer + user 消息） |
-| **P3** | 更早的工具交互（可能已被 ToolHistoryCompressor 压成 assistant 文本） |
-| **P4** | 循环填充剩余空间 |
+| **P3** | 更早的工具交互（包括 `retentionMode: 'compress'` 生成的压缩摘要） |
+压缩摘要虽然物理上是 `assistant.final_answer`，但不会按 P2 普通助手文本处理；它会在 P3 中和 raw 工具组共用历史工具交互预算。
 **可配置字段**（写在 `AgentSpec.contextPolicy`）：
 | 字段 | 默认 | 含义 | 开放状态 |
 |------|------|------|---------|
-| `budget.maxTokens` | `120000` | 总预算上限 | ✅ AgentSpec + runtime |
+| `budget.maxTokens` | `232000` | 总预算上限 | ✅ AgentSpec + runtime |
 | `budget.reservedForResponse` | `2400` | 留给 LLM 输出的 token | ✅ AgentSpec + runtime |
 | `budget.workingMemoryBudgetPercentage` | `0.70` | 工作记忆占可用预算的比例 | ✅ AgentSpec + runtime |
 | `reasoningRetention.keepLatestThoughts` | `1` | 最近保留多少条 thought | ✅ AgentSpec + runtime |
 | `workingMemory.minToolInteractionsToKeep` | `2` | 即便预算不够也至少保留多少组工具对 | ✅ AgentSpec + runtime |
 | `workingMemory.maxRecentToolInteractions` | `2` | 原始 tool_calls 形态保留的最大组数 | ✅ AgentSpec + runtime |
 | `workingMemory.toolPairingSearchRange` | `10` | 搜工具配对的窗口范围 | ✅ AgentSpec + runtime |
-| P1-P4 优先级数字 | `1/2/3/4` | 优先级编号 | ❌ 不开放|
+| P1-P3 优先级数字 | `1/2/3` | 优先级编号 | ❌ 不开放|
 ### 5.3 CheckpointSummarizationProvider —— Checkpoint 主动压缩（开放方式特殊 ✅）
@@ -390,7 +405,7 @@ contextPolicy: {
 | 字段 | 默认 | 含义 | 开放状态 |
 |------|------|------|---------|
-| `budget.maxTokens` | `120000` | 总预算 | ✅ AgentSpec |
+| `budget.maxTokens` | `232000` | 总预算 | ✅ AgentSpec |
 | `budget.reservedForResponse` | `2400` | 留给响应的 token | ✅ AgentSpec |
 | `budget.workingMemoryBudgetPercentage` | `0.70` | 工作记忆占可用预算的比例 | ✅ AgentSpec |
 | `tokenEstimation.encoding` | `'cl100k_base'` | 估算用的 tiktoken encoding 名 | ✅ AgentSpec + runtime |
@@ -556,7 +571,7 @@ contextPolicy: {
 ### ✅ 已通过 AgentSpec 协议化开放，且 runtime 已接线
 - `budget.maxTokens` / `reservedForResponse` / `workingMemoryBudgetPercentage`
-- `toolHistory.{strategy, keepLatestToolPairs, keepLatestRuns, maxInteractionGroups, overflowStrategy, maxPairTokens, maxOutputSummaryTokens}`
+- `toolHistory.{strategy, retentionMode, keepLatestToolPairs, keepLatestRuns, maxInteractionGroups, overflowStrategy, maxPairTokens, maxOutputSummaryTokens}`
 - `toolOutput.observationGovernance.{enabled, maxChars, maxLines}`
 - `providerReplay.{provider, requiresReasoningDetailsForToolReplay, missingSidecarBehavior}`
 - `summarization.{triggerThreshold, budgetPercentage, oldestMessagesPercentage, agentId, failureBehavior}`
@@ -578,7 +593,7 @@ contextPolicy: {
 - system reminder **持久化进 history**（违反 reminder 协议本质——若需持久化请走 fence `lifetime: 'persisted'`）
 - ContextProvider 三阶段顺序（核心 → 工作记忆 → 摘要）
-- 工具压缩的 P1-P4 优先级数字
+- 工具历史填充的 P1-P3 优先级数字
 - `tool_calls` / `tool_output` 配对不变量
 ---

package/docs/integration/context-fences.md CHANGED Viewed

@@ -18,7 +18,7 @@ linnkit 设计原则：
 - **任意 host 都能注册自己的围栏家族**——例如 `<additional_context>`、`<memory-context>`、`<system-event>`、`<file_context>`，都通过同一套机制插入，不需要任何 host 改 framework 源码
 - **注入消息有稳定协议载体**——`AiMessage.type = 'context_injection'` 是唯一通用类型，`metadata.fenceKind` 表达开放的 host kind
-正确的做法：把每类上下文声明成一个"围栏家族"（fence kind），通过 `FenceRegistry` 注册，运行时由 `BaseAgentTask` 把 host 请求里的 `fences[]` 自动展开成 `context_injection` 消息，按 `placement` 落到正确位置；旧轮 `lifetime: 'turn-only'` 的注入由 `FenceLifetimePreprocessor` 自动剥离。
+正确的做法：把每类上下文声明成一个"围栏家族"（fence kind），通过 `FenceRegistry` 注册，运行时由 `BaseAgentTask` 把 host 请求里的 `fences[]` 自动展开成 `context_injection` 消息，按 `placement` 落到正确位置；当前轮 prompt block 由 `CurrentTurnMessageAssembler` 先组装，旧轮 `lifetime: 'turn-only'` 的注入再由 `FenceLifetimePreprocessor` 自动剥离。
 ## 2. 概念三元组
@@ -75,6 +75,32 @@ export const myFenceRegistry = createMyFenceRegistry();
 - 同一个 `kind` 在同一个 registry 不能重复 register
 - `maxBudgetFraction` 必须落在 `(0, 1]`
+当前轮 user-side fence 会按实际注入内容组装到同一条 user request。比如 host 注册了 `document-fragment`：
+```ts
+{
+  kind: 'document-fragment',
+  llmRole: 'user',
+  placement: 'before-current-user',
+  lifetime: 'turn-only',
+  formatter: content => `<document_fragment>\n${content}\n</document_fragment>`,
+}
+```
+本轮只注入这一类 fence 时，最终给 LLM 的 user 内容是：
+```xml
+<document_fragment>
+...
+</document_fragment>
+<user_request>
+用户原始请求
+</user_request>
+```
+没有注入的 fence 不会产生空 XML，也不会出现 `<formatter(before-current-user fence)>` 这类概念占位符。
 ## 4. 写一个 host 适配器：把请求字段转成 `FenceInjection[]`
 这是把 host 自己的产品语义（"项目名"、"被选中的段落"、"用户引用的句子"等）翻成通用 fence 注入的关键一层。
@@ -175,6 +201,8 @@ const llmMessages = formatAgentLlmMessages(processingResult.messages, {
 如果你完全自定义了 preprocessor pipeline，那 `FenceLifetimePreprocessor` 要从 `@linnlabs/linnkit/context-manager` 导入并手动加进去（构造参数：`{ fenceRegistry }`）。
+如果你完全自定义 pipeline，也要保留 `CurrentTurnMessageAssembler`，并让它在 `FenceLifetimePreprocessor` 之前执行。否则当前轮 `before-current-user` 的 turn-only fence 仍可能被后续生命周期清理误判为旧轮上下文。
 ## 6. 配 MustKeepPolicy（控制 working memory 裁剪）
 `AgentCoreContextProvider` 通过 `contextPolicy.mustKeep` 决定哪些消息一律不被裁。它有两类输入：
@@ -225,12 +253,13 @@ withMyFenceInjections()      ← 你写的适配
   ▼
 AgentMessageOrchestrator     ← linnkit
   │  · BaseAgentTask 展开为 AiMessage(type='context_injection', metadata.fenceKind=...)
+  │  · CurrentTurnMessageAssembler 把当前轮 user/system fence 组装进唯一 user_input / system_prompt
   │  · FenceLifetimePreprocessor 剥离旧轮 turn-only 注入
   │  · AgentCoreContextProvider 按 MustKeepPolicy 决定 working memory 是否裁掉
   ▼
 formatAgentLlmMessages(..., { fenceRegistry })
-  │  · 找到 metadata.fenceKind → registry.get(kind).formatter(content, attrs)
-  │  · 出关成具体 LLM messages（system / user 各按 llmRole）
+  │  · 对尚未组装的 context_injection，找到 metadata.fenceKind → registry.get(kind).formatter(content, attrs)
+  │  · 出关成具体 LLM messages（system / user 各按 llmRole）；不盲目合并相邻 user 消息
   ▼
 AgentAiEngine.chatCompletionStream(llmMessages, ...)
 ```

package/docs/integration/llm-provider.md CHANGED Viewed

@@ -31,7 +31,7 @@
 出关到 LLM 时，host 默认装配应当用 `formatAgentLlmMessages(messages, { fenceRegistry })`（来自 `@linnlabs/linnkit/context-manager`）；它走 native tool 回放形态，会自动把 sidecar 写回去。
-> ⚠️ **注意**：被工具历史压缩 / 历史摘要替换 / chat formatter 处理过的旧工具组，不再保证 sidecar 可回放——这是 token 预算与 chat 兼容层的设计取舍。如果某个 provider 强要求 reasoning blocks 必须随回传，请确保该工具组以原始 `tool_call_decision + tool_output` 结构进入下一轮上下文。
+> ⚠️ **注意**：被工具历史删除、工具历史可选压缩 / 历史摘要替换 / chat formatter 处理过的旧工具组，不再保证 sidecar 可回放——这是 token 预算与 chat 兼容层的设计取舍。如果某个 provider 强要求 reasoning blocks 必须随回传，请确保该工具组以原始 `tool_call_decision + tool_output` 结构进入下一轮上下文。
 ### 3.1 缺 sidecar 时怎么办

package/docs/integration/persistence.md CHANGED Viewed

@@ -11,6 +11,7 @@
 ## 1. linnkit 给你的合同
 - `Checkpointer`（来自 `@linnlabs/linnkit/runtime-kernel`，在 `graph` namespace 下）：`load` / `save` / `clear` 三个必需方法 + `peekMeta` / `list` 两个可选。
+- `Checkpointer` 的 key 参数叫 `checkpointKey`：它只是 EngineState 快照索引。普通顶层 run 可以让它等于 `conversationId`，但不要在 adapter 里假设两者永远同义；同步 child-run 会使用内部 checkpoint key 隔离执行状态。
 - `EventStore`（来自 `@linnlabs/linnkit/runtime-kernel`，在 `graph` namespace 下）：`append` / `range` / `latestEventId` 三个必需 + `truncate` 可选。配套 `createMonotonicEventIdFactory()` 帮你生成单调 id。
 - `RunRegistryStore`（来自 `@linnlabs/linnkit/runtime-kernel`，在 `runSupervisor` namespace 下）：run lifecycle 元数据落库。
 - `RuntimeEvent` / `EventEnvelope` / `PersistedEvent` 类型来自 `@linnlabs/linnkit/contracts` 与 `runtime-kernel`。

package/docs/integration/run-supervisor.md CHANGED Viewed

@@ -32,10 +32,12 @@ const handle = await supervisor.registerRun({
 ## 2. 接入规则
 - `runId` 建议由 host 显式传入。如果 host 已有稳定的 `turnId` / request id，可以直接用 `runId = turnId`，这样 `RunHandle.observe({ includePersisted: true })` 能复用 EventStore 里的 runId 索引。
+- `conversationId` 是 host 的审计/事件归属；`runId` 是本次 run 身份；`parentRunId` 只表达父子成本与审计关联。不要把 GraphExecutor 的 checkpoint key 当成这三个字段之一。
 - `RunHandle.signal` 是 runner 内部唯一信号来源；不要再给 GraphExecutor 另起一根 ad-hoc `AbortController`。
 - `AgentRunnerService.run()` 一类 host runner 应同步返回 `{ handle, result }`：UI 可以立刻拿 handle 做 cancel/observe/cost，执行结果继续等 `result`。
 - runner 生命周期必须显式写：启动前 `markRunning()`，正常结束 `markCompleted()`，异常结束 `markFailed()`，取消由 `handle.cancel({ reason })` 写 `cancelled`。
 - `WaitUserNode` 触发的 `requires_user_interaction` 是正式 pause 事实事件。事件必须带 `metadata.run_context.runId`，host runner 要把 `runUntilYield().events` 中的这类事件发布/持久化，并调用 `markAwaitingUser()`；`DefaultRunSupervisor` 也会订阅该事件作为兜底联动。这样 `supervisor.peek(runId).status` 才会从 `running` 变成 `awaiting_user`。
+- `registerRun()` / `spawnDetached()` 会把 `AgentSpec` 与 request 作为注册时快照保存；`spawnDetached()` 的 executor 也读取这份快照。调用方后续修改原始对象不会改变已经注册的后台 run。
 - `pause/resume/runTree/handleFailure` 仍是冷暂停/树管理/故障策略占位，调用时应抛 `NotImplementedError`，不要给假实现。
 ## 3. RunHandle 完整 API（截至 0.5.0）
@@ -66,4 +68,5 @@ const handle = await supervisor.registerRun({
 - 单测：显式 `runId` 注册后，`handle.runId` 和 `registryStore.load(runId)` 对齐。
 - 单测：同一个 `runId` 注册两次抛 `RunAlreadyRegisteredError`。
 - 单测：`parentSignal.abort('reason')` 后 `handle.signal.aborted === true`。
+- 单测：`spawnDetached()` executor 收到的 `runId / parentRunId / conversationId / AgentSpec / request / metadata` 与注册时一致。
 - 集成测：取消时 `RunRecord.errorIfAny.message` 能透传到你的 `stream_end.reason_message`。

package/docs/integration/tool-development-guide.md CHANGED Viewed

@@ -161,7 +161,7 @@ Returns top-K documents ranked by relevance, each with id / title / snippet.`;
 `tool_output.status` 是协议层契约，**驱动**三个下游消费者：
 1. **UI 渲染**：失败应该红色卡片 + 错误信息；伪装成功 → UI 显示绿色，但内容是错误，用户困惑
-2. **`toolHistoryCompressor`**：失败的工具可以被压缩成 "tool X failed"，伪装成功会被当成成功结果保留正文
+2. **工具历史可选压缩**：当 agent 显式使用 `toolHistory.retentionMode: 'compress'` 时，失败的工具可以被压缩成 "tool X failed"，伪装成功会被当成成功结果保留正文
 3. **`AuditEnvelope`**：tool retry / tool deny 等审计决策依赖 `tool_output.status` 准确
 **违反这一条的 bug 是最难排查的一类**。`throw` 一次，三处一致；伪装一次，三处全错。
@@ -195,9 +195,11 @@ for (const field of required) {
 ---
-## 5. `getExecutionSummary` —— 把工具产出压成历史摘要
+## 5. `getExecutionSummary` —— 可选压缩模式下的工具历史摘要
-`toolHistoryCompressor` 在 `strategy: 'per-run'` / `'per-pair'` 下，会用 `getExecutionSummary(output)` 把历史轮次的工具产出**压缩成一行摘要**——这是 linnkit 上下文工程的核心机制之一。
+默认情况下，未进入保留窗口的旧工具组会被直接删除，不会生成摘要。只有当 agent 显式设置 `toolHistory.retentionMode: 'compress'` 时，`toolHistoryCompressor` 才会用 `getExecutionSummary(output)` 把历史轮次的工具产出**压缩成一行摘要**。
+这条摘要只是进入后续 working memory 的候选历史工具交互，仍会受到工具组数量上限和 token budget 约束；不要把 `getExecutionSummary` 理解成“永久保留旧工具结果”的存储机制。
 **默认实现**（`BaseTool.getExecutionSummary` 已提供）：
@@ -223,7 +225,7 @@ getExecutionSummary(output: string): string {
 }
 ```
-**对 token 的影响**：在 `strategy: 'per-run'` 下，一次 run 的 N-1 历史轮次工具产出都会被压成 `getExecutionSummary` 一行摘要——一个高质量的 summary 能把工具历史的 token 占用从几万压到几百。这是 linnkit "对每一个发给 AI 的 token 进行精细化管理" 的真实落地点。
+**对 token 的影响**：在 `retentionMode: 'compress'` 下，未进入保留窗口的历史轮次工具产出会被压成 `getExecutionSummary` 一行摘要；一个高质量的 summary 能把单组候选历史工具交互的 token 占用从几万压到几百，但摘要是否进入最终 prompt 仍取决于 working memory 预算。默认 `retentionMode: 'drop'` 则直接删除这些旧工具组，不调用 `getExecutionSummary`。
 ---
@@ -326,7 +328,7 @@ const toolRuntime = new QuickstartMemoryToolRuntime([
 | 错误处理（throw vs 返回）| host（遵守 §3）| runtime 接住 throw → `tool_output.status = 'error'` |
 | 必填参数校验 | linnkit 协议层 | `BaseTool.validateArguments()` |
 | 超长 observation 治理 | linnkit 协议层 + host | `contextPolicy.toolOutput.observationGovernance` + `ObservationPreviewPort` |
-| 工具历史压缩 | linnkit 协议层 | `contextPolicy.toolHistory.strategy` + `getExecutionSummary` |
+| 工具历史保留 / 可选压缩 | linnkit 协议层 | `contextPolicy.toolHistory.strategy` + `contextPolicy.toolHistory.retentionMode` + `getExecutionSummary` |
 | 工具配对一致性 | linnkit 协议层 | tool 配对不变量 C10 + `ToolReplayProtocolGuard` |
 | 交互工具的 wait_user 路由 | linnkit 协议层 | `WaitUserNode` + `requires_user_interaction` 事件 |
 | `data` 字段名约定 / 前端 registry 注册 | host（工具作者 + 前端工程师）| linnkit 不规定 |

package/docs/integration/tool-history.md CHANGED Viewed

@@ -1,19 +1,21 @@
-# Tool History · 工具历史压缩策略
+# Tool History · 工具历史保留策略
-> **What** · 工具历史压缩策略配置 —— `per-pair` / `per-run` / `none` 三种策略 + `overflowStrategy` 溢出兜底。
+> **What** · 工具历史保留策略配置 —— `per-pair` / `per-run` / `none` 三种选择窗口 + `retentionMode` 处理旧工具组 + `overflowStrategy` 溢出兜底。
 > **When to read** · 上下文里工具调用反复占满 token；想配置工具调用历史的压缩窗口；做长 run 任务的成本控制。
 > **Prerequisites** · [`tools.md`](./tools.md) · [`context-engineering.md` §5](./context-engineering.md)。
 > **Key exports** · `toolHistory` field in `AgentSpec.contextPolicy` from `@linnlabs/linnkit` · `ToolHistoryCompressor` preprocessor 由 framework 内置自动注入。
 > **Related** · [`context-engineering.md`](./context-engineering.md) ⭐ · [`tools.md`](./tools.md) · [`agent-registration-guide.md`](./agent-registration-guide.md) ⭐
-linnkit 的 agent preprocessor 支持三种工具历史压缩策略，host 可在 `AgentDefinition.config.contextPolicy.toolHistory` 中显式声明。
+linnkit 的 agent preprocessor 支持三种工具历史保留窗口，host 可在 `AgentDefinition.config.contextPolicy.toolHistory` 中显式声明。未进入保留窗口的旧工具组默认直接删除；确实需要旧摘要线索的 agent 可以显式设置 `retentionMode: 'compress'`。
+注意：`retentionMode: 'compress'` 只表示**预处理阶段先把旧工具组替换成摘要**，不表示这些摘要会永久进入后续每一次 LLM 输入。压缩摘要在 working memory 阶段仍被当作“历史工具交互”计数，会继续受到工具组上限和 token budget 约束。
 ## 1. 三种策略对比
 | 策略 | 适用场景 | 行为 | 风险 |
 |------|----------|------|------|
-| `per-pair` | 4K/8K 小上下文模型；需要强力控 token | 全局保留最近 N 组完整工具交互，其余压成自然语言摘要 | 可能跨 run 腰斩同一轮工具链，prompt cache prefix 不稳定 |
-| **`per-run`**（默认推荐）| 多步 agent、review、workspace 操作 | 按 `user_input` 划 run，完整保留最近 K 个历史 run 的工具序列 | token 使用量可能高于 per-pair |
+| `per-pair` | 4K/8K 小上下文模型；需要强力控 token | 全局保留最近 N 组完整工具交互，其余按 `retentionMode` 处理 | 可能跨 run 腰斩同一轮工具链，prompt cache prefix 不稳定 |
+| **`per-run`**（默认推荐）| 多步 agent、review、workspace 操作 | 按 `user_input` 划 run，完整保留最近 K 个历史 run 的工具序列，其余按 `retentionMode` 处理 | token 使用量可能高于 per-pair |
 | `none` | 200K+ 长上下文模型；调试回放；审计敏感链路 | 不做常规压缩，只保留安全阀 | 长历史会明显涨 token |
 **未传配置时默认走 `per-run` + `keepLatestRuns: 1`**。host 仍应在各自的 `AgentDefinition.config.contextPolicy.toolHistory` 中显式声明策略，避免依赖全局默认。
@@ -23,6 +25,7 @@ linnkit 的 agent preprocessor 支持三种工具历史压缩策略，host 可
 | 字段 | 默认 |
 |------|------|
 | `toolHistory.strategy` | `'per-run'` |
+| `toolHistory.retentionMode` | `'drop'`（未保留旧工具组直接删除；可显式设 `'compress'` 兼容旧摘要行为）|
 | `toolHistory.keepLatestRuns` | `1`（保留上一个 run 完整工具序列）|
 | `toolHistory.keepLatestToolPairs` | `2`（仅 `strategy='per-pair'` 时生效）|
 | `toolHistory.maxInteractionGroups` | `12` |
@@ -30,15 +33,29 @@ linnkit 的 agent preprocessor 支持三种工具历史压缩策略，host 可
 | `toolHistory.maxPairTokens` | `6000` |
 | `toolHistory.maxOutputSummaryTokens` | `1000` |
-## 3. 安全阀
+## 3. 压缩与不压缩的行为边界
+`toolHistory` 分两步生效，读配置时不要把这两步混在一起：
+| 阶段 | `retentionMode: 'drop'` | `retentionMode: 'compress'` |
+|------|--------------------------|------------------------------|
+| Preprocessor | 保留窗口外的旧完整工具组被整组删除，不生成替代消息 | 保留窗口外的旧完整工具组被替换成一条 `assistant.final_answer` 摘要 |
+| Working memory | 已删除的旧工具组不会再进入最终 prompt | 压缩摘要作为一组“历史工具交互”参与 P3 填充，仍受 `maxInteractionGroups` 和 token budget 限制 |
+| Provider replay | 只剩保留窗口内的 raw 工具组具备结构化 replay 能力 | 压缩摘要只保留自然语言线索，不再具备结构化 replay 能力 |
+因此旧策略可以概括为：**K 轮外先压缩，但压缩摘要不保证一直保留**。新默认策略可以概括为：**K 轮外直接删除，不再制造摘要消息**。
+## 4. 安全阀
 所有策略共用：
 - `maxInteractionGroups`：硬上限，默认 12
-- `overflowStrategy: 'keep-latest'`：超过上限时保留最近工具组，压缩更旧组
+- `overflowStrategy: 'keep-latest'`：预处理阶段的保留窗口超过上限时保留最近工具组，更旧组按 `retentionMode` 删除或压缩
 - `overflowStrategy: 'fail-fast'`：超过上限时抛 `ContextProviderError`，`code = 'TOOL_HISTORY_OVERFLOW'`，适合 CI 或生产 invariant
-## 4. `AgentSpecContextPolicy.toolHistory` 字段
+Working memory 阶段还会再用同一个 `maxInteractionGroups` 控制最终 prompt 中的历史工具交互数量。此处的历史工具交互同时包括 raw 工具组和 `retentionMode: 'compress'` 生成的压缩摘要。
+## 5. `AgentSpecContextPolicy.toolHistory` 字段
 ```ts
 interface AgentSpecContextPolicy {
@@ -52,7 +69,7 @@ interface AgentSpecContextPolicy {
   toolHistory?: {
     /**
-     * 压缩策略类型（默认 'per-run'）
+     * 保留窗口策略类型（默认 'per-run'）
      * - 'per-pair'：按工具对个数裁（旧默认；适合 4K/8K 等超紧上下文模型）
      * - 'per-run'：按 user_input 划 run 边界，保留最近 K 个 run 完整工具序列（prompt cache 友好；通用默认）
      * - 'none'：不压缩（适合 200K+ 长 context 模型；仅靠单 tool_output token cap 兜底）
@@ -61,6 +78,13 @@ interface AgentSpecContextPolicy {
      */
     strategy?: 'per-pair' | 'per-run' | 'none';
+    /**
+     * 未进入保留窗口的旧工具组如何处理（默认 'drop'）
+     * - 'drop'：整组删除旧 tool_calls/tool_output，不生成摘要；更利于减少 token 与避免伪 assistant answer
+     * - 'compress'：兼容旧行为，把旧工具组替换为自然语言摘要
+     */
+    retentionMode?: 'drop' | 'compress';
     /** strategy='per-pair' 时：保留最近 N 组完整工具对（默认 2）*/
     keepLatestToolPairs?: number;
@@ -94,7 +118,7 @@ interface AgentSpecContextPolicy {
 }
 ```
-## 5. AgentSpec 装配
+## 6. AgentSpec 装配
 AgentSpec schema 已落到 `@linnlabs/linnkit/contracts`，host 装配时可用 `contextPolicy.toolHistory` 控制策略：
@@ -118,7 +142,7 @@ const myAgentSpec: AgentSpec = {
 };
 ```
-## 6. 低层 preprocessor 注入
+## 7. 低层 preprocessor 注入
 测试或自定义 registry 也可以直接从默认 preprocessor registry 注入：
@@ -135,16 +159,18 @@ const registry = createDefaultAgentPreprocessorRegistry({
 });
 ```
-## 7. 默认值变更的兼容声明
+## 8. 默认值变更的兼容声明
-`strategy` 默认从历史隐式的 `per-pair`（N=2）→ `'per-run'`（K=1）。对所有 host 来说：
+`strategy` 默认从历史隐式的 `per-pair`（N=2）→ `'per-run'`（K=1），`retentionMode` 默认从旧摘要行为 → `'drop'`。对所有 host 来说：
-- 不会引入新 bug（per-run 是 per-pair 的超集——保留更多消息，不会少留）
-- 平均 history token 数 +20-40%（具体看 history 中工具调用密度）
+- 不改变当前 run 和保留窗口内 raw 工具组的结构化 replay 行为
+- 窗口外旧工具组不再保留摘要线索；依赖旧摘要做长程回忆的 agent 需要显式设 `retentionMode: 'compress'`
+- 未保留的旧工具组不再变成 assistant 摘要，prompt cache 前缀更稳定，也避免伪 `final_answer` 污染真实对话历史
 - prompt cache 命中率上升
 - host 想保持旧行为：在 AgentSpec 显式设 `toolHistory.strategy: 'per-pair'`
+- host 想保持旧摘要行为：显式设 `toolHistory.retentionMode: 'compress'`
-## 8. 每个 agent 显式声明（推荐做法）
+## 9. 每个 agent 显式声明（推荐做法）
 每个 agent 在自己的 `index.ts` 显式声明 `contextPolicy`，**不走预设模板、不依赖全局默认**：
@@ -175,7 +201,7 @@ export const researchAgent: AgentDefinition = {
 - 全局默认 per-run K=1 是合理兜底，但高密度子调度 agent 应该显式 K=2-3；translation / autocomplete / 内部子 agent 应该显式 N=0 极致省 token
 - 预设模板会增加间接耦合——换模型时不知道哪些预设需要联动改
-## 9. 用 ContextTrace 验证策略真的生效
+## 10. 用 ContextTrace 验证策略真的生效
 `toolHistory` 最终会影响 preprocessor 与 working-memory provider 的消息选择。调试时建议临时打开：

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@linnlabs/linnkit",
-  "version": "0.9.0",
+  "version": "0.10.0",
   "type": "module",
   "description": "A fine-grained context engineering framework for Agent applications — control every token sent to the model, with clear run lifecycle, audit records, and testable protocol boundaries.",
   "license": "MIT",
@@ -18,6 +18,7 @@
     "provenance": true
   },
   "files": [
+    "bin",
     "dist",
     "LICENSE",
     "CHANGELOG.md",
@@ -30,7 +31,7 @@
   "module": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "bin": {
-    "linnkit": "./dist/cli.cjs"
+    "linnkit": "bin/linnkit.cjs"
   },
   "exports": {
     ".": {
@@ -104,7 +105,7 @@
     "vitest": "^3.2.4"
   },
   "linnkit": {
-    "phase": "released (0.9.0 — stream reasoning_details 归并 + ToolNode 完整消费 tool_calls batch)",
+    "phase": "released (0.10.0 — checkpointKey contract + child-run conversation alignment)",
     "sourceOfTruth": "CHANGELOG.md",
     "notes": [
       "子入口 ./runtime-kernel/events 是 browser-safe slim seam：仅暴露 events governance 纯函数，禁止扩展任何 Node-only 依赖（如 node:async_hooks / crypto / fs）",