@parkgogogo/openclaw-reflection 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # OpenClaw Reflection
2
2
 
3
+ <p align="center">
4
+ <img src="./assets/openclaw-reflection-logo.png" alt="OpenClaw Reflection logo" width="180" />
5
+ </p>
6
+
7
+ <p align="center"><strong>Make OpenClaw's native memory system sharper without replacing it.</strong></p>
8
+
3
9
  ![OpenClaw Plugin](https://img.shields.io/badge/OpenClaw-Plugin-111111?style=flat-square)
4
10
  ![TypeScript](https://img.shields.io/badge/TypeScript-5.x-3178c6?style=flat-square)
5
11
  ![memory_gate 18 cases](https://img.shields.io/badge/memory_gate-18%20benchmark%20cases-2ea043?style=flat-square)
@@ -7,8 +13,6 @@
7
13
 
8
14
  Chinese version: [README.zh-CN.md](./README.zh-CN.md)
9
15
 
10
- **Make OpenClaw's native memory system sharper without replacing it.**
11
-
12
16
  OpenClaw Reflection is an additive layer on top of OpenClaw's built-in Markdown memory system. It captures message flow, keeps thread noise out of long-term memory, writes durable knowledge into the same human-readable memory files OpenClaw already uses, and periodically consolidates them so your agent gets sharper over time instead of messier.
13
17
 
14
18
  ## Current Scope
@@ -103,6 +107,13 @@ Put the following under `plugins.entries.openclaw-reflection` in your OpenClaw c
103
107
 
104
108
  Once the gateway restarts, Reflection will begin listening to `message_received` and `before_message_write`, then writing curated memory files into your configured `workspaceDir`.
105
109
 
110
+ ### Observability command
111
+
112
+ - Reflection now writes an independent write_guardian audit log to:
113
+ - `<workspaceDir>/.openclaw-reflection/write-guardian.log.jsonl`
114
+ - Register command: `/openclaw-reflection`
115
+ - Returns the most recent 10 write_guardian behaviors (written/refused/failed/skipped), including decision, target file, and reason.
116
+
106
117
  ## What You Get
107
118
 
108
119
  | You want | Reflection gives you |
@@ -114,15 +125,7 @@ Once the gateway restarts, Reflection will begin listening to `message_received`
114
125
 
115
126
  ## How It Works
116
127
 
117
- ```mermaid
118
- flowchart LR
119
- A["Incoming conversation"] --> B["Session buffer"]
120
- B --> C["memory_gate"]
121
- C -->|durable fact| D["write_guardian"]
122
- C -->|thread noise| E["No write"]
123
- D --> F["MEMORY.md / USER.md / SOUL.md / IDENTITY.md / TOOLS.md"]
124
- F --> G["Scheduled consolidation"]
125
- ```
128
+ ![OpenClaw Reflection flowchart](./assets/memory-flowchart.png)
126
129
 
127
130
  In practice, the pipeline is simple:
128
131
 
@@ -200,10 +203,43 @@ pnpm run typecheck
200
203
  pnpm run eval:memory-gate
201
204
  pnpm run eval:write-guardian
202
205
  pnpm run eval:all
206
+
207
+ node evals/run.mjs \
208
+ --suite memory-gate \
209
+ --models-config evals/models.json \
210
+ --baseline grok-fast \
211
+ --output evals/results/$(date +%F)-memory-gate-matrix.json \
212
+ --markdown-output evals/results/$(date +%F)-memory-gate-matrix.md
203
213
  ```
204
214
 
215
+ `evals/models.json` defines only the comparison matrix. The shared provider endpoint and key still come from `EVAL_BASE_URL` and `EVAL_API_KEY`. JSON output is the source of truth for automation and history, while the Markdown artifact is the readable leaderboard summary.
216
+
205
217
  More eval details: [evals/README.md](./evals/README.md)
206
218
 
219
+ ## Model Selection
220
+
221
+ Benchmark date: `2026-03-09`
222
+ Scope: `memory_gate` only, `18` cases, shared OpenRouter-compatible `EVAL_*` route
223
+
224
+ | Model | Pass/Total | Accuracy | Errors (P/S/E) | Recommendation | Best For |
225
+ | --- | --- | --- | --- | --- | --- |
226
+ | `x-ai/grok-4.1-fast` | `17/18` | `94.4%` | `0/0/0` | Default baseline | Daily eval baseline |
227
+ | `qwen/qwen3.5-flash-02-23` | `17/18` | `94.4%` | `0/1/0` | Good backup option | Cost-sensitive cross-checks |
228
+ | `google/gemini-2.5-flash-lite` | `16/18` | `88.9%` | `0/0/0` | Fast iteration candidate | Cheap prompt iteration |
229
+ | `inception/mercury-2` | `11/18` | `61.1%` | `0/0/0` | Not recommended as default | Exploratory comparisons only |
230
+ | `minimax/minimax-m2.5` | `9/18` | `50.0%` | `0/0/0` | Not recommended as default | Occasional sanity checks only |
231
+ | `openai/gpt-4o-mini` | `4/18` | `22.2%` | `18/0/0` | Not recommended on current route | Avoid on current OpenRouter path |
232
+
233
+ How to choose:
234
+
235
+ - Default to `x-ai/grok-4.1-fast` because it had the best overall stability in this round with no internal errors.
236
+ - Use `qwen/qwen3.5-flash-02-23` as the strongest backup when you want similar accuracy but can tolerate one schema failure in this benchmark.
237
+ - Use `google/gemini-2.5-flash-lite` for cheaper, faster prompt iteration when slightly lower boundary accuracy is acceptable.
238
+ - Avoid `inception/mercury-2` and `minimax/minimax-m2.5` as defaults because they frequently collapse `SOUL`, `IDENTITY`, or `NO_WRITE` boundaries into the wrong bucket.
239
+ - Avoid `openai/gpt-4o-mini` on the current OpenRouter/Azure-backed route because all `18` cases surfaced provider-side structured-output errors.
240
+
241
+ Source artifact: [2026-03-09-memory-gate-openrouter-model-benchmark.md](./evals/results/2026-03-09-memory-gate-openrouter-model-benchmark.md)
242
+
207
243
  ## Links
208
244
 
209
245
  - OpenClaw plugin docs: [docs.openclaw.ai/tools/plugin](https://docs.openclaw.ai/tools/plugin)
package/README.zh-CN.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # OpenClaw Reflection
2
2
 
3
+ <p align="center">
4
+ <img src="./assets/openclaw-reflection-logo.png" alt="OpenClaw Reflection logo" width="180" />
5
+ </p>
6
+
7
+ <p align="center"><strong>在不替换 OpenClaw 原生记忆体系的前提下,让 Markdown 记忆更干净、更稳定、更可持续。</strong></p>
8
+
3
9
  英文版: [README.md](./README.md)
4
10
 
5
11
  ![OpenClaw Plugin](https://img.shields.io/badge/OpenClaw-Plugin-111111?style=flat-square)
@@ -7,8 +13,6 @@
7
13
  ![memory_gate 18 cases](https://img.shields.io/badge/memory_gate-18%20benchmark%20cases-2ea043?style=flat-square)
8
14
  ![write_guardian 14 cases](https://img.shields.io/badge/write_guardian-14%20benchmark%20cases-2ea043?style=flat-square)
9
15
 
10
- **在不替换 OpenClaw 原生记忆体系的前提下,让 Markdown 记忆更干净、更稳定、更可持续。**
11
-
12
16
  OpenClaw Reflection 是叠加在 OpenClaw 原生 Markdown memory 之上的一层增强插件。它负责监听消息流,过滤线程噪音,把真正长期有效的信息写回 OpenClaw 的核心记忆文件,并定期整理这些文件,避免长期使用后越记越乱。
13
17
 
14
18
  ## 当前支持范围
@@ -98,6 +102,13 @@ openclaw plugins install @parkgogogo/openclaw-reflection
98
102
 
99
103
  Gateway 重启后,Reflection 就会开始监听 `message_received` 和 `before_message_write`,并把整理后的长期信息写入你配置的 `workspaceDir`。
100
104
 
105
+ ### 可观测性命令
106
+
107
+ - Reflection 现在会给 write_guardian 单独写一份审计日志:
108
+ - `<workspaceDir>/.openclaw-reflection/write-guardian.log.jsonl`
109
+ - 注册命令:`/openclaw-reflection`
110
+ - 返回最近 10 条 write_guardian 行为(written/refused/failed/skipped),包含 decision、目标文件和原因。
111
+
101
112
  ## 你会得到什么
102
113
 
103
114
  | 你想要的能力 | Reflection 提供的结果 |
@@ -109,15 +120,7 @@ Gateway 重启后,Reflection 就会开始监听 `message_received` 和 `before
109
120
 
110
121
  ## 它如何工作
111
122
 
112
- ```mermaid
113
- flowchart LR
114
- A["Incoming conversation"] --> B["Session buffer"]
115
- B --> C["memory_gate"]
116
- C -->|durable fact| D["write_guardian"]
117
- C -->|thread noise| E["No write"]
118
- D --> F["MEMORY.md / USER.md / SOUL.md / IDENTITY.md / TOOLS.md"]
119
- F --> G["Scheduled consolidation"]
120
- ```
123
+ ![OpenClaw Reflection flowchart](./assets/memory-flowchart.png)
121
124
 
122
125
  流程很直接:
123
126
 
@@ -174,10 +177,43 @@ pnpm run typecheck
174
177
  pnpm run eval:memory-gate
175
178
  pnpm run eval:write-guardian
176
179
  pnpm run eval:all
180
+
181
+ node evals/run.mjs \
182
+ --suite memory-gate \
183
+ --models-config evals/models.json \
184
+ --baseline grok-fast \
185
+ --output evals/results/$(date +%F)-memory-gate-matrix.json \
186
+ --markdown-output evals/results/$(date +%F)-memory-gate-matrix.md
177
187
  ```
178
188
 
189
+ `evals/models.json` 只用来定义多模型对比矩阵;共享的 provider endpoint 和 key 仍然来自 `EVAL_BASE_URL` 与 `EVAL_API_KEY`。JSON 输出是后续自动化和历史追踪的基准,Markdown 输出则是给人看的 leaderboard 摘要。
190
+
179
191
  更多评测说明见 [evals/README.md](./evals/README.md)。
180
192
 
193
+ ## 模型选择
194
+
195
+ 评测日期:`2026-03-09`
196
+ 范围:仅 `memory_gate`,共 `18` 个 case,共享 OpenRouter 兼容的 `EVAL_*` 路由
197
+
198
+ | 模型 | Pass/Total | 准确率 | 错误数 (P/S/E) | 建议 | 适用场景 |
199
+ | --- | --- | --- | --- | --- | --- |
200
+ | `x-ai/grok-4.1-fast` | `17/18` | `94.4%` | `0/0/0` | 默认基线 | 日常 eval 基线 |
201
+ | `qwen/qwen3.5-flash-02-23` | `17/18` | `94.4%` | `0/1/0` | 优秀备选 | 对成本敏感的交叉验证 |
202
+ | `google/gemini-2.5-flash-lite` | `16/18` | `88.9%` | `0/0/0` | 便宜快速候选 | 低成本 prompt 迭代 |
203
+ | `inception/mercury-2` | `11/18` | `61.1%` | `0/0/0` | 不建议默认使用 | 仅做探索性对比 |
204
+ | `minimax/minimax-m2.5` | `9/18` | `50.0%` | `0/0/0` | 不建议默认使用 | 偶尔做 sanity check |
205
+ | `openai/gpt-4o-mini` | `4/18` | `22.2%` | `18/0/0` | 当前路由下不建议使用 | 避免在当前 OpenRouter 路径使用 |
206
+
207
+ 如何选择:
208
+
209
+ - 默认优先用 `x-ai/grok-4.1-fast`,因为这一轮里它的整体稳定性最好,而且没有内部错误。
210
+ - 如果想要接近的准确率,同时能接受一次 schema 失败,可以把 `qwen/qwen3.5-flash-02-23` 作为最强备选。
211
+ - 如果更看重低成本和快速迭代,可以用 `google/gemini-2.5-flash-lite`,但要接受它在部分 `TOOLS` 边界上略弱。
212
+ - 不要把 `inception/mercury-2` 和 `minimax/minimax-m2.5` 当默认基线,因为它们经常把 `SOUL`、`IDENTITY` 或 `NO_WRITE` 判到错误类别。
213
+ - 当前 OpenRouter/Azure 路由下不要选 `openai/gpt-4o-mini`,因为 `18` 个 case 全都触发了 provider 侧 structured-output 错误。
214
+
215
+ 源结果见:[2026-03-09-memory-gate-openrouter-model-benchmark.md](./evals/results/2026-03-09-memory-gate-openrouter-model-benchmark.md)
216
+
181
217
  ## 链接
182
218
 
183
219
  - OpenClaw plugin docs: [docs.openclaw.ai/tools/plugin](https://docs.openclaw.ai/tools/plugin)
Binary file
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@parkgogogo/openclaw-reflection",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "OpenClaw plugin that enhances native Markdown memory with filtering, curation, and consolidation",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
7
7
  "files": [
8
+ "assets/",
8
9
  "src/",
9
10
  "openclaw.plugin.json",
10
11
  "README.md",
@@ -20,8 +21,9 @@
20
21
  "url": "https://github.com/parkgogogo/openclaw-reflection/issues"
21
22
  },
22
23
  "scripts": {
23
- "build": "tsc --noEmit",
24
+ "build": "tsc -p tsconfig.json",
24
25
  "clean": "rm -rf logs",
26
+ "test": "pnpm run build && node --test tests/*.test.mjs",
25
27
  "typecheck": "tsc --noEmit",
26
28
  "e2e:openclaw-plugin": "bash scripts/e2e-openclaw-plugin.sh",
27
29
  "eval:memory-gate": "pnpm exec tsc && node evals/run.mjs --suite memory-gate",
package/src/evals/cli.ts CHANGED
@@ -7,6 +7,11 @@ export interface EvalCliOptions {
7
7
  sharedDatasetPath?: string;
8
8
  memoryGateDatasetPath?: string;
9
9
  writeGuardianDatasetPath?: string;
10
+ modelsConfigPath?: string;
11
+ models?: string[];
12
+ baselineModelId?: string;
13
+ outputPath?: string;
14
+ markdownOutputPath?: string;
10
15
  }
11
16
 
12
17
  function getArgValue(argv: string[], flag: string): string | undefined {
@@ -34,6 +39,11 @@ function parseSuite(value: string | undefined): EvalSuite {
34
39
  }
35
40
 
36
41
  export function parseEvalCliOptions(argv: string[]): EvalCliOptions {
42
+ const models = getArgValue(argv, "--models")
43
+ ?.split(",")
44
+ .map((modelId) => modelId.trim())
45
+ .filter((modelId) => modelId !== "");
46
+
37
47
  return {
38
48
  suite: parseSuite(getArgValue(argv, "--suite")),
39
49
  useJudge: !argv.includes("--no-judge"),
@@ -41,5 +51,10 @@ export function parseEvalCliOptions(argv: string[]): EvalCliOptions {
41
51
  sharedDatasetPath: getArgValue(argv, "--shared-dataset"),
42
52
  memoryGateDatasetPath: getArgValue(argv, "--memory-gate-dataset"),
43
53
  writeGuardianDatasetPath: getArgValue(argv, "--write-guardian-dataset"),
54
+ modelsConfigPath: getArgValue(argv, "--models-config"),
55
+ models,
56
+ baselineModelId: getArgValue(argv, "--baseline"),
57
+ outputPath: getArgValue(argv, "--output"),
58
+ markdownOutputPath: getArgValue(argv, "--markdown-output"),
44
59
  };
45
60
  }
@@ -0,0 +1,248 @@
1
+ import type {
2
+ MemoryGateCaseResult,
3
+ SingleModelRunReport,
4
+ WriteGuardianCaseResult,
5
+ } from "./runner.js";
6
+ import type { EvalSuite } from "./cli.js";
7
+
8
+ export interface RankedModelReport {
9
+ modelId: string;
10
+ passed: number;
11
+ total: number;
12
+ errorCounts?: SingleModelRunReport["summary"]["errorCounts"];
13
+ }
14
+
15
+ export interface BaselineDiff {
16
+ modelId: string;
17
+ regressedCases: string[];
18
+ improvedCases: string[];
19
+ disagreementCases: string[];
20
+ }
21
+
22
+ export interface HardestCase {
23
+ scenarioId: string;
24
+ failedBy: string[];
25
+ }
26
+
27
+ export interface DisagreementCase {
28
+ scenarioId: string;
29
+ modelIds: string[];
30
+ }
31
+
32
+ export interface MultiModelComparisonReport {
33
+ runId: string;
34
+ timestamp: string;
35
+ suite: EvalSuite;
36
+ baselineModelId?: string;
37
+ models: SingleModelRunReport[];
38
+ comparison: {
39
+ ranking: RankedModelReport[];
40
+ baselineDiffs: BaselineDiff[];
41
+ hardestCases: HardestCase[];
42
+ disagreementCases: DisagreementCase[];
43
+ };
44
+ }
45
+
46
+ type EvalCaseResult = MemoryGateCaseResult | WriteGuardianCaseResult;
47
+
48
+ function getScenarioId(result: EvalCaseResult): string {
49
+ return result.scenarioId;
50
+ }
51
+
52
+ function getTotalErrors(report: SingleModelRunReport): number {
53
+ const errorCounts = report.summary.errorCounts;
54
+ if (!errorCounts) {
55
+ return 0;
56
+ }
57
+
58
+ return (
59
+ errorCounts.provider_error +
60
+ errorCounts.schema_error +
61
+ errorCounts.execution_error
62
+ );
63
+ }
64
+
65
+ function getCaseSignature(result: EvalCaseResult): string {
66
+ if ("actualDecision" in result) {
67
+ return JSON.stringify({
68
+ pass: result.pass,
69
+ actualDecision: result.actualDecision,
70
+ decisionPass: result.decisionPass,
71
+ candidatePass: result.candidatePass,
72
+ errorType: result.errorType,
73
+ });
74
+ }
75
+
76
+ return JSON.stringify({
77
+ pass: result.pass,
78
+ actualShouldWrite: result.actualShouldWrite,
79
+ toolTrace: result.actualToolTrace,
80
+ });
81
+ }
82
+
83
+ function buildResultMap(report: SingleModelRunReport): Map<string, EvalCaseResult> {
84
+ return new Map(
85
+ report.results.map((result) => [getScenarioId(result as EvalCaseResult), result as EvalCaseResult])
86
+ );
87
+ }
88
+
89
+ export function rankModelReports(
90
+ reports: SingleModelRunReport[]
91
+ ): RankedModelReport[] {
92
+ return reports
93
+ .map((report) => ({
94
+ modelId: report.modelId,
95
+ passed: report.summary.passed,
96
+ total: report.summary.total,
97
+ errorCounts: report.summary.errorCounts,
98
+ totalErrors: getTotalErrors(report),
99
+ }))
100
+ .sort((left, right) => {
101
+ if (right.passed !== left.passed) {
102
+ return right.passed - left.passed;
103
+ }
104
+
105
+ if (left.totalErrors !== right.totalErrors) {
106
+ return left.totalErrors - right.totalErrors;
107
+ }
108
+
109
+ return left.modelId.localeCompare(right.modelId);
110
+ })
111
+ .map(({ totalErrors: _totalErrors, ...report }) => report);
112
+ }
113
+
114
+ export function buildBaselineDiffs(
115
+ reports: SingleModelRunReport[],
116
+ baselineModelId: string
117
+ ): BaselineDiff[] {
118
+ const baselineReport = reports.find((report) => report.modelId === baselineModelId);
119
+ if (!baselineReport) {
120
+ throw new Error(`Missing baseline model: ${baselineModelId}`);
121
+ }
122
+
123
+ const baselineResults = buildResultMap(baselineReport);
124
+
125
+ return reports
126
+ .filter((report) => report.modelId !== baselineModelId)
127
+ .map((report) => {
128
+ const reportResults = buildResultMap(report);
129
+ const regressedCases: string[] = [];
130
+ const improvedCases: string[] = [];
131
+ const disagreementCases: string[] = [];
132
+
133
+ for (const [scenarioId, baselineResult] of baselineResults.entries()) {
134
+ const candidateResult = reportResults.get(scenarioId);
135
+ if (!candidateResult) {
136
+ continue;
137
+ }
138
+
139
+ if (baselineResult.pass && !candidateResult.pass) {
140
+ regressedCases.push(scenarioId);
141
+ }
142
+
143
+ if (!baselineResult.pass && candidateResult.pass) {
144
+ improvedCases.push(scenarioId);
145
+ }
146
+
147
+ if (getCaseSignature(baselineResult) !== getCaseSignature(candidateResult)) {
148
+ disagreementCases.push(scenarioId);
149
+ }
150
+ }
151
+
152
+ return {
153
+ modelId: report.modelId,
154
+ regressedCases,
155
+ improvedCases,
156
+ disagreementCases,
157
+ };
158
+ });
159
+ }
160
+
161
+ export function findHardestCases(
162
+ reports: SingleModelRunReport[]
163
+ ): HardestCase[] {
164
+ const failedByScenario = new Map<string, string[]>();
165
+
166
+ for (const report of reports) {
167
+ for (const result of report.results) {
168
+ const caseResult = result as EvalCaseResult;
169
+ if (caseResult.pass) {
170
+ continue;
171
+ }
172
+
173
+ const scenarioId = getScenarioId(caseResult);
174
+ const failedBy = failedByScenario.get(scenarioId) ?? [];
175
+ failedBy.push(report.modelId);
176
+ failedByScenario.set(scenarioId, failedBy);
177
+ }
178
+ }
179
+
180
+ return [...failedByScenario.entries()]
181
+ .map(([scenarioId, failedBy]) => ({
182
+ scenarioId,
183
+ failedBy,
184
+ }))
185
+ .sort((left, right) => {
186
+ if (right.failedBy.length !== left.failedBy.length) {
187
+ return right.failedBy.length - left.failedBy.length;
188
+ }
189
+
190
+ return left.scenarioId.localeCompare(right.scenarioId);
191
+ });
192
+ }
193
+
194
+ export function findDisagreementCases(
195
+ reports: SingleModelRunReport[]
196
+ ): DisagreementCase[] {
197
+ const cases = new Map<string, Array<{ modelId: string; signature: string }>>();
198
+
199
+ for (const report of reports) {
200
+ for (const result of report.results) {
201
+ const caseResult = result as EvalCaseResult;
202
+ const scenarioId = getScenarioId(caseResult);
203
+ const entries = cases.get(scenarioId) ?? [];
204
+ entries.push({
205
+ modelId: report.modelId,
206
+ signature: getCaseSignature(caseResult),
207
+ });
208
+ cases.set(scenarioId, entries);
209
+ }
210
+ }
211
+
212
+ return [...cases.entries()]
213
+ .filter(([, entries]) => new Set(entries.map((entry) => entry.signature)).size > 1)
214
+ .map(([scenarioId, entries]) => ({
215
+ scenarioId,
216
+ modelIds: entries.map((entry) => entry.modelId),
217
+ }))
218
+ .sort((left, right) => left.scenarioId.localeCompare(right.scenarioId));
219
+ }
220
+
221
+ export function buildMultiModelComparisonReport(input: {
222
+ suite: EvalSuite;
223
+ modelReports: SingleModelRunReport[];
224
+ baselineModelId?: string;
225
+ timestamp?: string;
226
+ runId?: string;
227
+ }): MultiModelComparisonReport {
228
+ const timestamp = input.timestamp ?? new Date().toISOString();
229
+ const baselineModelId =
230
+ input.baselineModelId ??
231
+ (input.modelReports.length > 0 ? input.modelReports[0].modelId : undefined);
232
+
233
+ return {
234
+ runId: input.runId ?? `${input.suite}-${timestamp}`,
235
+ timestamp,
236
+ suite: input.suite,
237
+ baselineModelId,
238
+ models: input.modelReports,
239
+ comparison: {
240
+ ranking: rankModelReports(input.modelReports),
241
+ baselineDiffs: baselineModelId
242
+ ? buildBaselineDiffs(input.modelReports, baselineModelId)
243
+ : [],
244
+ hardestCases: findHardestCases(input.modelReports),
245
+ disagreementCases: findDisagreementCases(input.modelReports),
246
+ },
247
+ };
248
+ }
@@ -0,0 +1,125 @@
1
+ import { readFile } from "node:fs/promises";
2
+
3
+ export interface EvalModelProfile {
4
+ id: string;
5
+ label: string;
6
+ model: string;
7
+ enabled: boolean;
8
+ tags?: string[];
9
+ }
10
+
11
+ export interface ResolvedEvalModelProfile extends EvalModelProfile {
12
+ baseURL: string;
13
+ apiKey: string;
14
+ }
15
+
16
+ interface LoadEvalModelProfilesInput {
17
+ configPath: string;
18
+ selectedModelIds?: string[];
19
+ env?: NodeJS.ProcessEnv;
20
+ }
21
+
22
+ function isRecord(value: unknown): value is Record<string, unknown> {
23
+ return typeof value === "object" && value !== null && !Array.isArray(value);
24
+ }
25
+
26
+ function parseEvalModelProfile(value: unknown): EvalModelProfile {
27
+ if (!isRecord(value)) {
28
+ throw new Error("Eval model profile must be an object");
29
+ }
30
+
31
+ const {
32
+ id,
33
+ label,
34
+ model,
35
+ enabled,
36
+ tags,
37
+ } = value;
38
+
39
+ if (typeof id !== "string" || id.trim() === "") {
40
+ throw new Error("Eval model profile id must be a non-empty string");
41
+ }
42
+
43
+ if (typeof label !== "string" || label.trim() === "") {
44
+ throw new Error(`Eval model profile ${id} label must be a non-empty string`);
45
+ }
46
+
47
+ if (typeof model !== "string" || model.trim() === "") {
48
+ throw new Error(`Eval model profile ${id} model must be a non-empty string`);
49
+ }
50
+
51
+ if (typeof enabled !== "boolean") {
52
+ throw new Error(`Eval model profile ${id} enabled must be a boolean`);
53
+ }
54
+
55
+ if (
56
+ tags !== undefined &&
57
+ (!Array.isArray(tags) || tags.some((tag) => typeof tag !== "string"))
58
+ ) {
59
+ throw new Error(`Eval model profile ${id} tags must be a string array`);
60
+ }
61
+
62
+ return {
63
+ id,
64
+ label,
65
+ model,
66
+ enabled,
67
+ tags,
68
+ };
69
+ }
70
+
71
+ function parseEvalModelConfig(content: string): EvalModelProfile[] {
72
+ const parsed: unknown = JSON.parse(content);
73
+
74
+ if (!isRecord(parsed) || !Array.isArray(parsed.profiles)) {
75
+ throw new Error("Eval model config must contain a profiles array");
76
+ }
77
+
78
+ return parsed.profiles.map((profile) => parseEvalModelProfile(profile));
79
+ }
80
+
81
+ export async function loadEvalModelProfiles(
82
+ input: LoadEvalModelProfilesInput
83
+ ): Promise<ResolvedEvalModelProfile[]> {
84
+ const env = input.env ?? process.env;
85
+ const baseURL = env.EVAL_BASE_URL;
86
+ const apiKey = env.EVAL_API_KEY;
87
+ if (
88
+ typeof baseURL !== "string" ||
89
+ baseURL.trim() === "" ||
90
+ typeof apiKey !== "string" ||
91
+ apiKey.trim() === ""
92
+ ) {
93
+ throw new Error(
94
+ "Missing required env vars for model comparison: EVAL_BASE_URL, EVAL_API_KEY"
95
+ );
96
+ }
97
+
98
+ const profiles = parseEvalModelConfig(await readFile(input.configPath, "utf8"));
99
+ const enabledProfiles = profiles.filter((profile) => profile.enabled);
100
+
101
+ if (enabledProfiles.length === 0) {
102
+ throw new Error("Eval model config has no enabled profiles");
103
+ }
104
+
105
+ const selectedModelIds =
106
+ input.selectedModelIds?.filter((modelId) => modelId.trim() !== "") ?? [];
107
+
108
+ const filteredProfiles =
109
+ selectedModelIds.length === 0
110
+ ? enabledProfiles
111
+ : selectedModelIds.map((modelId) => {
112
+ const profile = enabledProfiles.find((candidate) => candidate.id === modelId);
113
+ if (!profile) {
114
+ throw new Error(`Unknown model ids: ${modelId}`);
115
+ }
116
+
117
+ return profile;
118
+ });
119
+
120
+ return filteredProfiles.map((profile) => ({
121
+ ...profile,
122
+ baseURL,
123
+ apiKey,
124
+ }));
125
+ }
@@ -0,0 +1,123 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+
4
+ import type { MultiModelComparisonReport } from "./comparison.js";
5
+
6
+ function formatErrorCounts(
7
+ errorCounts: MultiModelComparisonReport["models"][number]["summary"]["errorCounts"]
8
+ ): string {
9
+ if (!errorCounts) {
10
+ return "0/0/0";
11
+ }
12
+
13
+ return `${errorCounts.provider_error}/${errorCounts.schema_error}/${errorCounts.execution_error}`;
14
+ }
15
+
16
+ export function renderComparisonMarkdown(
17
+ report: MultiModelComparisonReport
18
+ ): string {
19
+ const lines = [
20
+ "# Eval Comparison Report",
21
+ "",
22
+ `- Run ID: ${report.runId}`,
23
+ `- Timestamp: ${report.timestamp}`,
24
+ `- Suite: ${report.suite}`,
25
+ ];
26
+
27
+ if (report.baselineModelId) {
28
+ lines.push(`- Baseline: ${report.baselineModelId}`);
29
+ }
30
+
31
+ lines.push(
32
+ "",
33
+ "## Leaderboard",
34
+ "",
35
+ "| Model | Passed | Total | Errors (provider/schema/execution) |",
36
+ "| --- | --- | --- | --- |"
37
+ );
38
+
39
+ for (const entry of report.comparison.ranking) {
40
+ lines.push(
41
+ `| ${entry.modelId} | ${entry.passed} | ${entry.total} | ${formatErrorCounts(
42
+ entry.errorCounts
43
+ )} |`
44
+ );
45
+ }
46
+
47
+ lines.push("", "## Baseline Diffs", "");
48
+ if (report.comparison.baselineDiffs.length === 0) {
49
+ lines.push("No baseline diffs.");
50
+ } else {
51
+ for (const diff of report.comparison.baselineDiffs) {
52
+ lines.push(`### ${diff.modelId}`);
53
+ lines.push(`- Regressed: ${diff.regressedCases.join(", ") || "(none)"}`);
54
+ lines.push(`- Improved: ${diff.improvedCases.join(", ") || "(none)"}`);
55
+ lines.push(
56
+ `- Disagreements: ${diff.disagreementCases.join(", ") || "(none)"}`
57
+ );
58
+ lines.push("");
59
+ }
60
+ }
61
+
62
+ lines.push("## Hardest Cases", "");
63
+ if (report.comparison.hardestCases.length === 0) {
64
+ lines.push("No failed cases.");
65
+ } else {
66
+ for (const hardestCase of report.comparison.hardestCases) {
67
+ lines.push(
68
+ `- ${hardestCase.scenarioId}: ${hardestCase.failedBy.join(", ")}`
69
+ );
70
+ }
71
+ }
72
+
73
+ lines.push("", "## Disagreement Cases", "");
74
+ if (report.comparison.disagreementCases.length === 0) {
75
+ lines.push("No disagreement cases.");
76
+ } else {
77
+ for (const disagreement of report.comparison.disagreementCases) {
78
+ lines.push(
79
+ `- ${disagreement.scenarioId}: ${disagreement.modelIds.join(", ")}`
80
+ );
81
+ }
82
+ }
83
+
84
+ return `${lines.join("\n")}\n`;
85
+ }
86
+
87
+ export async function writeComparisonReports(input: {
88
+ report: MultiModelComparisonReport;
89
+ outputPath?: string;
90
+ markdownOutputPath?: string;
91
+ }): Promise<{
92
+ jsonWritten: boolean;
93
+ markdownWritten: boolean;
94
+ writtenPaths: string[];
95
+ }> {
96
+ const writtenPaths: string[] = [];
97
+
98
+ if (input.outputPath) {
99
+ await mkdir(path.dirname(input.outputPath), { recursive: true });
100
+ await writeFile(
101
+ input.outputPath,
102
+ `${JSON.stringify(input.report, null, 2)}\n`,
103
+ "utf8"
104
+ );
105
+ writtenPaths.push(input.outputPath);
106
+ }
107
+
108
+ if (input.markdownOutputPath) {
109
+ await mkdir(path.dirname(input.markdownOutputPath), { recursive: true });
110
+ await writeFile(
111
+ input.markdownOutputPath,
112
+ renderComparisonMarkdown(input.report),
113
+ "utf8"
114
+ );
115
+ writtenPaths.push(input.markdownOutputPath);
116
+ }
117
+
118
+ return {
119
+ jsonWritten: Boolean(input.outputPath),
120
+ markdownWritten: Boolean(input.markdownOutputPath),
121
+ writtenPaths,
122
+ };
123
+ }
@@ -5,6 +5,7 @@ import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
5
5
  import { LLMService } from "../llm/service.js";
6
6
  import { MemoryGateAnalyzer } from "../memory-gate/analyzer.js";
7
7
  import { WriteGuardian } from "../write-guardian/index.js";
8
+ import type { EvalSuite } from "./cli.js";
8
9
  import type {
9
10
  AgentStep,
10
11
  LLMService as LLMServiceContract,
@@ -59,6 +60,7 @@ export interface MemoryGateCaseResult {
59
60
  expectedDecision: MemoryGateOutput["decision"];
60
61
  actualCandidateFact?: string;
61
62
  expectedCandidateFact?: string;
63
+ errorType?: "provider_error" | "schema_error" | "execution_error";
62
64
  error?: string;
63
65
  }
64
66
 
@@ -77,6 +79,21 @@ export interface WriteGuardianCaseResult {
77
79
  export interface BenchmarkSummary {
78
80
  total: number;
79
81
  passed: number;
82
+ errorCounts?: {
83
+ provider_error: number;
84
+ schema_error: number;
85
+ execution_error: number;
86
+ };
87
+ }
88
+
89
+ export interface SingleModelRunReport {
90
+ modelId: string;
91
+ modelLabel: string;
92
+ suite: EvalSuite;
93
+ startedAt: string;
94
+ finishedAt: string;
95
+ summary: BenchmarkSummary;
96
+ results: MemoryGateCaseResult[] | WriteGuardianCaseResult[];
80
97
  }
81
98
 
82
99
  export interface Judge {
@@ -138,6 +155,38 @@ function normalizeFileContent(content: string): string {
138
155
  return normalized.endsWith("\n") ? normalized : `${normalized}\n`;
139
156
  }
140
157
 
158
+ function createEmptyErrorCounts(): NonNullable<BenchmarkSummary["errorCounts"]> {
159
+ return {
160
+ provider_error: 0,
161
+ schema_error: 0,
162
+ execution_error: 0,
163
+ };
164
+ }
165
+
166
+ function classifyMemoryGateError(
167
+ message: string | undefined
168
+ ): MemoryGateCaseResult["errorType"] | undefined {
169
+ if (!message) {
170
+ return undefined;
171
+ }
172
+
173
+ if (message.includes("Provider request failed")) {
174
+ return "provider_error";
175
+ }
176
+
177
+ if (message.includes("Schema validation failed")) {
178
+ return "schema_error";
179
+ }
180
+
181
+ return undefined;
182
+ }
183
+
184
+ export function buildSingleModelRunReport(
185
+ input: SingleModelRunReport
186
+ ): SingleModelRunReport {
187
+ return { ...input };
188
+ }
189
+
141
190
  export async function evaluateMemoryGateBenchmark(input: {
142
191
  scenarios: SharedScenario[];
143
192
  benchmarkCases: MemoryGateBenchmarkCase[];
@@ -148,6 +197,7 @@ export async function evaluateMemoryGateBenchmark(input: {
148
197
  const scenarioMap = buildScenarioMap(input.scenarios);
149
198
  const results: MemoryGateCaseResult[] = [];
150
199
  const logger = input.logger ?? createNoopLogger();
200
+ const errorCounts = createEmptyErrorCounts();
151
201
 
152
202
  for (const benchmarkCase of input.benchmarkCases) {
153
203
  const scenario = scenarioMap.get(benchmarkCase.scenario_id);
@@ -188,6 +238,10 @@ export async function evaluateMemoryGateBenchmark(input: {
188
238
  }
189
239
 
190
240
  const pass = decisionPass && candidatePass;
241
+ const errorType = classifyMemoryGateError(actual.reason);
242
+ if (errorType) {
243
+ errorCounts[errorType] += 1;
244
+ }
191
245
  results.push({
192
246
  scenarioId: benchmarkCase.scenario_id,
193
247
  pass,
@@ -198,6 +252,8 @@ export async function evaluateMemoryGateBenchmark(input: {
198
252
  expectedDecision: benchmarkCase.expected_decision,
199
253
  actualCandidateFact: actual.candidateFact,
200
254
  expectedCandidateFact: benchmarkCase.expected_candidate_fact,
255
+ errorType,
256
+ error: errorType ? actual.reason : undefined,
201
257
  });
202
258
  logger.info("EvalRunner", "Completed memory_gate case", {
203
259
  scenarioId: benchmarkCase.scenario_id,
@@ -206,9 +262,12 @@ export async function evaluateMemoryGateBenchmark(input: {
206
262
  candidatePass,
207
263
  judgeUsed,
208
264
  actualDecision: actual.decision,
265
+ errorType,
209
266
  });
210
267
  } catch (error) {
211
268
  const reason = getErrorMessage(error);
269
+ const errorType = classifyMemoryGateError(reason) ?? "execution_error";
270
+ errorCounts[errorType] += 1;
212
271
  results.push({
213
272
  scenarioId: benchmarkCase.scenario_id,
214
273
  pass: false,
@@ -218,11 +277,13 @@ export async function evaluateMemoryGateBenchmark(input: {
218
277
  actualDecision: "NO_WRITE",
219
278
  expectedDecision: benchmarkCase.expected_decision,
220
279
  expectedCandidateFact: benchmarkCase.expected_candidate_fact,
280
+ errorType,
221
281
  error: reason,
222
282
  });
223
283
  logger.error("EvalRunner", "memory_gate case failed", {
224
284
  scenarioId: benchmarkCase.scenario_id,
225
285
  reason,
286
+ errorType,
226
287
  });
227
288
  }
228
289
  }
@@ -231,6 +292,7 @@ export async function evaluateMemoryGateBenchmark(input: {
231
292
  summary: {
232
293
  total: results.length,
233
294
  passed: results.filter((result) => result.pass).length,
295
+ errorCounts,
234
296
  },
235
297
  results,
236
298
  };
package/src/index.ts CHANGED
@@ -12,6 +12,10 @@ import {
12
12
  MemoryGateAnalyzer,
13
13
  } from "./memory-gate/index.js";
14
14
  import { WriteGuardian } from "./write-guardian/index.js";
15
+ import {
16
+ WriteGuardianAuditLog,
17
+ type WriteGuardianAuditEntry,
18
+ } from "./write-guardian/audit-log.js";
15
19
  import {
16
20
  handleBeforeMessageWrite,
17
21
  handleMessageReceived,
@@ -47,6 +51,10 @@ export interface PluginAPI {
47
51
  handler: (event: unknown, context?: unknown) => void,
48
52
  options?: { priority?: number }
49
53
  ) => void;
54
+ registerCommand?: (
55
+ command: string,
56
+ handler: (args?: string) => string | Promise<string>
57
+ ) => void;
50
58
  }
51
59
 
52
60
  let bufferManager: SessionBufferManager | null = null;
@@ -54,6 +62,54 @@ let gatewayLogger: PluginLogger | null = null;
54
62
  let fileLogger: FileLogger | null = null;
55
63
  let isRegistered = false;
56
64
 
65
+ function formatWriteGuardianAudit(entries: WriteGuardianAuditEntry[]): string {
66
+ if (entries.length === 0) {
67
+ return "No write_guardian records found.";
68
+ }
69
+
70
+ const lines = entries.map((entry, index) => {
71
+ const summary = [
72
+ `${index + 1}. [${entry.timestamp}] ${entry.status}`,
73
+ `decision=${entry.decision}`,
74
+ entry.targetFile ? `file=${entry.targetFile}` : undefined,
75
+ entry.reason ? `reason=${entry.reason}` : undefined,
76
+ entry.candidateFact ? `fact=${entry.candidateFact}` : undefined,
77
+ ]
78
+ .filter((part): part is string => Boolean(part))
79
+ .join(" | ");
80
+
81
+ return summary;
82
+ });
83
+
84
+ return lines.join("\n");
85
+ }
86
+
87
+ function registerReflectionCommand(
88
+ api: PluginAPI,
89
+ logger: FileLogger,
90
+ auditLog?: WriteGuardianAuditLog
91
+ ): void {
92
+ if (typeof api.registerCommand !== "function") {
93
+ logger.info("PluginLifecycle", "registerCommand unavailable, skip command registration", {
94
+ command: "/openclaw-reflection",
95
+ });
96
+ return;
97
+ }
98
+
99
+ api.registerCommand("/openclaw-reflection", async () => {
100
+ if (!auditLog) {
101
+ return "write_guardian audit log unavailable: workspace is not configured.";
102
+ }
103
+
104
+ const entries = await auditLog.readRecent(10);
105
+ return formatWriteGuardianAudit(entries);
106
+ });
107
+
108
+ logger.info("PluginLifecycle", "Registered plugin command", {
109
+ command: "/openclaw-reflection",
110
+ });
111
+ }
112
+
57
113
  function getErrorMessage(error: unknown): string {
58
114
  if (error instanceof Error) {
59
115
  return error.message;
@@ -206,6 +262,7 @@ export default function activate(api: PluginAPI): void {
206
262
 
207
263
  let memoryGate: MemoryGateAnalyzer | undefined;
208
264
  let writeGuardian: WriteGuardian | undefined;
265
+ let writeGuardianAuditLog: WriteGuardianAuditLog | undefined;
209
266
 
210
267
  if (config.memoryGate.enabled && llmService) {
211
268
  memoryGate = new MemoryGateAnalyzer(llmService, logger);
@@ -217,7 +274,13 @@ export default function activate(api: PluginAPI): void {
217
274
  }
218
275
 
219
276
  if (llmService && workspaceDir) {
220
- writeGuardian = new WriteGuardian({ workspaceDir }, logger, llmService);
277
+ writeGuardianAuditLog = new WriteGuardianAuditLog(workspaceDir);
278
+ writeGuardian = new WriteGuardian(
279
+ { workspaceDir },
280
+ logger,
281
+ llmService,
282
+ writeGuardianAuditLog
283
+ );
221
284
  logger.info("PluginLifecycle", "write_guardian initialized", {
222
285
  workspaceDir,
223
286
  });
@@ -303,6 +366,8 @@ export default function activate(api: PluginAPI): void {
303
366
  }
304
367
  );
305
368
 
369
+ registerReflectionCommand(api, logger, writeGuardianAuditLog);
370
+
306
371
  gatewayLogger.info("[Reflection] Message hooks registered");
307
372
  logger.info("PluginLifecycle", "Message hooks registered");
308
373
 
@@ -0,0 +1,71 @@
1
+ import * as fs from "node:fs";
2
+ import * as fsp from "node:fs/promises";
3
+ import * as path from "node:path";
4
+
5
+ export interface WriteGuardianAuditEntry {
6
+ timestamp: string;
7
+ decision: string;
8
+ targetFile?: string;
9
+ status: "written" | "refused" | "failed" | "skipped";
10
+ reason?: string;
11
+ candidateFact?: string;
12
+ }
13
+
14
+ function normalizeError(error: unknown): string {
15
+ if (error instanceof Error) {
16
+ return error.message;
17
+ }
18
+
19
+ return String(error);
20
+ }
21
+
22
+ export class WriteGuardianAuditLog {
23
+ private readonly filePath: string;
24
+
25
+ constructor(workspaceDir: string) {
26
+ const logDir = path.join(workspaceDir, ".openclaw-reflection");
27
+ this.filePath = path.join(logDir, "write-guardian.log.jsonl");
28
+
29
+ if (!fs.existsSync(logDir)) {
30
+ fs.mkdirSync(logDir, { recursive: true });
31
+ }
32
+ }
33
+
34
+ async append(entry: Omit<WriteGuardianAuditEntry, "timestamp">): Promise<void> {
35
+ const serialized = JSON.stringify({
36
+ timestamp: new Date().toISOString(),
37
+ ...entry,
38
+ });
39
+
40
+ await fsp.appendFile(this.filePath, `${serialized}\n`, "utf8");
41
+ }
42
+
43
+ async readRecent(limit: number): Promise<WriteGuardianAuditEntry[]> {
44
+ try {
45
+ const content = await fsp.readFile(this.filePath, "utf8");
46
+ const lines = content
47
+ .split("\n")
48
+ .map((line) => line.trim())
49
+ .filter((line) => line.length > 0);
50
+
51
+ const parsed = lines
52
+ .map((line) => {
53
+ try {
54
+ return JSON.parse(line) as WriteGuardianAuditEntry;
55
+ } catch {
56
+ return null;
57
+ }
58
+ })
59
+ .filter((entry): entry is WriteGuardianAuditEntry => entry !== null);
60
+
61
+ return parsed.slice(-limit).reverse();
62
+ } catch (error) {
63
+ const errorMessage = normalizeError(error);
64
+ if (errorMessage.includes("ENOENT")) {
65
+ return [];
66
+ }
67
+
68
+ throw error;
69
+ }
70
+ }
71
+ }
@@ -1,6 +1,7 @@
1
1
  import * as path from "path";
2
2
  import type { AgentTool, LLMService, MemoryGateOutput, Logger } from "../types.js";
3
3
  import { readFile, writeFileWithLock } from "../utils/file-utils.js";
4
+ import { WriteGuardianAuditLog } from "./audit-log.js";
4
5
 
5
6
  type UpdateDecision =
6
7
  | "UPDATE_MEMORY"
@@ -93,16 +94,25 @@ export class WriteGuardian {
93
94
  private config: WriteGuardianConfig;
94
95
  private logger: Logger;
95
96
  private llmService: LLMService;
96
-
97
- constructor(config: WriteGuardianConfig, logger: Logger, llmService: LLMService) {
97
+ private auditLog?: WriteGuardianAuditLog;
98
+
99
+ constructor(
100
+ config: WriteGuardianConfig,
101
+ logger: Logger,
102
+ llmService: LLMService,
103
+ auditLog?: WriteGuardianAuditLog
104
+ ) {
98
105
  this.config = config;
99
106
  this.logger = logger;
100
107
  this.llmService = llmService;
108
+ this.auditLog = auditLog;
101
109
  }
102
110
 
103
111
  async write(output: MemoryGateOutput): Promise<WriteGuardianWriteResult> {
104
112
  if (!isUpdateDecision(output.decision)) {
105
- return { status: "skipped", reason: "not an update decision" };
113
+ const result = { status: "skipped", reason: "not an update decision" } as const;
114
+ await this.recordAudit(output, result);
115
+ return result;
106
116
  }
107
117
 
108
118
  const candidateFact = output.candidateFact?.trim();
@@ -111,7 +121,9 @@ export class WriteGuardian {
111
121
  decision: output.decision,
112
122
  reason: output.reason,
113
123
  });
114
- return { status: "skipped", reason: "missing candidate fact" };
124
+ const result = { status: "skipped", reason: "missing candidate fact" } as const;
125
+ await this.recordAudit(output, result);
126
+ return result;
115
127
  }
116
128
 
117
129
  const targetFile = TARGET_FILES[output.decision];
@@ -141,14 +153,18 @@ export class WriteGuardian {
141
153
  filePath,
142
154
  reason,
143
155
  });
144
- return { status: "refused", reason };
156
+ const writeResult = { status: "refused", reason } as const;
157
+ await this.recordAudit(output, writeResult, targetFile);
158
+ return writeResult;
145
159
  }
146
160
 
147
161
  this.logger.info("WriteGuardian", "write_guardian rewrote target file", {
148
162
  decision: output.decision,
149
163
  filePath,
150
164
  });
151
- return { status: "written" };
165
+ const writeResult = { status: "written" } as const;
166
+ await this.recordAudit(output, writeResult, targetFile);
167
+ return writeResult;
152
168
  } catch (error) {
153
169
  const reason = getErrorMessage(error);
154
170
  this.logger.error("WriteGuardian", "write_guardian execution failed", {
@@ -156,7 +172,33 @@ export class WriteGuardian {
156
172
  filePath,
157
173
  reason,
158
174
  });
159
- return { status: "failed", reason };
175
+ const writeResult = { status: "failed", reason } as const;
176
+ await this.recordAudit(output, writeResult, targetFile);
177
+ return writeResult;
178
+ }
179
+ }
180
+
181
+ private async recordAudit(
182
+ output: MemoryGateOutput,
183
+ result: WriteGuardianWriteResult,
184
+ targetFile?: CuratedFilename
185
+ ): Promise<void> {
186
+ if (!this.auditLog) {
187
+ return;
188
+ }
189
+
190
+ try {
191
+ await this.auditLog.append({
192
+ decision: output.decision,
193
+ targetFile,
194
+ status: result.status,
195
+ reason: result.reason,
196
+ candidateFact: output.candidateFact,
197
+ });
198
+ } catch (error) {
199
+ this.logger.warn("WriteGuardian", "Failed to append write_guardian audit log", {
200
+ reason: getErrorMessage(error),
201
+ });
160
202
  }
161
203
  }
162
204