npm - @parkgogogo/openclaw-reflection - Versions diffs - 0.1.0 - Mend

@parkgogogo/openclaw-reflection 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/INSTALL.md +78 -0
package/README.md +195 -0
package/openclaw.plugin.json +67 -0
package/package.json +52 -0
package/src/buffer.ts +40 -0
package/src/config.ts +254 -0
package/src/consolidation/consolidator.ts +316 -0
package/src/consolidation/index.ts +9 -0
package/src/consolidation/prompt.ts +58 -0
package/src/consolidation/scheduler.ts +153 -0
package/src/consolidation/types.ts +25 -0
package/src/evals/cli.ts +45 -0
package/src/evals/datasets.ts +39 -0
package/src/evals/runner.ts +446 -0
package/src/file-curator/index.ts +204 -0
package/src/index.ts +323 -0
package/src/llm/index.ts +11 -0
package/src/llm/service.ts +447 -0
package/src/llm/types.ts +87 -0
package/src/logger.ts +125 -0
package/src/memory-gate/analyzer.ts +191 -0
package/src/memory-gate/index.ts +7 -0
package/src/memory-gate/prompt.ts +85 -0
package/src/memory-gate/types.ts +23 -0
package/src/message-handler.ts +862 -0
package/src/proper-lockfile.d.ts +25 -0
package/src/session-manager.ts +114 -0
package/src/types.ts +109 -0
package/src/utils/file-utils.ts +228 -0

package/src/evals/runner.ts ADDED Viewed

@@ -0,0 +1,446 @@
+import os from "node:os";
+import path from "node:path";
+import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
+import { FileCurator } from "../file-curator/index.js";
+import { LLMService } from "../llm/service.js";
+import { MemoryGateAnalyzer } from "../memory-gate/analyzer.js";
+import type {
+  AgentStep,
+  LLMService as LLMServiceContract,
+  Logger,
+  MemoryGateOutput,
+} from "../types.js";
+export interface SharedScenario {
+  scenario_id: string;
+  task_type?: "memory_gate" | "writer_guardian";
+  title: string;
+  recent_messages?: Array<{
+    role: "user" | "agent";
+    message: string;
+  }>;
+  current_user_message?: string;
+  current_agent_reply?: string;
+  gate_decision?: MemoryGateOutput["decision"];
+  gate_reason?: string;
+  candidate_fact?: string;
+  target_file?: "MEMORY.md" | "USER.md" | "SOUL.md" | "IDENTITY.md" | "TOOLS.md";
+  current_file_content?: string;
+  notes: string;
+}
+export interface MemoryGateBenchmarkCase {
+  scenario_id: string;
+  expected_decision: MemoryGateOutput["decision"];
+  expected_candidate_fact?: string;
+  allowed_candidate_fact_variants?: string[];
+  severity: "core" | "boundary";
+  tags: string[];
+}
+export interface WriterGuardianBenchmarkCase {
+  scenario_id: string;
+  expected_should_write: boolean;
+  expected_outcome_type: string;
+  allowed_tool_traces: string[][];
+  expected_content_contains?: string[];
+  expected_content_not_contains?: string[];
+  tags: string[];
+}
+export interface MemoryGateCaseResult {
+  scenarioId: string;
+  pass: boolean;
+  decisionPass: boolean;
+  candidatePass: boolean;
+  judgeUsed: boolean;
+  actualDecision: MemoryGateOutput["decision"];
+  expectedDecision: MemoryGateOutput["decision"];
+  actualCandidateFact?: string;
+  expectedCandidateFact?: string;
+  error?: string;
+}
+export interface WriterGuardianCaseResult {
+  scenarioId: string;
+  pass: boolean;
+  shouldWritePass: boolean;
+  toolTracePass: boolean;
+  contentPass: boolean;
+  actualShouldWrite: boolean;
+  actualToolTrace: string[];
+  targetFile: string;
+  error?: string;
+}
+export interface BenchmarkSummary {
+  total: number;
+  passed: number;
+}
+export interface Judge {
+  compareCandidateFact(input: {
+    expected: string;
+    actual: string;
+    variants: string[];
+  }): Promise<{ equivalent: boolean; reason: string }>;
+}
+function createNoopLogger(): Logger {
+  return {
+    debug() {},
+    info() {},
+    warn() {},
+    error() {},
+  };
+}
+function getErrorMessage(error: unknown): string {
+  if (error instanceof Error) {
+    return error.message;
+  }
+  return String(error);
+}
+function withScenarioLogger(baseLogger: Logger, scenarioId: string): Logger {
+  return {
+    debug(component, event, details) {
+      baseLogger.debug(component, event, details, scenarioId);
+    },
+    info(component, event, details) {
+      baseLogger.info(component, event, details, scenarioId);
+    },
+    warn(component, event, details) {
+      baseLogger.warn(component, event, details, scenarioId);
+    },
+    error(component, event, details) {
+      baseLogger.error(component, event, details, scenarioId);
+    },
+  };
+}
+function normalizeText(value: string): string {
+  return value.trim().replace(/\s+/g, " ").toLowerCase();
+}
+function buildScenarioMap(scenarios: SharedScenario[]): Map<string, SharedScenario> {
+  return new Map(scenarios.map((scenario) => [scenario.scenario_id, scenario]));
+}
+function arraysEqual(left: string[], right: string[]): boolean {
+  return left.length === right.length && left.every((item, index) => item === right[index]);
+}
+function normalizeFileContent(content: string): string {
+  const normalized = content.replace(/\r\n/g, "\n");
+  return normalized.endsWith("\n") ? normalized : `${normalized}\n`;
+}
+export async function evaluateMemoryGateBenchmark(input: {
+  scenarios: SharedScenario[];
+  benchmarkCases: MemoryGateBenchmarkCase[];
+  executeCase: (scenario: SharedScenario) => Promise<MemoryGateOutput>;
+  judge?: Judge;
+  logger?: Logger;
+}): Promise<{ summary: BenchmarkSummary; results: MemoryGateCaseResult[] }> {
+  const scenarioMap = buildScenarioMap(input.scenarios);
+  const results: MemoryGateCaseResult[] = [];
+  const logger = input.logger ?? createNoopLogger();
+  for (const benchmarkCase of input.benchmarkCases) {
+    const scenario = scenarioMap.get(benchmarkCase.scenario_id);
+    if (!scenario) {
+      throw new Error(`Missing shared scenario: ${benchmarkCase.scenario_id}`);
+    }
+    try {
+      logger.info("EvalRunner", "Starting memory gate case", {
+        scenarioId: benchmarkCase.scenario_id,
+        expectedDecision: benchmarkCase.expected_decision,
+      });
+      const actual = await input.executeCase(scenario);
+      const decisionPass = actual.decision === benchmarkCase.expected_decision;
+      let candidatePass = true;
+      let judgeUsed = false;
+      if (benchmarkCase.expected_decision !== "NO_WRITE") {
+        const expectedFact = benchmarkCase.expected_candidate_fact ?? "";
+        const actualFact = actual.candidateFact ?? "";
+        const variants = benchmarkCase.allowed_candidate_fact_variants ?? [];
+        const exactMatches =
+          normalizeText(actualFact) === normalizeText(expectedFact) ||
+          variants.some((variant) => normalizeText(actualFact) === normalizeText(variant));
+        candidatePass = exactMatches;
+        if (!candidatePass && input.judge && actualFact.trim() !== "" && expectedFact.trim() !== "") {
+          const judged = await input.judge.compareCandidateFact({
+            expected: expectedFact,
+            actual: actualFact,
+            variants,
+          });
+          candidatePass = judged.equivalent;
+          judgeUsed = true;
+        }
+      }
+      const pass = decisionPass && candidatePass;
+      results.push({
+        scenarioId: benchmarkCase.scenario_id,
+        pass,
+        decisionPass,
+        candidatePass,
+        judgeUsed,
+        actualDecision: actual.decision,
+        expectedDecision: benchmarkCase.expected_decision,
+        actualCandidateFact: actual.candidateFact,
+        expectedCandidateFact: benchmarkCase.expected_candidate_fact,
+      });
+      logger.info("EvalRunner", "Completed memory gate case", {
+        scenarioId: benchmarkCase.scenario_id,
+        pass,
+        decisionPass,
+        candidatePass,
+        judgeUsed,
+        actualDecision: actual.decision,
+      });
+    } catch (error) {
+      const reason = getErrorMessage(error);
+      results.push({
+        scenarioId: benchmarkCase.scenario_id,
+        pass: false,
+        decisionPass: false,
+        candidatePass: false,
+        judgeUsed: false,
+        actualDecision: "NO_WRITE",
+        expectedDecision: benchmarkCase.expected_decision,
+        expectedCandidateFact: benchmarkCase.expected_candidate_fact,
+        error: reason,
+      });
+      logger.error("EvalRunner", "Memory gate case failed", {
+        scenarioId: benchmarkCase.scenario_id,
+        reason,
+      });
+    }
+  }
+  return {
+    summary: {
+      total: results.length,
+      passed: results.filter((result) => result.pass).length,
+    },
+    results,
+  };
+}
+export async function evaluateWriterGuardianBenchmark(input: {
+  scenarios: SharedScenario[];
+  benchmarkCases: WriterGuardianBenchmarkCase[];
+  executeCase: (scenario: SharedScenario) => Promise<{
+    shouldWrite: boolean;
+    toolTrace: string[];
+    finalContent: string;
+  }>;
+  logger?: Logger;
+}): Promise<{ summary: BenchmarkSummary; results: WriterGuardianCaseResult[] }> {
+  const scenarioMap = buildScenarioMap(input.scenarios);
+  const results: WriterGuardianCaseResult[] = [];
+  const logger = input.logger ?? createNoopLogger();
+  for (const benchmarkCase of input.benchmarkCases) {
+    const scenario = scenarioMap.get(benchmarkCase.scenario_id);
+    if (!scenario) {
+      throw new Error(`Missing shared scenario: ${benchmarkCase.scenario_id}`);
+    }
+    if (!scenario.target_file || typeof scenario.current_file_content !== "string") {
+      throw new Error(`Writer scenario is missing target_file or current_file_content: ${scenario.scenario_id}`);
+    }
+    try {
+      logger.info("EvalRunner", "Starting writer guardian case", {
+        scenarioId: benchmarkCase.scenario_id,
+        targetFile: scenario.target_file,
+        expectedShouldWrite: benchmarkCase.expected_should_write,
+      });
+      const actual = await input.executeCase(scenario);
+      const initialContent = normalizeFileContent(scenario.current_file_content);
+      const normalizedFinal = normalizeFileContent(actual.finalContent);
+      const shouldWritePass =
+        actual.shouldWrite === benchmarkCase.expected_should_write &&
+        (benchmarkCase.expected_should_write ? true : normalizedFinal === initialContent);
+      const toolTracePass = benchmarkCase.allowed_tool_traces.some((trace) =>
+        arraysEqual(trace, actual.toolTrace)
+      );
+      const expectedContains = benchmarkCase.expected_content_contains ?? [];
+      const expectedNotContains = benchmarkCase.expected_content_not_contains ?? [];
+      const contentPass =
+        expectedContains.every((snippet) => normalizedFinal.includes(snippet)) &&
+        expectedNotContains.every((snippet) => !normalizedFinal.includes(snippet));
+      const pass = shouldWritePass && toolTracePass && contentPass;
+      results.push({
+        scenarioId: benchmarkCase.scenario_id,
+        pass,
+        shouldWritePass,
+        toolTracePass,
+        contentPass,
+        actualShouldWrite: actual.shouldWrite,
+        actualToolTrace: actual.toolTrace,
+        targetFile: scenario.target_file,
+      });
+      logger.info("EvalRunner", "Completed writer guardian case", {
+        scenarioId: benchmarkCase.scenario_id,
+        pass,
+        shouldWritePass,
+        toolTracePass,
+        contentPass,
+        actualShouldWrite: actual.shouldWrite,
+        actualToolTrace: actual.toolTrace,
+      });
+    } catch (error) {
+      const reason = getErrorMessage(error);
+      results.push({
+        scenarioId: benchmarkCase.scenario_id,
+        pass: false,
+        shouldWritePass: false,
+        toolTracePass: false,
+        contentPass: false,
+        actualShouldWrite: false,
+        actualToolTrace: [],
+        targetFile: scenario.target_file,
+        error: reason,
+      });
+      logger.error("EvalRunner", "Writer guardian case failed", {
+        scenarioId: benchmarkCase.scenario_id,
+        targetFile: scenario.target_file,
+        reason,
+      });
+    }
+  }
+  return {
+    summary: {
+      total: results.length,
+      passed: results.filter((result) => result.pass).length,
+    },
+    results,
+  };
+}
+export async function runMemoryGateCase(input: {
+  scenario: SharedScenario;
+  llmService: LLMServiceContract;
+  logger?: Logger;
+}): Promise<MemoryGateOutput> {
+  if (
+    !input.scenario.current_user_message ||
+    typeof input.scenario.current_agent_reply !== "string"
+  ) {
+    throw new Error(`Memory gate scenario is missing current turn fields: ${input.scenario.scenario_id}`);
+  }
+  const analyzer = new MemoryGateAnalyzer(
+    input.llmService,
+    withScenarioLogger(input.logger ?? createNoopLogger(), input.scenario.scenario_id)
+  );
+  const recentMessages = (input.scenario.recent_messages ?? []).map((message, index) => ({
+    ...message,
+    timestamp: 1_700_000_000_000 + index * 1000,
+  }));
+  return analyzer.analyze({
+    recentMessages,
+    currentUserMessage: input.scenario.current_user_message,
+    currentAgentReply: input.scenario.current_agent_reply,
+  });
+}
+export async function runWriterGuardianCase(input: {
+  scenario: SharedScenario;
+  llmService: LLMServiceContract;
+  logger?: Logger;
+}): Promise<{ shouldWrite: boolean; toolTrace: string[]; finalContent: string }> {
+  const scenario = input.scenario;
+  if (
+    !scenario.target_file ||
+    !scenario.gate_decision ||
+    !scenario.gate_reason ||
+    !scenario.candidate_fact ||
+    typeof scenario.current_file_content !== "string"
+  ) {
+    throw new Error(`Writer guardian scenario is missing required fields: ${scenario.scenario_id}`);
+  }
+  const workspaceDir = await mkdtemp(path.join(os.tmpdir(), "reflection-eval-"));
+  const logger = withScenarioLogger(
+    input.logger ?? createNoopLogger(),
+    scenario.scenario_id
+  );
+  const filePath = path.join(workspaceDir, scenario.target_file);
+  const originalContent = normalizeFileContent(scenario.current_file_content);
+  await writeFile(filePath, originalContent, "utf8");
+  let lastSteps: AgentStep[] = [];
+  const recordingService: LLMServiceContract = {
+    generateObject: (params) => input.llmService.generateObject(params),
+    runAgent: async (params) => {
+      const result = await input.llmService.runAgent(params);
+      lastSteps = result.steps;
+      return result;
+    },
+  };
+  try {
+    const curator = new FileCurator({ workspaceDir }, logger, recordingService);
+    await curator.write({
+      decision: scenario.gate_decision,
+      reason: scenario.gate_reason,
+      candidateFact: scenario.candidate_fact,
+    });
+    const finalContent = normalizeFileContent((await readFile(filePath, "utf8")) ?? originalContent);
+    const toolTrace = lastSteps
+      .filter((step) => step.type === "tool" && typeof step.toolName === "string")
+      .map((step) => step.toolName as string);
+    const shouldWrite = toolTrace.includes("write") || finalContent !== originalContent;
+    return {
+      shouldWrite,
+      toolTrace,
+      finalContent,
+    };
+  } finally {
+    await rm(workspaceDir, { recursive: true, force: true });
+  }
+}
+export function createJudge(llmService: LLMService): Judge {
+  return {
+    async compareCandidateFact(input) {
+      return llmService.generateObject({
+        systemPrompt:
+          "You judge whether two candidate memory facts are semantically equivalent. Output JSON only.",
+        userPrompt: [
+          `Expected fact: ${input.expected}`,
+          `Actual fact: ${input.actual}`,
+          `Allowed variants: ${input.variants.join(" | ") || "(none)"}`,
+          "",
+          "Return whether the actual fact is an acceptable semantic match.",
+        ].join("\n"),
+        schema: {
+          type: "object",
+          additionalProperties: false,
+          required: ["equivalent", "reason"],
+          properties: {
+            equivalent: { type: "boolean" },
+            reason: { type: "string" },
+          },
+        },
+      });
+    },
+  };
+}

package/src/file-curator/index.ts ADDED Viewed

@@ -0,0 +1,204 @@
+import * as path from "path";
+import type { AgentTool, LLMService, MemoryGateOutput, Logger } from "../types.js";
+import { readFile, writeFileWithLock } from "../utils/file-utils.js";
+type UpdateDecision =
+  | "UPDATE_MEMORY"
+  | "UPDATE_USER"
+  | "UPDATE_SOUL"
+  | "UPDATE_IDENTITY"
+  | "UPDATE_TOOLS";
+type CuratedFilename =
+  | "MEMORY.md"
+  | "USER.md"
+  | "SOUL.md"
+  | "IDENTITY.md"
+  | "TOOLS.md";
+interface FileCuratorConfig {
+  workspaceDir: string;
+}
+export interface FileCuratorWriteResult {
+  status: "written" | "refused" | "failed" | "skipped";
+  reason?: string;
+}
+const FILE_CURATOR_SYSTEM_PROMPT = `You are the assistant's Writer Guardian.
+Your job:
+- Decide whether the candidate fact should update the target memory file
+- Use the read tool if you need the current file content
+- Use the write tool only if the target file truly should change
+- If the target file should not change, finish without calling write
+- If you write, preserve the candidate fact explicitly unless the exact wording is already present
+You are a guardian, not an eager writer.
+When in doubt, refuse.
+File meanings:
+- MEMORY.md: curated long-term memory. Keep durable decisions, lessons learned, shared context, and important private context. Reject fleeting chatter, short-lived project chatter, user profile facts, identity metadata, and assistant principles.
+- USER.md: about your human. Keep stable preferences, collaboration style, and helpful personal context. Do not turn this into a dossier. Reject project chatter in USER.md, one-off tactics, temporary moods, and surveillance-style detail.
+- SOUL.md: the assistant's enduring principles, boundaries, continuity rules, and general voice. General write-policy or disclosure-policy rules can belong here. Reject temporary tone shifts, project tactics, user profile facts, and identity metadata.
+- IDENTITY.md: Identity metadata only. Keep name, creature, vibe, emoji, avatar, or equivalent identity metadata. If the candidate fact is an explicit metadata change, write it and replace existing metadata when needed. Reject anything that is not identity metadata.
+- TOOLS.md: environment-specific tool context only. Keep local aliases, endpoints, room or device names, preferred TTS voices, and other local mappings that help the assistant use tools correctly in this workspace. Reject reusable procedures that belong in a skill, runtime tool availability claims, user facts, identity metadata, and general long-term memory.
+Hard constraints:
+- Only reason about the target file you were given
+- Do not route to another file
+- Do not read or infer from other files
+- If you refuse, finish without calling write
+- If you write, overwrite the full target file content
+- Preserve useful existing structure unless there is a strong reason to reorganize`;
+const TARGET_FILES: Record<UpdateDecision, CuratedFilename> = {
+  UPDATE_MEMORY: "MEMORY.md",
+  UPDATE_USER: "USER.md",
+  UPDATE_SOUL: "SOUL.md",
+  UPDATE_IDENTITY: "IDENTITY.md",
+  UPDATE_TOOLS: "TOOLS.md",
+};
+function isUpdateDecision(
+  decision: MemoryGateOutput["decision"]
+): decision is UpdateDecision {
+  return (
+    decision === "UPDATE_MEMORY" ||
+    decision === "UPDATE_USER" ||
+    decision === "UPDATE_SOUL" ||
+    decision === "UPDATE_IDENTITY" ||
+    decision === "UPDATE_TOOLS"
+  );
+}
+function getErrorMessage(error: unknown): string {
+  if (error instanceof Error) {
+    return error.message;
+  }
+  return String(error);
+}
+function getDefaultContent(targetFile: CuratedFilename): string {
+  return `# ${targetFile.replace(/\.md$/, "")}\n`;
+}
+function normalizeFileContent(content: string): string {
+  const normalized = content.replace(/\r\n/g, "\n");
+  return normalized.endsWith("\n") ? normalized : `${normalized}\n`;
+}
+export class FileCurator {
+  private config: FileCuratorConfig;
+  private logger: Logger;
+  private llmService: LLMService;
+  constructor(config: FileCuratorConfig, logger: Logger, llmService: LLMService) {
+    this.config = config;
+    this.logger = logger;
+    this.llmService = llmService;
+  }
+  async write(output: MemoryGateOutput): Promise<FileCuratorWriteResult> {
+    if (!isUpdateDecision(output.decision)) {
+      return { status: "skipped", reason: "not an update decision" };
+    }
+    const candidateFact = output.candidateFact?.trim();
+    if (!candidateFact) {
+      this.logger.warn("FileCurator", "Skip UPDATE_* without candidate fact", {
+        decision: output.decision,
+        reason: output.reason,
+      });
+      return { status: "skipped", reason: "missing candidate fact" };
+    }
+    const targetFile = TARGET_FILES[output.decision];
+    const filePath = path.join(this.config.workspaceDir, targetFile);
+    const tools = this.createTools(filePath, targetFile);
+    try {
+      const result = await this.llmService.runAgent({
+        systemPrompt: FILE_CURATOR_SYSTEM_PROMPT,
+        userPrompt: [
+          `Memory Gate decision: ${output.decision}`,
+          `Reason from gate: ${output.reason}`,
+          `Candidate fact: ${candidateFact}`,
+          `Target file: ${targetFile}`,
+          "",
+          "Decide whether this target file should change. Use read first if you need current content. If the file should change, call write with the full next file content. Otherwise finish without write.",
+        ].join("\n"),
+        tools,
+        maxSteps: 4,
+      });
+      if (!result.didWrite) {
+        const reason = result.finalMessage ?? "Writer guardian finished without write";
+        this.logger.info("FileCurator", "Guardian refused update", {
+          decision: output.decision,
+          filePath,
+          reason,
+        });
+        return { status: "refused", reason };
+      }
+      this.logger.info("FileCurator", "Writer guardian rewrote target file", {
+        decision: output.decision,
+        filePath,
+      });
+      return { status: "written" };
+    } catch (error) {
+      const reason = getErrorMessage(error);
+      this.logger.error("FileCurator", "Writer guardian execution failed", {
+        decision: output.decision,
+        filePath,
+        reason,
+      });
+      return { status: "failed", reason };
+    }
+  }
+  private createTools(filePath: string, targetFile: CuratedFilename): AgentTool[] {
+    return [
+      {
+        name: "read",
+        description: `Read the current raw content of ${targetFile}`,
+        inputSchema: {
+          type: "object",
+          properties: {},
+          additionalProperties: false,
+        },
+        execute: async () => (await readFile(filePath)) ?? getDefaultContent(targetFile),
+      },
+      {
+        name: "write",
+        description: `Overwrite ${targetFile} with the provided full content`,
+        inputSchema: {
+          type: "object",
+          properties: {
+            content: { type: "string" },
+          },
+          required: ["content"],
+          additionalProperties: false,
+        },
+        execute: async (input) => {
+          if (
+            typeof input !== "object" ||
+            input === null ||
+            typeof (input as { content?: unknown }).content !== "string"
+          ) {
+            throw new Error("write tool requires string content");
+          }
+          await writeFileWithLock(
+            filePath,
+            normalizeFileContent((input as { content: string }).content)
+          );
+          return "ok";
+        },
+      },
+    ];
+  }
+}