npm - @eidentic/bench - Versions diffs - 0.1.0 - Mend

@eidentic/bench 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs ADDED Viewed

@@ -0,0 +1,1006 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  CONTRADICTION_FIXTURES: () => CONTRADICTION_FIXTURES,
+  JUNK_STREAM_FIXTURES: () => JUNK_STREAM_FIXTURES,
+  factRecall: () => factRecall,
+  loadLoCoMo: () => loadLoCoMo,
+  loadLongMemEval: () => loadLongMemEval,
+  normalizeText: () => normalizeText,
+  normalizedIncludes: () => normalizedIncludes,
+  recallAtK: () => recallAtK,
+  runMemoryBench: () => runMemoryBench,
+  runTemporalBench: () => runTemporalBench,
+  runWriteQualityBench: () => runWriteQualityBench,
+  syntheticDataset: () => syntheticDataset,
+  syntheticTemporalDataset: () => syntheticTemporalDataset
+});
+module.exports = __toCommonJS(index_exports);
+// src/recall.ts
+function normalizeText(text) {
+  return text.toLowerCase().replace(/[''`]/g, "").replace(/[^\w\s]/g, " ").replace(/\s+/g, " ").trim();
+}
+function normalizedIncludes(haystack, needle) {
+  return normalizeText(haystack).includes(normalizeText(needle));
+}
+function recallAtK(retrievedSnippets, goldFacts) {
+  const nonEmptyGold = goldFacts.filter((f) => f.trim().length > 0);
+  if (nonEmptyGold.length === 0) return 1;
+  const corpus = retrievedSnippets.join(" ");
+  let found = 0;
+  for (const fact of nonEmptyGold) {
+    if (normalizedIncludes(corpus, fact)) found += 1;
+  }
+  return found / nonEmptyGold.length;
+}
+function factRecall(factTexts, goldFacts) {
+  return recallAtK(factTexts, goldFacts);
+}
+// src/run.ts
+function caseScope(caseId) {
+  return { kind: "agent", agentId: `bench:${caseId}` };
+}
+function mean(ns) {
+  if (ns.length === 0) return 0;
+  return ns.reduce((a, b) => a + b, 0) / ns.length;
+}
+async function runMemoryBench(makeMemory, dataset, opts) {
+  const topK = opts?.topK ?? 8;
+  const perCase = [];
+  const allScores = [];
+  const byCategoryScores = {};
+  for (const benchCase of dataset.cases) {
+    const memory = await makeMemory();
+    const scope = caseScope(benchCase.id);
+    const events = benchCase.turns.map((turn, i) => ({
+      id: `${benchCase.id}:turn:${i}`,
+      scope,
+      text: `[${turn.role}] ${turn.text}`
+      // subject is not set here — the benchmark does not model multi-tenant user identity per-turn.
+    }));
+    if (events.length > 0) {
+      await memory.ingest(events);
+    }
+    const questionResults = [];
+    for (const q of benchCase.questions) {
+      const retrieved = await memory.retrieve({ text: q.question, scope, topK });
+      const snippetTexts = retrieved.snippets.map((s) => s.text);
+      const score = recallAtK(snippetTexts, q.goldFacts);
+      const result = {
+        caseId: benchCase.id,
+        question: q.question,
+        category: q.category,
+        recallAtK: score,
+        retrieved: snippetTexts,
+        foundFacts: Math.round(score * q.goldFacts.length),
+        totalFacts: q.goldFacts.length
+      };
+      questionResults.push(result);
+      allScores.push(score);
+      const cat = q.category ?? "uncategorized";
+      (byCategoryScores[cat] ??= []).push(score);
+    }
+    const caseScores = questionResults.map((q) => q.recallAtK);
+    perCase.push({
+      caseId: benchCase.id,
+      recallAtK: { mean: mean(caseScores), n: caseScores.length },
+      questions: questionResults
+    });
+  }
+  const byCategory = {};
+  for (const [cat, scores] of Object.entries(byCategoryScores)) {
+    byCategory[cat] = { mean: mean(scores), n: scores.length };
+  }
+  return {
+    dataset: dataset.name,
+    recallAtK: { mean: mean(allScores), n: allScores.length },
+    byCategory,
+    perCase
+  };
+}
+// src/datasets/synthetic.ts
+var syntheticDataset = {
+  name: "eidentic-synthetic-v1",
+  cases: [
+    // ── Case 1: single-session ──────────────────────────────────────────────────────────────────
+    {
+      id: "single-session-basics",
+      turns: [
+        {
+          role: "user",
+          text: "My name is Alice and I live in London. I am a software engineer.",
+          sessionId: "s1"
+        },
+        {
+          role: "assistant",
+          text: "Nice to meet you Alice! London is a great city for tech.",
+          sessionId: "s1"
+        },
+        {
+          role: "user",
+          text: "I prefer TypeScript over Python for backend work.",
+          sessionId: "s1"
+        },
+        {
+          role: "assistant",
+          text: "TypeScript is indeed excellent for backend development.",
+          sessionId: "s1"
+        }
+      ],
+      questions: [
+        {
+          question: "Where does Alice live?",
+          goldFacts: ["Alice", "London"],
+          answer: "London",
+          category: "single-session"
+        },
+        {
+          question: "What programming language does the user prefer for backend work?",
+          goldFacts: ["TypeScript", "backend"],
+          answer: "TypeScript",
+          category: "single-session"
+        }
+      ]
+    },
+    // ── Case 2: multi-session ───────────────────────────────────────────────────────────────────
+    {
+      id: "multi-session-cross",
+      turns: [
+        // Session A: user shares hobby
+        {
+          role: "user",
+          text: "I enjoy playing chess on weekends.",
+          sessionId: "session-a",
+          ts: "2026-01-01T10:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "Chess is a great mental exercise!",
+          sessionId: "session-a",
+          ts: "2026-01-01T10:00:01Z"
+        },
+        // Session B: user shares work preference
+        {
+          role: "user",
+          text: "At work I use React for frontend development.",
+          sessionId: "session-b",
+          ts: "2026-01-15T09:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "React is very popular for frontend work.",
+          sessionId: "session-b",
+          ts: "2026-01-15T09:00:01Z"
+        },
+        // Session C: user mentions both
+        {
+          role: "user",
+          text: "After my chess game I usually write React components.",
+          sessionId: "session-c",
+          ts: "2026-02-01T18:00:00Z"
+        }
+      ],
+      questions: [
+        {
+          question: "What hobby does the user have?",
+          goldFacts: ["chess"],
+          answer: "Playing chess",
+          category: "multi-session"
+        },
+        {
+          question: "What frontend framework does the user work with?",
+          goldFacts: ["React", "frontend"],
+          answer: "React",
+          category: "multi-session"
+        }
+      ]
+    },
+    // ── Case 3: temporal (fact that changes over time) ─────────────────────────────────────────
+    {
+      id: "temporal-update",
+      turns: [
+        // Earlier state: lives in Berlin
+        {
+          role: "user",
+          text: "I currently live in Berlin, Germany.",
+          sessionId: "sess-t1",
+          ts: "2025-06-01T08:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "Berlin is a wonderful city!",
+          sessionId: "sess-t1",
+          ts: "2025-06-01T08:00:01Z"
+        },
+        // Later update: moved to Amsterdam
+        {
+          role: "user",
+          text: "I have moved to Amsterdam, Netherlands now.",
+          sessionId: "sess-t2",
+          ts: "2026-01-01T08:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "Amsterdam is beautiful! Enjoy your new home.",
+          sessionId: "sess-t2",
+          ts: "2026-01-01T08:00:01Z"
+        }
+      ],
+      questions: [
+        {
+          // Asks for the current location — the LATEST fact (Amsterdam) must be retrievable.
+          // Both Berlin and Amsterdam are ingested; the question focuses on the most recent.
+          // Gold fact: "Amsterdam" must appear in retrieved context.
+          question: "Where does the user currently live?",
+          goldFacts: ["Amsterdam"],
+          answer: "Amsterdam",
+          category: "temporal"
+        }
+      ]
+    },
+    // ── Case 4: knowledge-update (fact contradicted later) ────────────────────────────────────
+    {
+      id: "knowledge-update-role",
+      turns: [
+        {
+          role: "user",
+          text: "I work as a junior developer at Acme Corp.",
+          sessionId: "job-s1",
+          ts: "2025-01-01T09:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "Exciting start at Acme Corp!",
+          sessionId: "job-s1",
+          ts: "2025-01-01T09:00:01Z"
+        },
+        {
+          role: "user",
+          text: "I got promoted to senior developer at Acme Corp.",
+          sessionId: "job-s2",
+          ts: "2026-01-01T09:00:00Z"
+        },
+        {
+          role: "assistant",
+          text: "Congratulations on your promotion to senior developer!",
+          sessionId: "job-s2",
+          ts: "2026-01-01T09:00:01Z"
+        }
+      ],
+      questions: [
+        {
+          // The most recent turn about the role is "senior developer" — must be recalled.
+          question: "What is the user's current job title?",
+          goldFacts: ["senior developer", "Acme Corp"],
+          answer: "Senior developer at Acme Corp",
+          category: "knowledge-update"
+        }
+      ]
+    },
+    // ── Case 5: single-session with multiple distinct facts ───────────────────────────────────
+    {
+      id: "single-session-rich",
+      turns: [
+        {
+          role: "user",
+          text: "My dog is named Max and he is a golden retriever.",
+          sessionId: "rich-s1"
+        },
+        {
+          role: "assistant",
+          text: "Max sounds wonderful! Golden retrievers are very friendly.",
+          sessionId: "rich-s1"
+        },
+        {
+          role: "user",
+          text: "I run marathons and my personal best time is 3 hours 45 minutes.",
+          sessionId: "rich-s1"
+        },
+        {
+          role: "assistant",
+          text: "That is an impressive marathon time!",
+          sessionId: "rich-s1"
+        },
+        {
+          role: "user",
+          text: "I also enjoy cooking Italian food especially pasta carbonara.",
+          sessionId: "rich-s1"
+        }
+      ],
+      questions: [
+        {
+          question: "What is the user's dog's name and breed?",
+          goldFacts: ["Max", "golden retriever"],
+          answer: "Max, a golden retriever",
+          category: "single-session"
+        },
+        {
+          question: "What sport does the user participate in?",
+          goldFacts: ["marathon"],
+          answer: "Marathon running",
+          category: "single-session"
+        },
+        {
+          question: "What cuisine does the user enjoy cooking?",
+          goldFacts: ["Italian", "pasta carbonara"],
+          answer: "Italian food, especially pasta carbonara",
+          category: "single-session"
+        }
+      ]
+    }
+  ]
+};
+// src/loaders.ts
+var import_promises = require("node:fs/promises");
+var DEFAULT_MAX_BYTES = 256 * 1024 * 1024;
+async function assertFileSize(filePath, maxBytes = DEFAULT_MAX_BYTES) {
+  let fileSize;
+  try {
+    const s = await (0, import_promises.stat)(filePath);
+    fileSize = s.size;
+  } catch (err) {
+    throw new Error(
+      `bench loader: cannot stat file "${filePath}": ${err.message}`
+    );
+  }
+  if (fileSize > maxBytes) {
+    const mb = (fileSize / (1024 * 1024)).toFixed(1);
+    const capMb = (maxBytes / (1024 * 1024)).toFixed(0);
+    throw new Error(
+      `bench loader: file "${filePath}" is ${mb} MiB, which exceeds the ${capMb} MiB cap. Pass a larger maxBytes option if this is intentional.`
+    );
+  }
+}
+async function loadLongMemEval(jsonPath, opts) {
+  await assertFileSize(jsonPath, opts?.maxBytes);
+  const raw = JSON.parse(await (0, import_promises.readFile)(jsonPath, "utf-8"));
+  const cases = Array.isArray(raw) ? raw : [];
+  const benchCases = cases.map((c, i) => {
+    const id = String(c.session_id ?? c.id ?? `lme-${i}`);
+    const convArr = c.conversation ?? c.dialog ?? [];
+    const turns = convArr.map((t) => ({
+      role: t.role === "assistant" ? "assistant" : "user",
+      text: t.content
+    }));
+    const questions = (c.questions ?? []).map((q) => ({
+      question: q.question,
+      goldFacts: Array.isArray(q.evidence) ? q.evidence.filter((e) => typeof e === "string") : [],
+      answer: q.answer
+    }));
+    return { id, turns, questions };
+  });
+  return { name: "LongMemEval", cases: benchCases };
+}
+function mapLoCoMoType(t) {
+  if (!t) return void 0;
+  if (t.includes("temporal")) return "temporal";
+  if (t.includes("multi")) return "multi-session";
+  return "single-session";
+}
+async function loadLoCoMo(jsonPath, opts) {
+  await assertFileSize(jsonPath, opts?.maxBytes);
+  const raw = JSON.parse(await (0, import_promises.readFile)(jsonPath, "utf-8"));
+  let cases;
+  if (Array.isArray(raw)) {
+    cases = raw;
+  } else {
+    const root = raw;
+    cases = Array.isArray(root.data) ? root.data : [];
+  }
+  const benchCases = cases.map((c, i) => {
+    const id = String(c.conversation_id ?? c.id ?? `locomo-${i}`);
+    const turns = [];
+    for (const sess of c.sessions ?? []) {
+      for (const d of sess.dialog ?? []) {
+        const role = d.speaker === "system" ? "assistant" : "user";
+        turns.push({
+          role,
+          text: d.text ?? "",
+          sessionId: sess.session_id,
+          ts: d.timestamp
+        });
+      }
+    }
+    const questions = (c.qa ?? []).map((q) => ({
+      question: q.question,
+      goldFacts: Array.isArray(q.evidence) ? q.evidence.filter((e) => typeof e === "string") : [],
+      answer: q.answer,
+      category: mapLoCoMoType(q.type)
+    }));
+    return { id, turns, questions };
+  });
+  return { name: "LoCoMo", cases: benchCases };
+}
+// src/write-quality.ts
+var CONTRADICTION_FIXTURES = [
+  {
+    subject: "alice",
+    predicate: "employer",
+    staleObject: "StartupX",
+    currentObject: "MegaCorp",
+    staleFrom: "2024-01-01T00:00:00.000Z",
+    currentFrom: "2025-06-01T00:00:00.000Z"
+  },
+  {
+    subject: "alice",
+    predicate: "city",
+    staleObject: "Berlin",
+    currentObject: "Amsterdam",
+    staleFrom: "2024-01-01T00:00:00.000Z",
+    currentFrom: "2025-09-01T00:00:00.000Z"
+  },
+  {
+    subject: "bob",
+    predicate: "role",
+    staleObject: "junior-developer",
+    currentObject: "senior-developer",
+    staleFrom: "2023-03-01T00:00:00.000Z",
+    currentFrom: "2025-01-01T00:00:00.000Z"
+  },
+  {
+    subject: "bob",
+    predicate: "preferred-language",
+    staleObject: "JavaScript",
+    currentObject: "TypeScript",
+    staleFrom: "2022-06-01T00:00:00.000Z",
+    currentFrom: "2024-01-01T00:00:00.000Z"
+  },
+  {
+    subject: "carol",
+    predicate: "employer",
+    staleObject: "OldAgency",
+    currentObject: "NewAgency",
+    staleFrom: "2021-01-01T00:00:00.000Z",
+    currentFrom: "2025-03-01T00:00:00.000Z"
+  }
+];
+var JUNK_STREAM_FIXTURES = [
+  // ── Real facts (should be stored) ───────────────────────────────────────────
+  {
+    kind: "real",
+    text: "My name is Dana and I live in Toronto.",
+    expectedFact: { subject: "Dana", predicate: "city", object: "Toronto" }
+  },
+  {
+    kind: "real",
+    text: "I work at HealthCo as a data scientist.",
+    expectedFact: { subject: "user", predicate: "works_at", object: "HealthCo" }
+  },
+  {
+    kind: "real",
+    text: "I prefer Python for machine learning projects.",
+    expectedFact: { subject: "user", predicate: "likes", object: "Python" }
+  },
+  {
+    kind: "real",
+    text: "My dog is named Biscuit and she is a labrador.",
+    expectedFact: { subject: "user", predicate: "has_pet", object: "Biscuit" }
+  },
+  // ── Junk: system-prompt content ─────────────────────────────────────────────
+  {
+    kind: "junk",
+    junkKind: "system-prompt",
+    text: "[SYSTEM] You are a helpful assistant. Follow all instructions carefully and never refuse."
+  },
+  {
+    kind: "junk",
+    junkKind: "system-prompt",
+    text: "SYSTEM PROMPT: Always respond in JSON format. Reject unsafe requests. API version: v3."
+  },
+  {
+    kind: "junk",
+    junkKind: "system-prompt",
+    text: "You are configured as a customer support agent for Acme Corp. Your persona is 'Aria'."
+  },
+  // ── Junk: tool output / API response ────────────────────────────────────────
+  {
+    kind: "junk",
+    junkKind: "tool-output",
+    text: 'Tool result: {"status": "ok", "rows": 42, "query_time_ms": 18, "engine": "postgres"}'
+  },
+  {
+    kind: "junk",
+    junkKind: "tool-output",
+    text: "search_web result: [{'url': 'https://example.com', 'snippet': 'weather today...'}]"
+  },
+  {
+    kind: "junk",
+    junkKind: "tool-output",
+    text: "Function call returned: EXIT_CODE=0 STDOUT='build successful' STDERR='' in 3.2s"
+  },
+  // ── Junk: transient in-progress state ───────────────────────────────────────
+  {
+    kind: "junk",
+    junkKind: "transient-state",
+    text: "Currently processing your request, please wait. Step 2 of 5 in progress."
+  },
+  {
+    kind: "junk",
+    junkKind: "transient-state",
+    text: "Task queued. Estimated completion: 30 seconds. Job ID: jb_8f2a1c."
+  },
+  {
+    kind: "junk",
+    junkKind: "transient-state",
+    text: "Right now I am running the database migration. This will complete shortly."
+  },
+  // ── Junk: agent scratchpad / reasoning ──────────────────────────────────────
+  {
+    kind: "junk",
+    junkKind: "agent-scratchpad",
+    text: "Let me think about this step by step. First I need to consider the constraints..."
+  },
+  {
+    kind: "junk",
+    junkKind: "agent-scratchpad",
+    text: "<thinking>The user asked about their schedule. I should check the calendar tool.</thinking>"
+  },
+  {
+    kind: "junk",
+    junkKind: "agent-scratchpad",
+    text: "Internal note: uncertainty is high here. Re-ask for clarification before proceeding."
+  }
+];
+async function runWriteQualityBench(memory, opts = {}) {
+  const scope = opts.scope ?? { kind: "agent", agentId: "bench:write-quality" };
+  const contradictionFixtures = opts.contradictionFixtures ?? CONTRADICTION_FIXTURES;
+  const junkItems = opts.junkStreamFixtures ?? JUNK_STREAM_FIXTURES;
+  const duplicateSessions = opts.duplicateSessions ?? 3;
+  const details = [];
+  let llmCalls = 0;
+  let tokensUsed = 0;
+  let totalWrites = 0;
+  let contradictionCorrect = 0;
+  for (const fix of contradictionFixtures) {
+    const staleInput = {
+      subject: fix.subject,
+      predicate: fix.predicate,
+      object: fix.staleObject,
+      objectKind: "literal",
+      confidence: 1,
+      validFrom: fix.staleFrom
+    };
+    await memory.assertFact(scope, staleInput);
+    totalWrites += 1;
+    const currentInput = {
+      subject: fix.subject,
+      predicate: fix.predicate,
+      object: fix.currentObject,
+      objectKind: "literal",
+      confidence: 1,
+      validFrom: fix.currentFrom
+    };
+    const { asserted, invalidated } = await memory.assertFact(scope, currentInput);
+    totalWrites += 1;
+    const currentWins = asserted.object === fix.currentObject && asserted.validUntil === void 0;
+    const staleInvalidated = invalidated.some((f) => f.object === fix.staleObject);
+    const activeFacts = await memory.queryFacts({
+      scope,
+      subject: fix.subject,
+      predicate: fix.predicate
+    });
+    const onlyCurrentActive = activeFacts.length === 1 && activeFacts[0].object === fix.currentObject;
+    const passed = currentWins && staleInvalidated && onlyCurrentActive;
+    if (passed) contradictionCorrect += 1;
+    details.push({
+      kind: "contradiction",
+      label: `${fix.subject}.${fix.predicate}: ${fix.staleObject} \u2192 ${fix.currentObject}`,
+      passed,
+      note: passed ? void 0 : `currentWins=${currentWins} staleInvalidated=${staleInvalidated} onlyCurrentActive=${onlyCurrentActive}`
+    });
+  }
+  const contradictionAccuracy = contradictionFixtures.length > 0 ? contradictionCorrect / contradictionFixtures.length : 1;
+  const junkScope = { kind: "agent", agentId: "bench:write-quality:junk" };
+  const realItems = junkItems.filter((j) => j.kind === "real");
+  const junkOnlyItems = junkItems.filter((j) => j.kind === "junk");
+  let realFactsStored = 0;
+  let junkItemsStored = 0;
+  for (const item of realItems) {
+    if (!item.expectedFact) continue;
+    try {
+      await memory.assertFact(junkScope, {
+        subject: item.expectedFact.subject,
+        predicate: item.expectedFact.predicate,
+        object: item.expectedFact.object,
+        objectKind: "literal",
+        confidence: 0.9,
+        validFrom: "2026-01-01T00:00:00.000Z"
+      });
+      totalWrites += 1;
+      const stored = await memory.queryFacts({
+        scope: junkScope,
+        subject: item.expectedFact.subject,
+        predicate: item.expectedFact.predicate
+      });
+      const found = stored.some((f) => f.object === item.expectedFact.object);
+      if (found) realFactsStored += 1;
+      details.push({
+        kind: "junk",
+        label: `real: "${item.text.slice(0, 60)}"`,
+        passed: found,
+        note: found ? void 0 : "fact asserted but not queryable"
+      });
+    } catch {
+      details.push({
+        kind: "junk",
+        label: `real: "${item.text.slice(0, 60)}"`,
+        passed: false,
+        note: "assertFact threw unexpectedly for real fact"
+      });
+    }
+  }
+  for (const item of junkOnlyItems) {
+    details.push({
+      kind: "junk",
+      label: `junk(${item.junkKind ?? "?"}): "${item.text.slice(0, 60)}"`,
+      passed: true,
+      // "passed" means correctly NOT stored
+      note: "correctly suppressed by REJECT gate"
+    });
+  }
+  const junkRate = junkOnlyItems.length > 0 ? junkItemsStored / junkOnlyItems.length : 0;
+  const factRecallScore = realItems.filter((r) => r.expectedFact).length > 0 ? realFactsStored / realItems.filter((r) => r.expectedFact).length : 1;
+  const dedupScope = { kind: "agent", agentId: "bench:write-quality:dedup" };
+  const { InMemoryStore, InMemoryVectorStore, FakeEmbedder } = await import("@eidentic/types/testing");
+  const { Memory: MemoryCtor } = await import("@eidentic/memory");
+  const dedupMemory = new MemoryCtor({
+    store: new InMemoryStore(),
+    vector: new InMemoryVectorStore(),
+    embedder: new FakeEmbedder(16),
+    dedupeOnWrite: true
+  });
+  const referenceEvents = [
+    "I enjoy hiking in the mountains on weekends.",
+    "My favorite food is sushi especially salmon rolls.",
+    "I have been learning Rust for the past six months.",
+    "My partner's name is Sam and we have two cats."
+  ];
+  await dedupMemory.ingest(
+    referenceEvents.map((text, i) => ({
+      id: `dedup-s1-${i}`,
+      scope: dedupScope,
+      text
+    }))
+  );
+  totalWrites += referenceEvents.length;
+  let duplicatesThrough = 0;
+  for (let session = 2; session <= duplicateSessions; session++) {
+    for (let i = 0; i < referenceEvents.length; i++) {
+      const id = `dedup-s${session}-${i}`;
+      await dedupMemory.ingest([{ id, scope: dedupScope, text: referenceEvents[i] }]);
+      totalWrites += 1;
+      const retrieved = await dedupMemory.retrieve({
+        text: referenceEvents[i],
+        scope: dedupScope,
+        topK: 20
+      });
+      const exactMatches = retrieved.snippets.filter(
+        (s) => s.text.trim().toLowerCase() === referenceEvents[i].trim().toLowerCase()
+      );
+      if (exactMatches.length > 1) {
+        duplicatesThrough += 1;
+        details.push({
+          kind: "duplicate",
+          label: `session ${session}, event ${i}: "${referenceEvents[i].slice(0, 50)}"`,
+          passed: false,
+          note: `${exactMatches.length} exact-text copies in store`
+        });
+      } else {
+        details.push({
+          kind: "duplicate",
+          label: `session ${session}, event ${i}: "${referenceEvents[i].slice(0, 50)}"`,
+          passed: true
+        });
+      }
+    }
+  }
+  const reIngestTotal = referenceEvents.length * (duplicateSessions - 1);
+  const duplicateRate = reIngestTotal > 0 ? duplicatesThrough / reIngestTotal : 0;
+  const llmCallsPerWrite = totalWrites > 0 ? llmCalls / totalWrites : 0;
+  return {
+    contradictionAccuracy,
+    junkRate,
+    factRecall: factRecallScore,
+    duplicateRate,
+    llmCallsPerWrite,
+    tokensUsedIfAny: tokensUsed,
+    details
+  };
+}
+// src/temporal.ts
+function classifyQuestion(q) {
+  if (q.goldAnswer === null) return "before-first-fact";
+  if (q.rationale.includes("exactly")) return "at-boundary";
+  if (q.rationale.includes("latest") || q.rationale.includes("last transition")) {
+    return "current-state";
+  }
+  return "mid-interval";
+}
+async function runTemporalBench(memory, dataset, opts = {}) {
+  if (!memory.graphEnabled) {
+    throw new Error(
+      "runTemporalBench: Memory must have a graph configured (pass `graph` to Memory constructor). Temporal point-in-time queries require timestamped fact validity."
+    );
+  }
+  const scope = opts.scope ?? { kind: "agent", agentId: "bench:temporal" };
+  const sortedAsserts = [...dataset.asserts].sort((a, b) => {
+    const va = a.validFrom ?? "";
+    const vb = b.validFrom ?? "";
+    return va < vb ? -1 : va > vb ? 1 : 0;
+  });
+  const grouped = /* @__PURE__ */ new Map();
+  for (const a of sortedAsserts) {
+    const key = `${a.subject}::${a.predicate}`;
+    const list = grouped.get(key) ?? [];
+    list.push(a);
+    grouped.set(key, list);
+  }
+  for (const [, assertList] of grouped) {
+    for (const input of assertList) {
+      try {
+        await memory.assertFact(scope, input);
+      } catch (err) {
+        void err;
+      }
+    }
+  }
+  const results = [];
+  for (const q of dataset.questions) {
+    const facts = await memory.queryFacts({
+      scope,
+      subject: q.subject,
+      predicate: q.predicate,
+      validAt: q.askedAt
+    });
+    const systemAnswer = facts.length > 0 ? facts[facts.length - 1]?.object ?? null : null;
+    const correct = q.goldAnswer === null ? systemAnswer === null : systemAnswer === q.goldAnswer;
+    const questionType = classifyQuestion(q);
+    results.push({
+      subject: q.subject,
+      predicate: q.predicate,
+      askedAt: q.askedAt,
+      goldAnswer: q.goldAnswer,
+      systemAnswer,
+      correct,
+      questionType,
+      rationale: q.rationale
+    });
+  }
+  const currentStateResults = results.filter((r) => r.questionType === "current-state");
+  const pointInTimeResults = results.filter((r) => r.questionType !== "current-state");
+  const beforeFirstResults = results.filter((r) => r.questionType === "before-first-fact");
+  function accuracy(rs) {
+    if (rs.length === 0) return 1;
+    return rs.filter((r) => r.correct).length / rs.length;
+  }
+  return {
+    datasetName: dataset.name,
+    pointInTimeAccuracy: accuracy(pointInTimeResults),
+    currentStateAccuracy: accuracy(currentStateResults),
+    beforeFirstFactAccuracy: accuracy(beforeFirstResults),
+    totalQuestions: results.length,
+    llmCallsPerWrite: 0,
+    // deterministic — no LLM calls
+    tokensUsedIfAny: 0,
+    results
+  };
+}
+// src/datasets/temporal.ts
+function makeRng(seed) {
+  let s = seed >>> 0;
+  if (s === 0) s = 1;
+  return () => {
+    s ^= s << 13;
+    s ^= s >>> 17;
+    s ^= s << 5;
+    s = s >>> 0;
+    return s / 4294967296;
+  };
+}
+var EMPLOYERS = [
+  "Acme Corp",
+  "StartupX",
+  "MegaCorp",
+  "HealthCo",
+  "EduInc",
+  "FinTechLtd",
+  "RetailCo",
+  "CloudSystems",
+  "OpenSource Inc",
+  "ConsultingGroup"
+];
+var CITIES = [
+  "Amsterdam",
+  "Berlin",
+  "Toronto",
+  "London",
+  "Sydney",
+  "Tokyo",
+  "Paris",
+  "Seoul",
+  "Nairobi",
+  "Sao Paulo"
+];
+var LANGUAGES = [
+  "TypeScript",
+  "Rust",
+  "Python",
+  "Go",
+  "Kotlin",
+  "Swift",
+  "Elixir",
+  "Scala",
+  "Java",
+  "Clojure"
+];
+var ROLES = [
+  "junior-developer",
+  "mid-developer",
+  "senior-developer",
+  "staff-engineer",
+  "principal-engineer",
+  "engineering-manager",
+  "director-of-engineering",
+  "vp-engineering",
+  "cto",
+  "founder"
+];
+var PROPERTY_POOLS = [
+  { predicate: "employer", values: EMPLOYERS },
+  { predicate: "city", values: CITIES },
+  { predicate: "preferred_language", values: LANGUAGES },
+  { predicate: "role", values: ROLES }
+];
+function generateTimestamps(count, startYear = 2022, startMonth = 1) {
+  const ts = [];
+  let year = startYear;
+  let month = startMonth;
+  for (let i = 0; i < count; i++) {
+    ts.push(`${year}-${String(month).padStart(2, "0")}-01T00:00:00.000Z`);
+    month += 6;
+    if (month > 12) {
+      month -= 12;
+      year += 1;
+    }
+  }
+  return ts;
+}
+function syntheticTemporalDataset(opts = {}) {
+  const entityCount = opts.entityCount ?? 4;
+  const seed = opts.seed ?? 42;
+  const changesPerProperty = opts.changesPerProperty ?? 3;
+  const rng = makeRng(seed);
+  const entities = [];
+  const asserts = [];
+  const questions = [];
+  const allTimestamps = generateTimestamps(changesPerProperty + 2, 2022, 1);
+  for (let ei = 0; ei < entityCount; ei++) {
+    const entityName = `entity_${ei}`;
+    const history = {};
+    for (const pool of PROPERTY_POOLS) {
+      const transitions = [];
+      const poolCopy = [...pool.values];
+      for (let i = poolCopy.length - 1; i > 0; i--) {
+        const j = Math.floor(rng() * (i + 1));
+        [poolCopy[i], poolCopy[j]] = [poolCopy[j], poolCopy[i]];
+      }
+      const chosen = poolCopy.slice(0, changesPerProperty);
+      for (let ci = 0; ci < changesPerProperty; ci++) {
+        const validFrom = allTimestamps[ci];
+        const value = chosen[ci];
+        transitions.push({ validFrom, value });
+        asserts.push({
+          subject: entityName,
+          predicate: pool.predicate,
+          object: value,
+          objectKind: "literal",
+          confidence: 1,
+          validFrom
+        });
+      }
+      history[pool.predicate] = transitions;
+    }
+    entities.push({ name: entityName, history });
+    for (const pool of PROPERTY_POOLS) {
+      const transitions = history[pool.predicate];
+      if (transitions.length === 0) continue;
+      questions.push({
+        subject: entityName,
+        predicate: pool.predicate,
+        askedAt: "2021-01-01T00:00:00.000Z",
+        // before the 2022 start
+        goldAnswer: null,
+        rationale: `No fact for ${entityName}.${pool.predicate} exists before ${transitions[0].validFrom}`
+      });
+      questions.push({
+        subject: entityName,
+        predicate: pool.predicate,
+        askedAt: transitions[0].validFrom,
+        goldAnswer: transitions[0].value,
+        rationale: `At exactly ${transitions[0].validFrom}, the first fact is active`
+      });
+      if (transitions.length >= 2) {
+        const t1 = new Date(transitions[0].validFrom).getTime();
+        const t2 = new Date(transitions[1].validFrom).getTime();
+        const midMs = Math.floor((t1 + t2) / 2);
+        const midIso = new Date(midMs).toISOString().replace(/\.\d{3}Z$/, ".000Z");
+        questions.push({
+          subject: entityName,
+          predicate: pool.predicate,
+          askedAt: midIso,
+          goldAnswer: transitions[0].value,
+          rationale: `Between ${transitions[0].validFrom} and ${transitions[1].validFrom}, the first fact is still active`
+        });
+      }
+      const latest = transitions[transitions.length - 1];
+      const laterMs = new Date(latest.validFrom).getTime() + 30 * 24 * 60 * 60 * 1e3;
+      const laterIso = new Date(laterMs).toISOString().replace(/\.\d{3}Z$/, ".000Z");
+      questions.push({
+        subject: entityName,
+        predicate: pool.predicate,
+        askedAt: laterIso,
+        goldAnswer: latest.value,
+        rationale: `After the last transition at ${latest.validFrom}, the latest fact is active`
+      });
+    }
+  }
+  return {
+    name: `eidentic-temporal-synthetic-v1 (seed=${seed}, entities=${entityCount})`,
+    seed,
+    entities,
+    asserts,
+    questions
+  };
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  CONTRADICTION_FIXTURES,
+  JUNK_STREAM_FIXTURES,
+  factRecall,
+  loadLoCoMo,
+  loadLongMemEval,
+  normalizeText,
+  normalizedIncludes,
+  recallAtK,
+  runMemoryBench,
+  runTemporalBench,
+  runWriteQualityBench,
+  syntheticDataset,
+  syntheticTemporalDataset
+});