npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.5.8 → 0.5.9 - Mend

@pentatonic-ai/ai-agent-sdk 0.5.8 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/packages/doctor/__tests__/checks.test.js +357 -0
package/packages/doctor/src/checks/claude-code.js +100 -0
package/packages/doctor/src/checks/data-flow.js +252 -0
package/packages/doctor/src/index.js +2 -0
package/packages/doctor/src/runner.js +7 -3
package/packages/memory/src/__tests__/api-contract.test.js +151 -0
package/packages/memory/src/ingest.js +40 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.5.8",
+  "version": "0.5.9",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/doctor/__tests__/checks.test.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import { universalChecks } from "../src/checks/universal.js";
 import { hostedTesChecks } from "../src/checks/hosted-tes.js";
+import { dataFlowChecks } from "../src/checks/data-flow.js";
+import { claudeCodeChecks } from "../src/checks/claude-code.js";
 import { platformChecks } from "../src/checks/platform.js";
 // fetch mocking — we don't want any real network in unit tests.
@@ -185,3 +187,358 @@ describe("platform checks", () => {
     expect(r.msg).toMatch(/no models loaded/);
   });
 });
+describe("data-flow checks", () => {
+  beforeEach(() => {
+    process.env.TES_ENDPOINT = "https://example.test";
+    process.env.TES_API_KEY = "tes_test_key";
+    process.env.TES_CLIENT_ID = "test-client";
+  });
+  afterEach(() => {
+    delete process.env.TES_ENDPOINT;
+    delete process.env.TES_API_KEY;
+    delete process.env.TES_CLIENT_ID;
+    delete process.env.PENTATONIC_DOCTOR_PROBE_QUERY;
+  });
+  // Capture the request bodies so tests can assert on the GraphQL
+  // shape doctor sends — not just the response handling.
+  function captureFetch(handler) {
+    const calls = [];
+    globalThis.fetch = async (url, opts) => {
+      const body = opts?.body ? JSON.parse(opts.body) : null;
+      calls.push({ url, headers: opts?.headers || {}, body });
+      return handler(url, opts);
+    };
+    return calls;
+  }
+  it("registers the three expected probes", () => {
+    const names = dataFlowChecks().map((c) => c.name);
+    expect(names).toContain("TES event stream has data");
+    expect(names).toContain("MEMORY_CREATED events for client");
+    expect(names).toContain("semanticSearchMemories returns hits");
+  });
+  // --- event stream check ---
+  it("event stream: sends GraphQL query with `limit:1` (not `first:1`)", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 5 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    await c.run();
+    expect(calls).toHaveLength(1);
+    expect(calls[0].body.query).toMatch(/events\(\s*limit:\s*1\s*\)/);
+    expect(calls[0].body.query).not.toMatch(/first\s*:/);
+    expect(calls[0].body.query).toMatch(/totalCount/);
+  });
+  it("event stream: warns when totalCount is 0", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 0 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).toMatch(/0 events yet/);
+  });
+  it("event stream: passes with a positive count", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 42 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(true);
+    expect(r.detail.totalCount).toBe(42);
+  });
+  // --- memory-created check ---
+  it("memory-created: filter uses eventType + StringFilterInput wrapper", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 3 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "MEMORY_CREATED events for client"
+    );
+    await c.run();
+    const { query, variables } = calls[0].body;
+    // Schema requires eventType (not "kind") with a StringFilterInput
+    // wrapper, and clientId likewise as a filter wrapper.
+    expect(query).toMatch(/eventType:\s*\{\s*eq:\s*\$eventType\s*\}/);
+    expect(query).toMatch(/clientId:\s*\{\s*eq:\s*\$client\s*\}/);
+    expect(query).not.toMatch(/\bkind\b/);
+    expect(variables.eventType).toBe("MEMORY_CREATED");
+    expect(variables.client).toBe("test-client");
+  });
+  it("memory-created: flags the client id in the warning", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 0 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "MEMORY_CREATED events for client"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).toMatch(/test-client/);
+  });
+  // --- semantic search check ---
+  it("semantic search: sends required clientId arg + selects similarity (not score)", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({
+        data: { semanticSearchMemories: [{ id: "m1", similarity: 0.8 }] },
+      }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    await c.run();
+    const { query, variables } = calls[0].body;
+    // clientId is required by the schema; doctor must send it.
+    expect(query).toMatch(/clientId:\s*\$clientId/);
+    expect(variables.clientId).toBe("test-client");
+    // Result type exposes `similarity`, not `score`.
+    expect(query).toMatch(/similarity/);
+    expect(query).not.toMatch(/\bscore\b/);
+  });
+  it("semantic search: warns on 0 hits", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { semanticSearchMemories: [] } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).toMatch(/0 hits/);
+  });
+  it("semantic search: passes with hits", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({
+        data: { semanticSearchMemories: [{ id: "m1", similarity: 0.8 }] },
+      }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(true);
+    expect(r.detail.hits).toBe(1);
+  });
+  it("semantic search: 'cannot query field' skips gracefully", async () => {
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({
+        errors: [{ message: 'Cannot query field "semanticSearchMemories" on type "Query"' }],
+      }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(true);
+    expect(r.msg).toMatch(/skipped/);
+  });
+  it("semantic search: schema-arg mismatches surface as errors, NOT silent skips", async () => {
+    // E.g. a missing required arg — error mentions the field name but
+    // is NOT the "Cannot query field" wording. Doctor must report,
+    // not pretend the deployment doesn't expose the field.
+    captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({
+        errors: [
+          {
+            message:
+              'Field "semanticSearchMemories" argument "clientId" of type "String!" is required',
+          },
+        ],
+      }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    const r = await c.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).not.toMatch(/skipped/);
+    expect(r.msg).toMatch(/required/);
+  });
+  it("PENTATONIC_DOCTOR_PROBE_QUERY overrides the default probe text", async () => {
+    process.env.PENTATONIC_DOCTOR_PROBE_QUERY = "custom probe text";
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { semanticSearchMemories: [] } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "semanticSearchMemories returns hits"
+    );
+    await c.run();
+    expect(calls[0].body.variables.q).toBe("custom probe text");
+  });
+  // --- auth header branching ---
+  it("uses Authorization: Bearer for tes_-prefixed keys", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 1 } } }),
+    }));
+    process.env.TES_API_KEY = "tes_user_abc";
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    await c.run();
+    expect(calls[0].headers.Authorization).toBe("Bearer tes_user_abc");
+    expect(calls[0].headers["x-service-key"]).toBeUndefined();
+  });
+  it("uses x-service-key for non-tes_ keys (internal service tokens)", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 1 } } }),
+    }));
+    process.env.TES_API_KEY = "internal_svc_xyz";
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    await c.run();
+    expect(calls[0].headers["x-service-key"]).toBe("internal_svc_xyz");
+    expect(calls[0].headers.Authorization).toBeUndefined();
+  });
+  it("sends x-client-id on every request", async () => {
+    const calls = captureFetch(async () => ({
+      ok: true,
+      status: 200,
+      json: async () => ({ data: { events: { totalCount: 1 } } }),
+    }));
+    const c = dataFlowChecks().find(
+      (x) => x.name === "TES event stream has data"
+    );
+    await c.run();
+    expect(calls[0].headers["x-client-id"]).toBe("test-client");
+  });
+  it("all three report missing env clearly", async () => {
+    delete process.env.TES_CLIENT_ID;
+    for (const c of dataFlowChecks()) {
+      const r = await c.run();
+      expect(r.ok).toBe(false);
+      expect(r.msg).toMatch(/TES_ENDPOINT|required/);
+    }
+  });
+});
+describe("Claude Code plugin check", () => {
+  it("reports installed + version when manifest is present at ~/.claude", async () => {
+    const [check] = claudeCodeChecks({
+      fileExists: (p) => p === "/home/fake/.claude/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json",
+      readFile: () =>
+        JSON.stringify({ name: "tes-memory", version: "0.5.3" }),
+      homedir: () => "/home/fake",
+      env: {},
+    });
+    const r = await check.run();
+    expect(r.ok).toBe(true);
+    expect(r.msg).toMatch(/tes-memory v0\.5\.3 installed/);
+    expect(r.detail.version).toBe("0.5.3");
+    expect(r.detail.path).toMatch(/\.claude\/plugins/);
+  });
+  it("falls through to ~/.claude-pentatonic when ~/.claude is empty", async () => {
+    const pentatonicPath =
+      "/home/fake/.claude-pentatonic/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json";
+    const [check] = claudeCodeChecks({
+      fileExists: (p) => p === pentatonicPath,
+      readFile: () =>
+        JSON.stringify({ name: "tes-memory", version: "0.5.3" }),
+      homedir: () => "/home/fake",
+      env: {},
+    });
+    const r = await check.run();
+    expect(r.ok).toBe(true);
+    expect(r.detail.path).toBe(pentatonicPath);
+  });
+  it("respects CLAUDE_CONFIG_DIR override (highest precedence)", async () => {
+    const overridePath =
+      "/custom/cfg/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json";
+    const [check] = claudeCodeChecks({
+      fileExists: (p) => p === overridePath,
+      readFile: () =>
+        JSON.stringify({ name: "tes-memory", version: "9.9.9" }),
+      homedir: () => "/home/fake",
+      env: { CLAUDE_CONFIG_DIR: "/custom/cfg" },
+    });
+    const r = await check.run();
+    expect(r.ok).toBe(true);
+    expect(r.detail.path).toBe(overridePath);
+    expect(r.detail.version).toBe("9.9.9");
+  });
+  it("reports the install command + all candidate paths when none exist", async () => {
+    const [check] = claudeCodeChecks({
+      fileExists: () => false,
+      homedir: () => "/home/fake",
+      env: { CLAUDE_CONFIG_DIR: "/custom/cfg" },
+    });
+    const r = await check.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).toMatch(/plugin install tes-memory/);
+    expect(r.detail.candidates).toEqual(
+      expect.arrayContaining([
+        expect.stringContaining("/custom/cfg/plugins"),
+        expect.stringContaining("/home/fake/.claude/plugins"),
+        expect.stringContaining("/home/fake/.claude-pentatonic/plugins"),
+      ])
+    );
+  });
+  it("handles corrupt manifest json without throwing", async () => {
+    const [check] = claudeCodeChecks({
+      fileExists: () => true,
+      readFile: () => "{ not json",
+      homedir: () => "/home/fake",
+      env: {},
+    });
+    const r = await check.run();
+    expect(r.ok).toBe(false);
+    expect(r.msg).toMatch(/unreadable/);
+  });
+});

package/packages/doctor/src/checks/claude-code.js ADDED Viewed

@@ -0,0 +1,100 @@
+/**
+ * Claude Code plugin installation check.
+ *
+ * The SDK ships a Claude Code plugin (`tes-memory@pentatonic-ai`) that
+ * wires UserPromptSubmit / Stop hooks so CHAT_TURN + MEMORY_CREATED
+ * events actually get emitted. It's entirely possible for the server
+ * side to be healthy (TES reachable, key valid) while the client side
+ * is silently uninstalled — the hooks never fire and the event stream
+ * stays empty. This check tells users whether the plugin is present
+ * and what version they're on, so upstream feedback ("why am I not
+ * seeing memories?") lands faster.
+ *
+ * Resolution order mirrors `hooks/scripts/shared.js:loadConfig` — three
+ * candidate roots, first match wins:
+ *
+ *   1. $CLAUDE_CONFIG_DIR (explicit override, highest precedence)
+ *   2. ~/.claude              (default Claude Code install)
+ *   3. ~/.claude-pentatonic   (Pentatonic-branded variant)
+ *
+ * The check is universal-ish: it only reports positively when the
+ * plugin file is found. If the user isn't on Claude Code at all, the
+ * plugin absence is reported as info, not a failure.
+ */
+import { existsSync as realExistsSync, readFileSync as realReadFileSync } from "fs";
+import { join } from "path";
+import { homedir as realHomedir } from "os";
+import { SEVERITY } from "../index.js";
+const PLUGIN_REL_PATH = [
+  "plugins",
+  "marketplaces",
+  "pentatonic-ai",
+  ".claude-plugin",
+  "plugin.json",
+];
+/**
+ * Build the ordered list of candidate manifest paths. First match wins.
+ * Same precedence as the SDK hook's loadConfig() so users on
+ * CLAUDE_CONFIG_DIR or .claude-pentatonic don't get false negatives.
+ */
+function candidateManifestPaths(home, env) {
+  const roots = [];
+  if (env?.CLAUDE_CONFIG_DIR) roots.push(env.CLAUDE_CONFIG_DIR);
+  roots.push(join(home, ".claude"));
+  roots.push(join(home, ".claude-pentatonic"));
+  return roots.map((root) => join(root, ...PLUGIN_REL_PATH));
+}
+function checkClaudeCodePluginInstalled({
+  fileExists,
+  readFile,
+  homedir,
+  env,
+} = {}) {
+  const exists = fileExists || realExistsSync;
+  const read = readFile || ((p) => realReadFileSync(p, "utf8"));
+  const resolveHome = typeof homedir === "function" ? homedir : realHomedir;
+  const resolveEnv = env || process.env;
+  const home = resolveHome();
+  return {
+    name: "tes-memory Claude Code plugin installed",
+    severity: SEVERITY.INFO,
+    run: async () => {
+      const candidates = candidateManifestPaths(home, resolveEnv);
+      const found = candidates.find((p) => exists(p));
+      if (!found) {
+        return {
+          ok: false,
+          msg:
+            "tes-memory plugin not found — run: /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk && /plugin install tes-memory@pentatonic-ai",
+          detail: { candidates },
+        };
+      }
+      try {
+        const manifest = JSON.parse(read(found));
+        const version = typeof manifest.version === "string" ? manifest.version : "?";
+        const name = typeof manifest.name === "string" ? manifest.name : "tes-memory";
+        return {
+          ok: true,
+          msg: `${name} v${version} installed`,
+          detail: { name, version, path: found },
+        };
+      } catch (err) {
+        return {
+          ok: false,
+          msg: `plugin manifest unreadable: ${err.message}`,
+          detail: { path: found },
+        };
+      }
+    },
+  };
+}
+export function claudeCodeChecks(seams = {}) {
+  return [checkClaudeCodePluginInstalled(seams)];
+}

package/packages/doctor/src/checks/data-flow.js ADDED Viewed

@@ -0,0 +1,252 @@
+/**
+ * Hosted TES data-flow checks.
+ *
+ * The existing hosted-tes checks prove the TES server is up and the API
+ * key is accepted. They don't prove data is actually flowing end-to-end —
+ * you can have a green doctor pass while the Claude Code hook is silently
+ * dropping events, or while vector retrieval is returning nothing at the
+ * configured minScore.
+ *
+ * These checks close that gap with three real-data probes against the
+ * same GraphQL endpoint the SDK already uses at runtime:
+ *
+ *   - "TES event stream has data"      — events table has rows at all
+ *   - "MEMORY_CREATED events present"  — memory events exist for this client
+ *   - "semantic search returns hits"   — a broad probe query retrieves > 0
+ *
+ * All three are WARNINGs by default: a green liveness check + a "0 events"
+ * warning is more informative than pretending liveness implies correctness,
+ * but an empty stream on a fresh install is legitimate and shouldn't fail
+ * the overall doctor pass.
+ *
+ * GraphQL shapes match TES's deployed schema (verified against
+ * thing-event-system/functions/api/graphql/domains/event/schema.js and
+ * thing-event-system/modules/deep-memory/graphql/memory/schema.js):
+ *
+ *   events(filter: EventFilterInput, limit: Int, offset: Int): EventPage!
+ *   EventFilterInput { eventType: StringFilterInput, clientId: StringFilterInput, ... }
+ *   EventPage { totalCount: Int!, ... }
+ *
+ *   semanticSearchMemories(
+ *     clientId: String!,
+ *     query: String!,
+ *     userId: String,
+ *     limit: Int,
+ *     minScore: Float
+ *   ): [SemanticMemoryResult!]!
+ *   SemanticMemoryResult { id: String!, similarity: Float!, ... }
+ */
+import { SEVERITY } from "../index.js";
+async function fetchWithTimeout(url, opts = {}, timeoutMs = 10_000) {
+  return await fetch(url, {
+    ...opts,
+    signal: AbortSignal.timeout(timeoutMs),
+  });
+}
+/**
+ * Auth header: TES accepts `Authorization: Bearer tes_...` for end-user
+ * keys and `x-service-key: <key>` for internal/service keys. Mirrors the
+ * branching in hooks/scripts/shared.js so doctor authenticates the same
+ * way the SDK runtime does.
+ */
+function authHeaders(apiKey, clientId) {
+  const headers = {
+    "Content-Type": "application/json",
+    "x-client-id": clientId,
+  };
+  if (apiKey?.startsWith("tes_")) {
+    headers["Authorization"] = `Bearer ${apiKey}`;
+  } else if (apiKey) {
+    headers["x-service-key"] = apiKey;
+  }
+  return headers;
+}
+async function graphql(endpoint, apiKey, clientId, query, variables) {
+  const res = await fetchWithTimeout(
+    `${endpoint.replace(/\/$/, "")}/api/graphql`,
+    {
+      method: "POST",
+      headers: authHeaders(apiKey, clientId),
+      body: JSON.stringify({ query, variables }),
+    }
+  );
+  if (!res.ok) {
+    const text = await res.text().catch(() => "");
+    throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
+  }
+  const body = await res.json();
+  if (body.errors?.length) {
+    throw new Error(body.errors[0].message || "graphql error");
+  }
+  return body.data;
+}
+function requireHostedEnv() {
+  const endpoint = process.env.TES_ENDPOINT;
+  const apiKey = process.env.TES_API_KEY;
+  const clientId = process.env.TES_CLIENT_ID;
+  if (!endpoint || !apiKey || !clientId) {
+    return {
+      missing: true,
+      reason: "TES_ENDPOINT / TES_API_KEY / TES_CLIENT_ID required",
+    };
+  }
+  return { endpoint, apiKey, clientId };
+}
+function checkEventStreamHasData() {
+  return {
+    name: "TES event stream has data",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        // `limit: 1` keeps the payload tiny — we only care about the total.
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorEventCount { events(limit: 1) { totalCount } }`
+        );
+        const total = data?.events?.totalCount ?? 0;
+        if (total > 0) {
+          return {
+            ok: true,
+            msg: `${total} event(s) in stream`,
+            detail: { totalCount: total },
+          };
+        }
+        return {
+          ok: false,
+          msg: "0 events yet — send one prompt to your agent and re-run",
+          detail: { totalCount: 0 },
+        };
+      } catch (err) {
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+function checkMemoryCreatedForClient() {
+  return {
+    name: "MEMORY_CREATED events for client",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorMemCount($eventType: String!, $client: String!) {
+             events(
+               limit: 1,
+               filter: {
+                 eventType: { eq: $eventType }
+                 clientId: { eq: $client }
+               }
+             ) {
+               totalCount
+             }
+           }`,
+          { eventType: "MEMORY_CREATED", client: env.clientId }
+        );
+        const total = data?.events?.totalCount ?? 0;
+        if (total > 0) {
+          return {
+            ok: true,
+            msg: `${total} MEMORY_CREATED event(s) for ${env.clientId}`,
+            detail: { totalCount: total, clientId: env.clientId },
+          };
+        }
+        return {
+          ok: false,
+          msg: `no MEMORY_CREATED events for ${env.clientId} yet — hook may not be writing memories`,
+          detail: { totalCount: 0, clientId: env.clientId },
+        };
+      } catch (err) {
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+// Match TES's "Cannot query field 'X'" error wording precisely so a
+// schema-arg mismatch doesn't masquerade as "deployment doesn't expose
+// the field" — that would silently hide real errors.
+const FIELD_NOT_FOUND_RE =
+  /cannot query field "?semanticSearchMemories"?/i;
+function checkSemanticSearchReturnsHits() {
+  return {
+    name: "semanticSearchMemories returns hits",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        // A broad probe query. Low minScore (0.1) because the point of this
+        // check is "does retrieval work at all", not "does retrieval rank
+        // well". A follow-up tuning warning can be a separate check later.
+        const query = process.env.PENTATONIC_DOCTOR_PROBE_QUERY || "heartbeat";
+        const minScore = 0.1;
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorSearch($clientId: String!, $q: String!, $minScore: Float!) {
+             semanticSearchMemories(
+               clientId: $clientId,
+               query: $q,
+               minScore: $minScore,
+               limit: 5
+             ) {
+               id
+               similarity
+             }
+           }`,
+          { clientId: env.clientId, q: query, minScore }
+        );
+        const hits = data?.semanticSearchMemories ?? [];
+        if (hits.length > 0) {
+          return {
+            ok: true,
+            msg: `${hits.length} hit(s) for "${query}" at minScore=${minScore}`,
+            detail: { query, minScore, hits: hits.length },
+          };
+        }
+        return {
+          ok: false,
+          msg: `0 hits for "${query}" at minScore=${minScore} — try lowering minScore or PENTATONIC_DOCTOR_PROBE_QUERY`,
+          detail: { query, minScore, hits: 0 },
+        };
+      } catch (err) {
+        // Only treat the precise "Cannot query field" error as
+        // "deployment doesn't expose this" — schema-arg mismatches and
+        // other graphql errors should surface, not be silently skipped.
+        if (FIELD_NOT_FOUND_RE.test(err.message)) {
+          return {
+            ok: true,
+            msg: "semanticSearchMemories not exposed by this deployment (skipped)",
+          };
+        }
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+export function dataFlowChecks() {
+  return [
+    checkEventStreamHasData(),
+    checkMemoryCreatedForClient(),
+    checkSemanticSearchReturnsHits(),
+  ];
+}

package/packages/doctor/src/index.js CHANGED Viewed

@@ -24,7 +24,9 @@ export { renderHuman, renderJson } from "./output.js";
 export { universalChecks } from "./checks/universal.js";
 export { localMemoryChecks } from "./checks/local-memory.js";
 export { hostedTesChecks } from "./checks/hosted-tes.js";
+export { dataFlowChecks } from "./checks/data-flow.js";
 export { platformChecks } from "./checks/platform.js";
+export { claudeCodeChecks } from "./checks/claude-code.js";
 export const SEVERITY = Object.freeze({
   CRITICAL: "critical",

package/packages/doctor/src/runner.js CHANGED Viewed

@@ -21,7 +21,9 @@ import { detectPaths, PATHS } from "./detect.js";
 import { universalChecks } from "./checks/universal.js";
 import { localMemoryChecks } from "./checks/local-memory.js";
 import { hostedTesChecks } from "./checks/hosted-tes.js";
+import { dataFlowChecks } from "./checks/data-flow.js";
 import { platformChecks } from "./checks/platform.js";
+import { claudeCodeChecks } from "./checks/claude-code.js";
 import { loadPlugins } from "./plugins.js";
 import { SEVERITY } from "./index.js";
@@ -32,7 +34,8 @@ function pathChecks(path) {
     case PATHS.LOCAL:
       return localMemoryChecks();
     case PATHS.HOSTED:
-      return hostedTesChecks();
+      // Liveness (hostedTesChecks) + end-to-end data-flow probes.
+      return [...hostedTesChecks(), ...dataFlowChecks()];
     case PATHS.PLATFORM:
       return platformChecks();
     default:
@@ -91,8 +94,9 @@ export async function runDoctor(opts = {}) {
   const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
   const paths = detectPaths(opts);
-  // Universal checks always run.
-  const checks = [...universalChecks()];
+  // Universal checks always run. claudeCodeChecks is also universal —
+  // the plugin may be present regardless of which install path is in use.
+  const checks = [...universalChecks(), ...claudeCodeChecks()];
   for (const p of paths) {
     checks.push(...pathChecks(p));
   }

package/packages/memory/src/__tests__/api-contract.test.js CHANGED Viewed

@@ -581,3 +581,154 @@ describe("ingest options contract", () => {
     expect(registered.length).toBe(0);
   });
 });
+// --- Ingest dedup ---
+describe("ingest dedup option", () => {
+  function makeMockDb(state = {}) {
+    const calls = [];
+    const existing = state.existing || []; // [{ id, client_id, content }, ...]
+    const inserted = [];
+    const db = async (sql, params) => {
+      calls.push({ sql, params });
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      // Dedup pre-check (raw + LIKE legacy form)
+      if (sql.includes("SELECT id FROM memory_nodes")) {
+        const [clientId, content] = params;
+        const match = existing.find(
+          (r) =>
+            r.client_id === clientId &&
+            (r.content === content ||
+              r.content.endsWith(`] ${content}`)) // legacy timestamp-prefixed
+        );
+        return { rows: match ? [{ id: match.id }] : [] };
+      }
+      // Insert path
+      if (sql.startsWith("INSERT INTO memory_nodes")) {
+        inserted.push({
+          id: params[0],
+          client_id: params[1],
+          content: params[3],
+        });
+        return { rows: [] };
+      }
+      return { rows: [] };
+    };
+    return { db, calls, inserted };
+  }
+  const mockAi = { embed: async () => null };
+  const mockLlm = { chat: async () => "[]" };
+  it("inserts a fresh row when no duplicate exists", async () => {
+    const { db, inserted } = makeMockDb({ existing: [] });
+    const out = await ingest(db, mockAi, mockLlm, "fresh content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(out.id.startsWith("mem_")).toBe(true);
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].content).toBe("fresh content");
+  });
+  it("returns the existing row's id when raw content matches", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_existing", client_id: "c", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBe(true);
+    expect(out.id).toBe("mem_existing");
+    expect(out.content).toBe("duplicate content");
+    expect(inserted).toHaveLength(0); // no insert happened
+  });
+  it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`)", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        {
+          id: "mem_legacy",
+          client_id: "c",
+          content: "[2026-04-26T10:00:00Z] duplicate content",
+        },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBe(true);
+    expect(out.id).toBe("mem_legacy");
+    expect(inserted).toHaveLength(0);
+  });
+  it("dedup off (default) still inserts on duplicate content", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_existing", client_id: "c", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      // dedup omitted — defaults to false
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].id).not.toBe("mem_existing");
+  });
+  it("scopes dedup to the given clientId (cross-tenant collisions don't dedup)", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_other", client_id: "other", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c", // different tenant
+      dedup: true,
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].client_id).toBe("c");
+  });
+  it("dedup check failure falls through to insert (best-effort semantics)", async () => {
+    let dupCheckSql = null;
+    const flakyDb = async (sql, params) => {
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      if (sql.includes("SELECT id FROM memory_nodes")) {
+        dupCheckSql = sql;
+        throw new Error("DB unreachable");
+      }
+      return { rows: [] };
+    };
+    const out = await ingest(flakyDb, mockAi, mockLlm, "content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(dupCheckSql).toContain("memory_nodes");
+    expect(out.deduped).toBeUndefined();
+    expect(out.id.startsWith("mem_")).toBe(true);
+  });
+});

package/packages/memory/src/ingest.js CHANGED Viewed

@@ -21,7 +21,17 @@ import { distill } from "./distill.js";
  *   tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
  *   background task is handed to it so the host keeps it alive past return.
  *   Without it, distill is fire-and-forget (fine for Node/browser).
- * @returns {Promise<{id: string, content: string, layerId: string}>}
+ * @param {boolean} [opts.dedup=false] - Skip ingest if a memory_node with
+ *   byte-equal content already exists for this `client_id`. Use for
+ *   retry-safe pipelines where the same logical event may be processed
+ *   twice (queue retries, consumer fan-out). Returns the existing row's
+ *   id with `{deduped: true}` instead of inserting. Strict equality —
+ *   not a semantic similarity match. Best-effort: if the SELECT itself
+ *   fails, ingest proceeds (worst case: duplicate row, identical to
+ *   `dedup:false` behaviour). The eventual structural fix is a
+ *   `UNIQUE(client_id, content_hash)` constraint at the schema level;
+ *   this option is the bridge.
+ * @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean}>}
  */
 export async function ingest(db, ai, llm, content, opts = {}) {
   const clientId = opts.clientId;
@@ -41,6 +51,35 @@ export async function ingest(db, ai, llm, content, opts = {}) {
   }
   const layerId = layerResult.rows[0].id;
+  // Optional dedup: skip the insert (and all the embedding/HyDE/distill
+  // work that would follow) if a row with byte-equal content already
+  // exists for this tenant. The OR-LIKE branch matches against the
+  // legacy `[<iso>] <content>` form so callers that wrote with a
+  // timestamp prefix dedup correctly until the legacy corpus ages out.
+  if (opts.dedup) {
+    try {
+      const dupCheck = await db(
+        `SELECT id FROM memory_nodes
+           WHERE client_id = $1
+             AND (content = $2 OR content LIKE '%] ' || $2)
+           LIMIT 1`,
+        [clientId, content]
+      );
+      if (dupCheck.rows?.length) {
+        log(`dedup: matched existing memory ${dupCheck.rows[0].id}`);
+        return {
+          id: dupCheck.rows[0].id,
+          content,
+          layerId,
+          deduped: true,
+        };
+      }
+    } catch (err) {
+      log(`dedup check failed (proceeding with insert): ${err.message}`);
+    }
+  }
   const memoryId = `mem_${crypto.randomUUID()}`;
   // Insert memory node