npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.5.7 → 0.5.9 - Mend

@pentatonic-ai/ai-agent-sdk 0.5.7 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/index.cjs +244 -8
package/dist/index.js +244 -8
package/package.json +2 -2
package/packages/doctor/__tests__/checks.test.js +357 -0
package/packages/doctor/src/checks/claude-code.js +100 -0
package/packages/doctor/src/checks/data-flow.js +252 -0
package/packages/doctor/src/index.js +2 -0
package/packages/doctor/src/runner.js +7 -3
package/packages/memory/src/__tests__/api-contract.test.js +151 -0
package/packages/memory/src/hosted.js +7 -0
package/packages/memory/src/ingest.js +40 -1
package/packages/memory/src/inject.js +83 -0
package/src/client.js +20 -2
package/src/wrapper.js +129 -6

package/packages/doctor/src/checks/data-flow.js ADDED Viewed

@@ -0,0 +1,252 @@
+/**
+ * Hosted TES data-flow checks.
+ *
+ * The existing hosted-tes checks prove the TES server is up and the API
+ * key is accepted. They don't prove data is actually flowing end-to-end —
+ * you can have a green doctor pass while the Claude Code hook is silently
+ * dropping events, or while vector retrieval is returning nothing at the
+ * configured minScore.
+ *
+ * These checks close that gap with three real-data probes against the
+ * same GraphQL endpoint the SDK already uses at runtime:
+ *
+ *   - "TES event stream has data"      — events table has rows at all
+ *   - "MEMORY_CREATED events present"  — memory events exist for this client
+ *   - "semantic search returns hits"   — a broad probe query retrieves > 0
+ *
+ * All three are WARNINGs by default: a green liveness check + a "0 events"
+ * warning is more informative than pretending liveness implies correctness,
+ * but an empty stream on a fresh install is legitimate and shouldn't fail
+ * the overall doctor pass.
+ *
+ * GraphQL shapes match TES's deployed schema (verified against
+ * thing-event-system/functions/api/graphql/domains/event/schema.js and
+ * thing-event-system/modules/deep-memory/graphql/memory/schema.js):
+ *
+ *   events(filter: EventFilterInput, limit: Int, offset: Int): EventPage!
+ *   EventFilterInput { eventType: StringFilterInput, clientId: StringFilterInput, ... }
+ *   EventPage { totalCount: Int!, ... }
+ *
+ *   semanticSearchMemories(
+ *     clientId: String!,
+ *     query: String!,
+ *     userId: String,
+ *     limit: Int,
+ *     minScore: Float
+ *   ): [SemanticMemoryResult!]!
+ *   SemanticMemoryResult { id: String!, similarity: Float!, ... }
+ */
+import { SEVERITY } from "../index.js";
+async function fetchWithTimeout(url, opts = {}, timeoutMs = 10_000) {
+  return await fetch(url, {
+    ...opts,
+    signal: AbortSignal.timeout(timeoutMs),
+  });
+}
+/**
+ * Auth header: TES accepts `Authorization: Bearer tes_...` for end-user
+ * keys and `x-service-key: <key>` for internal/service keys. Mirrors the
+ * branching in hooks/scripts/shared.js so doctor authenticates the same
+ * way the SDK runtime does.
+ */
+function authHeaders(apiKey, clientId) {
+  const headers = {
+    "Content-Type": "application/json",
+    "x-client-id": clientId,
+  };
+  if (apiKey?.startsWith("tes_")) {
+    headers["Authorization"] = `Bearer ${apiKey}`;
+  } else if (apiKey) {
+    headers["x-service-key"] = apiKey;
+  }
+  return headers;
+}
+async function graphql(endpoint, apiKey, clientId, query, variables) {
+  const res = await fetchWithTimeout(
+    `${endpoint.replace(/\/$/, "")}/api/graphql`,
+    {
+      method: "POST",
+      headers: authHeaders(apiKey, clientId),
+      body: JSON.stringify({ query, variables }),
+    }
+  );
+  if (!res.ok) {
+    const text = await res.text().catch(() => "");
+    throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
+  }
+  const body = await res.json();
+  if (body.errors?.length) {
+    throw new Error(body.errors[0].message || "graphql error");
+  }
+  return body.data;
+}
+function requireHostedEnv() {
+  const endpoint = process.env.TES_ENDPOINT;
+  const apiKey = process.env.TES_API_KEY;
+  const clientId = process.env.TES_CLIENT_ID;
+  if (!endpoint || !apiKey || !clientId) {
+    return {
+      missing: true,
+      reason: "TES_ENDPOINT / TES_API_KEY / TES_CLIENT_ID required",
+    };
+  }
+  return { endpoint, apiKey, clientId };
+}
+function checkEventStreamHasData() {
+  return {
+    name: "TES event stream has data",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        // `limit: 1` keeps the payload tiny — we only care about the total.
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorEventCount { events(limit: 1) { totalCount } }`
+        );
+        const total = data?.events?.totalCount ?? 0;
+        if (total > 0) {
+          return {
+            ok: true,
+            msg: `${total} event(s) in stream`,
+            detail: { totalCount: total },
+          };
+        }
+        return {
+          ok: false,
+          msg: "0 events yet — send one prompt to your agent and re-run",
+          detail: { totalCount: 0 },
+        };
+      } catch (err) {
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+function checkMemoryCreatedForClient() {
+  return {
+    name: "MEMORY_CREATED events for client",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorMemCount($eventType: String!, $client: String!) {
+             events(
+               limit: 1,
+               filter: {
+                 eventType: { eq: $eventType }
+                 clientId: { eq: $client }
+               }
+             ) {
+               totalCount
+             }
+           }`,
+          { eventType: "MEMORY_CREATED", client: env.clientId }
+        );
+        const total = data?.events?.totalCount ?? 0;
+        if (total > 0) {
+          return {
+            ok: true,
+            msg: `${total} MEMORY_CREATED event(s) for ${env.clientId}`,
+            detail: { totalCount: total, clientId: env.clientId },
+          };
+        }
+        return {
+          ok: false,
+          msg: `no MEMORY_CREATED events for ${env.clientId} yet — hook may not be writing memories`,
+          detail: { totalCount: 0, clientId: env.clientId },
+        };
+      } catch (err) {
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+// Match TES's "Cannot query field 'X'" error wording precisely so a
+// schema-arg mismatch doesn't masquerade as "deployment doesn't expose
+// the field" — that would silently hide real errors.
+const FIELD_NOT_FOUND_RE =
+  /cannot query field "?semanticSearchMemories"?/i;
+function checkSemanticSearchReturnsHits() {
+  return {
+    name: "semanticSearchMemories returns hits",
+    severity: SEVERITY.WARNING,
+    run: async () => {
+      const env = requireHostedEnv();
+      if (env.missing) return { ok: false, msg: env.reason };
+      try {
+        // A broad probe query. Low minScore (0.1) because the point of this
+        // check is "does retrieval work at all", not "does retrieval rank
+        // well". A follow-up tuning warning can be a separate check later.
+        const query = process.env.PENTATONIC_DOCTOR_PROBE_QUERY || "heartbeat";
+        const minScore = 0.1;
+        const data = await graphql(
+          env.endpoint,
+          env.apiKey,
+          env.clientId,
+          `query DoctorSearch($clientId: String!, $q: String!, $minScore: Float!) {
+             semanticSearchMemories(
+               clientId: $clientId,
+               query: $q,
+               minScore: $minScore,
+               limit: 5
+             ) {
+               id
+               similarity
+             }
+           }`,
+          { clientId: env.clientId, q: query, minScore }
+        );
+        const hits = data?.semanticSearchMemories ?? [];
+        if (hits.length > 0) {
+          return {
+            ok: true,
+            msg: `${hits.length} hit(s) for "${query}" at minScore=${minScore}`,
+            detail: { query, minScore, hits: hits.length },
+          };
+        }
+        return {
+          ok: false,
+          msg: `0 hits for "${query}" at minScore=${minScore} — try lowering minScore or PENTATONIC_DOCTOR_PROBE_QUERY`,
+          detail: { query, minScore, hits: 0 },
+        };
+      } catch (err) {
+        // Only treat the precise "Cannot query field" error as
+        // "deployment doesn't expose this" — schema-arg mismatches and
+        // other graphql errors should surface, not be silently skipped.
+        if (FIELD_NOT_FOUND_RE.test(err.message)) {
+          return {
+            ok: true,
+            msg: "semanticSearchMemories not exposed by this deployment (skipped)",
+          };
+        }
+        return { ok: false, msg: err.message };
+      }
+    },
+  };
+}
+export function dataFlowChecks() {
+  return [
+    checkEventStreamHasData(),
+    checkMemoryCreatedForClient(),
+    checkSemanticSearchReturnsHits(),
+  ];
+}

package/packages/doctor/src/index.js CHANGED Viewed

@@ -24,7 +24,9 @@ export { renderHuman, renderJson } from "./output.js";
 export { universalChecks } from "./checks/universal.js";
 export { localMemoryChecks } from "./checks/local-memory.js";
 export { hostedTesChecks } from "./checks/hosted-tes.js";
+export { dataFlowChecks } from "./checks/data-flow.js";
 export { platformChecks } from "./checks/platform.js";
+export { claudeCodeChecks } from "./checks/claude-code.js";
 export const SEVERITY = Object.freeze({
   CRITICAL: "critical",

package/packages/doctor/src/runner.js CHANGED Viewed

@@ -21,7 +21,9 @@ import { detectPaths, PATHS } from "./detect.js";
 import { universalChecks } from "./checks/universal.js";
 import { localMemoryChecks } from "./checks/local-memory.js";
 import { hostedTesChecks } from "./checks/hosted-tes.js";
+import { dataFlowChecks } from "./checks/data-flow.js";
 import { platformChecks } from "./checks/platform.js";
+import { claudeCodeChecks } from "./checks/claude-code.js";
 import { loadPlugins } from "./plugins.js";
 import { SEVERITY } from "./index.js";
@@ -32,7 +34,8 @@ function pathChecks(path) {
     case PATHS.LOCAL:
       return localMemoryChecks();
     case PATHS.HOSTED:
-      return hostedTesChecks();
+      // Liveness (hostedTesChecks) + end-to-end data-flow probes.
+      return [...hostedTesChecks(), ...dataFlowChecks()];
     case PATHS.PLATFORM:
       return platformChecks();
     default:
@@ -91,8 +94,9 @@ export async function runDoctor(opts = {}) {
   const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
   const paths = detectPaths(opts);
-  // Universal checks always run.
-  const checks = [...universalChecks()];
+  // Universal checks always run. claudeCodeChecks is also universal —
+  // the plugin may be present regardless of which install path is in use.
+  const checks = [...universalChecks(), ...claudeCodeChecks()];
   for (const p of paths) {
     checks.push(...pathChecks(p));
   }

package/packages/memory/src/__tests__/api-contract.test.js CHANGED Viewed

@@ -581,3 +581,154 @@ describe("ingest options contract", () => {
     expect(registered.length).toBe(0);
   });
 });
+// --- Ingest dedup ---
+describe("ingest dedup option", () => {
+  function makeMockDb(state = {}) {
+    const calls = [];
+    const existing = state.existing || []; // [{ id, client_id, content }, ...]
+    const inserted = [];
+    const db = async (sql, params) => {
+      calls.push({ sql, params });
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      // Dedup pre-check (raw + LIKE legacy form)
+      if (sql.includes("SELECT id FROM memory_nodes")) {
+        const [clientId, content] = params;
+        const match = existing.find(
+          (r) =>
+            r.client_id === clientId &&
+            (r.content === content ||
+              r.content.endsWith(`] ${content}`)) // legacy timestamp-prefixed
+        );
+        return { rows: match ? [{ id: match.id }] : [] };
+      }
+      // Insert path
+      if (sql.startsWith("INSERT INTO memory_nodes")) {
+        inserted.push({
+          id: params[0],
+          client_id: params[1],
+          content: params[3],
+        });
+        return { rows: [] };
+      }
+      return { rows: [] };
+    };
+    return { db, calls, inserted };
+  }
+  const mockAi = { embed: async () => null };
+  const mockLlm = { chat: async () => "[]" };
+  it("inserts a fresh row when no duplicate exists", async () => {
+    const { db, inserted } = makeMockDb({ existing: [] });
+    const out = await ingest(db, mockAi, mockLlm, "fresh content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(out.id.startsWith("mem_")).toBe(true);
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].content).toBe("fresh content");
+  });
+  it("returns the existing row's id when raw content matches", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_existing", client_id: "c", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBe(true);
+    expect(out.id).toBe("mem_existing");
+    expect(out.content).toBe("duplicate content");
+    expect(inserted).toHaveLength(0); // no insert happened
+  });
+  it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`)", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        {
+          id: "mem_legacy",
+          client_id: "c",
+          content: "[2026-04-26T10:00:00Z] duplicate content",
+        },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(out.deduped).toBe(true);
+    expect(out.id).toBe("mem_legacy");
+    expect(inserted).toHaveLength(0);
+  });
+  it("dedup off (default) still inserts on duplicate content", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_existing", client_id: "c", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c",
+      // dedup omitted — defaults to false
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].id).not.toBe("mem_existing");
+  });
+  it("scopes dedup to the given clientId (cross-tenant collisions don't dedup)", async () => {
+    const { db, inserted } = makeMockDb({
+      existing: [
+        { id: "mem_other", client_id: "other", content: "duplicate content" },
+      ],
+    });
+    const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
+      clientId: "c", // different tenant
+      dedup: true,
+    });
+    expect(out.deduped).toBeUndefined();
+    expect(inserted).toHaveLength(1);
+    expect(inserted[0].client_id).toBe("c");
+  });
+  it("dedup check failure falls through to insert (best-effort semantics)", async () => {
+    let dupCheckSql = null;
+    const flakyDb = async (sql, params) => {
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      if (sql.includes("SELECT id FROM memory_nodes")) {
+        dupCheckSql = sql;
+        throw new Error("DB unreachable");
+      }
+      return { rows: [] };
+    };
+    const out = await ingest(flakyDb, mockAi, mockLlm, "content", {
+      clientId: "c",
+      dedup: true,
+    });
+    expect(dupCheckSql).toContain("memory_nodes");
+    expect(out.deduped).toBeUndefined();
+    expect(out.id.startsWith("mem_")).toBe(true);
+  });
+});

package/packages/memory/src/hosted.js CHANGED Viewed

@@ -370,3 +370,10 @@ function shortenReason(msg) {
     .replace(/[^a-z0-9]+/g, "_")
     .slice(0, 60);
 }
+// Re-export the system-message injector so callers that import the
+// hosted module get the full memory-augmentation surface in one place.
+// Keeping the implementation in `./inject.js` lets non-hosted consumers
+// (e.g. a future "augment a request body" helper that doesn't talk to
+// TES) reuse it without pulling in the GraphQL surface.
+export { injectMemories } from "./inject.js";

package/packages/memory/src/ingest.js CHANGED Viewed

@@ -21,7 +21,17 @@ import { distill } from "./distill.js";
  *   tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
  *   background task is handed to it so the host keeps it alive past return.
  *   Without it, distill is fire-and-forget (fine for Node/browser).
- * @returns {Promise<{id: string, content: string, layerId: string}>}
+ * @param {boolean} [opts.dedup=false] - Skip ingest if a memory_node with
+ *   byte-equal content already exists for this `client_id`. Use for
+ *   retry-safe pipelines where the same logical event may be processed
+ *   twice (queue retries, consumer fan-out). Returns the existing row's
+ *   id with `{deduped: true}` instead of inserting. Strict equality —
+ *   not a semantic similarity match. Best-effort: if the SELECT itself
+ *   fails, ingest proceeds (worst case: duplicate row, identical to
+ *   `dedup:false` behaviour). The eventual structural fix is a
+ *   `UNIQUE(client_id, content_hash)` constraint at the schema level;
+ *   this option is the bridge.
+ * @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean}>}
  */
 export async function ingest(db, ai, llm, content, opts = {}) {
   const clientId = opts.clientId;
@@ -41,6 +51,35 @@ export async function ingest(db, ai, llm, content, opts = {}) {
   }
   const layerId = layerResult.rows[0].id;
+  // Optional dedup: skip the insert (and all the embedding/HyDE/distill
+  // work that would follow) if a row with byte-equal content already
+  // exists for this tenant. The OR-LIKE branch matches against the
+  // legacy `[<iso>] <content>` form so callers that wrote with a
+  // timestamp prefix dedup correctly until the legacy corpus ages out.
+  if (opts.dedup) {
+    try {
+      const dupCheck = await db(
+        `SELECT id FROM memory_nodes
+           WHERE client_id = $1
+             AND (content = $2 OR content LIKE '%] ' || $2)
+           LIMIT 1`,
+        [clientId, content]
+      );
+      if (dupCheck.rows?.length) {
+        log(`dedup: matched existing memory ${dupCheck.rows[0].id}`);
+        return {
+          id: dupCheck.rows[0].id,
+          content,
+          layerId,
+          deduped: true,
+        };
+      }
+    } catch (err) {
+      log(`dedup check failed (proceeding with insert): ${err.message}`);
+    }
+  }
   const memoryId = `mem_${crypto.randomUUID()}`;
   // Insert memory node

package/packages/memory/src/inject.js ADDED Viewed

@@ -0,0 +1,83 @@
+/**
+ * Memory injection — formats retrieved memories as a system-message preamble
+ * and merges them into the upstream request body.
+ *
+ * Why a preamble (not a separate user-turn or tool-result):
+ *   - Customer's existing system prompt is preserved verbatim, just appended.
+ *   - Anthropic and OpenAI both treat system content as cache-friendly.
+ *   - No conversation-history mutation — replays remain reproducible.
+ *
+ * Format:
+ *   <tes:context>
+ *     [1] (similarity 0.82) memory text...
+ *     [2] (similarity 0.71) memory text...
+ *   </tes:context>
+ *
+ * The XML-ish wrapper makes it trivial for the model to ignore on demand
+ * and trivial for an evaluator to strip when measuring quality deltas.
+ */
+const MAX_CHARS_PER_MEMORY = 1200;
+/**
+ * @param {object} body                 — upstream request body, mutated copy returned
+ * @param {Array<{id, content, similarity}>} memories
+ * @param {"anthropic"|"openai"} provider
+ * @returns {object} new body
+ */
+export function injectMemories(body, memories, provider) {
+  if (!memories || memories.length === 0) return body;
+  const preamble = formatPreamble(memories);
+  if (provider === "anthropic") {
+    return injectAnthropic(body, preamble);
+  }
+  return injectOpenAI(body, preamble);
+}
+function formatPreamble(memories) {
+  const lines = ["<tes:context>"];
+  memories.forEach((m, i) => {
+    const sim =
+      typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
+    const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
+    lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
+  });
+  lines.push("</tes:context>");
+  return lines.join("\n");
+}
+function injectAnthropic(body, preamble) {
+  // Anthropic accepts `system` as either a string OR an array of content
+  // blocks. Preserve whichever shape the customer sent.
+  const next = { ...body };
+  if (typeof body.system === "string") {
+    next.system = `${preamble}\n\n${body.system}`;
+  } else if (Array.isArray(body.system)) {
+    next.system = [{ type: "text", text: preamble }, ...body.system];
+  } else {
+    next.system = preamble;
+  }
+  return next;
+}
+function injectOpenAI(body, preamble) {
+  // OpenAI carries the system prompt as the first message with role:'system'.
+  // If one exists we prepend; otherwise we insert a fresh one at index 0.
+  const messages = Array.isArray(body.messages) ? [...body.messages] : [];
+  if (messages.length > 0 && messages[0].role === "system") {
+    const existing = messages[0];
+    const existingContent =
+      typeof existing.content === "string"
+        ? existing.content
+        : JSON.stringify(existing.content);
+    messages[0] = {
+      ...existing,
+      content: `${preamble}\n\n${existingContent}`,
+    };
+  } else {
+    messages.unshift({ role: "system", content: preamble });
+  }
+  return { ...body, messages };
+}

package/src/client.js CHANGED Viewed

@@ -56,8 +56,26 @@ export class TESClient {
     return new Session(this._config, opts);
   }
-  wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
+  wrap(
+    client,
+    {
+      sessionId,
+      userId,
+      metadata,
+      autoEmit = true,
+      waitUntil,
+      memory,
+      memoryOpts,
+    } = {}
+  ) {
     const config = userId ? { ...this._config, userId } : this._config;
-    return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
+    return wrapClient(config, client, {
+      sessionId,
+      metadata,
+      autoEmit,
+      waitUntil,
+      memory,
+      memoryOpts,
+    });
   }
 }