@pentatonic-ai/ai-agent-sdk 0.5.8 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.8",
3
+ "version": "0.5.9",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -1,5 +1,7 @@
1
1
  import { universalChecks } from "../src/checks/universal.js";
2
2
  import { hostedTesChecks } from "../src/checks/hosted-tes.js";
3
+ import { dataFlowChecks } from "../src/checks/data-flow.js";
4
+ import { claudeCodeChecks } from "../src/checks/claude-code.js";
3
5
  import { platformChecks } from "../src/checks/platform.js";
4
6
 
5
7
  // fetch mocking — we don't want any real network in unit tests.
@@ -185,3 +187,358 @@ describe("platform checks", () => {
185
187
  expect(r.msg).toMatch(/no models loaded/);
186
188
  });
187
189
  });
190
+
191
+ describe("data-flow checks", () => {
192
+ beforeEach(() => {
193
+ process.env.TES_ENDPOINT = "https://example.test";
194
+ process.env.TES_API_KEY = "tes_test_key";
195
+ process.env.TES_CLIENT_ID = "test-client";
196
+ });
197
+ afterEach(() => {
198
+ delete process.env.TES_ENDPOINT;
199
+ delete process.env.TES_API_KEY;
200
+ delete process.env.TES_CLIENT_ID;
201
+ delete process.env.PENTATONIC_DOCTOR_PROBE_QUERY;
202
+ });
203
+
204
+ // Capture the request bodies so tests can assert on the GraphQL
205
+ // shape doctor sends — not just the response handling.
206
+ function captureFetch(handler) {
207
+ const calls = [];
208
+ globalThis.fetch = async (url, opts) => {
209
+ const body = opts?.body ? JSON.parse(opts.body) : null;
210
+ calls.push({ url, headers: opts?.headers || {}, body });
211
+ return handler(url, opts);
212
+ };
213
+ return calls;
214
+ }
215
+
216
+ it("registers the three expected probes", () => {
217
+ const names = dataFlowChecks().map((c) => c.name);
218
+ expect(names).toContain("TES event stream has data");
219
+ expect(names).toContain("MEMORY_CREATED events for client");
220
+ expect(names).toContain("semanticSearchMemories returns hits");
221
+ });
222
+
223
+ // --- event stream check ---
224
+
225
+ it("event stream: sends GraphQL query with `limit:1` (not `first:1`)", async () => {
226
+ const calls = captureFetch(async () => ({
227
+ ok: true,
228
+ status: 200,
229
+ json: async () => ({ data: { events: { totalCount: 5 } } }),
230
+ }));
231
+ const c = dataFlowChecks().find(
232
+ (x) => x.name === "TES event stream has data"
233
+ );
234
+ await c.run();
235
+ expect(calls).toHaveLength(1);
236
+ expect(calls[0].body.query).toMatch(/events\(\s*limit:\s*1\s*\)/);
237
+ expect(calls[0].body.query).not.toMatch(/first\s*:/);
238
+ expect(calls[0].body.query).toMatch(/totalCount/);
239
+ });
240
+
241
+ it("event stream: warns when totalCount is 0", async () => {
242
+ captureFetch(async () => ({
243
+ ok: true,
244
+ status: 200,
245
+ json: async () => ({ data: { events: { totalCount: 0 } } }),
246
+ }));
247
+ const c = dataFlowChecks().find(
248
+ (x) => x.name === "TES event stream has data"
249
+ );
250
+ const r = await c.run();
251
+ expect(r.ok).toBe(false);
252
+ expect(r.msg).toMatch(/0 events yet/);
253
+ });
254
+
255
+ it("event stream: passes with a positive count", async () => {
256
+ captureFetch(async () => ({
257
+ ok: true,
258
+ status: 200,
259
+ json: async () => ({ data: { events: { totalCount: 42 } } }),
260
+ }));
261
+ const c = dataFlowChecks().find(
262
+ (x) => x.name === "TES event stream has data"
263
+ );
264
+ const r = await c.run();
265
+ expect(r.ok).toBe(true);
266
+ expect(r.detail.totalCount).toBe(42);
267
+ });
268
+
269
+ // --- memory-created check ---
270
+
271
+ it("memory-created: filter uses eventType + StringFilterInput wrapper", async () => {
272
+ const calls = captureFetch(async () => ({
273
+ ok: true,
274
+ status: 200,
275
+ json: async () => ({ data: { events: { totalCount: 3 } } }),
276
+ }));
277
+ const c = dataFlowChecks().find(
278
+ (x) => x.name === "MEMORY_CREATED events for client"
279
+ );
280
+ await c.run();
281
+ const { query, variables } = calls[0].body;
282
+ // Schema requires eventType (not "kind") with a StringFilterInput
283
+ // wrapper, and clientId likewise as a filter wrapper.
284
+ expect(query).toMatch(/eventType:\s*\{\s*eq:\s*\$eventType\s*\}/);
285
+ expect(query).toMatch(/clientId:\s*\{\s*eq:\s*\$client\s*\}/);
286
+ expect(query).not.toMatch(/\bkind\b/);
287
+ expect(variables.eventType).toBe("MEMORY_CREATED");
288
+ expect(variables.client).toBe("test-client");
289
+ });
290
+
291
+ it("memory-created: flags the client id in the warning", async () => {
292
+ captureFetch(async () => ({
293
+ ok: true,
294
+ status: 200,
295
+ json: async () => ({ data: { events: { totalCount: 0 } } }),
296
+ }));
297
+ const c = dataFlowChecks().find(
298
+ (x) => x.name === "MEMORY_CREATED events for client"
299
+ );
300
+ const r = await c.run();
301
+ expect(r.ok).toBe(false);
302
+ expect(r.msg).toMatch(/test-client/);
303
+ });
304
+
305
+ // --- semantic search check ---
306
+
307
+ it("semantic search: sends required clientId arg + selects similarity (not score)", async () => {
308
+ const calls = captureFetch(async () => ({
309
+ ok: true,
310
+ status: 200,
311
+ json: async () => ({
312
+ data: { semanticSearchMemories: [{ id: "m1", similarity: 0.8 }] },
313
+ }),
314
+ }));
315
+ const c = dataFlowChecks().find(
316
+ (x) => x.name === "semanticSearchMemories returns hits"
317
+ );
318
+ await c.run();
319
+ const { query, variables } = calls[0].body;
320
+ // clientId is required by the schema; doctor must send it.
321
+ expect(query).toMatch(/clientId:\s*\$clientId/);
322
+ expect(variables.clientId).toBe("test-client");
323
+ // Result type exposes `similarity`, not `score`.
324
+ expect(query).toMatch(/similarity/);
325
+ expect(query).not.toMatch(/\bscore\b/);
326
+ });
327
+
328
+ it("semantic search: warns on 0 hits", async () => {
329
+ captureFetch(async () => ({
330
+ ok: true,
331
+ status: 200,
332
+ json: async () => ({ data: { semanticSearchMemories: [] } }),
333
+ }));
334
+ const c = dataFlowChecks().find(
335
+ (x) => x.name === "semanticSearchMemories returns hits"
336
+ );
337
+ const r = await c.run();
338
+ expect(r.ok).toBe(false);
339
+ expect(r.msg).toMatch(/0 hits/);
340
+ });
341
+
342
+ it("semantic search: passes with hits", async () => {
343
+ captureFetch(async () => ({
344
+ ok: true,
345
+ status: 200,
346
+ json: async () => ({
347
+ data: { semanticSearchMemories: [{ id: "m1", similarity: 0.8 }] },
348
+ }),
349
+ }));
350
+ const c = dataFlowChecks().find(
351
+ (x) => x.name === "semanticSearchMemories returns hits"
352
+ );
353
+ const r = await c.run();
354
+ expect(r.ok).toBe(true);
355
+ expect(r.detail.hits).toBe(1);
356
+ });
357
+
358
+ it("semantic search: 'cannot query field' skips gracefully", async () => {
359
+ captureFetch(async () => ({
360
+ ok: true,
361
+ status: 200,
362
+ json: async () => ({
363
+ errors: [{ message: 'Cannot query field "semanticSearchMemories" on type "Query"' }],
364
+ }),
365
+ }));
366
+ const c = dataFlowChecks().find(
367
+ (x) => x.name === "semanticSearchMemories returns hits"
368
+ );
369
+ const r = await c.run();
370
+ expect(r.ok).toBe(true);
371
+ expect(r.msg).toMatch(/skipped/);
372
+ });
373
+
374
+ it("semantic search: schema-arg mismatches surface as errors, NOT silent skips", async () => {
375
+ // E.g. a missing required arg — error mentions the field name but
376
+ // is NOT the "Cannot query field" wording. Doctor must report,
377
+ // not pretend the deployment doesn't expose the field.
378
+ captureFetch(async () => ({
379
+ ok: true,
380
+ status: 200,
381
+ json: async () => ({
382
+ errors: [
383
+ {
384
+ message:
385
+ 'Field "semanticSearchMemories" argument "clientId" of type "String!" is required',
386
+ },
387
+ ],
388
+ }),
389
+ }));
390
+ const c = dataFlowChecks().find(
391
+ (x) => x.name === "semanticSearchMemories returns hits"
392
+ );
393
+ const r = await c.run();
394
+ expect(r.ok).toBe(false);
395
+ expect(r.msg).not.toMatch(/skipped/);
396
+ expect(r.msg).toMatch(/required/);
397
+ });
398
+
399
+ it("PENTATONIC_DOCTOR_PROBE_QUERY overrides the default probe text", async () => {
400
+ process.env.PENTATONIC_DOCTOR_PROBE_QUERY = "custom probe text";
401
+ const calls = captureFetch(async () => ({
402
+ ok: true,
403
+ status: 200,
404
+ json: async () => ({ data: { semanticSearchMemories: [] } }),
405
+ }));
406
+ const c = dataFlowChecks().find(
407
+ (x) => x.name === "semanticSearchMemories returns hits"
408
+ );
409
+ await c.run();
410
+ expect(calls[0].body.variables.q).toBe("custom probe text");
411
+ });
412
+
413
+ // --- auth header branching ---
414
+
415
+ it("uses Authorization: Bearer for tes_-prefixed keys", async () => {
416
+ const calls = captureFetch(async () => ({
417
+ ok: true,
418
+ status: 200,
419
+ json: async () => ({ data: { events: { totalCount: 1 } } }),
420
+ }));
421
+ process.env.TES_API_KEY = "tes_user_abc";
422
+ const c = dataFlowChecks().find(
423
+ (x) => x.name === "TES event stream has data"
424
+ );
425
+ await c.run();
426
+ expect(calls[0].headers.Authorization).toBe("Bearer tes_user_abc");
427
+ expect(calls[0].headers["x-service-key"]).toBeUndefined();
428
+ });
429
+
430
+ it("uses x-service-key for non-tes_ keys (internal service tokens)", async () => {
431
+ const calls = captureFetch(async () => ({
432
+ ok: true,
433
+ status: 200,
434
+ json: async () => ({ data: { events: { totalCount: 1 } } }),
435
+ }));
436
+ process.env.TES_API_KEY = "internal_svc_xyz";
437
+ const c = dataFlowChecks().find(
438
+ (x) => x.name === "TES event stream has data"
439
+ );
440
+ await c.run();
441
+ expect(calls[0].headers["x-service-key"]).toBe("internal_svc_xyz");
442
+ expect(calls[0].headers.Authorization).toBeUndefined();
443
+ });
444
+
445
+ it("sends x-client-id on every request", async () => {
446
+ const calls = captureFetch(async () => ({
447
+ ok: true,
448
+ status: 200,
449
+ json: async () => ({ data: { events: { totalCount: 1 } } }),
450
+ }));
451
+ const c = dataFlowChecks().find(
452
+ (x) => x.name === "TES event stream has data"
453
+ );
454
+ await c.run();
455
+ expect(calls[0].headers["x-client-id"]).toBe("test-client");
456
+ });
457
+
458
+ it("all three report missing env clearly", async () => {
459
+ delete process.env.TES_CLIENT_ID;
460
+ for (const c of dataFlowChecks()) {
461
+ const r = await c.run();
462
+ expect(r.ok).toBe(false);
463
+ expect(r.msg).toMatch(/TES_ENDPOINT|required/);
464
+ }
465
+ });
466
+ });
467
+
468
+ describe("Claude Code plugin check", () => {
469
+ it("reports installed + version when manifest is present at ~/.claude", async () => {
470
+ const [check] = claudeCodeChecks({
471
+ fileExists: (p) => p === "/home/fake/.claude/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json",
472
+ readFile: () =>
473
+ JSON.stringify({ name: "tes-memory", version: "0.5.3" }),
474
+ homedir: () => "/home/fake",
475
+ env: {},
476
+ });
477
+ const r = await check.run();
478
+ expect(r.ok).toBe(true);
479
+ expect(r.msg).toMatch(/tes-memory v0\.5\.3 installed/);
480
+ expect(r.detail.version).toBe("0.5.3");
481
+ expect(r.detail.path).toMatch(/\.claude\/plugins/);
482
+ });
483
+
484
+ it("falls through to ~/.claude-pentatonic when ~/.claude is empty", async () => {
485
+ const pentatonicPath =
486
+ "/home/fake/.claude-pentatonic/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json";
487
+ const [check] = claudeCodeChecks({
488
+ fileExists: (p) => p === pentatonicPath,
489
+ readFile: () =>
490
+ JSON.stringify({ name: "tes-memory", version: "0.5.3" }),
491
+ homedir: () => "/home/fake",
492
+ env: {},
493
+ });
494
+ const r = await check.run();
495
+ expect(r.ok).toBe(true);
496
+ expect(r.detail.path).toBe(pentatonicPath);
497
+ });
498
+
499
+ it("respects CLAUDE_CONFIG_DIR override (highest precedence)", async () => {
500
+ const overridePath =
501
+ "/custom/cfg/plugins/marketplaces/pentatonic-ai/.claude-plugin/plugin.json";
502
+ const [check] = claudeCodeChecks({
503
+ fileExists: (p) => p === overridePath,
504
+ readFile: () =>
505
+ JSON.stringify({ name: "tes-memory", version: "9.9.9" }),
506
+ homedir: () => "/home/fake",
507
+ env: { CLAUDE_CONFIG_DIR: "/custom/cfg" },
508
+ });
509
+ const r = await check.run();
510
+ expect(r.ok).toBe(true);
511
+ expect(r.detail.path).toBe(overridePath);
512
+ expect(r.detail.version).toBe("9.9.9");
513
+ });
514
+
515
+ it("reports the install command + all candidate paths when none exist", async () => {
516
+ const [check] = claudeCodeChecks({
517
+ fileExists: () => false,
518
+ homedir: () => "/home/fake",
519
+ env: { CLAUDE_CONFIG_DIR: "/custom/cfg" },
520
+ });
521
+ const r = await check.run();
522
+ expect(r.ok).toBe(false);
523
+ expect(r.msg).toMatch(/plugin install tes-memory/);
524
+ expect(r.detail.candidates).toEqual(
525
+ expect.arrayContaining([
526
+ expect.stringContaining("/custom/cfg/plugins"),
527
+ expect.stringContaining("/home/fake/.claude/plugins"),
528
+ expect.stringContaining("/home/fake/.claude-pentatonic/plugins"),
529
+ ])
530
+ );
531
+ });
532
+
533
+ it("handles corrupt manifest json without throwing", async () => {
534
+ const [check] = claudeCodeChecks({
535
+ fileExists: () => true,
536
+ readFile: () => "{ not json",
537
+ homedir: () => "/home/fake",
538
+ env: {},
539
+ });
540
+ const r = await check.run();
541
+ expect(r.ok).toBe(false);
542
+ expect(r.msg).toMatch(/unreadable/);
543
+ });
544
+ });
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Claude Code plugin installation check.
3
+ *
4
+ * The SDK ships a Claude Code plugin (`tes-memory@pentatonic-ai`) that
5
+ * wires UserPromptSubmit / Stop hooks so CHAT_TURN + MEMORY_CREATED
6
+ * events actually get emitted. It's entirely possible for the server
7
+ * side to be healthy (TES reachable, key valid) while the client side
8
+ * is silently uninstalled — the hooks never fire and the event stream
9
+ * stays empty. This check tells users whether the plugin is present
10
+ * and what version they're on, so upstream feedback ("why am I not
11
+ * seeing memories?") lands faster.
12
+ *
13
+ * Resolution order mirrors `hooks/scripts/shared.js:loadConfig` — three
14
+ * candidate roots, first match wins:
15
+ *
16
+ * 1. $CLAUDE_CONFIG_DIR (explicit override, highest precedence)
17
+ * 2. ~/.claude (default Claude Code install)
18
+ * 3. ~/.claude-pentatonic (Pentatonic-branded variant)
19
+ *
20
+ * The check is universal-ish: it only reports positively when the
21
+ * plugin file is found. If the user isn't on Claude Code at all, the
22
+ * plugin absence is reported as info, not a failure.
23
+ */
24
+
25
+ import { existsSync as realExistsSync, readFileSync as realReadFileSync } from "fs";
26
+ import { join } from "path";
27
+ import { homedir as realHomedir } from "os";
28
+
29
+ import { SEVERITY } from "../index.js";
30
+
31
+ const PLUGIN_REL_PATH = [
32
+ "plugins",
33
+ "marketplaces",
34
+ "pentatonic-ai",
35
+ ".claude-plugin",
36
+ "plugin.json",
37
+ ];
38
+
39
+ /**
40
+ * Build the ordered list of candidate manifest paths. First match wins.
41
+ * Same precedence as the SDK hook's loadConfig() so users on
42
+ * CLAUDE_CONFIG_DIR or .claude-pentatonic don't get false negatives.
43
+ */
44
+ function candidateManifestPaths(home, env) {
45
+ const roots = [];
46
+ if (env?.CLAUDE_CONFIG_DIR) roots.push(env.CLAUDE_CONFIG_DIR);
47
+ roots.push(join(home, ".claude"));
48
+ roots.push(join(home, ".claude-pentatonic"));
49
+ return roots.map((root) => join(root, ...PLUGIN_REL_PATH));
50
+ }
51
+
52
+ function checkClaudeCodePluginInstalled({
53
+ fileExists,
54
+ readFile,
55
+ homedir,
56
+ env,
57
+ } = {}) {
58
+ const exists = fileExists || realExistsSync;
59
+ const read = readFile || ((p) => realReadFileSync(p, "utf8"));
60
+ const resolveHome = typeof homedir === "function" ? homedir : realHomedir;
61
+ const resolveEnv = env || process.env;
62
+ const home = resolveHome();
63
+
64
+ return {
65
+ name: "tes-memory Claude Code plugin installed",
66
+ severity: SEVERITY.INFO,
67
+ run: async () => {
68
+ const candidates = candidateManifestPaths(home, resolveEnv);
69
+ const found = candidates.find((p) => exists(p));
70
+ if (!found) {
71
+ return {
72
+ ok: false,
73
+ msg:
74
+ "tes-memory plugin not found — run: /plugin marketplace add Pentatonic-Ltd/ai-agent-sdk && /plugin install tes-memory@pentatonic-ai",
75
+ detail: { candidates },
76
+ };
77
+ }
78
+ try {
79
+ const manifest = JSON.parse(read(found));
80
+ const version = typeof manifest.version === "string" ? manifest.version : "?";
81
+ const name = typeof manifest.name === "string" ? manifest.name : "tes-memory";
82
+ return {
83
+ ok: true,
84
+ msg: `${name} v${version} installed`,
85
+ detail: { name, version, path: found },
86
+ };
87
+ } catch (err) {
88
+ return {
89
+ ok: false,
90
+ msg: `plugin manifest unreadable: ${err.message}`,
91
+ detail: { path: found },
92
+ };
93
+ }
94
+ },
95
+ };
96
+ }
97
+
98
+ export function claudeCodeChecks(seams = {}) {
99
+ return [checkClaudeCodePluginInstalled(seams)];
100
+ }
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Hosted TES data-flow checks.
3
+ *
4
+ * The existing hosted-tes checks prove the TES server is up and the API
5
+ * key is accepted. They don't prove data is actually flowing end-to-end —
6
+ * you can have a green doctor pass while the Claude Code hook is silently
7
+ * dropping events, or while vector retrieval is returning nothing at the
8
+ * configured minScore.
9
+ *
10
+ * These checks close that gap with three real-data probes against the
11
+ * same GraphQL endpoint the SDK already uses at runtime:
12
+ *
13
+ * - "TES event stream has data" — events table has rows at all
14
+ * - "MEMORY_CREATED events present" — memory events exist for this client
15
+ * - "semantic search returns hits" — a broad probe query retrieves > 0
16
+ *
17
+ * All three are WARNINGs by default: a green liveness check + a "0 events"
18
+ * warning is more informative than pretending liveness implies correctness,
19
+ * but an empty stream on a fresh install is legitimate and shouldn't fail
20
+ * the overall doctor pass.
21
+ *
22
+ * GraphQL shapes match TES's deployed schema (verified against
23
+ * thing-event-system/functions/api/graphql/domains/event/schema.js and
24
+ * thing-event-system/modules/deep-memory/graphql/memory/schema.js):
25
+ *
26
+ * events(filter: EventFilterInput, limit: Int, offset: Int): EventPage!
27
+ * EventFilterInput { eventType: StringFilterInput, clientId: StringFilterInput, ... }
28
+ * EventPage { totalCount: Int!, ... }
29
+ *
30
+ * semanticSearchMemories(
31
+ * clientId: String!,
32
+ * query: String!,
33
+ * userId: String,
34
+ * limit: Int,
35
+ * minScore: Float
36
+ * ): [SemanticMemoryResult!]!
37
+ * SemanticMemoryResult { id: String!, similarity: Float!, ... }
38
+ */
39
+
40
+ import { SEVERITY } from "../index.js";
41
+
42
+ async function fetchWithTimeout(url, opts = {}, timeoutMs = 10_000) {
43
+ return await fetch(url, {
44
+ ...opts,
45
+ signal: AbortSignal.timeout(timeoutMs),
46
+ });
47
+ }
48
+
49
+ /**
50
+ * Auth header: TES accepts `Authorization: Bearer tes_...` for end-user
51
+ * keys and `x-service-key: <key>` for internal/service keys. Mirrors the
52
+ * branching in hooks/scripts/shared.js so doctor authenticates the same
53
+ * way the SDK runtime does.
54
+ */
55
+ function authHeaders(apiKey, clientId) {
56
+ const headers = {
57
+ "Content-Type": "application/json",
58
+ "x-client-id": clientId,
59
+ };
60
+ if (apiKey?.startsWith("tes_")) {
61
+ headers["Authorization"] = `Bearer ${apiKey}`;
62
+ } else if (apiKey) {
63
+ headers["x-service-key"] = apiKey;
64
+ }
65
+ return headers;
66
+ }
67
+
68
+ async function graphql(endpoint, apiKey, clientId, query, variables) {
69
+ const res = await fetchWithTimeout(
70
+ `${endpoint.replace(/\/$/, "")}/api/graphql`,
71
+ {
72
+ method: "POST",
73
+ headers: authHeaders(apiKey, clientId),
74
+ body: JSON.stringify({ query, variables }),
75
+ }
76
+ );
77
+ if (!res.ok) {
78
+ const text = await res.text().catch(() => "");
79
+ throw new Error(`HTTP ${res.status}${text ? `: ${text.slice(0, 200)}` : ""}`);
80
+ }
81
+ const body = await res.json();
82
+ if (body.errors?.length) {
83
+ throw new Error(body.errors[0].message || "graphql error");
84
+ }
85
+ return body.data;
86
+ }
87
+
88
+ function requireHostedEnv() {
89
+ const endpoint = process.env.TES_ENDPOINT;
90
+ const apiKey = process.env.TES_API_KEY;
91
+ const clientId = process.env.TES_CLIENT_ID;
92
+ if (!endpoint || !apiKey || !clientId) {
93
+ return {
94
+ missing: true,
95
+ reason: "TES_ENDPOINT / TES_API_KEY / TES_CLIENT_ID required",
96
+ };
97
+ }
98
+ return { endpoint, apiKey, clientId };
99
+ }
100
+
101
+ function checkEventStreamHasData() {
102
+ return {
103
+ name: "TES event stream has data",
104
+ severity: SEVERITY.WARNING,
105
+ run: async () => {
106
+ const env = requireHostedEnv();
107
+ if (env.missing) return { ok: false, msg: env.reason };
108
+ try {
109
+ // `limit: 1` keeps the payload tiny — we only care about the total.
110
+ const data = await graphql(
111
+ env.endpoint,
112
+ env.apiKey,
113
+ env.clientId,
114
+ `query DoctorEventCount { events(limit: 1) { totalCount } }`
115
+ );
116
+ const total = data?.events?.totalCount ?? 0;
117
+ if (total > 0) {
118
+ return {
119
+ ok: true,
120
+ msg: `${total} event(s) in stream`,
121
+ detail: { totalCount: total },
122
+ };
123
+ }
124
+ return {
125
+ ok: false,
126
+ msg: "0 events yet — send one prompt to your agent and re-run",
127
+ detail: { totalCount: 0 },
128
+ };
129
+ } catch (err) {
130
+ return { ok: false, msg: err.message };
131
+ }
132
+ },
133
+ };
134
+ }
135
+
136
+ function checkMemoryCreatedForClient() {
137
+ return {
138
+ name: "MEMORY_CREATED events for client",
139
+ severity: SEVERITY.WARNING,
140
+ run: async () => {
141
+ const env = requireHostedEnv();
142
+ if (env.missing) return { ok: false, msg: env.reason };
143
+ try {
144
+ const data = await graphql(
145
+ env.endpoint,
146
+ env.apiKey,
147
+ env.clientId,
148
+ `query DoctorMemCount($eventType: String!, $client: String!) {
149
+ events(
150
+ limit: 1,
151
+ filter: {
152
+ eventType: { eq: $eventType }
153
+ clientId: { eq: $client }
154
+ }
155
+ ) {
156
+ totalCount
157
+ }
158
+ }`,
159
+ { eventType: "MEMORY_CREATED", client: env.clientId }
160
+ );
161
+ const total = data?.events?.totalCount ?? 0;
162
+ if (total > 0) {
163
+ return {
164
+ ok: true,
165
+ msg: `${total} MEMORY_CREATED event(s) for ${env.clientId}`,
166
+ detail: { totalCount: total, clientId: env.clientId },
167
+ };
168
+ }
169
+ return {
170
+ ok: false,
171
+ msg: `no MEMORY_CREATED events for ${env.clientId} yet — hook may not be writing memories`,
172
+ detail: { totalCount: 0, clientId: env.clientId },
173
+ };
174
+ } catch (err) {
175
+ return { ok: false, msg: err.message };
176
+ }
177
+ },
178
+ };
179
+ }
180
+
181
+ // Match TES's "Cannot query field 'X'" error wording precisely so a
182
+ // schema-arg mismatch doesn't masquerade as "deployment doesn't expose
183
+ // the field" — that would silently hide real errors.
184
+ const FIELD_NOT_FOUND_RE =
185
+ /cannot query field "?semanticSearchMemories"?/i;
186
+
187
+ function checkSemanticSearchReturnsHits() {
188
+ return {
189
+ name: "semanticSearchMemories returns hits",
190
+ severity: SEVERITY.WARNING,
191
+ run: async () => {
192
+ const env = requireHostedEnv();
193
+ if (env.missing) return { ok: false, msg: env.reason };
194
+ try {
195
+ // A broad probe query. Low minScore (0.1) because the point of this
196
+ // check is "does retrieval work at all", not "does retrieval rank
197
+ // well". A follow-up tuning warning can be a separate check later.
198
+ const query = process.env.PENTATONIC_DOCTOR_PROBE_QUERY || "heartbeat";
199
+ const minScore = 0.1;
200
+ const data = await graphql(
201
+ env.endpoint,
202
+ env.apiKey,
203
+ env.clientId,
204
+ `query DoctorSearch($clientId: String!, $q: String!, $minScore: Float!) {
205
+ semanticSearchMemories(
206
+ clientId: $clientId,
207
+ query: $q,
208
+ minScore: $minScore,
209
+ limit: 5
210
+ ) {
211
+ id
212
+ similarity
213
+ }
214
+ }`,
215
+ { clientId: env.clientId, q: query, minScore }
216
+ );
217
+ const hits = data?.semanticSearchMemories ?? [];
218
+ if (hits.length > 0) {
219
+ return {
220
+ ok: true,
221
+ msg: `${hits.length} hit(s) for "${query}" at minScore=${minScore}`,
222
+ detail: { query, minScore, hits: hits.length },
223
+ };
224
+ }
225
+ return {
226
+ ok: false,
227
+ msg: `0 hits for "${query}" at minScore=${minScore} — try lowering minScore or PENTATONIC_DOCTOR_PROBE_QUERY`,
228
+ detail: { query, minScore, hits: 0 },
229
+ };
230
+ } catch (err) {
231
+ // Only treat the precise "Cannot query field" error as
232
+ // "deployment doesn't expose this" — schema-arg mismatches and
233
+ // other graphql errors should surface, not be silently skipped.
234
+ if (FIELD_NOT_FOUND_RE.test(err.message)) {
235
+ return {
236
+ ok: true,
237
+ msg: "semanticSearchMemories not exposed by this deployment (skipped)",
238
+ };
239
+ }
240
+ return { ok: false, msg: err.message };
241
+ }
242
+ },
243
+ };
244
+ }
245
+
246
+ export function dataFlowChecks() {
247
+ return [
248
+ checkEventStreamHasData(),
249
+ checkMemoryCreatedForClient(),
250
+ checkSemanticSearchReturnsHits(),
251
+ ];
252
+ }
@@ -24,7 +24,9 @@ export { renderHuman, renderJson } from "./output.js";
24
24
  export { universalChecks } from "./checks/universal.js";
25
25
  export { localMemoryChecks } from "./checks/local-memory.js";
26
26
  export { hostedTesChecks } from "./checks/hosted-tes.js";
27
+ export { dataFlowChecks } from "./checks/data-flow.js";
27
28
  export { platformChecks } from "./checks/platform.js";
29
+ export { claudeCodeChecks } from "./checks/claude-code.js";
28
30
 
29
31
  export const SEVERITY = Object.freeze({
30
32
  CRITICAL: "critical",
@@ -21,7 +21,9 @@ import { detectPaths, PATHS } from "./detect.js";
21
21
  import { universalChecks } from "./checks/universal.js";
22
22
  import { localMemoryChecks } from "./checks/local-memory.js";
23
23
  import { hostedTesChecks } from "./checks/hosted-tes.js";
24
+ import { dataFlowChecks } from "./checks/data-flow.js";
24
25
  import { platformChecks } from "./checks/platform.js";
26
+ import { claudeCodeChecks } from "./checks/claude-code.js";
25
27
  import { loadPlugins } from "./plugins.js";
26
28
  import { SEVERITY } from "./index.js";
27
29
 
@@ -32,7 +34,8 @@ function pathChecks(path) {
32
34
  case PATHS.LOCAL:
33
35
  return localMemoryChecks();
34
36
  case PATHS.HOSTED:
35
- return hostedTesChecks();
37
+ // Liveness (hostedTesChecks) + end-to-end data-flow probes.
38
+ return [...hostedTesChecks(), ...dataFlowChecks()];
36
39
  case PATHS.PLATFORM:
37
40
  return platformChecks();
38
41
  default:
@@ -91,8 +94,9 @@ export async function runDoctor(opts = {}) {
91
94
  const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
92
95
  const paths = detectPaths(opts);
93
96
 
94
- // Universal checks always run.
95
- const checks = [...universalChecks()];
97
+ // Universal checks always run. claudeCodeChecks is also universal —
98
+ // the plugin may be present regardless of which install path is in use.
99
+ const checks = [...universalChecks(), ...claudeCodeChecks()];
96
100
  for (const p of paths) {
97
101
  checks.push(...pathChecks(p));
98
102
  }
@@ -581,3 +581,154 @@ describe("ingest options contract", () => {
581
581
  expect(registered.length).toBe(0);
582
582
  });
583
583
  });
584
+
585
+ // --- Ingest dedup ---
586
+
587
+ describe("ingest dedup option", () => {
588
+ function makeMockDb(state = {}) {
589
+ const calls = [];
590
+ const existing = state.existing || []; // [{ id, client_id, content }, ...]
591
+ const inserted = [];
592
+ const db = async (sql, params) => {
593
+ calls.push({ sql, params });
594
+ if (sql.includes("SELECT id FROM memory_layers")) {
595
+ return { rows: [{ id: "layer-1" }] };
596
+ }
597
+ // Dedup pre-check (raw + LIKE legacy form)
598
+ if (sql.includes("SELECT id FROM memory_nodes")) {
599
+ const [clientId, content] = params;
600
+ const match = existing.find(
601
+ (r) =>
602
+ r.client_id === clientId &&
603
+ (r.content === content ||
604
+ r.content.endsWith(`] ${content}`)) // legacy timestamp-prefixed
605
+ );
606
+ return { rows: match ? [{ id: match.id }] : [] };
607
+ }
608
+ // Insert path
609
+ if (sql.startsWith("INSERT INTO memory_nodes")) {
610
+ inserted.push({
611
+ id: params[0],
612
+ client_id: params[1],
613
+ content: params[3],
614
+ });
615
+ return { rows: [] };
616
+ }
617
+ return { rows: [] };
618
+ };
619
+ return { db, calls, inserted };
620
+ }
621
+
622
+ const mockAi = { embed: async () => null };
623
+ const mockLlm = { chat: async () => "[]" };
624
+
625
+ it("inserts a fresh row when no duplicate exists", async () => {
626
+ const { db, inserted } = makeMockDb({ existing: [] });
627
+
628
+ const out = await ingest(db, mockAi, mockLlm, "fresh content", {
629
+ clientId: "c",
630
+ dedup: true,
631
+ });
632
+
633
+ expect(out.deduped).toBeUndefined();
634
+ expect(out.id.startsWith("mem_")).toBe(true);
635
+ expect(inserted).toHaveLength(1);
636
+ expect(inserted[0].content).toBe("fresh content");
637
+ });
638
+
639
+ it("returns the existing row's id when raw content matches", async () => {
640
+ const { db, inserted } = makeMockDb({
641
+ existing: [
642
+ { id: "mem_existing", client_id: "c", content: "duplicate content" },
643
+ ],
644
+ });
645
+
646
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
647
+ clientId: "c",
648
+ dedup: true,
649
+ });
650
+
651
+ expect(out.deduped).toBe(true);
652
+ expect(out.id).toBe("mem_existing");
653
+ expect(out.content).toBe("duplicate content");
654
+ expect(inserted).toHaveLength(0); // no insert happened
655
+ });
656
+
657
+ it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`)", async () => {
658
+ const { db, inserted } = makeMockDb({
659
+ existing: [
660
+ {
661
+ id: "mem_legacy",
662
+ client_id: "c",
663
+ content: "[2026-04-26T10:00:00Z] duplicate content",
664
+ },
665
+ ],
666
+ });
667
+
668
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
669
+ clientId: "c",
670
+ dedup: true,
671
+ });
672
+
673
+ expect(out.deduped).toBe(true);
674
+ expect(out.id).toBe("mem_legacy");
675
+ expect(inserted).toHaveLength(0);
676
+ });
677
+
678
+ it("dedup off (default) still inserts on duplicate content", async () => {
679
+ const { db, inserted } = makeMockDb({
680
+ existing: [
681
+ { id: "mem_existing", client_id: "c", content: "duplicate content" },
682
+ ],
683
+ });
684
+
685
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
686
+ clientId: "c",
687
+ // dedup omitted — defaults to false
688
+ });
689
+
690
+ expect(out.deduped).toBeUndefined();
691
+ expect(inserted).toHaveLength(1);
692
+ expect(inserted[0].id).not.toBe("mem_existing");
693
+ });
694
+
695
+ it("scopes dedup to the given clientId (cross-tenant collisions don't dedup)", async () => {
696
+ const { db, inserted } = makeMockDb({
697
+ existing: [
698
+ { id: "mem_other", client_id: "other", content: "duplicate content" },
699
+ ],
700
+ });
701
+
702
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
703
+ clientId: "c", // different tenant
704
+ dedup: true,
705
+ });
706
+
707
+ expect(out.deduped).toBeUndefined();
708
+ expect(inserted).toHaveLength(1);
709
+ expect(inserted[0].client_id).toBe("c");
710
+ });
711
+
712
+ it("dedup check failure falls through to insert (best-effort semantics)", async () => {
713
+ let dupCheckSql = null;
714
+ const flakyDb = async (sql, params) => {
715
+ if (sql.includes("SELECT id FROM memory_layers")) {
716
+ return { rows: [{ id: "layer-1" }] };
717
+ }
718
+ if (sql.includes("SELECT id FROM memory_nodes")) {
719
+ dupCheckSql = sql;
720
+ throw new Error("DB unreachable");
721
+ }
722
+ return { rows: [] };
723
+ };
724
+
725
+ const out = await ingest(flakyDb, mockAi, mockLlm, "content", {
726
+ clientId: "c",
727
+ dedup: true,
728
+ });
729
+
730
+ expect(dupCheckSql).toContain("memory_nodes");
731
+ expect(out.deduped).toBeUndefined();
732
+ expect(out.id.startsWith("mem_")).toBe(true);
733
+ });
734
+ });
@@ -21,7 +21,17 @@ import { distill } from "./distill.js";
21
21
  * tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
22
22
  * background task is handed to it so the host keeps it alive past return.
23
23
  * Without it, distill is fire-and-forget (fine for Node/browser).
24
- * @returns {Promise<{id: string, content: string, layerId: string}>}
24
+ * @param {boolean} [opts.dedup=false] - Skip ingest if a memory_node with
25
+ * byte-equal content already exists for this `client_id`. Use for
26
+ * retry-safe pipelines where the same logical event may be processed
27
+ * twice (queue retries, consumer fan-out). Returns the existing row's
28
+ * id with `{deduped: true}` instead of inserting. Strict equality —
29
+ * not a semantic similarity match. Best-effort: if the SELECT itself
30
+ * fails, ingest proceeds (worst case: duplicate row, identical to
31
+ * `dedup:false` behaviour). The eventual structural fix is a
32
+ * `UNIQUE(client_id, content_hash)` constraint at the schema level;
33
+ * this option is the bridge.
34
+ * @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean}>}
25
35
  */
26
36
  export async function ingest(db, ai, llm, content, opts = {}) {
27
37
  const clientId = opts.clientId;
@@ -41,6 +51,35 @@ export async function ingest(db, ai, llm, content, opts = {}) {
41
51
  }
42
52
 
43
53
  const layerId = layerResult.rows[0].id;
54
+
55
+ // Optional dedup: skip the insert (and all the embedding/HyDE/distill
56
+ // work that would follow) if a row with byte-equal content already
57
+ // exists for this tenant. The OR-LIKE branch matches against the
58
+ // legacy `[<iso>] <content>` form so callers that wrote with a
59
+ // timestamp prefix dedup correctly until the legacy corpus ages out.
60
+ if (opts.dedup) {
61
+ try {
62
+ const dupCheck = await db(
63
+ `SELECT id FROM memory_nodes
64
+ WHERE client_id = $1
65
+ AND (content = $2 OR content LIKE '%] ' || $2)
66
+ LIMIT 1`,
67
+ [clientId, content]
68
+ );
69
+ if (dupCheck.rows?.length) {
70
+ log(`dedup: matched existing memory ${dupCheck.rows[0].id}`);
71
+ return {
72
+ id: dupCheck.rows[0].id,
73
+ content,
74
+ layerId,
75
+ deduped: true,
76
+ };
77
+ }
78
+ } catch (err) {
79
+ log(`dedup check failed (proceeding with insert): ${err.message}`);
80
+ }
81
+ }
82
+
44
83
  const memoryId = `mem_${crypto.randomUUID()}`;
45
84
 
46
85
  // Insert memory node