@pentatonic-ai/ai-agent-sdk 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -73,6 +73,8 @@ That's it. The plugin hooks automatically search memories on every prompt and st
73
73
  - **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
74
74
  - **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
75
75
 
76
+ > **Store latency note (v0.5.4+):** on the local memory server, `store_memory` now awaits distillation before returning instead of running it fire-and-forget. This fixed a bug where distillation was being killed mid-flight (atoms never got embeddings, so they were unreachable by semantic search), but it means stores now take as long as your configured LLM takes to produce atoms — typically 5–30s on `llama3.2:3b`, up to the `chat()` timeout ceiling (60s default, overridable via `opts.timeout`). Cloudflare Worker deployments pass `ctx.waitUntil` and still return fast. Set `opts.distill: false` on the ingest call if you want the old fast-return behaviour at the cost of no atoms.
77
+
76
78
  ### Change models
77
79
 
78
80
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.3",
3
+ "version": "0.5.4",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -366,6 +366,97 @@ describe("search options contract", () => {
366
366
  const out = await search(mockDb, mockAi, "q", { clientId: "c", minScore: 0 });
367
367
  expect(out[0].source_id).toBe("raw-1");
368
368
  });
369
+
370
+ it("hydrateAtomSources: true fetches and appends source raws for matched atoms", async () => {
371
+ const matchedRows = [
372
+ { id: "atom-1", client_id: "c", layer_id: "l", content: "Caroline went to support group",
373
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
374
+ ];
375
+ const hydratedRaw = {
376
+ id: "raw-1", client_id: "c", layer_id: "l", source_id: null,
377
+ content: "[Date: 8 May 2023] Caroline: I went to the LGBTQ support group...",
378
+ confidence: 1, decay_rate: 0.05, access_count: 0,
379
+ };
380
+ const mockDb = async (sql, params) => {
381
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
382
+ if (sql.includes("final_score")) return { rows: matchedRows };
383
+ if (sql.includes("id = ANY") && Array.isArray(params?.[0]) && params[0].includes("raw-1")) {
384
+ return { rows: [hydratedRaw] };
385
+ }
386
+ return { rows: [] };
387
+ };
388
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
389
+
390
+ const out = await search(mockDb, mockAi, "q", {
391
+ clientId: "c",
392
+ minScore: 0,
393
+ dedupeBySource: false,
394
+ hydrateAtomSources: true,
395
+ });
396
+
397
+ expect(out.length).toBe(2);
398
+ expect(out.map((r) => r.id).sort()).toEqual(["atom-1", "raw-1"]);
399
+ const raw = out.find((r) => r.id === "raw-1");
400
+ expect(raw.content).toContain("8 May 2023");
401
+ });
402
+
403
+ it("hydrateAtomSources: false is a no-op (default)", async () => {
404
+ const rows = [
405
+ { id: "atom-1", client_id: "c", layer_id: "l", content: "atom",
406
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
407
+ ];
408
+ let hydrateCalled = false;
409
+ const mockDb = async (sql) => {
410
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
411
+ if (sql.includes("final_score")) return { rows };
412
+ if (sql.includes("SELECT * FROM memory_nodes WHERE id = ANY")) {
413
+ hydrateCalled = true;
414
+ return { rows: [] };
415
+ }
416
+ return { rows: [] };
417
+ };
418
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
419
+
420
+ await search(mockDb, mockAi, "q", {
421
+ clientId: "c",
422
+ minScore: 0,
423
+ dedupeBySource: false,
424
+ });
425
+
426
+ expect(hydrateCalled).toBe(false);
427
+ });
428
+ });
429
+
430
+ describe("ingest default behavior", () => {
431
+ it("awaits distill when no waitUntil is passed (fixes fire-and-forget in local dev)", async () => {
432
+ let distillStarted = false;
433
+ let distillFinished = false;
434
+ const mockDb = async (sql) => {
435
+ if (sql.includes("SELECT id FROM memory_layers")) {
436
+ return { rows: [{ id: "layer-1" }] };
437
+ }
438
+ return { rows: [] };
439
+ };
440
+ const mockAi = { embed: async () => null };
441
+ const mockLlm = {
442
+ chat: async () => {
443
+ distillStarted = true;
444
+ // Simulate LLM latency
445
+ await new Promise((r) => setTimeout(r, 20));
446
+ distillFinished = true;
447
+ return "[]";
448
+ },
449
+ };
450
+
451
+ await ingest(mockDb, mockAi, mockLlm, "some content", {
452
+ clientId: "c",
453
+ // no waitUntil — distill must be awaited inline
454
+ });
455
+
456
+ // After ingest returns, distill must have finished
457
+ expect(distillStarted).toBe(true);
458
+ expect(distillFinished).toBe(true);
459
+ });
369
460
  });
370
461
 
371
462
  // --- Ingest options contract ---
@@ -89,6 +89,9 @@ export function createAIClient(config) {
89
89
  * @param {object} [opts]
90
90
  * @param {number} [opts.maxTokens=150]
91
91
  * @param {number} [opts.temperature=0.7]
92
+ * @param {number} [opts.timeout=60000] - Defaults 60s. Longer chunks +
93
+ * smaller/local models routinely exceed 15s; 60s keeps distill/HyDE
94
+ * reliable on prod-class content without catching genuine hangs.
92
95
  * @returns {Promise<string>} The assistant's response text
93
96
  */
94
97
  async chat(messages, opts = {}) {
@@ -102,7 +105,7 @@ export function createAIClient(config) {
102
105
  max_tokens: opts.maxTokens || 150,
103
106
  temperature: opts.temperature ?? 0.7,
104
107
  }),
105
- signal: AbortSignal.timeout(opts.timeout || 15000),
108
+ signal: AbortSignal.timeout(opts.timeout || 60000),
106
109
  });
107
110
 
108
111
  if (!res.ok) return "";
@@ -87,14 +87,20 @@ export async function ingest(db, ai, llm, content, opts = {}) {
87
87
  log(`HyDE failed for ${memoryId}: ${err.message}`);
88
88
  }
89
89
 
90
- // Distill atomic facts in the background — only for raw ingestions
91
- // (skip if this call is already storing a distilled atom or user opted out).
90
+ // Distill atomic facts — only for raw ingestions (skip if this call is
91
+ // already storing a distilled atom or user opted out).
92
+ //
93
+ // On Cloudflare Workers: caller passes `waitUntil` so distill runs past
94
+ // the handler return (without waitUntil the runtime kills unreferenced
95
+ // promises). On Node / local dev / test: we await inline so distill
96
+ // actually completes before ingest() returns.
92
97
  if (opts.distill !== false && !opts.sourceId) {
93
98
  const distillPromise = distill(db, ai, llm, memoryId, content, {
94
99
  ...opts,
95
100
  logger: log,
96
101
  }).catch((err) => log(`distill failed for ${memoryId}: ${err.message}`));
97
102
  if (typeof opts.waitUntil === "function") opts.waitUntil(distillPromise);
103
+ else await distillPromise;
98
104
  }
99
105
 
100
106
  return { id: memoryId, content, layerId };
@@ -34,6 +34,11 @@ const DEFAULT_WEIGHTS = {
34
34
  * @param {boolean} [opts.dedupeBySource=true] - When an atom matches,
35
35
  * drop its raw source memory from the results (atoms are already
36
36
  * distillations of the source, so returning both is redundant).
37
+ * @param {boolean} [opts.hydrateAtomSources=false] - Opt-in. For each
38
+ * matched atom, also fetch its source raw by id and append to results
39
+ * (deduped by id). Useful when downstream needs full-detail context
40
+ * (dates, names, quotes) that atoms decontextualize away. Not
41
+ * meaningful when dedupeBySource is also on.
37
42
  * @param {Function} [opts.logger] - Optional logger
38
43
  * @returns {Promise<Array>} Scored memory results
39
44
  */
@@ -159,6 +164,28 @@ export async function search(db, ai, query, opts = {}) {
159
164
  }
160
165
  }
161
166
 
167
+ // Hydrate: for each matched atom, also fetch and append its source raw
168
+ // (deduped by id). Atoms often drop specifics (dates, names); surfacing
169
+ // the raw gives the LLM consumer full context alongside the focused atom.
170
+ // Opt-in via opts.hydrateAtomSources. Not meaningful with dedupeBySource.
171
+ if (opts.hydrateAtomSources === true) {
172
+ const existingIds = new Set(filtered.map((r) => r.id));
173
+ const missingSourceIds = [
174
+ ...new Set(
175
+ filtered
176
+ .filter((r) => r.source_id && !existingIds.has(r.source_id))
177
+ .map((r) => r.source_id)
178
+ ),
179
+ ];
180
+ if (missingSourceIds.length > 0) {
181
+ const hydrated = await db(
182
+ `SELECT * FROM memory_nodes WHERE id = ANY($1)`,
183
+ [missingSourceIds]
184
+ );
185
+ filtered = filtered.concat(hydrated.rows || []);
186
+ }
187
+ }
188
+
162
189
  // Increment access counts
163
190
  const ids = filtered.map((r) => r.id);
164
191
  if (ids.length) {
@@ -347,7 +347,7 @@ async function main() {
347
347
  const health = {
348
348
  status: "ok",
349
349
  client: CLIENT_ID,
350
- version: "0.5.3",
350
+ version: "0.5.4",
351
351
  search: "text",
352
352
  db: false,
353
353
  ollama: false,