@pentatonic-ai/ai-agent-sdk 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -73,6 +73,8 @@ That's it. The plugin hooks automatically search memories on every prompt and st
|
|
|
73
73
|
- **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
|
|
74
74
|
- **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
|
|
75
75
|
|
|
76
|
+
> **Store latency note (v0.5.4+):** on the local memory server, `store_memory` now awaits distillation before returning instead of running it fire-and-forget. This fixed a bug where distillation was being killed mid-flight (atoms never got embeddings, so they were unreachable by semantic search), but it means stores now take as long as your configured LLM takes to produce atoms — typically 5–30s on `llama3.2:3b`, up to the `chat()` timeout ceiling (60s default, overridable via `opts.timeout`). Cloudflare Worker deployments pass `ctx.waitUntil` and still return fast. Set `opts.distill: false` on the ingest call if you want the old fast-return behaviour at the cost of no atoms.
|
|
77
|
+
|
|
76
78
|
### Change models
|
|
77
79
|
|
|
78
80
|
```bash
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.4",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -283,6 +283,180 @@ describe("search options contract", () => {
|
|
|
283
283
|
|
|
284
284
|
expect(Array.isArray(results)).toBe(true);
|
|
285
285
|
});
|
|
286
|
+
|
|
287
|
+
it("SQL includes atomBoost and verbosityPenalty terms", async () => {
|
|
288
|
+
const seenSqls = [];
|
|
289
|
+
const mockDb = async (sql) => {
|
|
290
|
+
seenSqls.push(sql);
|
|
291
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
292
|
+
return { rows: [] };
|
|
293
|
+
};
|
|
294
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
295
|
+
|
|
296
|
+
await search(mockDb, mockAi, "q", { clientId: "c" });
|
|
297
|
+
|
|
298
|
+
const scoringSql = seenSqls.find((s) => s.includes("final_score"));
|
|
299
|
+
expect(scoringSql).toBeDefined();
|
|
300
|
+
expect(scoringSql).toMatch(/source_id IS NOT NULL/);
|
|
301
|
+
expect(scoringSql).toMatch(/length\(mn\.content\)/);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
it("dedupeBySource drops raw rows whose id is a source of a matched atom", async () => {
|
|
305
|
+
const rows = [
|
|
306
|
+
{ id: "raw-1", client_id: "c", layer_id: "l", content: "long raw turn",
|
|
307
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: null },
|
|
308
|
+
{ id: "atom-1", client_id: "c", layer_id: "l", content: "Phil owns a Subaru",
|
|
309
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.8, source_id: "raw-1" },
|
|
310
|
+
];
|
|
311
|
+
let searchCallCount = 0;
|
|
312
|
+
const mockDb = async (sql) => {
|
|
313
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
314
|
+
if (sql.includes("final_score")) {
|
|
315
|
+
searchCallCount++;
|
|
316
|
+
return { rows };
|
|
317
|
+
}
|
|
318
|
+
return { rows: [] };
|
|
319
|
+
};
|
|
320
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
321
|
+
|
|
322
|
+
const out = await search(mockDb, mockAi, "q", { clientId: "c", minScore: 0 });
|
|
323
|
+
|
|
324
|
+
expect(searchCallCount).toBe(1);
|
|
325
|
+
expect(out.length).toBe(1);
|
|
326
|
+
expect(out[0].id).toBe("atom-1");
|
|
327
|
+
expect(out[0].source_id).toBe("raw-1");
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
it("dedupeBySource: false keeps both atom and its raw source", async () => {
|
|
331
|
+
const rows = [
|
|
332
|
+
{ id: "raw-1", client_id: "c", layer_id: "l", content: "long",
|
|
333
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: null },
|
|
334
|
+
{ id: "atom-1", client_id: "c", layer_id: "l", content: "short",
|
|
335
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.8, source_id: "raw-1" },
|
|
336
|
+
];
|
|
337
|
+
const mockDb = async (sql) => {
|
|
338
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
339
|
+
if (sql.includes("final_score")) return { rows };
|
|
340
|
+
return { rows: [] };
|
|
341
|
+
};
|
|
342
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
343
|
+
|
|
344
|
+
const out = await search(mockDb, mockAi, "q", {
|
|
345
|
+
clientId: "c",
|
|
346
|
+
minScore: 0,
|
|
347
|
+
dedupeBySource: false,
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
expect(out.length).toBe(2);
|
|
351
|
+
expect(out.map((r) => r.id).sort()).toEqual(["atom-1", "raw-1"]);
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
it("search results include source_id (null for raw, set for atoms)", async () => {
|
|
355
|
+
const rows = [
|
|
356
|
+
{ id: "atom-1", client_id: "c", layer_id: "l", content: "atom",
|
|
357
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
|
|
358
|
+
];
|
|
359
|
+
const mockDb = async (sql) => {
|
|
360
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
361
|
+
if (sql.includes("final_score")) return { rows };
|
|
362
|
+
return { rows: [] };
|
|
363
|
+
};
|
|
364
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
365
|
+
|
|
366
|
+
const out = await search(mockDb, mockAi, "q", { clientId: "c", minScore: 0 });
|
|
367
|
+
expect(out[0].source_id).toBe("raw-1");
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
it("hydrateAtomSources: true fetches and appends source raws for matched atoms", async () => {
|
|
371
|
+
const matchedRows = [
|
|
372
|
+
{ id: "atom-1", client_id: "c", layer_id: "l", content: "Caroline went to support group",
|
|
373
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
|
|
374
|
+
];
|
|
375
|
+
const hydratedRaw = {
|
|
376
|
+
id: "raw-1", client_id: "c", layer_id: "l", source_id: null,
|
|
377
|
+
content: "[Date: 8 May 2023] Caroline: I went to the LGBTQ support group...",
|
|
378
|
+
confidence: 1, decay_rate: 0.05, access_count: 0,
|
|
379
|
+
};
|
|
380
|
+
const mockDb = async (sql, params) => {
|
|
381
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
382
|
+
if (sql.includes("final_score")) return { rows: matchedRows };
|
|
383
|
+
if (sql.includes("id = ANY") && Array.isArray(params?.[0]) && params[0].includes("raw-1")) {
|
|
384
|
+
return { rows: [hydratedRaw] };
|
|
385
|
+
}
|
|
386
|
+
return { rows: [] };
|
|
387
|
+
};
|
|
388
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
389
|
+
|
|
390
|
+
const out = await search(mockDb, mockAi, "q", {
|
|
391
|
+
clientId: "c",
|
|
392
|
+
minScore: 0,
|
|
393
|
+
dedupeBySource: false,
|
|
394
|
+
hydrateAtomSources: true,
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
expect(out.length).toBe(2);
|
|
398
|
+
expect(out.map((r) => r.id).sort()).toEqual(["atom-1", "raw-1"]);
|
|
399
|
+
const raw = out.find((r) => r.id === "raw-1");
|
|
400
|
+
expect(raw.content).toContain("8 May 2023");
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
it("hydrateAtomSources: false is a no-op (default)", async () => {
|
|
404
|
+
const rows = [
|
|
405
|
+
{ id: "atom-1", client_id: "c", layer_id: "l", content: "atom",
|
|
406
|
+
confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
|
|
407
|
+
];
|
|
408
|
+
let hydrateCalled = false;
|
|
409
|
+
const mockDb = async (sql) => {
|
|
410
|
+
if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
|
|
411
|
+
if (sql.includes("final_score")) return { rows };
|
|
412
|
+
if (sql.includes("SELECT * FROM memory_nodes WHERE id = ANY")) {
|
|
413
|
+
hydrateCalled = true;
|
|
414
|
+
return { rows: [] };
|
|
415
|
+
}
|
|
416
|
+
return { rows: [] };
|
|
417
|
+
};
|
|
418
|
+
const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
|
|
419
|
+
|
|
420
|
+
await search(mockDb, mockAi, "q", {
|
|
421
|
+
clientId: "c",
|
|
422
|
+
minScore: 0,
|
|
423
|
+
dedupeBySource: false,
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
expect(hydrateCalled).toBe(false);
|
|
427
|
+
});
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
describe("ingest default behavior", () => {
|
|
431
|
+
it("awaits distill when no waitUntil is passed (fixes fire-and-forget in local dev)", async () => {
|
|
432
|
+
let distillStarted = false;
|
|
433
|
+
let distillFinished = false;
|
|
434
|
+
const mockDb = async (sql) => {
|
|
435
|
+
if (sql.includes("SELECT id FROM memory_layers")) {
|
|
436
|
+
return { rows: [{ id: "layer-1" }] };
|
|
437
|
+
}
|
|
438
|
+
return { rows: [] };
|
|
439
|
+
};
|
|
440
|
+
const mockAi = { embed: async () => null };
|
|
441
|
+
const mockLlm = {
|
|
442
|
+
chat: async () => {
|
|
443
|
+
distillStarted = true;
|
|
444
|
+
// Simulate LLM latency
|
|
445
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
446
|
+
distillFinished = true;
|
|
447
|
+
return "[]";
|
|
448
|
+
},
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
await ingest(mockDb, mockAi, mockLlm, "some content", {
|
|
452
|
+
clientId: "c",
|
|
453
|
+
// no waitUntil — distill must be awaited inline
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
// After ingest returns, distill must have finished
|
|
457
|
+
expect(distillStarted).toBe(true);
|
|
458
|
+
expect(distillFinished).toBe(true);
|
|
459
|
+
});
|
|
286
460
|
});
|
|
287
461
|
|
|
288
462
|
// --- Ingest options contract ---
|
|
@@ -89,6 +89,9 @@ export function createAIClient(config) {
|
|
|
89
89
|
* @param {object} [opts]
|
|
90
90
|
* @param {number} [opts.maxTokens=150]
|
|
91
91
|
* @param {number} [opts.temperature=0.7]
|
|
92
|
+
* @param {number} [opts.timeout=60000] - Defaults 60s. Longer chunks +
|
|
93
|
+
* smaller/local models routinely exceed 15s; 60s keeps distill/HyDE
|
|
94
|
+
* reliable on prod-class content without catching genuine hangs.
|
|
92
95
|
* @returns {Promise<string>} The assistant's response text
|
|
93
96
|
*/
|
|
94
97
|
async chat(messages, opts = {}) {
|
|
@@ -102,7 +105,7 @@ export function createAIClient(config) {
|
|
|
102
105
|
max_tokens: opts.maxTokens || 150,
|
|
103
106
|
temperature: opts.temperature ?? 0.7,
|
|
104
107
|
}),
|
|
105
|
-
signal: AbortSignal.timeout(opts.timeout ||
|
|
108
|
+
signal: AbortSignal.timeout(opts.timeout || 60000),
|
|
106
109
|
});
|
|
107
110
|
|
|
108
111
|
if (!res.ok) return "";
|
|
@@ -87,14 +87,20 @@ export async function ingest(db, ai, llm, content, opts = {}) {
|
|
|
87
87
|
log(`HyDE failed for ${memoryId}: ${err.message}`);
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
// Distill atomic facts
|
|
91
|
-
//
|
|
90
|
+
// Distill atomic facts — only for raw ingestions (skip if this call is
|
|
91
|
+
// already storing a distilled atom or user opted out).
|
|
92
|
+
//
|
|
93
|
+
// On Cloudflare Workers: caller passes `waitUntil` so distill runs past
|
|
94
|
+
// the handler return (without waitUntil the runtime kills unreferenced
|
|
95
|
+
// promises). On Node / local dev / test: we await inline so distill
|
|
96
|
+
// actually completes before ingest() returns.
|
|
92
97
|
if (opts.distill !== false && !opts.sourceId) {
|
|
93
98
|
const distillPromise = distill(db, ai, llm, memoryId, content, {
|
|
94
99
|
...opts,
|
|
95
100
|
logger: log,
|
|
96
101
|
}).catch((err) => log(`distill failed for ${memoryId}: ${err.message}`));
|
|
97
102
|
if (typeof opts.waitUntil === "function") opts.waitUntil(distillPromise);
|
|
103
|
+
else await distillPromise;
|
|
98
104
|
}
|
|
99
105
|
|
|
100
106
|
return { id: memoryId, content, layerId };
|
|
@@ -10,6 +10,11 @@ const DEFAULT_WEIGHTS = {
|
|
|
10
10
|
relevance: 0.6,
|
|
11
11
|
recency: 0.25,
|
|
12
12
|
frequency: 0.15,
|
|
13
|
+
// Boost distilled atoms — they're high signal per token by design.
|
|
14
|
+
atomBoost: 0.15,
|
|
15
|
+
// Penalty on verbose raw turns. Short focused memories rank higher.
|
|
16
|
+
// Atoms are exempt (penalty skipped when source_id IS NOT NULL).
|
|
17
|
+
verbosityPenalty: 0.1,
|
|
13
18
|
};
|
|
14
19
|
|
|
15
20
|
/**
|
|
@@ -25,6 +30,15 @@ const DEFAULT_WEIGHTS = {
|
|
|
25
30
|
* @param {number} [opts.minScore=0.5] - Minimum score threshold
|
|
26
31
|
* @param {string} [opts.userId] - Optional user scope
|
|
27
32
|
* @param {object} [opts.weights] - Override scoring weights
|
|
33
|
+
* (relevance, recency, frequency, atomBoost, verbosityPenalty)
|
|
34
|
+
* @param {boolean} [opts.dedupeBySource=true] - When an atom matches,
|
|
35
|
+
* drop its raw source memory from the results (atoms are already
|
|
36
|
+
* distillations of the source, so returning both is redundant).
|
|
37
|
+
* @param {boolean} [opts.hydrateAtomSources=false] - Opt-in. For each
|
|
38
|
+
* matched atom, also fetch its source raw by id and append to results
|
|
39
|
+
* (deduped by id). Useful when downstream needs full-detail context
|
|
40
|
+
* (dates, names, quotes) that atoms decontextualize away. Not
|
|
41
|
+
* meaningful when dedupeBySource is also on.
|
|
28
42
|
* @param {Function} [opts.logger] - Optional logger
|
|
29
43
|
* @returns {Promise<Array>} Scored memory results
|
|
30
44
|
*/
|
|
@@ -107,7 +121,19 @@ export async function search(db, ai, query, opts = {}) {
|
|
|
107
121
|
${w.recency} * exp(
|
|
108
122
|
-0.01 * EXTRACT(EPOCH FROM NOW() - COALESCE(mn.last_accessed, mn.created_at)) / 3600
|
|
109
123
|
) +
|
|
110
|
-
${w.frequency} * (ln(mn.access_count + 1) / ln(ma.val + 1))
|
|
124
|
+
${w.frequency} * (ln(mn.access_count + 1) / ln(ma.val + 1)) +
|
|
125
|
+
${w.atomBoost} * (CASE WHEN mn.source_id IS NOT NULL THEN 1 ELSE 0 END) -
|
|
126
|
+
${w.verbosityPenalty} * (
|
|
127
|
+
CASE WHEN mn.source_id IS NULL THEN
|
|
128
|
+
LEAST(
|
|
129
|
+
GREATEST(
|
|
130
|
+
(ln(length(mn.content) + 1) - ln(200)) / (ln(10000) - ln(200)),
|
|
131
|
+
0
|
|
132
|
+
),
|
|
133
|
+
1
|
|
134
|
+
)
|
|
135
|
+
ELSE 0 END
|
|
136
|
+
)
|
|
111
137
|
) AS final_score
|
|
112
138
|
FROM memory_nodes mn
|
|
113
139
|
CROSS JOIN max_ac ma
|
|
@@ -123,10 +149,43 @@ export async function search(db, ai, query, opts = {}) {
|
|
|
123
149
|
|
|
124
150
|
const result = await db(sql, params);
|
|
125
151
|
|
|
126
|
-
|
|
152
|
+
let filtered = (result.rows || []).filter(
|
|
127
153
|
(r) => parseFloat(r.final_score) >= threshold
|
|
128
154
|
);
|
|
129
155
|
|
|
156
|
+
// De-dupe: when an atom matches, drop its raw source from the set.
|
|
157
|
+
// Default on; set opts.dedupeBySource: false to keep both.
|
|
158
|
+
if (opts.dedupeBySource !== false) {
|
|
159
|
+
const atomSources = new Set(
|
|
160
|
+
filtered.filter((r) => r.source_id).map((r) => r.source_id)
|
|
161
|
+
);
|
|
162
|
+
if (atomSources.size > 0) {
|
|
163
|
+
filtered = filtered.filter((r) => !atomSources.has(r.id));
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Hydrate: for each matched atom, also fetch and append its source raw
|
|
168
|
+
// (deduped by id). Atoms often drop specifics (dates, names); surfacing
|
|
169
|
+
// the raw gives the LLM consumer full context alongside the focused atom.
|
|
170
|
+
// Opt-in via opts.hydrateAtomSources. Not meaningful with dedupeBySource.
|
|
171
|
+
if (opts.hydrateAtomSources === true) {
|
|
172
|
+
const existingIds = new Set(filtered.map((r) => r.id));
|
|
173
|
+
const missingSourceIds = [
|
|
174
|
+
...new Set(
|
|
175
|
+
filtered
|
|
176
|
+
.filter((r) => r.source_id && !existingIds.has(r.source_id))
|
|
177
|
+
.map((r) => r.source_id)
|
|
178
|
+
),
|
|
179
|
+
];
|
|
180
|
+
if (missingSourceIds.length > 0) {
|
|
181
|
+
const hydrated = await db(
|
|
182
|
+
`SELECT * FROM memory_nodes WHERE id = ANY($1)`,
|
|
183
|
+
[missingSourceIds]
|
|
184
|
+
);
|
|
185
|
+
filtered = filtered.concat(hydrated.rows || []);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
130
189
|
// Increment access counts
|
|
131
190
|
const ids = filtered.map((r) => r.id);
|
|
132
191
|
if (ids.length) {
|
|
@@ -182,6 +241,7 @@ function mapRow(row) {
|
|
|
182
241
|
client_id: row.client_id,
|
|
183
242
|
user_id: row.user_id || null,
|
|
184
243
|
layer_id: row.layer_id,
|
|
244
|
+
source_id: row.source_id || null,
|
|
185
245
|
content: row.content,
|
|
186
246
|
metadata:
|
|
187
247
|
typeof row.metadata === "string"
|