@pentatonic-ai/ai-agent-sdk 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/__tests__/indicator.test.js +9 -10
- package/packages/memory/openclaw-plugin/index.js +1 -3
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/api-contract.test.js +56 -0
- package/packages/memory/src/ai.js +52 -25
- package/packages/memory/src/distill.js +29 -4
- package/packages/memory/src/openclaw/index.js +1 -3
- package/packages/memory/src/server.js +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.6",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -77,7 +77,7 @@ describe("memory-used indicator — hosted mode", () => {
|
|
|
77
77
|
});
|
|
78
78
|
|
|
79
79
|
expect(result.systemPromptAddition).toMatch(/🧠/);
|
|
80
|
-
expect(result.systemPromptAddition).toMatch(/
|
|
80
|
+
expect(result.systemPromptAddition).toMatch(/Matched 2 memories from Pentatonic Memory/);
|
|
81
81
|
expect(result.systemPromptAddition).toMatch(/append exactly this footer/);
|
|
82
82
|
});
|
|
83
83
|
|
|
@@ -90,8 +90,8 @@ describe("memory-used indicator — hosted mode", () => {
|
|
|
90
90
|
messages: [{ role: "user", content: "query" }],
|
|
91
91
|
});
|
|
92
92
|
|
|
93
|
-
expect(result.systemPromptAddition).toMatch(/
|
|
94
|
-
expect(result.systemPromptAddition).not.toMatch(/
|
|
93
|
+
expect(result.systemPromptAddition).toMatch(/Matched 1 memory from Pentatonic Memory/);
|
|
94
|
+
expect(result.systemPromptAddition).not.toMatch(/Matched 1 memories/);
|
|
95
95
|
});
|
|
96
96
|
|
|
97
97
|
it("omits the indicator instruction when show_memory_indicator is false", async () => {
|
|
@@ -123,10 +123,10 @@ describe("memory-used indicator — hosted mode", () => {
|
|
|
123
123
|
expect(result.systemPromptAddition).toBeUndefined();
|
|
124
124
|
});
|
|
125
125
|
|
|
126
|
-
it("instructs the LLM to
|
|
127
|
-
//
|
|
128
|
-
//
|
|
129
|
-
//
|
|
126
|
+
it("always instructs the LLM to append the footer when memories were retrieved", async () => {
|
|
127
|
+
// Removed the "omit if irrelevant" escape hatch so users always get
|
|
128
|
+
// a visible signal when memory was consulted — even when retrieval
|
|
129
|
+
// was poor. Surfaces retrieval quality instead of hiding it.
|
|
130
130
|
mockFetch([{ id: "m1", content: "Phil likes cheese", similarity: 0.4 }]);
|
|
131
131
|
const engine = makeEngine();
|
|
132
132
|
|
|
@@ -135,8 +135,7 @@ describe("memory-used indicator — hosted mode", () => {
|
|
|
135
135
|
messages: [{ role: "user", content: "query" }],
|
|
136
136
|
});
|
|
137
137
|
|
|
138
|
-
expect(result.systemPromptAddition).toMatch(
|
|
139
|
-
|
|
140
|
-
);
|
|
138
|
+
expect(result.systemPromptAddition).not.toMatch(/omit the footer/);
|
|
139
|
+
expect(result.systemPromptAddition).toMatch(/append exactly this footer/);
|
|
141
140
|
});
|
|
142
141
|
});
|
|
@@ -656,9 +656,7 @@ export default {
|
|
|
656
656
|
"",
|
|
657
657
|
`After your reply, on a new line, append exactly this footer (no other prefix, no trailing content):`,
|
|
658
658
|
`—`,
|
|
659
|
-
`🧠
|
|
660
|
-
"",
|
|
661
|
-
`If the memories above were not relevant to your reply, omit the footer.`,
|
|
659
|
+
`🧠 _Matched ${results.length} memor${results.length === 1 ? "y" : "ies"} from Pentatonic Memory_`,
|
|
662
660
|
]
|
|
663
661
|
: [];
|
|
664
662
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "pentatonic-memory",
|
|
3
3
|
"name": "Pentatonic Memory",
|
|
4
4
|
"description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
|
|
5
|
-
"version": "0.5.
|
|
5
|
+
"version": "0.5.2",
|
|
6
6
|
"kind": "context-engine",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/openclaw-memory-plugin",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.2",
|
|
4
4
|
"description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -263,6 +263,62 @@ describe("createAIClient", () => {
|
|
|
263
263
|
await client.chat([{ role: "user", content: "q" }]);
|
|
264
264
|
expect(hitUrl).toBe("http://localhost:11434/v1/chat/completions");
|
|
265
265
|
});
|
|
266
|
+
|
|
267
|
+
it("embedBatch sends all inputs in one HTTP call", async () => {
|
|
268
|
+
let callCount = 0;
|
|
269
|
+
let lastBody;
|
|
270
|
+
globalThis.fetch = async (_url, opts) => {
|
|
271
|
+
callCount++;
|
|
272
|
+
lastBody = JSON.parse(opts.body);
|
|
273
|
+
return {
|
|
274
|
+
ok: true,
|
|
275
|
+
json: async () => ({
|
|
276
|
+
data: lastBody.input.map((_, i) => ({
|
|
277
|
+
embedding: [0.1, 0.2, 0.3],
|
|
278
|
+
index: i,
|
|
279
|
+
})),
|
|
280
|
+
}),
|
|
281
|
+
};
|
|
282
|
+
};
|
|
283
|
+
const client = createAIClient({
|
|
284
|
+
url: "http://localhost:11434/v1",
|
|
285
|
+
model: "m",
|
|
286
|
+
});
|
|
287
|
+
const out = await client.embedBatch(["a", "b", "c"], "passage");
|
|
288
|
+
expect(callCount).toBe(1);
|
|
289
|
+
expect(lastBody.input).toEqual(["a", "b", "c"]);
|
|
290
|
+
expect(out.length).toBe(3);
|
|
291
|
+
expect(out.every((r) => r.embedding.length === 3)).toBe(true);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("embedBatch returns nulls on non-2xx without throwing", async () => {
|
|
295
|
+
globalThis.fetch = async () => ({ ok: false, json: async () => ({}) });
|
|
296
|
+
const client = createAIClient({
|
|
297
|
+
url: "http://localhost:11434/v1",
|
|
298
|
+
model: "m",
|
|
299
|
+
});
|
|
300
|
+
const out = await client.embedBatch(["a", "b"]);
|
|
301
|
+
expect(out).toEqual([null, null]);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
it("embedBatch parses Ollama/Pentatonic-style {embeddings: [[...]]} response", async () => {
|
|
305
|
+
globalThis.fetch = async () => ({
|
|
306
|
+
ok: true,
|
|
307
|
+
json: async () => ({
|
|
308
|
+
embeddings: [
|
|
309
|
+
[0.1, 0.2],
|
|
310
|
+
[0.3, 0.4],
|
|
311
|
+
],
|
|
312
|
+
}),
|
|
313
|
+
});
|
|
314
|
+
const client = createAIClient({
|
|
315
|
+
url: "http://localhost:11434/v1",
|
|
316
|
+
model: "m",
|
|
317
|
+
});
|
|
318
|
+
const out = await client.embedBatch(["x", "y"]);
|
|
319
|
+
expect(out[0].embedding).toEqual([0.1, 0.2]);
|
|
320
|
+
expect(out[1].embedding).toEqual([0.3, 0.4]);
|
|
321
|
+
});
|
|
266
322
|
});
|
|
267
323
|
|
|
268
324
|
// --- Search options contract ---
|
|
@@ -45,6 +45,40 @@ export function createAIClient(config) {
|
|
|
45
45
|
const chatPath = stripLeading(config.chatPath || "chat/completions");
|
|
46
46
|
const baseUrl = stripTrailing(config.url);
|
|
47
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Send an embedding request with N inputs. Shared by embed() and
|
|
50
|
+
* embedBatch(). Returns an array of { embedding, dimensions, model } or
|
|
51
|
+
* nulls (one per input, preserving order).
|
|
52
|
+
*/
|
|
53
|
+
async function rawEmbed(texts, inputType) {
|
|
54
|
+
if (!texts.length) return [];
|
|
55
|
+
try {
|
|
56
|
+
const res = await fetch(`${baseUrl}/${embeddingPath}`, {
|
|
57
|
+
method: "POST",
|
|
58
|
+
headers,
|
|
59
|
+
body: JSON.stringify({
|
|
60
|
+
input: texts.map((t) => (t ?? "").substring(0, 8192)),
|
|
61
|
+
model: config.model,
|
|
62
|
+
input_type: inputType,
|
|
63
|
+
}),
|
|
64
|
+
signal: AbortSignal.timeout(30000),
|
|
65
|
+
});
|
|
66
|
+
if (!res.ok) return texts.map(() => null);
|
|
67
|
+
const data = await res.json();
|
|
68
|
+
// OpenAI-compat: data.data = [{embedding, index}, ...]
|
|
69
|
+
// Pentatonic gateway / Ollama: data.embeddings = [[...], [...], ...]
|
|
70
|
+
const vectors =
|
|
71
|
+
data.data?.map((d) => d.embedding) || data.embeddings || [];
|
|
72
|
+
return texts.map((_, i) => {
|
|
73
|
+
const embedding = vectors[i];
|
|
74
|
+
if (!embedding) return null;
|
|
75
|
+
return { embedding, dimensions: embedding.length, model: config.model };
|
|
76
|
+
});
|
|
77
|
+
} catch {
|
|
78
|
+
return texts.map(() => null);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
48
82
|
return {
|
|
49
83
|
/**
|
|
50
84
|
* Generate an embedding vector for text.
|
|
@@ -54,32 +88,25 @@ export function createAIClient(config) {
|
|
|
54
88
|
* @returns {Promise<{embedding: number[], dimensions: number, model: string} | null>}
|
|
55
89
|
*/
|
|
56
90
|
async embed(text, inputType = "passage") {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
headers,
|
|
61
|
-
body: JSON.stringify({
|
|
62
|
-
input: [text.substring(0, 8192)],
|
|
63
|
-
model: config.model,
|
|
64
|
-
input_type: inputType,
|
|
65
|
-
}),
|
|
66
|
-
signal: AbortSignal.timeout(30000),
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
if (!res.ok) return null;
|
|
70
|
-
|
|
71
|
-
const data = await res.json();
|
|
72
|
-
const embedding = data.data?.[0]?.embedding || data.embeddings?.[0];
|
|
73
|
-
if (!embedding) return null;
|
|
91
|
+
const results = await rawEmbed([text], inputType);
|
|
92
|
+
return results[0];
|
|
93
|
+
},
|
|
74
94
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Generate embeddings for N texts in a single HTTP round-trip. Returns
|
|
97
|
+
* an array the same length as the input; each entry is either the
|
|
98
|
+
* embedding object or null on failure.
|
|
99
|
+
*
|
|
100
|
+
* Batching matters under load — one call instead of N cuts GPU overhead
|
|
101
|
+
* and downstream queueing. Used by distill() to embed all atoms from a
|
|
102
|
+
* raw memory in one shot rather than N serial calls.
|
|
103
|
+
*
|
|
104
|
+
* @param {string[]} texts
|
|
105
|
+
* @param {string} [inputType="passage"]
|
|
106
|
+
* @returns {Promise<Array<{embedding: number[], dimensions: number, model: string} | null>>}
|
|
107
|
+
*/
|
|
108
|
+
async embedBatch(texts, inputType = "passage") {
|
|
109
|
+
return rawEmbed(texts, inputType);
|
|
83
110
|
},
|
|
84
111
|
|
|
85
112
|
/**
|
|
@@ -104,8 +104,27 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
|
|
|
104
104
|
}
|
|
105
105
|
const layerId = layerResult.rows[0].id;
|
|
106
106
|
|
|
107
|
+
// Batch-embed all atoms in one HTTP call. Under load this is a big
|
|
108
|
+
// win over N serial embed calls — one GPU forward pass instead of N,
|
|
109
|
+
// less downstream queueing.
|
|
110
|
+
let embeddings;
|
|
111
|
+
if (ai.embedBatch) {
|
|
112
|
+
try {
|
|
113
|
+
embeddings = await ai.embedBatch(facts, "passage");
|
|
114
|
+
} catch (err) {
|
|
115
|
+
log(`distill: batch embed failed: ${err.message}`);
|
|
116
|
+
embeddings = facts.map(() => null);
|
|
117
|
+
}
|
|
118
|
+
} else {
|
|
119
|
+
// Older AI clients without embedBatch — fall through to per-atom embed
|
|
120
|
+
// inside the loop below. Kept for backwards compat with any custom
|
|
121
|
+
// client passed into createMemorySystem.
|
|
122
|
+
embeddings = null;
|
|
123
|
+
}
|
|
124
|
+
|
|
107
125
|
const stored = [];
|
|
108
|
-
for (
|
|
126
|
+
for (let i = 0; i < facts.length; i++) {
|
|
127
|
+
const fact = facts[i];
|
|
109
128
|
try {
|
|
110
129
|
const atomId = `mem_${crypto.randomUUID()}`;
|
|
111
130
|
|
|
@@ -124,9 +143,13 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
|
|
|
124
143
|
]
|
|
125
144
|
);
|
|
126
145
|
|
|
127
|
-
//
|
|
146
|
+
// Attach embedding — from the batch when available, else fall back
|
|
147
|
+
// to a per-atom call.
|
|
128
148
|
try {
|
|
129
|
-
|
|
149
|
+
let embResult = embeddings ? embeddings[i] : null;
|
|
150
|
+
if (!embResult && !embeddings) {
|
|
151
|
+
embResult = await ai.embed(fact, "passage");
|
|
152
|
+
}
|
|
130
153
|
if (embResult?.embedding) {
|
|
131
154
|
await db(
|
|
132
155
|
`UPDATE memory_nodes SET embedding = $1, updated_at = NOW() WHERE id = $2`,
|
|
@@ -137,7 +160,9 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
|
|
|
137
160
|
log(`distill: embedding failed for ${atomId}: ${err.message}`);
|
|
138
161
|
}
|
|
139
162
|
|
|
140
|
-
// HyDE (2 queries for atoms — they're already focused)
|
|
163
|
+
// HyDE (2 queries for atoms — they're already focused).
|
|
164
|
+
// Still per-atom — chat completions don't share a batch surface
|
|
165
|
+
// across providers the way embeddings do.
|
|
141
166
|
try {
|
|
142
167
|
const queries = await generateHypotheticalQueries(llm, fact);
|
|
143
168
|
const trimmed = queries.slice(0, 2);
|
|
@@ -452,9 +452,7 @@ function createHostedContextEngine(config, opts = {}) {
|
|
|
452
452
|
"",
|
|
453
453
|
`After your reply, on a new line, append exactly this footer (no other prefix, no trailing content):`,
|
|
454
454
|
`—`,
|
|
455
|
-
`🧠
|
|
456
|
-
"",
|
|
457
|
-
`If the memories above were not relevant to your reply, omit the footer.`,
|
|
455
|
+
`🧠 _Matched ${results.length} memor${results.length === 1 ? "y" : "ies"} from Pentatonic Memory_`,
|
|
458
456
|
].join("\n")
|
|
459
457
|
: "";
|
|
460
458
|
|