@pentatonic-ai/ai-agent-sdk 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/packages/memory/openclaw-plugin/__tests__/query-expansion.test.js +193 -0
- package/packages/memory/openclaw-plugin/index.js +41 -2
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/api-contract.test.js +119 -0
- package/packages/memory/src/ai.js +25 -2
- package/packages/memory/src/ingest.js +9 -3
- package/packages/memory/src/server.js +6 -1
package/README.md
CHANGED
|
@@ -70,6 +70,7 @@ That's it. The plugin hooks automatically search memories on every prompt and st
|
|
|
70
70
|
- **Automatic memory** -- every conversation turn is stored with embeddings and HyDE query expansion
|
|
71
71
|
- **Semantic search** -- multi-signal retrieval combining vector similarity, BM25 full-text, recency decay, and access frequency
|
|
72
72
|
- **Memory layers** -- episodic (recent), semantic (consolidated), procedural (how-to), working (temporary)
|
|
73
|
+
- **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
|
|
73
74
|
- **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
|
|
74
75
|
|
|
75
76
|
### Change models
|
|
@@ -195,7 +196,7 @@ openclaw pentatonic-memory local
|
|
|
195
196
|
|
|
196
197
|
OpenClaw's context engine hooks fire on every lifecycle event:
|
|
197
198
|
|
|
198
|
-
- **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion
|
|
199
|
+
- **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion, then distilled into atomic facts in the background (see [Distilled memory](#what-you-get))
|
|
199
200
|
- **Assemble** -- relevant memories are injected as system prompt context before every model run
|
|
200
201
|
- **Compact** -- decay cycle runs when the context window fills
|
|
201
202
|
- **After turn** -- high-access memories get consolidated to the semantic layer
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.2",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query expansion fallback tests.
|
|
3
|
+
*
|
|
4
|
+
* When the raw user prompt returns no memories, the plugin retries once
|
|
5
|
+
* with a keyword-distilled form. This recovers matches for verbose
|
|
6
|
+
* natural-language prompts that fall below the semantic threshold.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import plugin, { extractSearchKeywords } from "../index.js";
|
|
10
|
+
|
|
11
|
+
const realFetch = globalThis.fetch;
|
|
12
|
+
|
|
13
|
+
afterEach(() => {
|
|
14
|
+
globalThis.fetch = realFetch;
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
function makeEngine(extraConfig = {}) {
|
|
18
|
+
let factory;
|
|
19
|
+
plugin.register({
|
|
20
|
+
pluginConfig: {
|
|
21
|
+
tes_endpoint: "https://x.test",
|
|
22
|
+
tes_client_id: "c",
|
|
23
|
+
tes_api_key: "tes_c_xyz",
|
|
24
|
+
...extraConfig,
|
|
25
|
+
},
|
|
26
|
+
registerTool: () => {},
|
|
27
|
+
registerContextEngine: (_name, fn) => {
|
|
28
|
+
factory = fn;
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
if (!factory) throw new Error("plugin did not register a context engine");
|
|
32
|
+
return factory();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
describe("extractSearchKeywords", () => {
|
|
36
|
+
it("strips stopwords from verbose prompts", () => {
|
|
37
|
+
const out = extractSearchKeywords(
|
|
38
|
+
"when I was working in the thing-event-system, I copied migrations, what were they?"
|
|
39
|
+
);
|
|
40
|
+
expect(out).toMatch(/thing-event-system/);
|
|
41
|
+
expect(out).toMatch(/migrations/);
|
|
42
|
+
expect(out).not.toMatch(/\bwhen\b/);
|
|
43
|
+
expect(out).not.toMatch(/\bwhat\b/);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("preserves hyphenated compounds", () => {
|
|
47
|
+
expect(extractSearchKeywords("where is thing-event-system?")).toMatch(
|
|
48
|
+
/thing-event-system/
|
|
49
|
+
);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("returns null when the distilled form equals the input", () => {
|
|
53
|
+
expect(extractSearchKeywords("deep-memory migrations")).toBeNull();
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("returns null when the prompt is only stopwords", () => {
|
|
57
|
+
expect(extractSearchKeywords("what were they?")).toBeNull();
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("returns null for non-string input", () => {
|
|
61
|
+
expect(extractSearchKeywords(null)).toBeNull();
|
|
62
|
+
expect(extractSearchKeywords(undefined)).toBeNull();
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe("assemble — keyword retry fallback (hosted mode)", () => {
|
|
67
|
+
it("retries with distilled keywords when raw prompt misses", async () => {
|
|
68
|
+
const queries = [];
|
|
69
|
+
globalThis.fetch = async (_url, init) => {
|
|
70
|
+
const body = JSON.parse(init.body);
|
|
71
|
+
const q = body.variables.query;
|
|
72
|
+
queries.push(q);
|
|
73
|
+
const isFirst = queries.length === 1;
|
|
74
|
+
return {
|
|
75
|
+
ok: true,
|
|
76
|
+
status: 200,
|
|
77
|
+
json: async () => ({
|
|
78
|
+
data: {
|
|
79
|
+
semanticSearchMemories: isFirst
|
|
80
|
+
? []
|
|
81
|
+
: [{ id: "m1", content: "matched on retry", similarity: 0.7 }],
|
|
82
|
+
},
|
|
83
|
+
}),
|
|
84
|
+
};
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
const engine = makeEngine();
|
|
88
|
+
const result = await engine.assemble({
|
|
89
|
+
sessionId: "s",
|
|
90
|
+
messages: [
|
|
91
|
+
{
|
|
92
|
+
role: "user",
|
|
93
|
+
content:
|
|
94
|
+
"when I was working in the thing-event-system, what were those migration changes again?",
|
|
95
|
+
},
|
|
96
|
+
],
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
expect(queries).toHaveLength(2);
|
|
100
|
+
expect(queries[0]).toMatch(/when I was working/);
|
|
101
|
+
expect(queries[1]).not.toMatch(/\bwhen\b/);
|
|
102
|
+
expect(queries[1]).toMatch(/thing-event-system/);
|
|
103
|
+
expect(queries[1]).toMatch(/migration/);
|
|
104
|
+
expect(result.systemPromptAddition).toMatch(/matched on retry/);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("does not retry when the raw prompt already returns results", async () => {
|
|
108
|
+
const queries = [];
|
|
109
|
+
globalThis.fetch = async (_url, init) => {
|
|
110
|
+
queries.push(JSON.parse(init.body).variables.query);
|
|
111
|
+
return {
|
|
112
|
+
ok: true,
|
|
113
|
+
status: 200,
|
|
114
|
+
json: async () => ({
|
|
115
|
+
data: {
|
|
116
|
+
semanticSearchMemories: [
|
|
117
|
+
{ id: "m1", content: "hit", similarity: 0.9 },
|
|
118
|
+
],
|
|
119
|
+
},
|
|
120
|
+
}),
|
|
121
|
+
};
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
const engine = makeEngine();
|
|
125
|
+
await engine.assemble({
|
|
126
|
+
sessionId: "s",
|
|
127
|
+
messages: [{ role: "user", content: "thing-event-system migrations" }],
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
expect(queries).toHaveLength(1);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("does not retry when distilled query equals the raw query", async () => {
|
|
134
|
+
const queries = [];
|
|
135
|
+
globalThis.fetch = async (_url, init) => {
|
|
136
|
+
queries.push(JSON.parse(init.body).variables.query);
|
|
137
|
+
return {
|
|
138
|
+
ok: true,
|
|
139
|
+
status: 200,
|
|
140
|
+
json: async () => ({ data: { semanticSearchMemories: [] } }),
|
|
141
|
+
};
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const engine = makeEngine();
|
|
145
|
+
await engine.assemble({
|
|
146
|
+
sessionId: "s",
|
|
147
|
+
messages: [{ role: "user", content: "deep-memory migrations" }],
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
expect(queries).toHaveLength(1);
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
describe("assemble — keyword retry fallback (local mode)", () => {
|
|
155
|
+
it("retries via /search endpoint when raw query returns nothing", async () => {
|
|
156
|
+
const queries = [];
|
|
157
|
+
globalThis.fetch = async (_url, init) => {
|
|
158
|
+
const body = JSON.parse(init.body);
|
|
159
|
+
queries.push(body.query);
|
|
160
|
+
const isFirst = queries.length === 1;
|
|
161
|
+
return {
|
|
162
|
+
ok: true,
|
|
163
|
+
status: 200,
|
|
164
|
+
json: async () => ({
|
|
165
|
+
results: isFirst
|
|
166
|
+
? []
|
|
167
|
+
: [{ id: "m1", content: "local hit", similarity: 0.6 }],
|
|
168
|
+
}),
|
|
169
|
+
};
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
let factory;
|
|
173
|
+
plugin.register({
|
|
174
|
+
pluginConfig: {}, // no tes_* creds → local mode
|
|
175
|
+
registerTool: () => {},
|
|
176
|
+
registerContextEngine: (_name, fn) => {
|
|
177
|
+
factory = fn;
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
const engine = factory();
|
|
181
|
+
|
|
182
|
+
const result = await engine.assemble({
|
|
183
|
+
sessionId: "s",
|
|
184
|
+
messages: [
|
|
185
|
+
{ role: "user", content: "what were the migration changes again?" },
|
|
186
|
+
],
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
expect(queries).toHaveLength(2);
|
|
190
|
+
expect(queries[1]).toMatch(/migration/);
|
|
191
|
+
expect(result.systemPromptAddition).toMatch(/local hit/);
|
|
192
|
+
});
|
|
193
|
+
});
|
|
@@ -88,6 +88,33 @@ const stats = {
|
|
|
88
88
|
setupPrompted: false,
|
|
89
89
|
};
|
|
90
90
|
|
|
91
|
+
// --- Query keyword extraction ---
|
|
92
|
+
// Natural-language prompts ("what were those changes again?") often fall
|
|
93
|
+
// below the semantic threshold even when relevant memories exist. We
|
|
94
|
+
// drop stopwords and retry with the keyword-distilled form.
|
|
95
|
+
const STOPWORDS = new Set([
|
|
96
|
+
"a", "am", "an", "and", "are", "as", "at", "be", "been", "but", "by",
|
|
97
|
+
"can", "did", "do", "does", "for", "from", "had", "has", "have", "he",
|
|
98
|
+
"her", "him", "his", "how", "i", "if", "in", "into", "is", "it", "its",
|
|
99
|
+
"just", "like", "made", "me", "my", "need", "needed", "of", "on", "or",
|
|
100
|
+
"our", "out", "over", "she", "so", "some", "than", "that", "the",
|
|
101
|
+
"their", "them", "then", "there", "these", "they", "this", "those",
|
|
102
|
+
"to", "up", "us", "was", "we", "went", "were", "what", "when", "where",
|
|
103
|
+
"which", "who", "why", "will", "with", "would", "you", "your",
|
|
104
|
+
]);
|
|
105
|
+
|
|
106
|
+
export function extractSearchKeywords(query) {
|
|
107
|
+
if (typeof query !== "string") return null;
|
|
108
|
+
const tokens = query
|
|
109
|
+
.toLowerCase()
|
|
110
|
+
.split(/[^a-z0-9-]+/)
|
|
111
|
+
.filter((t) => t.length >= 2 && !STOPWORDS.has(t));
|
|
112
|
+
if (tokens.length === 0) return null;
|
|
113
|
+
const distilled = tokens.join(" ");
|
|
114
|
+
if (distilled === query.toLowerCase().trim()) return null;
|
|
115
|
+
return distilled;
|
|
116
|
+
}
|
|
117
|
+
|
|
91
118
|
// --- Local mode: HTTP to memory server ---
|
|
92
119
|
|
|
93
120
|
async function localSearch(baseUrl, query, limit = 5, minScore = 0.3) {
|
|
@@ -432,11 +459,23 @@ export default {
|
|
|
432
459
|
|
|
433
460
|
stats.mode = hosted ? "hosted" : "local";
|
|
434
461
|
|
|
435
|
-
// Unified search/store that routes to local or hosted
|
|
436
|
-
|
|
462
|
+
// Unified search/store that routes to local or hosted.
|
|
463
|
+
// If the raw query returns nothing, retry once with the
|
|
464
|
+
// keyword-distilled form — natural-language prompts frequently
|
|
465
|
+
// miss the semantic threshold even when matches exist.
|
|
466
|
+
const searchBackend = hosted
|
|
437
467
|
? (query, limit, score) => hostedSearch(config, query, limit, score)
|
|
438
468
|
: (query, limit, score) => localSearch(baseUrl, query, limit, score);
|
|
439
469
|
|
|
470
|
+
const search = async (query, limit, score) => {
|
|
471
|
+
const first = await searchBackend(query, limit, score);
|
|
472
|
+
if (first.length > 0) return first;
|
|
473
|
+
const keywords = extractSearchKeywords(query);
|
|
474
|
+
if (!keywords) return first;
|
|
475
|
+
log(`search: retry "${query.substring(0, 40)}" → "${keywords}"`);
|
|
476
|
+
return searchBackend(keywords, limit, score);
|
|
477
|
+
};
|
|
478
|
+
|
|
440
479
|
const store = hosted
|
|
441
480
|
? (content, metadata) => hostedStore(config, content, metadata)
|
|
442
481
|
: (content, metadata) => localStore(baseUrl, content, metadata);
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "pentatonic-memory",
|
|
3
3
|
"name": "Pentatonic Memory",
|
|
4
4
|
"description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
|
|
5
|
-
"version": "0.5.
|
|
5
|
+
"version": "0.5.1",
|
|
6
6
|
"kind": "context-engine",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/openclaw-memory-plugin",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -168,6 +168,11 @@ describe("named exports", () => {
|
|
|
168
168
|
// --- AI client ---
|
|
169
169
|
|
|
170
170
|
describe("createAIClient", () => {
|
|
171
|
+
const realFetch = globalThis.fetch;
|
|
172
|
+
afterEach(() => {
|
|
173
|
+
globalThis.fetch = realFetch;
|
|
174
|
+
});
|
|
175
|
+
|
|
171
176
|
it("returns an object with embed() and chat()", () => {
|
|
172
177
|
const client = createAIClient({
|
|
173
178
|
url: "http://localhost:11434/v1",
|
|
@@ -185,6 +190,79 @@ describe("createAIClient", () => {
|
|
|
185
190
|
});
|
|
186
191
|
expect(client).toBeDefined();
|
|
187
192
|
});
|
|
193
|
+
|
|
194
|
+
it("hits /embeddings by default (OpenAI spec)", async () => {
|
|
195
|
+
let hitUrl;
|
|
196
|
+
globalThis.fetch = async (url) => {
|
|
197
|
+
hitUrl = url;
|
|
198
|
+
return { ok: true, json: async () => ({ data: [{ embedding: [0.1, 0.2] }] }) };
|
|
199
|
+
};
|
|
200
|
+
const client = createAIClient({
|
|
201
|
+
url: "http://localhost:11434/v1",
|
|
202
|
+
model: "test",
|
|
203
|
+
});
|
|
204
|
+
await client.embed("hello");
|
|
205
|
+
expect(hitUrl).toBe("http://localhost:11434/v1/embeddings");
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it("uses embeddingPath override (e.g. Pentatonic AI Gateway)", async () => {
|
|
209
|
+
let hitUrl;
|
|
210
|
+
globalThis.fetch = async (url) => {
|
|
211
|
+
hitUrl = url;
|
|
212
|
+
return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
|
|
213
|
+
};
|
|
214
|
+
const client = createAIClient({
|
|
215
|
+
url: "https://lambda-gateway.pentatonic.com/v1",
|
|
216
|
+
model: "NV-Embed-v2",
|
|
217
|
+
embeddingPath: "embed",
|
|
218
|
+
});
|
|
219
|
+
await client.embed("hello");
|
|
220
|
+
expect(hitUrl).toBe("https://lambda-gateway.pentatonic.com/v1/embed");
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it("normalises leading slashes and trailing base-url slashes", async () => {
|
|
224
|
+
let hitUrl;
|
|
225
|
+
globalThis.fetch = async (url) => {
|
|
226
|
+
hitUrl = url;
|
|
227
|
+
return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
|
|
228
|
+
};
|
|
229
|
+
const client = createAIClient({
|
|
230
|
+
url: "https://gateway.test/v1/",
|
|
231
|
+
model: "m",
|
|
232
|
+
embeddingPath: "/embed",
|
|
233
|
+
});
|
|
234
|
+
await client.embed("hi");
|
|
235
|
+
expect(hitUrl).toBe("https://gateway.test/v1/embed");
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it("chatPath override applies to chat() too", async () => {
|
|
239
|
+
let hitUrl;
|
|
240
|
+
globalThis.fetch = async (url) => {
|
|
241
|
+
hitUrl = url;
|
|
242
|
+
return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
|
|
243
|
+
};
|
|
244
|
+
const client = createAIClient({
|
|
245
|
+
url: "https://gateway.test/v1",
|
|
246
|
+
model: "m",
|
|
247
|
+
chatPath: "chat",
|
|
248
|
+
});
|
|
249
|
+
await client.chat([{ role: "user", content: "q" }]);
|
|
250
|
+
expect(hitUrl).toBe("https://gateway.test/v1/chat");
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
it("chat defaults to /chat/completions", async () => {
|
|
254
|
+
let hitUrl;
|
|
255
|
+
globalThis.fetch = async (url) => {
|
|
256
|
+
hitUrl = url;
|
|
257
|
+
return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
|
|
258
|
+
};
|
|
259
|
+
const client = createAIClient({
|
|
260
|
+
url: "http://localhost:11434/v1",
|
|
261
|
+
model: "m",
|
|
262
|
+
});
|
|
263
|
+
await client.chat([{ role: "user", content: "q" }]);
|
|
264
|
+
expect(hitUrl).toBe("http://localhost:11434/v1/chat/completions");
|
|
265
|
+
});
|
|
188
266
|
});
|
|
189
267
|
|
|
190
268
|
// --- Search options contract ---
|
|
@@ -231,4 +309,45 @@ describe("ingest options contract", () => {
|
|
|
231
309
|
expect(result).toHaveProperty("content");
|
|
232
310
|
expect(result).toHaveProperty("layerId");
|
|
233
311
|
});
|
|
312
|
+
|
|
313
|
+
it("hands the distill background promise to opts.waitUntil when provided", async () => {
|
|
314
|
+
const mockDb = async (sql) => {
|
|
315
|
+
if (sql.includes("SELECT id FROM memory_layers")) {
|
|
316
|
+
return { rows: [{ id: "layer-1" }] };
|
|
317
|
+
}
|
|
318
|
+
return { rows: [] };
|
|
319
|
+
};
|
|
320
|
+
const mockAi = { embed: async () => null };
|
|
321
|
+
const mockLlm = { chat: async () => "[]" };
|
|
322
|
+
|
|
323
|
+
const registered = [];
|
|
324
|
+
await ingest(mockDb, mockAi, mockLlm, "test content", {
|
|
325
|
+
clientId: "test-client",
|
|
326
|
+
waitUntil: (p) => registered.push(p),
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
expect(registered.length).toBe(1);
|
|
330
|
+
expect(typeof registered[0].then).toBe("function");
|
|
331
|
+
await registered[0]; // should resolve cleanly
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
it("does not call waitUntil when distill is skipped", async () => {
|
|
335
|
+
const mockDb = async (sql) => {
|
|
336
|
+
if (sql.includes("SELECT id FROM memory_layers")) {
|
|
337
|
+
return { rows: [{ id: "layer-1" }] };
|
|
338
|
+
}
|
|
339
|
+
return { rows: [] };
|
|
340
|
+
};
|
|
341
|
+
const mockAi = { embed: async () => null };
|
|
342
|
+
const mockLlm = { chat: async () => "[]" };
|
|
343
|
+
|
|
344
|
+
const registered = [];
|
|
345
|
+
await ingest(mockDb, mockAi, mockLlm, "test content", {
|
|
346
|
+
clientId: "test-client",
|
|
347
|
+
distill: false,
|
|
348
|
+
waitUntil: (p) => registered.push(p),
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
expect(registered.length).toBe(0);
|
|
352
|
+
});
|
|
234
353
|
});
|
|
@@ -8,10 +8,16 @@
|
|
|
8
8
|
/**
|
|
9
9
|
* Create an AI client from config.
|
|
10
10
|
*
|
|
11
|
+
* Defaults to OpenAI-standard paths (`/embeddings`, `/chat/completions`).
|
|
12
|
+
* Override with `embeddingPath` / `chatPath` for gateways that use
|
|
13
|
+
* different routes — e.g. Pentatonic AI Gateway exposes `/embed`.
|
|
14
|
+
*
|
|
11
15
|
* @param {object} config
|
|
12
16
|
* @param {string} config.url - Base URL (e.g. "http://ollama:11434/v1")
|
|
13
17
|
* @param {string} config.model - Model name
|
|
14
18
|
* @param {string} [config.apiKey] - Optional API key
|
|
19
|
+
* @param {string} [config.embeddingPath="embeddings"] - Path appended to url
|
|
20
|
+
* @param {string} [config.chatPath="chat/completions"] - Path appended to url
|
|
15
21
|
* @param {number} [config.dimensions] - Expected embedding dimensions
|
|
16
22
|
* @returns {object} Client with embed() and chat() methods
|
|
17
23
|
*/
|
|
@@ -22,6 +28,23 @@ export function createAIClient(config) {
|
|
|
22
28
|
headers["X-API-Key"] = config.apiKey;
|
|
23
29
|
}
|
|
24
30
|
|
|
31
|
+
// Strip leading slashes so callers can use "embed" or "/embed"
|
|
32
|
+
// interchangeably. Base url may or may not have a trailing slash.
|
|
33
|
+
// Plain loops (not regex) to avoid polynomial-regex scanner flags.
|
|
34
|
+
const stripLeading = (s) => {
|
|
35
|
+
let i = 0;
|
|
36
|
+
while (i < s.length && s[i] === "/") i++;
|
|
37
|
+
return i === 0 ? s : s.slice(i);
|
|
38
|
+
};
|
|
39
|
+
const stripTrailing = (s) => {
|
|
40
|
+
let i = s.length;
|
|
41
|
+
while (i > 0 && s[i - 1] === "/") i--;
|
|
42
|
+
return i === s.length ? s : s.slice(0, i);
|
|
43
|
+
};
|
|
44
|
+
const embeddingPath = stripLeading(config.embeddingPath || "embeddings");
|
|
45
|
+
const chatPath = stripLeading(config.chatPath || "chat/completions");
|
|
46
|
+
const baseUrl = stripTrailing(config.url);
|
|
47
|
+
|
|
25
48
|
return {
|
|
26
49
|
/**
|
|
27
50
|
* Generate an embedding vector for text.
|
|
@@ -32,7 +55,7 @@ export function createAIClient(config) {
|
|
|
32
55
|
*/
|
|
33
56
|
async embed(text, inputType = "passage") {
|
|
34
57
|
try {
|
|
35
|
-
const res = await fetch(`${
|
|
58
|
+
const res = await fetch(`${baseUrl}/${embeddingPath}`, {
|
|
36
59
|
method: "POST",
|
|
37
60
|
headers,
|
|
38
61
|
body: JSON.stringify({
|
|
@@ -70,7 +93,7 @@ export function createAIClient(config) {
|
|
|
70
93
|
*/
|
|
71
94
|
async chat(messages, opts = {}) {
|
|
72
95
|
try {
|
|
73
|
-
const res = await fetch(`${
|
|
96
|
+
const res = await fetch(`${baseUrl}/${chatPath}`, {
|
|
74
97
|
method: "POST",
|
|
75
98
|
headers,
|
|
76
99
|
body: JSON.stringify({
|
|
@@ -17,6 +17,10 @@ import { distill } from "./distill.js";
|
|
|
17
17
|
* @param {string} [opts.layerType="episodic"] - Target layer
|
|
18
18
|
* @param {object} [opts.metadata] - Additional metadata
|
|
19
19
|
* @param {Function} [opts.logger] - Optional logger
|
|
20
|
+
* @param {Function} [opts.waitUntil] - Platform hook to register background
|
|
21
|
+
* tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
|
|
22
|
+
* background task is handed to it so the host keeps it alive past return.
|
|
23
|
+
* Without it, distill is fire-and-forget (fine for Node/browser).
|
|
20
24
|
* @returns {Promise<{id: string, content: string, layerId: string}>}
|
|
21
25
|
*/
|
|
22
26
|
export async function ingest(db, ai, llm, content, opts = {}) {
|
|
@@ -86,9 +90,11 @@ export async function ingest(db, ai, llm, content, opts = {}) {
|
|
|
86
90
|
// Distill atomic facts in the background — only for raw ingestions
|
|
87
91
|
// (skip if this call is already storing a distilled atom or user opted out).
|
|
88
92
|
if (opts.distill !== false && !opts.sourceId) {
|
|
89
|
-
distill(db, ai, llm, memoryId, content, {
|
|
90
|
-
|
|
91
|
-
|
|
93
|
+
const distillPromise = distill(db, ai, llm, memoryId, content, {
|
|
94
|
+
...opts,
|
|
95
|
+
logger: log,
|
|
96
|
+
}).catch((err) => log(`distill failed for ${memoryId}: ${err.message}`));
|
|
97
|
+
if (typeof opts.waitUntil === "function") opts.waitUntil(distillPromise);
|
|
92
98
|
}
|
|
93
99
|
|
|
94
100
|
return { id: memoryId, content, layerId };
|
|
@@ -13,6 +13,9 @@
|
|
|
13
13
|
* LLM_URL — OpenAI-compatible chat endpoint (required)
|
|
14
14
|
* LLM_MODEL — Chat model name for HyDE (required)
|
|
15
15
|
* API_KEY — API key for embedding/LLM endpoints (optional)
|
|
16
|
+
* EMBEDDING_PATH — Path appended to EMBEDDING_URL (default: "embeddings").
|
|
17
|
+
* Set to "embed" for the Pentatonic AI Gateway.
|
|
18
|
+
* CHAT_PATH — Path appended to LLM_URL (default: "chat/completions")
|
|
16
19
|
* CLIENT_ID — Client ID for memory scoping (default: "default")
|
|
17
20
|
* PORT — HTTP port for SSE transport (default: 3333)
|
|
18
21
|
*/
|
|
@@ -46,11 +49,13 @@ function createMemory() {
|
|
|
46
49
|
url: process.env.EMBEDDING_URL,
|
|
47
50
|
model: process.env.EMBEDDING_MODEL,
|
|
48
51
|
apiKey: process.env.API_KEY,
|
|
52
|
+
embeddingPath: process.env.EMBEDDING_PATH,
|
|
49
53
|
},
|
|
50
54
|
llm: {
|
|
51
55
|
url: process.env.LLM_URL,
|
|
52
56
|
model: process.env.LLM_MODEL,
|
|
53
57
|
apiKey: process.env.API_KEY,
|
|
58
|
+
chatPath: process.env.CHAT_PATH,
|
|
54
59
|
},
|
|
55
60
|
logger: (msg) => process.stderr.write(`[memory] ${msg}\n`),
|
|
56
61
|
});
|
|
@@ -342,7 +347,7 @@ async function main() {
|
|
|
342
347
|
const health = {
|
|
343
348
|
status: "ok",
|
|
344
349
|
client: CLIENT_ID,
|
|
345
|
-
version: "0.5.
|
|
350
|
+
version: "0.5.2",
|
|
346
351
|
search: "text",
|
|
347
352
|
db: false,
|
|
348
353
|
ollama: false,
|