@pentatonic-ai/ai-agent-sdk 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -70,6 +70,7 @@ That's it. The plugin hooks automatically search memories on every prompt and st
70
70
  - **Automatic memory** -- every conversation turn is stored with embeddings and HyDE query expansion
71
71
  - **Semantic search** -- multi-signal retrieval combining vector similarity, BM25 full-text, recency decay, and access frequency
72
72
  - **Memory layers** -- episodic (recent), semantic (consolidated), procedural (how-to), working (temporary)
73
+ - **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
73
74
  - **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
74
75
 
75
76
  ### Change models
@@ -195,7 +196,7 @@ openclaw pentatonic-memory local
195
196
 
196
197
  OpenClaw's context engine hooks fire on every lifecycle event:
197
198
 
198
- - **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion
199
+ - **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion, then distilled into atomic facts in the background (see [Distilled memory](#what-you-get))
199
200
  - **Assemble** -- relevant memories are injected as system prompt context before every model run
200
201
  - **Compact** -- decay cycle runs when the context window fills
201
202
  - **After turn** -- high-access memories get consolidated to the semantic layer
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Query expansion fallback tests.
3
+ *
4
+ * When the raw user prompt returns no memories, the plugin retries once
5
+ * with a keyword-distilled form. This recovers matches for verbose
6
+ * natural-language prompts that fall below the semantic threshold.
7
+ */
8
+
9
+ import plugin, { extractSearchKeywords } from "../index.js";
10
+
11
+ const realFetch = globalThis.fetch;
12
+
13
+ afterEach(() => {
14
+ globalThis.fetch = realFetch;
15
+ });
16
+
17
+ function makeEngine(extraConfig = {}) {
18
+ let factory;
19
+ plugin.register({
20
+ pluginConfig: {
21
+ tes_endpoint: "https://x.test",
22
+ tes_client_id: "c",
23
+ tes_api_key: "tes_c_xyz",
24
+ ...extraConfig,
25
+ },
26
+ registerTool: () => {},
27
+ registerContextEngine: (_name, fn) => {
28
+ factory = fn;
29
+ },
30
+ });
31
+ if (!factory) throw new Error("plugin did not register a context engine");
32
+ return factory();
33
+ }
34
+
35
+ describe("extractSearchKeywords", () => {
36
+ it("strips stopwords from verbose prompts", () => {
37
+ const out = extractSearchKeywords(
38
+ "when I was working in the thing-event-system, I copied migrations, what were they?"
39
+ );
40
+ expect(out).toMatch(/thing-event-system/);
41
+ expect(out).toMatch(/migrations/);
42
+ expect(out).not.toMatch(/\bwhen\b/);
43
+ expect(out).not.toMatch(/\bwhat\b/);
44
+ });
45
+
46
+ it("preserves hyphenated compounds", () => {
47
+ expect(extractSearchKeywords("where is thing-event-system?")).toMatch(
48
+ /thing-event-system/
49
+ );
50
+ });
51
+
52
+ it("returns null when the distilled form equals the input", () => {
53
+ expect(extractSearchKeywords("deep-memory migrations")).toBeNull();
54
+ });
55
+
56
+ it("returns null when the prompt is only stopwords", () => {
57
+ expect(extractSearchKeywords("what were they?")).toBeNull();
58
+ });
59
+
60
+ it("returns null for non-string input", () => {
61
+ expect(extractSearchKeywords(null)).toBeNull();
62
+ expect(extractSearchKeywords(undefined)).toBeNull();
63
+ });
64
+ });
65
+
66
+ describe("assemble — keyword retry fallback (hosted mode)", () => {
67
+ it("retries with distilled keywords when raw prompt misses", async () => {
68
+ const queries = [];
69
+ globalThis.fetch = async (_url, init) => {
70
+ const body = JSON.parse(init.body);
71
+ const q = body.variables.query;
72
+ queries.push(q);
73
+ const isFirst = queries.length === 1;
74
+ return {
75
+ ok: true,
76
+ status: 200,
77
+ json: async () => ({
78
+ data: {
79
+ semanticSearchMemories: isFirst
80
+ ? []
81
+ : [{ id: "m1", content: "matched on retry", similarity: 0.7 }],
82
+ },
83
+ }),
84
+ };
85
+ };
86
+
87
+ const engine = makeEngine();
88
+ const result = await engine.assemble({
89
+ sessionId: "s",
90
+ messages: [
91
+ {
92
+ role: "user",
93
+ content:
94
+ "when I was working in the thing-event-system, what were those migration changes again?",
95
+ },
96
+ ],
97
+ });
98
+
99
+ expect(queries).toHaveLength(2);
100
+ expect(queries[0]).toMatch(/when I was working/);
101
+ expect(queries[1]).not.toMatch(/\bwhen\b/);
102
+ expect(queries[1]).toMatch(/thing-event-system/);
103
+ expect(queries[1]).toMatch(/migration/);
104
+ expect(result.systemPromptAddition).toMatch(/matched on retry/);
105
+ });
106
+
107
+ it("does not retry when the raw prompt already returns results", async () => {
108
+ const queries = [];
109
+ globalThis.fetch = async (_url, init) => {
110
+ queries.push(JSON.parse(init.body).variables.query);
111
+ return {
112
+ ok: true,
113
+ status: 200,
114
+ json: async () => ({
115
+ data: {
116
+ semanticSearchMemories: [
117
+ { id: "m1", content: "hit", similarity: 0.9 },
118
+ ],
119
+ },
120
+ }),
121
+ };
122
+ };
123
+
124
+ const engine = makeEngine();
125
+ await engine.assemble({
126
+ sessionId: "s",
127
+ messages: [{ role: "user", content: "thing-event-system migrations" }],
128
+ });
129
+
130
+ expect(queries).toHaveLength(1);
131
+ });
132
+
133
+ it("does not retry when distilled query equals the raw query", async () => {
134
+ const queries = [];
135
+ globalThis.fetch = async (_url, init) => {
136
+ queries.push(JSON.parse(init.body).variables.query);
137
+ return {
138
+ ok: true,
139
+ status: 200,
140
+ json: async () => ({ data: { semanticSearchMemories: [] } }),
141
+ };
142
+ };
143
+
144
+ const engine = makeEngine();
145
+ await engine.assemble({
146
+ sessionId: "s",
147
+ messages: [{ role: "user", content: "deep-memory migrations" }],
148
+ });
149
+
150
+ expect(queries).toHaveLength(1);
151
+ });
152
+ });
153
+
154
+ describe("assemble — keyword retry fallback (local mode)", () => {
155
+ it("retries via /search endpoint when raw query returns nothing", async () => {
156
+ const queries = [];
157
+ globalThis.fetch = async (_url, init) => {
158
+ const body = JSON.parse(init.body);
159
+ queries.push(body.query);
160
+ const isFirst = queries.length === 1;
161
+ return {
162
+ ok: true,
163
+ status: 200,
164
+ json: async () => ({
165
+ results: isFirst
166
+ ? []
167
+ : [{ id: "m1", content: "local hit", similarity: 0.6 }],
168
+ }),
169
+ };
170
+ };
171
+
172
+ let factory;
173
+ plugin.register({
174
+ pluginConfig: {}, // no tes_* creds → local mode
175
+ registerTool: () => {},
176
+ registerContextEngine: (_name, fn) => {
177
+ factory = fn;
178
+ },
179
+ });
180
+ const engine = factory();
181
+
182
+ const result = await engine.assemble({
183
+ sessionId: "s",
184
+ messages: [
185
+ { role: "user", content: "what were the migration changes again?" },
186
+ ],
187
+ });
188
+
189
+ expect(queries).toHaveLength(2);
190
+ expect(queries[1]).toMatch(/migration/);
191
+ expect(result.systemPromptAddition).toMatch(/local hit/);
192
+ });
193
+ });
@@ -88,6 +88,33 @@ const stats = {
88
88
  setupPrompted: false,
89
89
  };
90
90
 
91
+ // --- Query keyword extraction ---
92
+ // Natural-language prompts ("what were those changes again?") often fall
93
+ // below the semantic threshold even when relevant memories exist. We
94
+ // drop stopwords and retry with the keyword-distilled form.
95
+ const STOPWORDS = new Set([
96
+ "a", "am", "an", "and", "are", "as", "at", "be", "been", "but", "by",
97
+ "can", "did", "do", "does", "for", "from", "had", "has", "have", "he",
98
+ "her", "him", "his", "how", "i", "if", "in", "into", "is", "it", "its",
99
+ "just", "like", "made", "me", "my", "need", "needed", "of", "on", "or",
100
+ "our", "out", "over", "she", "so", "some", "than", "that", "the",
101
+ "their", "them", "then", "there", "these", "they", "this", "those",
102
+ "to", "up", "us", "was", "we", "went", "were", "what", "when", "where",
103
+ "which", "who", "why", "will", "with", "would", "you", "your",
104
+ ]);
105
+
106
+ export function extractSearchKeywords(query) {
107
+ if (typeof query !== "string") return null;
108
+ const tokens = query
109
+ .toLowerCase()
110
+ .split(/[^a-z0-9-]+/)
111
+ .filter((t) => t.length >= 2 && !STOPWORDS.has(t));
112
+ if (tokens.length === 0) return null;
113
+ const distilled = tokens.join(" ");
114
+ if (distilled === query.toLowerCase().trim()) return null;
115
+ return distilled;
116
+ }
117
+
91
118
  // --- Local mode: HTTP to memory server ---
92
119
 
93
120
  async function localSearch(baseUrl, query, limit = 5, minScore = 0.3) {
@@ -432,11 +459,23 @@ export default {
432
459
 
433
460
  stats.mode = hosted ? "hosted" : "local";
434
461
 
435
- // Unified search/store that routes to local or hosted
436
- const search = hosted
462
+ // Unified search/store that routes to local or hosted.
463
+ // If the raw query returns nothing, retry once with the
464
+ // keyword-distilled form — natural-language prompts frequently
465
+ // miss the semantic threshold even when matches exist.
466
+ const searchBackend = hosted
437
467
  ? (query, limit, score) => hostedSearch(config, query, limit, score)
438
468
  : (query, limit, score) => localSearch(baseUrl, query, limit, score);
439
469
 
470
+ const search = async (query, limit, score) => {
471
+ const first = await searchBackend(query, limit, score);
472
+ if (first.length > 0) return first;
473
+ const keywords = extractSearchKeywords(query);
474
+ if (!keywords) return first;
475
+ log(`search: retry "${query.substring(0, 40)}" → "${keywords}"`);
476
+ return searchBackend(keywords, limit, score);
477
+ };
478
+
440
479
  const store = hosted
441
480
  ? (content, metadata) => hostedStore(config, content, metadata)
442
481
  : (content, metadata) => localStore(baseUrl, content, metadata);
@@ -2,7 +2,7 @@
2
2
  "id": "pentatonic-memory",
3
3
  "name": "Pentatonic Memory",
4
4
  "description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
5
- "version": "0.5.0",
5
+ "version": "0.5.1",
6
6
  "kind": "context-engine",
7
7
  "configSchema": {
8
8
  "type": "object",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/openclaw-memory-plugin",
3
- "version": "0.8.0",
3
+ "version": "0.8.1",
4
4
  "description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -168,6 +168,11 @@ describe("named exports", () => {
168
168
  // --- AI client ---
169
169
 
170
170
  describe("createAIClient", () => {
171
+ const realFetch = globalThis.fetch;
172
+ afterEach(() => {
173
+ globalThis.fetch = realFetch;
174
+ });
175
+
171
176
  it("returns an object with embed() and chat()", () => {
172
177
  const client = createAIClient({
173
178
  url: "http://localhost:11434/v1",
@@ -185,6 +190,79 @@ describe("createAIClient", () => {
185
190
  });
186
191
  expect(client).toBeDefined();
187
192
  });
193
+
194
+ it("hits /embeddings by default (OpenAI spec)", async () => {
195
+ let hitUrl;
196
+ globalThis.fetch = async (url) => {
197
+ hitUrl = url;
198
+ return { ok: true, json: async () => ({ data: [{ embedding: [0.1, 0.2] }] }) };
199
+ };
200
+ const client = createAIClient({
201
+ url: "http://localhost:11434/v1",
202
+ model: "test",
203
+ });
204
+ await client.embed("hello");
205
+ expect(hitUrl).toBe("http://localhost:11434/v1/embeddings");
206
+ });
207
+
208
+ it("uses embeddingPath override (e.g. Pentatonic AI Gateway)", async () => {
209
+ let hitUrl;
210
+ globalThis.fetch = async (url) => {
211
+ hitUrl = url;
212
+ return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
213
+ };
214
+ const client = createAIClient({
215
+ url: "https://lambda-gateway.pentatonic.com/v1",
216
+ model: "NV-Embed-v2",
217
+ embeddingPath: "embed",
218
+ });
219
+ await client.embed("hello");
220
+ expect(hitUrl).toBe("https://lambda-gateway.pentatonic.com/v1/embed");
221
+ });
222
+
223
+ it("normalises leading slashes and trailing base-url slashes", async () => {
224
+ let hitUrl;
225
+ globalThis.fetch = async (url) => {
226
+ hitUrl = url;
227
+ return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
228
+ };
229
+ const client = createAIClient({
230
+ url: "https://gateway.test/v1/",
231
+ model: "m",
232
+ embeddingPath: "/embed",
233
+ });
234
+ await client.embed("hi");
235
+ expect(hitUrl).toBe("https://gateway.test/v1/embed");
236
+ });
237
+
238
+ it("chatPath override applies to chat() too", async () => {
239
+ let hitUrl;
240
+ globalThis.fetch = async (url) => {
241
+ hitUrl = url;
242
+ return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
243
+ };
244
+ const client = createAIClient({
245
+ url: "https://gateway.test/v1",
246
+ model: "m",
247
+ chatPath: "chat",
248
+ });
249
+ await client.chat([{ role: "user", content: "q" }]);
250
+ expect(hitUrl).toBe("https://gateway.test/v1/chat");
251
+ });
252
+
253
+ it("chat defaults to /chat/completions", async () => {
254
+ let hitUrl;
255
+ globalThis.fetch = async (url) => {
256
+ hitUrl = url;
257
+ return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
258
+ };
259
+ const client = createAIClient({
260
+ url: "http://localhost:11434/v1",
261
+ model: "m",
262
+ });
263
+ await client.chat([{ role: "user", content: "q" }]);
264
+ expect(hitUrl).toBe("http://localhost:11434/v1/chat/completions");
265
+ });
188
266
  });
189
267
 
190
268
  // --- Search options contract ---
@@ -231,4 +309,45 @@ describe("ingest options contract", () => {
231
309
  expect(result).toHaveProperty("content");
232
310
  expect(result).toHaveProperty("layerId");
233
311
  });
312
+
313
+ it("hands the distill background promise to opts.waitUntil when provided", async () => {
314
+ const mockDb = async (sql) => {
315
+ if (sql.includes("SELECT id FROM memory_layers")) {
316
+ return { rows: [{ id: "layer-1" }] };
317
+ }
318
+ return { rows: [] };
319
+ };
320
+ const mockAi = { embed: async () => null };
321
+ const mockLlm = { chat: async () => "[]" };
322
+
323
+ const registered = [];
324
+ await ingest(mockDb, mockAi, mockLlm, "test content", {
325
+ clientId: "test-client",
326
+ waitUntil: (p) => registered.push(p),
327
+ });
328
+
329
+ expect(registered.length).toBe(1);
330
+ expect(typeof registered[0].then).toBe("function");
331
+ await registered[0]; // should resolve cleanly
332
+ });
333
+
334
+ it("does not call waitUntil when distill is skipped", async () => {
335
+ const mockDb = async (sql) => {
336
+ if (sql.includes("SELECT id FROM memory_layers")) {
337
+ return { rows: [{ id: "layer-1" }] };
338
+ }
339
+ return { rows: [] };
340
+ };
341
+ const mockAi = { embed: async () => null };
342
+ const mockLlm = { chat: async () => "[]" };
343
+
344
+ const registered = [];
345
+ await ingest(mockDb, mockAi, mockLlm, "test content", {
346
+ clientId: "test-client",
347
+ distill: false,
348
+ waitUntil: (p) => registered.push(p),
349
+ });
350
+
351
+ expect(registered.length).toBe(0);
352
+ });
234
353
  });
@@ -8,10 +8,16 @@
8
8
  /**
9
9
  * Create an AI client from config.
10
10
  *
11
+ * Defaults to OpenAI-standard paths (`/embeddings`, `/chat/completions`).
12
+ * Override with `embeddingPath` / `chatPath` for gateways that use
13
+ * different routes — e.g. Pentatonic AI Gateway exposes `/embed`.
14
+ *
11
15
  * @param {object} config
12
16
  * @param {string} config.url - Base URL (e.g. "http://ollama:11434/v1")
13
17
  * @param {string} config.model - Model name
14
18
  * @param {string} [config.apiKey] - Optional API key
19
+ * @param {string} [config.embeddingPath="embeddings"] - Path appended to url
20
+ * @param {string} [config.chatPath="chat/completions"] - Path appended to url
15
21
  * @param {number} [config.dimensions] - Expected embedding dimensions
16
22
  * @returns {object} Client with embed() and chat() methods
17
23
  */
@@ -22,6 +28,23 @@ export function createAIClient(config) {
22
28
  headers["X-API-Key"] = config.apiKey;
23
29
  }
24
30
 
31
+ // Strip leading slashes so callers can use "embed" or "/embed"
32
+ // interchangeably. Base url may or may not have a trailing slash.
33
+ // Plain loops (not regex) to avoid polynomial-regex scanner flags.
34
+ const stripLeading = (s) => {
35
+ let i = 0;
36
+ while (i < s.length && s[i] === "/") i++;
37
+ return i === 0 ? s : s.slice(i);
38
+ };
39
+ const stripTrailing = (s) => {
40
+ let i = s.length;
41
+ while (i > 0 && s[i - 1] === "/") i--;
42
+ return i === s.length ? s : s.slice(0, i);
43
+ };
44
+ const embeddingPath = stripLeading(config.embeddingPath || "embeddings");
45
+ const chatPath = stripLeading(config.chatPath || "chat/completions");
46
+ const baseUrl = stripTrailing(config.url);
47
+
25
48
  return {
26
49
  /**
27
50
  * Generate an embedding vector for text.
@@ -32,7 +55,7 @@ export function createAIClient(config) {
32
55
  */
33
56
  async embed(text, inputType = "passage") {
34
57
  try {
35
- const res = await fetch(`${config.url}/embeddings`, {
58
+ const res = await fetch(`${baseUrl}/${embeddingPath}`, {
36
59
  method: "POST",
37
60
  headers,
38
61
  body: JSON.stringify({
@@ -70,7 +93,7 @@ export function createAIClient(config) {
70
93
  */
71
94
  async chat(messages, opts = {}) {
72
95
  try {
73
- const res = await fetch(`${config.url}/chat/completions`, {
96
+ const res = await fetch(`${baseUrl}/${chatPath}`, {
74
97
  method: "POST",
75
98
  headers,
76
99
  body: JSON.stringify({
@@ -17,6 +17,10 @@ import { distill } from "./distill.js";
17
17
  * @param {string} [opts.layerType="episodic"] - Target layer
18
18
  * @param {object} [opts.metadata] - Additional metadata
19
19
  * @param {Function} [opts.logger] - Optional logger
20
+ * @param {Function} [opts.waitUntil] - Platform hook to register background
21
+ * tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
22
+ * background task is handed to it so the host keeps it alive past return.
23
+ * Without it, distill is fire-and-forget (fine for Node/browser).
20
24
  * @returns {Promise<{id: string, content: string, layerId: string}>}
21
25
  */
22
26
  export async function ingest(db, ai, llm, content, opts = {}) {
@@ -86,9 +90,11 @@ export async function ingest(db, ai, llm, content, opts = {}) {
86
90
  // Distill atomic facts in the background — only for raw ingestions
87
91
  // (skip if this call is already storing a distilled atom or user opted out).
88
92
  if (opts.distill !== false && !opts.sourceId) {
89
- distill(db, ai, llm, memoryId, content, { ...opts, logger: log }).catch(
90
- (err) => log(`distill failed for ${memoryId}: ${err.message}`)
91
- );
93
+ const distillPromise = distill(db, ai, llm, memoryId, content, {
94
+ ...opts,
95
+ logger: log,
96
+ }).catch((err) => log(`distill failed for ${memoryId}: ${err.message}`));
97
+ if (typeof opts.waitUntil === "function") opts.waitUntil(distillPromise);
92
98
  }
93
99
 
94
100
  return { id: memoryId, content, layerId };
@@ -13,6 +13,9 @@
13
13
  * LLM_URL — OpenAI-compatible chat endpoint (required)
14
14
  * LLM_MODEL — Chat model name for HyDE (required)
15
15
  * API_KEY — API key for embedding/LLM endpoints (optional)
16
+ * EMBEDDING_PATH — Path appended to EMBEDDING_URL (default: "embeddings").
17
+ * Set to "embed" for the Pentatonic AI Gateway.
18
+ * CHAT_PATH — Path appended to LLM_URL (default: "chat/completions")
16
19
  * CLIENT_ID — Client ID for memory scoping (default: "default")
17
20
  * PORT — HTTP port for SSE transport (default: 3333)
18
21
  */
@@ -46,11 +49,13 @@ function createMemory() {
46
49
  url: process.env.EMBEDDING_URL,
47
50
  model: process.env.EMBEDDING_MODEL,
48
51
  apiKey: process.env.API_KEY,
52
+ embeddingPath: process.env.EMBEDDING_PATH,
49
53
  },
50
54
  llm: {
51
55
  url: process.env.LLM_URL,
52
56
  model: process.env.LLM_MODEL,
53
57
  apiKey: process.env.API_KEY,
58
+ chatPath: process.env.CHAT_PATH,
54
59
  },
55
60
  logger: (msg) => process.stderr.write(`[memory] ${msg}\n`),
56
61
  });
@@ -342,7 +347,7 @@ async function main() {
342
347
  const health = {
343
348
  status: "ok",
344
349
  client: CLIENT_ID,
345
- version: "0.5.0",
350
+ version: "0.5.2",
346
351
  search: "text",
347
352
  db: false,
348
353
  ollama: false,