@pentatonic-ai/ai-agent-sdk 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -283,6 +283,89 @@ describe("search options contract", () => {
283
283
 
284
284
  expect(Array.isArray(results)).toBe(true);
285
285
  });
286
+
287
+ it("SQL includes atomBoost and verbosityPenalty terms", async () => {
288
+ const seenSqls = [];
289
+ const mockDb = async (sql) => {
290
+ seenSqls.push(sql);
291
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
292
+ return { rows: [] };
293
+ };
294
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
295
+
296
+ await search(mockDb, mockAi, "q", { clientId: "c" });
297
+
298
+ const scoringSql = seenSqls.find((s) => s.includes("final_score"));
299
+ expect(scoringSql).toBeDefined();
300
+ expect(scoringSql).toMatch(/source_id IS NOT NULL/);
301
+ expect(scoringSql).toMatch(/length\(mn\.content\)/);
302
+ });
303
+
304
+ it("dedupeBySource drops raw rows whose id is a source of a matched atom", async () => {
305
+ const rows = [
306
+ { id: "raw-1", client_id: "c", layer_id: "l", content: "long raw turn",
307
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: null },
308
+ { id: "atom-1", client_id: "c", layer_id: "l", content: "Phil owns a Subaru",
309
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.8, source_id: "raw-1" },
310
+ ];
311
+ let searchCallCount = 0;
312
+ const mockDb = async (sql) => {
313
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
314
+ if (sql.includes("final_score")) {
315
+ searchCallCount++;
316
+ return { rows };
317
+ }
318
+ return { rows: [] };
319
+ };
320
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
321
+
322
+ const out = await search(mockDb, mockAi, "q", { clientId: "c", minScore: 0 });
323
+
324
+ expect(searchCallCount).toBe(1);
325
+ expect(out.length).toBe(1);
326
+ expect(out[0].id).toBe("atom-1");
327
+ expect(out[0].source_id).toBe("raw-1");
328
+ });
329
+
330
+ it("dedupeBySource: false keeps both atom and its raw source", async () => {
331
+ const rows = [
332
+ { id: "raw-1", client_id: "c", layer_id: "l", content: "long",
333
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: null },
334
+ { id: "atom-1", client_id: "c", layer_id: "l", content: "short",
335
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.8, source_id: "raw-1" },
336
+ ];
337
+ const mockDb = async (sql) => {
338
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
339
+ if (sql.includes("final_score")) return { rows };
340
+ return { rows: [] };
341
+ };
342
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
343
+
344
+ const out = await search(mockDb, mockAi, "q", {
345
+ clientId: "c",
346
+ minScore: 0,
347
+ dedupeBySource: false,
348
+ });
349
+
350
+ expect(out.length).toBe(2);
351
+ expect(out.map((r) => r.id).sort()).toEqual(["atom-1", "raw-1"]);
352
+ });
353
+
354
+ it("search results include source_id (null for raw, set for atoms)", async () => {
355
+ const rows = [
356
+ { id: "atom-1", client_id: "c", layer_id: "l", content: "atom",
357
+ confidence: 1, decay_rate: 0.05, access_count: 0, final_score: 0.9, source_id: "raw-1" },
358
+ ];
359
+ const mockDb = async (sql) => {
360
+ if (sql.includes("information_schema.columns")) return { rows: [{ "?column?": 1 }] };
361
+ if (sql.includes("final_score")) return { rows };
362
+ return { rows: [] };
363
+ };
364
+ const mockAi = { embed: async () => ({ embedding: [0.1], dimensions: 1, model: "t" }) };
365
+
366
+ const out = await search(mockDb, mockAi, "q", { clientId: "c", minScore: 0 });
367
+ expect(out[0].source_id).toBe("raw-1");
368
+ });
286
369
  });
287
370
 
288
371
  // --- Ingest options contract ---
@@ -10,6 +10,11 @@ const DEFAULT_WEIGHTS = {
10
10
  relevance: 0.6,
11
11
  recency: 0.25,
12
12
  frequency: 0.15,
13
+ // Boost distilled atoms — they're high signal per token by design.
14
+ atomBoost: 0.15,
15
+ // Penalty on verbose raw turns. Short focused memories rank higher.
16
+ // Atoms are exempt (penalty skipped when source_id IS NOT NULL).
17
+ verbosityPenalty: 0.1,
13
18
  };
14
19
 
15
20
  /**
@@ -25,6 +30,10 @@ const DEFAULT_WEIGHTS = {
25
30
  * @param {number} [opts.minScore=0.5] - Minimum score threshold
26
31
  * @param {string} [opts.userId] - Optional user scope
27
32
  * @param {object} [opts.weights] - Override scoring weights
33
+ * (relevance, recency, frequency, atomBoost, verbosityPenalty)
34
+ * @param {boolean} [opts.dedupeBySource=true] - When an atom matches,
35
+ * drop its raw source memory from the results (atoms are already
36
+ * distillations of the source, so returning both is redundant).
28
37
  * @param {Function} [opts.logger] - Optional logger
29
38
  * @returns {Promise<Array>} Scored memory results
30
39
  */
@@ -107,7 +116,19 @@ export async function search(db, ai, query, opts = {}) {
107
116
  ${w.recency} * exp(
108
117
  -0.01 * EXTRACT(EPOCH FROM NOW() - COALESCE(mn.last_accessed, mn.created_at)) / 3600
109
118
  ) +
110
- ${w.frequency} * (ln(mn.access_count + 1) / ln(ma.val + 1))
119
+ ${w.frequency} * (ln(mn.access_count + 1) / ln(ma.val + 1)) +
120
+ ${w.atomBoost} * (CASE WHEN mn.source_id IS NOT NULL THEN 1 ELSE 0 END) -
121
+ ${w.verbosityPenalty} * (
122
+ CASE WHEN mn.source_id IS NULL THEN
123
+ LEAST(
124
+ GREATEST(
125
+ (ln(length(mn.content) + 1) - ln(200)) / (ln(10000) - ln(200)),
126
+ 0
127
+ ),
128
+ 1
129
+ )
130
+ ELSE 0 END
131
+ )
111
132
  ) AS final_score
112
133
  FROM memory_nodes mn
113
134
  CROSS JOIN max_ac ma
@@ -123,10 +144,21 @@ export async function search(db, ai, query, opts = {}) {
123
144
 
124
145
  const result = await db(sql, params);
125
146
 
126
- const filtered = (result.rows || []).filter(
147
+ let filtered = (result.rows || []).filter(
127
148
  (r) => parseFloat(r.final_score) >= threshold
128
149
  );
129
150
 
151
+ // De-dupe: when an atom matches, drop its raw source from the set.
152
+ // Default on; set opts.dedupeBySource: false to keep both.
153
+ if (opts.dedupeBySource !== false) {
154
+ const atomSources = new Set(
155
+ filtered.filter((r) => r.source_id).map((r) => r.source_id)
156
+ );
157
+ if (atomSources.size > 0) {
158
+ filtered = filtered.filter((r) => !atomSources.has(r.id));
159
+ }
160
+ }
161
+
130
162
  // Increment access counts
131
163
  const ids = filtered.map((r) => r.id);
132
164
  if (ids.length) {
@@ -182,6 +214,7 @@ function mapRow(row) {
182
214
  client_id: row.client_id,
183
215
  user_id: row.user_id || null,
184
216
  layer_id: row.layer_id,
217
+ source_id: row.source_id || null,
185
218
  content: row.content,
186
219
  metadata:
187
220
  typeof row.metadata === "string"
@@ -347,7 +347,7 @@ async function main() {
347
347
  const health = {
348
348
  status: "ok",
349
349
  client: CLIENT_ID,
350
- version: "0.5.2",
350
+ version: "0.5.3",
351
351
  search: "text",
352
352
  db: false,
353
353
  ollama: false,