@pentatonic-ai/ai-agent-sdk 0.5.10 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.11",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -587,23 +587,50 @@ describe("ingest options contract", () => {
|
|
|
587
587
|
describe("ingest dedup option", () => {
|
|
588
588
|
function makeMockDb(state = {}) {
|
|
589
589
|
const calls = [];
|
|
590
|
-
|
|
590
|
+
// Each existing row optionally carries created_at (Date). Defaults to
|
|
591
|
+
// "now" so legacy-window tests that don't care still match.
|
|
592
|
+
const existing = state.existing || [];
|
|
591
593
|
const inserted = [];
|
|
594
|
+
const now = state.now || new Date();
|
|
592
595
|
const db = async (sql, params) => {
|
|
593
596
|
calls.push({ sql, params });
|
|
594
597
|
if (sql.includes("SELECT id FROM memory_layers")) {
|
|
595
598
|
return { rows: [{ id: "layer-1" }] };
|
|
596
599
|
}
|
|
597
|
-
// Dedup pre-check
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
600
|
+
// Dedup pre-check. Two SQL shapes — one with the legacy LIKE branch
|
|
601
|
+
// (3 params: clientId, dedupKey, legacyDays as string), one without
|
|
602
|
+
// (2 params: clientId, dedupKey).
|
|
603
|
+
if (sql.includes("FROM memory_nodes")) {
|
|
604
|
+
const [clientId, content, legacyDaysStr] = params;
|
|
605
|
+
const legacyDays = legacyDaysStr ? Number(legacyDaysStr) : 0;
|
|
606
|
+
const cutoff = new Date(
|
|
607
|
+
now.getTime() - legacyDays * 24 * 60 * 60 * 1000
|
|
605
608
|
);
|
|
606
|
-
|
|
609
|
+
|
|
610
|
+
const exactMatch = existing.find(
|
|
611
|
+
(r) => r.client_id === clientId && r.content === content
|
|
612
|
+
);
|
|
613
|
+
if (exactMatch) {
|
|
614
|
+
return {
|
|
615
|
+
rows: [{ id: exactMatch.id, match_kind: "exact" }],
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
if (legacyDays > 0) {
|
|
620
|
+
const legacyMatch = existing.find(
|
|
621
|
+
(r) =>
|
|
622
|
+
r.client_id === clientId &&
|
|
623
|
+
r.content.endsWith(`] ${content}`) &&
|
|
624
|
+
(!r.created_at || r.created_at > cutoff)
|
|
625
|
+
);
|
|
626
|
+
if (legacyMatch) {
|
|
627
|
+
return {
|
|
628
|
+
rows: [{ id: legacyMatch.id, match_kind: "legacy" }],
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
return { rows: [] };
|
|
607
634
|
}
|
|
608
635
|
// Insert path
|
|
609
636
|
if (sql.startsWith("INSERT INTO memory_nodes")) {
|
|
@@ -636,7 +663,7 @@ describe("ingest dedup option", () => {
|
|
|
636
663
|
expect(inserted[0].content).toBe("fresh content");
|
|
637
664
|
});
|
|
638
665
|
|
|
639
|
-
it("returns the existing row's id when raw content matches", async () => {
|
|
666
|
+
it("returns the existing row's id when raw content matches (exact)", async () => {
|
|
640
667
|
const { db, inserted } = makeMockDb({
|
|
641
668
|
existing: [
|
|
642
669
|
{ id: "mem_existing", client_id: "c", content: "duplicate content" },
|
|
@@ -649,18 +676,21 @@ describe("ingest dedup option", () => {
|
|
|
649
676
|
});
|
|
650
677
|
|
|
651
678
|
expect(out.deduped).toBe(true);
|
|
679
|
+
expect(out.dedupMatchKind).toBe("exact");
|
|
652
680
|
expect(out.id).toBe("mem_existing");
|
|
653
681
|
expect(out.content).toBe("duplicate content");
|
|
654
682
|
expect(inserted).toHaveLength(0); // no insert happened
|
|
655
683
|
});
|
|
656
684
|
|
|
657
|
-
it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`)", async () => {
|
|
685
|
+
it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`) within the default window", async () => {
|
|
658
686
|
const { db, inserted } = makeMockDb({
|
|
659
687
|
existing: [
|
|
660
688
|
{
|
|
661
689
|
id: "mem_legacy",
|
|
662
690
|
client_id: "c",
|
|
663
691
|
content: "[2026-04-26T10:00:00Z] duplicate content",
|
|
692
|
+
// Default window is 7 days; created_at omitted → mocked as
|
|
693
|
+
// "now" so the row is in-window.
|
|
664
694
|
},
|
|
665
695
|
],
|
|
666
696
|
});
|
|
@@ -671,10 +701,89 @@ describe("ingest dedup option", () => {
|
|
|
671
701
|
});
|
|
672
702
|
|
|
673
703
|
expect(out.deduped).toBe(true);
|
|
704
|
+
expect(out.dedupMatchKind).toBe("legacy");
|
|
674
705
|
expect(out.id).toBe("mem_legacy");
|
|
675
706
|
expect(inserted).toHaveLength(0);
|
|
676
707
|
});
|
|
677
708
|
|
|
709
|
+
it("legacy LIKE branch ignores rows older than dedupLegacyWindowDays", async () => {
|
|
710
|
+
const now = new Date("2026-04-27T00:00:00Z");
|
|
711
|
+
const tenDaysAgo = new Date(now.getTime() - 10 * 24 * 60 * 60 * 1000);
|
|
712
|
+
const { db, inserted } = makeMockDb({
|
|
713
|
+
now,
|
|
714
|
+
existing: [
|
|
715
|
+
{
|
|
716
|
+
id: "mem_old",
|
|
717
|
+
client_id: "c",
|
|
718
|
+
content: "[2026-04-17T00:00:00Z] duplicate content",
|
|
719
|
+
created_at: tenDaysAgo,
|
|
720
|
+
},
|
|
721
|
+
],
|
|
722
|
+
});
|
|
723
|
+
|
|
724
|
+
const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
|
|
725
|
+
clientId: "c",
|
|
726
|
+
dedup: true,
|
|
727
|
+
// Default window 7 days — old row should NOT match
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
expect(out.deduped).toBeUndefined();
|
|
731
|
+
expect(inserted).toHaveLength(1);
|
|
732
|
+
});
|
|
733
|
+
|
|
734
|
+
it("dedupLegacyWindowDays: 0 disables the legacy LIKE branch", async () => {
|
|
735
|
+
const { db, inserted, calls } = makeMockDb({
|
|
736
|
+
existing: [
|
|
737
|
+
{
|
|
738
|
+
id: "mem_legacy",
|
|
739
|
+
client_id: "c",
|
|
740
|
+
content: "[2026-04-26T10:00:00Z] duplicate content",
|
|
741
|
+
},
|
|
742
|
+
],
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
|
|
746
|
+
clientId: "c",
|
|
747
|
+
dedup: true,
|
|
748
|
+
dedupLegacyWindowDays: 0,
|
|
749
|
+
});
|
|
750
|
+
|
|
751
|
+
// Legacy match wouldn't fire — row inserts as fresh.
|
|
752
|
+
expect(out.deduped).toBeUndefined();
|
|
753
|
+
expect(inserted).toHaveLength(1);
|
|
754
|
+
|
|
755
|
+
// SQL shouldn't include the LIKE branch at all.
|
|
756
|
+
const dedupQuery = calls.find((c) => c.sql.includes("FROM memory_nodes"));
|
|
757
|
+
expect(dedupQuery.sql).not.toContain("LIKE");
|
|
758
|
+
expect(dedupQuery.params).toHaveLength(2); // [clientId, dedupKey] only
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
it("dedupLegacyWindowDays: 30 widens the window — old rows match", async () => {
|
|
762
|
+
const now = new Date("2026-04-27T00:00:00Z");
|
|
763
|
+
const tenDaysAgo = new Date(now.getTime() - 10 * 24 * 60 * 60 * 1000);
|
|
764
|
+
const { db, inserted } = makeMockDb({
|
|
765
|
+
now,
|
|
766
|
+
existing: [
|
|
767
|
+
{
|
|
768
|
+
id: "mem_old",
|
|
769
|
+
client_id: "c",
|
|
770
|
+
content: "[2026-04-17T00:00:00Z] duplicate content",
|
|
771
|
+
created_at: tenDaysAgo,
|
|
772
|
+
},
|
|
773
|
+
],
|
|
774
|
+
});
|
|
775
|
+
|
|
776
|
+
const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
|
|
777
|
+
clientId: "c",
|
|
778
|
+
dedup: true,
|
|
779
|
+
dedupLegacyWindowDays: 30,
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
expect(out.deduped).toBe(true);
|
|
783
|
+
expect(out.dedupMatchKind).toBe("legacy");
|
|
784
|
+
expect(inserted).toHaveLength(0);
|
|
785
|
+
});
|
|
786
|
+
|
|
678
787
|
it("dedup off (default) still inserts on duplicate content", async () => {
|
|
679
788
|
const { db, inserted } = makeMockDb({
|
|
680
789
|
existing: [
|
|
@@ -746,7 +855,7 @@ describe("ingest dedup option", () => {
|
|
|
746
855
|
if (sql.includes("SELECT id FROM memory_layers")) {
|
|
747
856
|
return { rows: [{ id: "layer-1" }] };
|
|
748
857
|
}
|
|
749
|
-
if (sql.includes("
|
|
858
|
+
if (sql.includes("FROM memory_nodes")) {
|
|
750
859
|
dupCheckSql = sql;
|
|
751
860
|
throw new Error("DB unreachable");
|
|
752
861
|
}
|
|
@@ -25,19 +25,29 @@ import { distill } from "./distill.js";
|
|
|
25
25
|
* byte-equal content already exists for this `client_id`. Use for
|
|
26
26
|
* retry-safe pipelines where the same logical event may be processed
|
|
27
27
|
* twice (queue retries, consumer fan-out). Returns the existing row's
|
|
28
|
-
* id with `{deduped: true}` instead
|
|
29
|
-
* not a semantic similarity match.
|
|
30
|
-
* fails, ingest proceeds (worst case:
|
|
31
|
-
* `dedup:false` behaviour). The eventual
|
|
32
|
-
* `UNIQUE(client_id, content_hash)` constraint at
|
|
33
|
-
* this option is the bridge.
|
|
28
|
+
* id with `{deduped: true, dedupMatchKind: "exact"|"legacy"}` instead
|
|
29
|
+
* of inserting. Strict equality — not a semantic similarity match.
|
|
30
|
+
* Best-effort: if the SELECT itself fails, ingest proceeds (worst case:
|
|
31
|
+
* duplicate row, identical to `dedup:false` behaviour). The eventual
|
|
32
|
+
* structural fix is a `UNIQUE(client_id, content_hash)` constraint at
|
|
33
|
+
* the schema level; this option is the bridge.
|
|
34
34
|
* @param {string} [opts.dedupContent] - Optional: the string to dedup
|
|
35
35
|
* against, when it differs from what gets stored. Use when callers
|
|
36
36
|
* wrap the stored content in a non-stable prefix (timestamps, run
|
|
37
37
|
* ids) — pass the raw form here so retries of the same logical event
|
|
38
38
|
* match across runs whose prefixes differ by a few ms. Defaults to
|
|
39
39
|
* `content`.
|
|
40
|
-
* @
|
|
40
|
+
* @param {number} [opts.dedupLegacyWindowDays=7] - How far back the
|
|
41
|
+
* `[<iso>] <content>` legacy-form `LIKE` match scans. Default 7 days.
|
|
42
|
+
* The leading-wildcard `LIKE` can't use a btree index, so without a
|
|
43
|
+
* bound it would scan the whole tenant partition on every ingest as
|
|
44
|
+
* the corpus grows. Real retries land within seconds, so 7 days is
|
|
45
|
+
* generous; the window only exists so the dedup check transitions
|
|
46
|
+
* cleanly when callers stop writing the legacy form. Set `0` to
|
|
47
|
+
* disable the legacy-form match entirely (recommended once the
|
|
48
|
+
* backfill script has run and no caller writes the prefix anymore —
|
|
49
|
+
* strict equality alone is then enough).
|
|
50
|
+
* @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean, dedupMatchKind?: "exact"|"legacy"}>}
|
|
41
51
|
*/
|
|
42
52
|
export async function ingest(db, ai, llm, content, opts = {}) {
|
|
43
53
|
const clientId = opts.clientId;
|
|
@@ -60,29 +70,62 @@ export async function ingest(db, ai, llm, content, opts = {}) {
|
|
|
60
70
|
|
|
61
71
|
// Optional dedup: skip the insert (and all the embedding/HyDE/distill
|
|
62
72
|
// work that would follow) if a row with byte-equal content already
|
|
63
|
-
// exists for this tenant.
|
|
64
|
-
//
|
|
65
|
-
//
|
|
66
|
-
//
|
|
67
|
-
//
|
|
73
|
+
// exists for this tenant.
|
|
74
|
+
//
|
|
75
|
+
// Two match strategies:
|
|
76
|
+
// - exact: content = $key (uses the (client_id, content) btree if
|
|
77
|
+
// one is present; degrades to a partition scan if not)
|
|
78
|
+
// - legacy: content LIKE '%] ' || $key for `[<iso>] <key>` rows
|
|
79
|
+
// that callers wrote with a timestamp prefix. Bounded by
|
|
80
|
+
// dedupLegacyWindowDays so the leading-wildcard scan
|
|
81
|
+
// stays cheap as the corpus grows.
|
|
82
|
+
//
|
|
83
|
+
// Caller can disable the legacy branch by setting
|
|
84
|
+
// dedupLegacyWindowDays: 0 — once the backfill has run and no caller
|
|
85
|
+
// writes the prefix anymore, strict equality alone is enough.
|
|
68
86
|
if (opts.dedup) {
|
|
69
87
|
const dedupKey =
|
|
70
88
|
typeof opts.dedupContent === "string" ? opts.dedupContent : content;
|
|
89
|
+
const legacyWindowDays =
|
|
90
|
+
opts.dedupLegacyWindowDays === undefined
|
|
91
|
+
? 7
|
|
92
|
+
: Number(opts.dedupLegacyWindowDays);
|
|
71
93
|
try {
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
94
|
+
const sql =
|
|
95
|
+
legacyWindowDays > 0
|
|
96
|
+
? `SELECT id,
|
|
97
|
+
CASE WHEN content = $2 THEN 'exact' ELSE 'legacy' END AS match_kind
|
|
98
|
+
FROM memory_nodes
|
|
99
|
+
WHERE client_id = $1
|
|
100
|
+
AND (
|
|
101
|
+
content = $2
|
|
102
|
+
OR (
|
|
103
|
+
content LIKE '%] ' || $2
|
|
104
|
+
AND created_at > NOW() - ($3 || ' days')::interval
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
LIMIT 1`
|
|
108
|
+
: `SELECT id, 'exact' AS match_kind
|
|
109
|
+
FROM memory_nodes
|
|
110
|
+
WHERE client_id = $1
|
|
111
|
+
AND content = $2
|
|
112
|
+
LIMIT 1`;
|
|
113
|
+
const params =
|
|
114
|
+
legacyWindowDays > 0
|
|
115
|
+
? [clientId, dedupKey, String(legacyWindowDays)]
|
|
116
|
+
: [clientId, dedupKey];
|
|
117
|
+
const dupCheck = await db(sql, params);
|
|
79
118
|
if (dupCheck.rows?.length) {
|
|
80
|
-
|
|
119
|
+
const matchKind = dupCheck.rows[0].match_kind || "exact";
|
|
120
|
+
log(
|
|
121
|
+
`dedup: matched existing memory ${dupCheck.rows[0].id} (${matchKind})`
|
|
122
|
+
);
|
|
81
123
|
return {
|
|
82
124
|
id: dupCheck.rows[0].id,
|
|
83
125
|
content,
|
|
84
126
|
layerId,
|
|
85
127
|
deduped: true,
|
|
128
|
+
dedupMatchKind: matchKind,
|
|
86
129
|
};
|
|
87
130
|
}
|
|
88
131
|
} catch (err) {
|