@pentatonic-ai/ai-agent-sdk 0.5.10 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.10",
3
+ "version": "0.5.11",
4
4
  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
@@ -587,23 +587,50 @@ describe("ingest options contract", () => {
587
587
  describe("ingest dedup option", () => {
588
588
  function makeMockDb(state = {}) {
589
589
  const calls = [];
590
- const existing = state.existing || []; // [{ id, client_id, content }, ...]
590
+ // Each existing row optionally carries created_at (Date). Defaults to
591
+ // "now" so legacy-window tests that don't care still match.
592
+ const existing = state.existing || [];
591
593
  const inserted = [];
594
+ const now = state.now || new Date();
592
595
  const db = async (sql, params) => {
593
596
  calls.push({ sql, params });
594
597
  if (sql.includes("SELECT id FROM memory_layers")) {
595
598
  return { rows: [{ id: "layer-1" }] };
596
599
  }
597
- // Dedup pre-check (raw + LIKE legacy form)
598
- if (sql.includes("SELECT id FROM memory_nodes")) {
599
- const [clientId, content] = params;
600
- const match = existing.find(
601
- (r) =>
602
- r.client_id === clientId &&
603
- (r.content === content ||
604
- r.content.endsWith(`] ${content}`)) // legacy timestamp-prefixed
600
+ // Dedup pre-check. Two SQL shapes — one with the legacy LIKE branch
601
+ // (3 params: clientId, dedupKey, legacyDays as string), one without
602
+ // (2 params: clientId, dedupKey).
603
+ if (sql.includes("FROM memory_nodes")) {
604
+ const [clientId, content, legacyDaysStr] = params;
605
+ const legacyDays = legacyDaysStr ? Number(legacyDaysStr) : 0;
606
+ const cutoff = new Date(
607
+ now.getTime() - legacyDays * 24 * 60 * 60 * 1000
605
608
  );
606
- return { rows: match ? [{ id: match.id }] : [] };
609
+
610
+ const exactMatch = existing.find(
611
+ (r) => r.client_id === clientId && r.content === content
612
+ );
613
+ if (exactMatch) {
614
+ return {
615
+ rows: [{ id: exactMatch.id, match_kind: "exact" }],
616
+ };
617
+ }
618
+
619
+ if (legacyDays > 0) {
620
+ const legacyMatch = existing.find(
621
+ (r) =>
622
+ r.client_id === clientId &&
623
+ r.content.endsWith(`] ${content}`) &&
624
+ (!r.created_at || r.created_at > cutoff)
625
+ );
626
+ if (legacyMatch) {
627
+ return {
628
+ rows: [{ id: legacyMatch.id, match_kind: "legacy" }],
629
+ };
630
+ }
631
+ }
632
+
633
+ return { rows: [] };
607
634
  }
608
635
  // Insert path
609
636
  if (sql.startsWith("INSERT INTO memory_nodes")) {
@@ -636,7 +663,7 @@ describe("ingest dedup option", () => {
636
663
  expect(inserted[0].content).toBe("fresh content");
637
664
  });
638
665
 
639
- it("returns the existing row's id when raw content matches", async () => {
666
+ it("returns the existing row's id when raw content matches (exact)", async () => {
640
667
  const { db, inserted } = makeMockDb({
641
668
  existing: [
642
669
  { id: "mem_existing", client_id: "c", content: "duplicate content" },
@@ -649,18 +676,21 @@ describe("ingest dedup option", () => {
649
676
  });
650
677
 
651
678
  expect(out.deduped).toBe(true);
679
+ expect(out.dedupMatchKind).toBe("exact");
652
680
  expect(out.id).toBe("mem_existing");
653
681
  expect(out.content).toBe("duplicate content");
654
682
  expect(inserted).toHaveLength(0); // no insert happened
655
683
  });
656
684
 
657
- it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`)", async () => {
685
+ it("matches legacy timestamp-prefixed rows (`[<iso>] <content>`) within the default window", async () => {
658
686
  const { db, inserted } = makeMockDb({
659
687
  existing: [
660
688
  {
661
689
  id: "mem_legacy",
662
690
  client_id: "c",
663
691
  content: "[2026-04-26T10:00:00Z] duplicate content",
692
+ // Default window is 7 days; created_at omitted → mocked as
693
+ // "now" so the row is in-window.
664
694
  },
665
695
  ],
666
696
  });
@@ -671,10 +701,89 @@ describe("ingest dedup option", () => {
671
701
  });
672
702
 
673
703
  expect(out.deduped).toBe(true);
704
+ expect(out.dedupMatchKind).toBe("legacy");
674
705
  expect(out.id).toBe("mem_legacy");
675
706
  expect(inserted).toHaveLength(0);
676
707
  });
677
708
 
709
+ it("legacy LIKE branch ignores rows older than dedupLegacyWindowDays", async () => {
710
+ const now = new Date("2026-04-27T00:00:00Z");
711
+ const tenDaysAgo = new Date(now.getTime() - 10 * 24 * 60 * 60 * 1000);
712
+ const { db, inserted } = makeMockDb({
713
+ now,
714
+ existing: [
715
+ {
716
+ id: "mem_old",
717
+ client_id: "c",
718
+ content: "[2026-04-17T00:00:00Z] duplicate content",
719
+ created_at: tenDaysAgo,
720
+ },
721
+ ],
722
+ });
723
+
724
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
725
+ clientId: "c",
726
+ dedup: true,
727
+ // Default window 7 days — old row should NOT match
728
+ });
729
+
730
+ expect(out.deduped).toBeUndefined();
731
+ expect(inserted).toHaveLength(1);
732
+ });
733
+
734
+ it("dedupLegacyWindowDays: 0 disables the legacy LIKE branch", async () => {
735
+ const { db, inserted, calls } = makeMockDb({
736
+ existing: [
737
+ {
738
+ id: "mem_legacy",
739
+ client_id: "c",
740
+ content: "[2026-04-26T10:00:00Z] duplicate content",
741
+ },
742
+ ],
743
+ });
744
+
745
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
746
+ clientId: "c",
747
+ dedup: true,
748
+ dedupLegacyWindowDays: 0,
749
+ });
750
+
751
+ // Legacy match wouldn't fire — row inserts as fresh.
752
+ expect(out.deduped).toBeUndefined();
753
+ expect(inserted).toHaveLength(1);
754
+
755
+ // SQL shouldn't include the LIKE branch at all.
756
+ const dedupQuery = calls.find((c) => c.sql.includes("FROM memory_nodes"));
757
+ expect(dedupQuery.sql).not.toContain("LIKE");
758
+ expect(dedupQuery.params).toHaveLength(2); // [clientId, dedupKey] only
759
+ });
760
+
761
+ it("dedupLegacyWindowDays: 30 widens the window — old rows match", async () => {
762
+ const now = new Date("2026-04-27T00:00:00Z");
763
+ const tenDaysAgo = new Date(now.getTime() - 10 * 24 * 60 * 60 * 1000);
764
+ const { db, inserted } = makeMockDb({
765
+ now,
766
+ existing: [
767
+ {
768
+ id: "mem_old",
769
+ client_id: "c",
770
+ content: "[2026-04-17T00:00:00Z] duplicate content",
771
+ created_at: tenDaysAgo,
772
+ },
773
+ ],
774
+ });
775
+
776
+ const out = await ingest(db, mockAi, mockLlm, "duplicate content", {
777
+ clientId: "c",
778
+ dedup: true,
779
+ dedupLegacyWindowDays: 30,
780
+ });
781
+
782
+ expect(out.deduped).toBe(true);
783
+ expect(out.dedupMatchKind).toBe("legacy");
784
+ expect(inserted).toHaveLength(0);
785
+ });
786
+
678
787
  it("dedup off (default) still inserts on duplicate content", async () => {
679
788
  const { db, inserted } = makeMockDb({
680
789
  existing: [
@@ -746,7 +855,7 @@ describe("ingest dedup option", () => {
746
855
  if (sql.includes("SELECT id FROM memory_layers")) {
747
856
  return { rows: [{ id: "layer-1" }] };
748
857
  }
749
- if (sql.includes("SELECT id FROM memory_nodes")) {
858
+ if (sql.includes("FROM memory_nodes")) {
750
859
  dupCheckSql = sql;
751
860
  throw new Error("DB unreachable");
752
861
  }
@@ -25,19 +25,29 @@ import { distill } from "./distill.js";
25
25
  * byte-equal content already exists for this `client_id`. Use for
26
26
  * retry-safe pipelines where the same logical event may be processed
27
27
  * twice (queue retries, consumer fan-out). Returns the existing row's
28
- * id with `{deduped: true}` instead of inserting. Strict equality —
29
- * not a semantic similarity match. Best-effort: if the SELECT itself
30
- * fails, ingest proceeds (worst case: duplicate row, identical to
31
- * `dedup:false` behaviour). The eventual structural fix is a
32
- * `UNIQUE(client_id, content_hash)` constraint at the schema level;
33
- * this option is the bridge.
28
+ * id with `{deduped: true, dedupMatchKind: "exact"|"legacy"}` instead
29
+ * of inserting. Strict equality — not a semantic similarity match.
30
+ * Best-effort: if the SELECT itself fails, ingest proceeds (worst case:
31
+ * duplicate row, identical to `dedup:false` behaviour). The eventual
32
+ * structural fix is a `UNIQUE(client_id, content_hash)` constraint at
33
+ * the schema level; this option is the bridge.
34
34
  * @param {string} [opts.dedupContent] - Optional: the string to dedup
35
35
  * against, when it differs from what gets stored. Use when callers
36
36
  * wrap the stored content in a non-stable prefix (timestamps, run
37
37
  * ids) — pass the raw form here so retries of the same logical event
38
38
  * match across runs whose prefixes differ by a few ms. Defaults to
39
39
  * `content`.
40
- * @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean}>}
40
+ * @param {number} [opts.dedupLegacyWindowDays=7] - How far back the
41
+ * `[<iso>] <content>` legacy-form `LIKE` match scans. Default 7 days.
42
+ * The leading-wildcard `LIKE` can't use a btree index, so without a
43
+ * bound it would scan the whole tenant partition on every ingest as
44
+ * the corpus grows. Real retries land within seconds, so 7 days is
45
+ * generous; the window only exists so the dedup check transitions
46
+ * cleanly when callers stop writing the legacy form. Set `0` to
47
+ * disable the legacy-form match entirely (recommended once the
48
+ * backfill script has run and no caller writes the prefix anymore —
49
+ * strict equality alone is then enough).
50
+ * @returns {Promise<{id: string, content: string, layerId: string, deduped?: boolean, dedupMatchKind?: "exact"|"legacy"}>}
41
51
  */
42
52
  export async function ingest(db, ai, llm, content, opts = {}) {
43
53
  const clientId = opts.clientId;
@@ -60,29 +70,62 @@ export async function ingest(db, ai, llm, content, opts = {}) {
60
70
 
61
71
  // Optional dedup: skip the insert (and all the embedding/HyDE/distill
62
72
  // work that would follow) if a row with byte-equal content already
63
- // exists for this tenant. The dedup key is `opts.dedupContent` if
64
- // provided (use for callers that wrap the stored form in a non-stable
65
- // prefix like a timestamp), else `content`. The OR-LIKE branch matches
66
- // against legacy `[<iso>] <content>` rows so callers that wrote with a
67
- // timestamp prefix dedup correctly until the legacy corpus ages out.
73
+ // exists for this tenant.
74
+ //
75
+ // Two match strategies:
76
+ // - exact: content = $key (uses the (client_id, content) btree if
77
+ // one is present; degrades to a partition scan if not)
78
+ // - legacy: content LIKE '%] ' || $key for `[<iso>] <key>` rows
79
+ // that callers wrote with a timestamp prefix. Bounded by
80
+ // dedupLegacyWindowDays so the leading-wildcard scan
81
+ // stays cheap as the corpus grows.
82
+ //
83
+ // Caller can disable the legacy branch by setting
84
+ // dedupLegacyWindowDays: 0 — once the backfill has run and no caller
85
+ // writes the prefix anymore, strict equality alone is enough.
68
86
  if (opts.dedup) {
69
87
  const dedupKey =
70
88
  typeof opts.dedupContent === "string" ? opts.dedupContent : content;
89
+ const legacyWindowDays =
90
+ opts.dedupLegacyWindowDays === undefined
91
+ ? 7
92
+ : Number(opts.dedupLegacyWindowDays);
71
93
  try {
72
- const dupCheck = await db(
73
- `SELECT id FROM memory_nodes
74
- WHERE client_id = $1
75
- AND (content = $2 OR content LIKE '%] ' || $2)
76
- LIMIT 1`,
77
- [clientId, dedupKey]
78
- );
94
+ const sql =
95
+ legacyWindowDays > 0
96
+ ? `SELECT id,
97
+ CASE WHEN content = $2 THEN 'exact' ELSE 'legacy' END AS match_kind
98
+ FROM memory_nodes
99
+ WHERE client_id = $1
100
+ AND (
101
+ content = $2
102
+ OR (
103
+ content LIKE '%] ' || $2
104
+ AND created_at > NOW() - ($3 || ' days')::interval
105
+ )
106
+ )
107
+ LIMIT 1`
108
+ : `SELECT id, 'exact' AS match_kind
109
+ FROM memory_nodes
110
+ WHERE client_id = $1
111
+ AND content = $2
112
+ LIMIT 1`;
113
+ const params =
114
+ legacyWindowDays > 0
115
+ ? [clientId, dedupKey, String(legacyWindowDays)]
116
+ : [clientId, dedupKey];
117
+ const dupCheck = await db(sql, params);
79
118
  if (dupCheck.rows?.length) {
80
- log(`dedup: matched existing memory ${dupCheck.rows[0].id}`);
119
+ const matchKind = dupCheck.rows[0].match_kind || "exact";
120
+ log(
121
+ `dedup: matched existing memory ${dupCheck.rows[0].id} (${matchKind})`
122
+ );
81
123
  return {
82
124
  id: dupCheck.rows[0].id,
83
125
  content,
84
126
  layerId,
85
127
  deduped: true,
128
+ dedupMatchKind: matchKind,
86
129
  };
87
130
  }
88
131
  } catch (err) {