@framers/agentos-ext-topicality 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Framers
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+
@@ -0,0 +1,152 @@
1
+ /**
2
+ * @fileoverview TopicDriftTracker — session-level EMA drift detection for topicality guardrails.
3
+ *
4
+ * This module tracks whether a conversation session is gradually drifting away
5
+ * from its allowed topics by maintaining a per-session **running embedding**
6
+ * that is updated with each new message using an Exponential Moving Average
7
+ * (EMA).
8
+ *
9
+ * ### Why EMA?
10
+ * A simple "last-message" check is too noisy: a single off-topic message in an
11
+ * otherwise on-topic conversation should not trigger a hard block. EMA
12
+ * smooths the signal so that sustained drift is detected while brief tangents
13
+ * are tolerated.
14
+ *
15
+ * The update formula is:
16
+ * ```
17
+ * running[i] = alpha * message[i] + (1 - alpha) * running[i]
18
+ * ```
19
+ * A smaller `alpha` means the running vector changes slowly (long memory);
20
+ * a larger `alpha` means it reacts quickly to each new message.
21
+ *
22
+ * ### Drift decision
23
+ * After each EMA update the tracker checks whether the running embedding is
24
+ * "on-topic" by calling {@link TopicEmbeddingIndex.isOnTopicByVector}. If the
25
+ * check fails the `driftStreak` counter is incremented; if it passes the
26
+ * streak resets to zero. When `driftStreak >= driftStreakLimit` the result
27
+ * `driftLimitExceeded` is set to `true`.
28
+ *
29
+ * ### Session management
30
+ * Sessions are stored in an in-memory `Map<sessionId, TopicState>`. To
31
+ * prevent unbounded memory growth:
32
+ * - Stale sessions (inactive for > `sessionTimeoutMs`) are pruned lazily
33
+ * whenever `map.size > maxSessions` at the start of an `update()` call.
34
+ * - Callers can force a full clear via {@link clear}.
35
+ *
36
+ * @module topicality/TopicDriftTracker
37
+ */
38
+ import type { TopicEmbeddingIndex } from './TopicEmbeddingIndex';
39
+ import { type DriftConfig, type DriftResult, type TopicState } from './types';
40
+ /**
41
+ * Tracks per-session topic drift using EMA-blended running embeddings.
42
+ *
43
+ * Instantiate once per agent process (not per conversation) since the tracker
44
+ * manages many concurrent sessions internally.
45
+ *
46
+ * @example
47
+ * ```ts
48
+ * const tracker = new TopicDriftTracker({ alpha: 0.4, driftStreakLimit: 2 });
49
+ *
50
+ * // In your message handler:
51
+ * const embedding = await embed(userMessage);
52
+ * const result = tracker.update('session-abc', embedding, allowedIndex);
53
+ *
54
+ * if (result.driftLimitExceeded) {
55
+ * // Take configured action: redirect, warn, or block.
56
+ * }
57
+ * ```
58
+ */
59
+ export declare class TopicDriftTracker {
60
+ /** Fully resolved drift configuration (defaults merged with caller overrides). */
61
+ private readonly config;
62
+ /**
63
+ * In-memory session store.
64
+ * Key: caller-supplied session ID (e.g. conversation UUID).
65
+ * Value: mutable {@link TopicState} for that session.
66
+ */
67
+ private readonly sessions;
68
+ /**
69
+ * Creates a new `TopicDriftTracker`.
70
+ *
71
+ * @param config - Optional partial override of {@link DEFAULT_DRIFT_CONFIG}.
72
+ * Any fields not provided fall back to their default values. Pass an
73
+ * empty object `{}` or omit entirely to use all defaults.
74
+ *
75
+ * @example
76
+ * ```ts
77
+ * // Use defaults
78
+ * const tracker = new TopicDriftTracker();
79
+ *
80
+ * // Override only alpha and streakLimit
81
+ * const strictTracker = new TopicDriftTracker({ alpha: 0.5, driftStreakLimit: 2 });
82
+ * ```
83
+ */
84
+ constructor(config?: Partial<DriftConfig>);
85
+ /**
86
+ * Processes a new message embedding for the given session and returns the
87
+ * current drift assessment.
88
+ *
89
+ * ### Steps performed
90
+ * 1. **Retrieve or create** the session state. On the very first message
91
+ * the running embedding is initialised to a shallow copy of
92
+ * `messageEmbedding` (no EMA applied yet).
93
+ * 2. **Apply EMA** (from the second message onwards):
94
+ * `running[i] = alpha * message[i] + (1 - alpha) * running[i]`
95
+ * 3. **Check topic alignment** using
96
+ * `allowedIndex.isOnTopicByVector(running, driftThreshold)`.
97
+ * 4. **Update streak** — increment `driftStreak` on off-topic, reset to 0
98
+ * on on-topic.
99
+ * 5. **Lazy-prune** stale sessions when `map.size > maxSessions` before
100
+ * creating a new session (never during updates of existing sessions to
101
+ * avoid deleting the session we are currently updating).
102
+ * 6. **Persist** the updated state and return a {@link DriftResult}.
103
+ *
104
+ * @param sessionId - Unique identifier for the conversation session.
105
+ * Typically a UUID or user ID. Must be consistent across messages in the
106
+ * same conversation.
107
+ * @param messageEmbedding - Pre-computed numeric embedding of the current
108
+ * message. Must have the same dimensionality as the topic centroids used
109
+ * by `allowedIndex`.
110
+ * @param allowedIndex - Built {@link TopicEmbeddingIndex} containing the
111
+ * allowed topics to check against. The tracker calls only
112
+ * `isOnTopicByVector` (no async operations, no extra embedding calls).
113
+ * @returns A {@link DriftResult} describing whether the session is currently
114
+ * drifting and by how much.
115
+ */
116
+ update(sessionId: string, messageEmbedding: number[], allowedIndex: TopicEmbeddingIndex): DriftResult;
117
+ /**
118
+ * Removes sessions that have been inactive for longer than `sessionTimeoutMs`.
119
+ *
120
+ * This is called lazily inside {@link update} when the session map exceeds
121
+ * `maxSessions`, but callers may invoke it directly to trigger an immediate
122
+ * cleanup (e.g. in a scheduled maintenance job).
123
+ *
124
+ * Pruning is O(n) in the number of active sessions.
125
+ */
126
+ pruneStale(): void;
127
+ /**
128
+ * Removes all sessions from the tracker unconditionally.
129
+ *
130
+ * Useful for graceful shutdown, testing teardown, or resetting the agent
131
+ * context between evaluation runs.
132
+ */
133
+ clear(): void;
134
+ /**
135
+ * Returns the current number of active sessions in the internal map.
136
+ * Useful for observability and testing.
137
+ *
138
+ * @internal
139
+ */
140
+ get sessionCount(): number;
141
+ /**
142
+ * Returns a copy of the {@link TopicState} for the given session, or
143
+ * `undefined` if the session does not exist.
144
+ *
145
+ * Exposed for unit-testing state inspection. The returned object is a
146
+ * shallow copy — mutating it does not affect the tracker's internal state.
147
+ *
148
+ * @internal
149
+ */
150
+ getState(sessionId: string): TopicState | undefined;
151
+ }
152
+ //# sourceMappingURL=TopicDriftTracker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TopicDriftTracker.d.ts","sourceRoot":"","sources":["../src/TopicDriftTracker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AACjE,OAAO,EAEL,KAAK,WAAW,EAChB,KAAK,WAAW,EAEhB,KAAK,UAAU,EAChB,MAAM,SAAS,CAAC;AAMjB;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,iBAAiB;IAC5B,kFAAkF;IAClF,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IAErC;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAsC;IAM/D;;;;;;;;;;;;;;;OAeG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC;IAUzC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA8BG;IACH,MAAM,CACJ,SAAS,EAAE,MAAM,EACjB,gBAAgB,EAAE,MAAM,EAAE,EAC1B,YAAY,EAAE,mBAAmB,GAChC,WAAW;IAiGd;;;;;;;;OAQG;IACH,UAAU,IAAI,IAAI;IAelB;;;;;OAKG;IACH,KAAK,IAAI,IAAI;IAQb;;;;;OAKG;IACH,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED;;;;;;;;OAQG;IACH,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;CAMpD"}
@@ -0,0 +1,265 @@
1
+ /**
2
+ * @fileoverview TopicDriftTracker — session-level EMA drift detection for topicality guardrails.
3
+ *
4
+ * This module tracks whether a conversation session is gradually drifting away
5
+ * from its allowed topics by maintaining a per-session **running embedding**
6
+ * that is updated with each new message using an Exponential Moving Average
7
+ * (EMA).
8
+ *
9
+ * ### Why EMA?
10
+ * A simple "last-message" check is too noisy: a single off-topic message in an
11
+ * otherwise on-topic conversation should not trigger a hard block. EMA
12
+ * smooths the signal so that sustained drift is detected while brief tangents
13
+ * are tolerated.
14
+ *
15
+ * The update formula is:
16
+ * ```
17
+ * running[i] = alpha * message[i] + (1 - alpha) * running[i]
18
+ * ```
19
+ * A smaller `alpha` means the running vector changes slowly (long memory);
20
+ * a larger `alpha` means it reacts quickly to each new message.
21
+ *
22
+ * ### Drift decision
23
+ * After each EMA update the tracker checks whether the running embedding is
24
+ * "on-topic" by calling {@link TopicEmbeddingIndex.isOnTopicByVector}. If the
25
+ * check fails the `driftStreak` counter is incremented; if it passes the
26
+ * streak resets to zero. When `driftStreak >= driftStreakLimit` the result
27
+ * `driftLimitExceeded` is set to `true`.
28
+ *
29
+ * ### Session management
30
+ * Sessions are stored in an in-memory `Map<sessionId, TopicState>`. To
31
+ * prevent unbounded memory growth:
32
+ * - Stale sessions (inactive for > `sessionTimeoutMs`) are pruned lazily
33
+ * whenever `map.size > maxSessions` at the start of an `update()` call.
34
+ * - Callers can force a full clear via {@link clear}.
35
+ *
36
+ * @module topicality/TopicDriftTracker
37
+ */
38
+ import { DEFAULT_DRIFT_CONFIG, } from './types';
39
+ // ---------------------------------------------------------------------------
40
+ // TopicDriftTracker
41
+ // ---------------------------------------------------------------------------
42
+ /**
43
+ * Tracks per-session topic drift using EMA-blended running embeddings.
44
+ *
45
+ * Instantiate once per agent process (not per conversation) since the tracker
46
+ * manages many concurrent sessions internally.
47
+ *
48
+ * @example
49
+ * ```ts
50
+ * const tracker = new TopicDriftTracker({ alpha: 0.4, driftStreakLimit: 2 });
51
+ *
52
+ * // In your message handler:
53
+ * const embedding = await embed(userMessage);
54
+ * const result = tracker.update('session-abc', embedding, allowedIndex);
55
+ *
56
+ * if (result.driftLimitExceeded) {
57
+ * // Take configured action: redirect, warn, or block.
58
+ * }
59
+ * ```
60
+ */
61
+ export class TopicDriftTracker {
62
+ /** Fully resolved drift configuration (defaults merged with caller overrides). */
63
+ config;
64
+ /**
65
+ * In-memory session store.
66
+ * Key: caller-supplied session ID (e.g. conversation UUID).
67
+ * Value: mutable {@link TopicState} for that session.
68
+ */
69
+ sessions = new Map();
70
+ // -------------------------------------------------------------------------
71
+ // Constructor
72
+ // -------------------------------------------------------------------------
73
+ /**
74
+ * Creates a new `TopicDriftTracker`.
75
+ *
76
+ * @param config - Optional partial override of {@link DEFAULT_DRIFT_CONFIG}.
77
+ * Any fields not provided fall back to their default values. Pass an
78
+ * empty object `{}` or omit entirely to use all defaults.
79
+ *
80
+ * @example
81
+ * ```ts
82
+ * // Use defaults
83
+ * const tracker = new TopicDriftTracker();
84
+ *
85
+ * // Override only alpha and streakLimit
86
+ * const strictTracker = new TopicDriftTracker({ alpha: 0.5, driftStreakLimit: 2 });
87
+ * ```
88
+ */
89
+ constructor(config) {
90
+ // Merge caller overrides with defaults — undefined fields are taken from
91
+ // DEFAULT_DRIFT_CONFIG, preserving all caller-supplied values exactly.
92
+ this.config = { ...DEFAULT_DRIFT_CONFIG, ...(config ?? {}) };
93
+ }
94
+ // -------------------------------------------------------------------------
95
+ // Public API — update
96
+ // -------------------------------------------------------------------------
97
+ /**
98
+ * Processes a new message embedding for the given session and returns the
99
+ * current drift assessment.
100
+ *
101
+ * ### Steps performed
102
+ * 1. **Retrieve or create** the session state. On the very first message
103
+ * the running embedding is initialised to a shallow copy of
104
+ * `messageEmbedding` (no EMA applied yet).
105
+ * 2. **Apply EMA** (from the second message onwards):
106
+ * `running[i] = alpha * message[i] + (1 - alpha) * running[i]`
107
+ * 3. **Check topic alignment** using
108
+ * `allowedIndex.isOnTopicByVector(running, driftThreshold)`.
109
+ * 4. **Update streak** — increment `driftStreak` on off-topic, reset to 0
110
+ * on on-topic.
111
+ * 5. **Lazy-prune** stale sessions when `map.size > maxSessions` before
112
+ * creating a new session (never during updates of existing sessions to
113
+ * avoid deleting the session we are currently updating).
114
+ * 6. **Persist** the updated state and return a {@link DriftResult}.
115
+ *
116
+ * @param sessionId - Unique identifier for the conversation session.
117
+ * Typically a UUID or user ID. Must be consistent across messages in the
118
+ * same conversation.
119
+ * @param messageEmbedding - Pre-computed numeric embedding of the current
120
+ * message. Must have the same dimensionality as the topic centroids used
121
+ * by `allowedIndex`.
122
+ * @param allowedIndex - Built {@link TopicEmbeddingIndex} containing the
123
+ * allowed topics to check against. The tracker calls only
124
+ * `isOnTopicByVector` (no async operations, no extra embedding calls).
125
+ * @returns A {@link DriftResult} describing whether the session is currently
126
+ * drifting and by how much.
127
+ */
128
+ update(sessionId, messageEmbedding, allowedIndex) {
129
+ const now = Date.now();
130
+ const isNewSession = !this.sessions.has(sessionId);
131
+ // Lazy prune: only trigger when a NEW session would push us over the limit.
132
+ // We do not prune during updates of existing sessions to avoid accidentally
133
+ // deleting a session that is currently being processed.
134
+ if (isNewSession && this.sessions.size >= this.config.maxSessions) {
135
+ this.pruneStale();
136
+ }
137
+ let state;
138
+ if (isNewSession) {
139
+ // First message in this session — initialise running embedding to a copy
140
+ // of the current message embedding. A copy prevents external mutation
141
+ // of the array from silently corrupting the tracker state.
142
+ state = {
143
+ runningEmbedding: [...messageEmbedding],
144
+ messageCount: 0, // will be incremented below
145
+ lastTopicScore: 0,
146
+ driftStreak: 0,
147
+ lastSeenAt: now,
148
+ };
149
+ }
150
+ else {
151
+ // Retrieve existing state — guaranteed non-null by the `has()` check above.
152
+ state = this.sessions.get(sessionId);
153
+ // Apply the EMA update in-place.
154
+ // running[i] = alpha * message[i] + (1 - alpha) * running[i]
155
+ const alpha = this.config.alpha;
156
+ const oneMinusAlpha = 1 - alpha;
157
+ for (let i = 0; i < state.runningEmbedding.length; i++) {
158
+ state.runningEmbedding[i] =
159
+ alpha * messageEmbedding[i] + oneMinusAlpha * state.runningEmbedding[i];
160
+ }
161
+ }
162
+ // Increment message counter and timestamp.
163
+ state.messageCount += 1;
164
+ state.lastSeenAt = now;
165
+ // -----------------------------------------------------------------------
166
+ // Topic alignment check
167
+ // -----------------------------------------------------------------------
168
+ // Check whether the (now-updated) running embedding is on-topic.
169
+ const onTopic = allowedIndex.isOnTopicByVector(state.runningEmbedding, this.config.driftThreshold);
170
+ // Retrieve the best-match details from the index for the result payload.
171
+ // matchByVector is synchronous and does not re-embed anything.
172
+ const topMatches = allowedIndex.matchByVector(state.runningEmbedding);
173
+ const nearestTopic = topMatches.length > 0 ? topMatches[0] : null;
174
+ const currentSimilarity = nearestTopic?.similarity ?? 0;
175
+ // Store the latest similarity score for observability.
176
+ state.lastTopicScore = currentSimilarity;
177
+ // -----------------------------------------------------------------------
178
+ // Drift streak management
179
+ // -----------------------------------------------------------------------
180
+ if (onTopic) {
181
+ // Good message — reset the drift counter.
182
+ state.driftStreak = 0;
183
+ }
184
+ else {
185
+ // Off-topic message — accumulate the streak.
186
+ state.driftStreak += 1;
187
+ }
188
+ const driftLimitExceeded = state.driftStreak >= this.config.driftStreakLimit;
189
+ // -----------------------------------------------------------------------
190
+ // Persist state and return result
191
+ // -----------------------------------------------------------------------
192
+ // Always write back (even for existing sessions, since we mutated in-place
193
+ // for the EMA; for new sessions we need to insert).
194
+ this.sessions.set(sessionId, state);
195
+ return {
196
+ onTopic,
197
+ currentSimilarity,
198
+ nearestTopic,
199
+ driftStreak: state.driftStreak,
200
+ driftLimitExceeded,
201
+ };
202
+ }
203
+ // -------------------------------------------------------------------------
204
+ // Public API — pruneStale
205
+ // -------------------------------------------------------------------------
206
+ /**
207
+ * Removes sessions that have been inactive for longer than `sessionTimeoutMs`.
208
+ *
209
+ * This is called lazily inside {@link update} when the session map exceeds
210
+ * `maxSessions`, but callers may invoke it directly to trigger an immediate
211
+ * cleanup (e.g. in a scheduled maintenance job).
212
+ *
213
+ * Pruning is O(n) in the number of active sessions.
214
+ */
215
+ pruneStale() {
216
+ const now = Date.now();
217
+ const timeoutMs = this.config.sessionTimeoutMs;
218
+ for (const [id, state] of this.sessions) {
219
+ if (now - state.lastSeenAt > timeoutMs) {
220
+ this.sessions.delete(id);
221
+ }
222
+ }
223
+ }
224
+ // -------------------------------------------------------------------------
225
+ // Public API — clear
226
+ // -------------------------------------------------------------------------
227
+ /**
228
+ * Removes all sessions from the tracker unconditionally.
229
+ *
230
+ * Useful for graceful shutdown, testing teardown, or resetting the agent
231
+ * context between evaluation runs.
232
+ */
233
+ clear() {
234
+ this.sessions.clear();
235
+ }
236
+ // -------------------------------------------------------------------------
237
+ // Internal helpers (exposed for testing via package-private pattern)
238
+ // -------------------------------------------------------------------------
239
+ /**
240
+ * Returns the current number of active sessions in the internal map.
241
+ * Useful for observability and testing.
242
+ *
243
+ * @internal
244
+ */
245
+ get sessionCount() {
246
+ return this.sessions.size;
247
+ }
248
+ /**
249
+ * Returns a copy of the {@link TopicState} for the given session, or
250
+ * `undefined` if the session does not exist.
251
+ *
252
+ * Exposed for unit-testing state inspection. The returned object is a
253
+ * shallow copy — mutating it does not affect the tracker's internal state.
254
+ *
255
+ * @internal
256
+ */
257
+ getState(sessionId) {
258
+ const state = this.sessions.get(sessionId);
259
+ if (!state)
260
+ return undefined;
261
+ // Shallow copy to prevent callers from accidentally mutating tracker state.
262
+ return { ...state, runningEmbedding: [...state.runningEmbedding] };
263
+ }
264
+ }
265
+ //# sourceMappingURL=TopicDriftTracker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TopicDriftTracker.js","sourceRoot":"","sources":["../src/TopicDriftTracker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAGH,OAAO,EACL,oBAAoB,GAKrB,MAAM,SAAS,CAAC;AAEjB,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,OAAO,iBAAiB;IAC5B,kFAAkF;IACjE,MAAM,CAAc;IAErC;;;;OAIG;IACc,QAAQ,GAA4B,IAAI,GAAG,EAAE,CAAC;IAE/D,4EAA4E;IAC5E,cAAc;IACd,4EAA4E;IAE5E;;;;;;;;;;;;;;;OAeG;IACH,YAAY,MAA6B;QACvC,yEAAyE;QACzE,uEAAuE;QACvE,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,oBAAoB,EAAE,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,EAAE,CAAC;IAC/D,CAAC;IAED,4EAA4E;IAC5E,sBAAsB;IACtB,4EAA4E;IAE5E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA8BG;IACH,MAAM,CACJ,SAAiB,EACjB,gBAA0B,EAC1B,YAAiC;QAEjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEnD,4EAA4E;QAC5E,4EAA4E;QAC5E,wDAAwD;QACxD,IAAI,YAAY,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAClE,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,IAAI,KAAiB,CAAC;QAEtB,IAAI,YAAY,EAAE,CAAC;YACjB,yEAAyE;YACzE,uEAAuE;YACvE,2DAA2D;YAC3D,KAAK,GAAG;gBACN,gBAAgB,EAAE,CAAC,GAAG,gBAAgB,CAAC;gBACvC,YAAY,EAAE,CAAC,EAAE,4BAA4B;gBAC7C,cAAc,EAAE,CAAC;gBACjB,WAAW,EAAE,CAAC;gBACd,UAAU,EAAE,GAAG;aAChB,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,4EAA4E;YAC5E,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAE,CAAC;YAEtC,iCAAiC;YACjC,6DAA6D;YAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;YAChC,MAAM,aAAa,GAAG,CAAC,GAAG,KAAK,CAAC;YAEhC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvD,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC;oBACvB,KAAK,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,aAAa,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC;QACxB,KAAK,CAAC,UAAU,GAAG,GAAG,CAAC;QAEvB,0EAA0E;QAC1E,wBAAwB;QACxB,0EAA0E;QAE1E,iEAAiE;QACjE,MAAM,OAAO,GAAG,YAAY,CAAC,iBAAiB,CAC5C,KAAK,CAAC,gBAAgB,EACtB,IAAI,CAAC,MAAM,CAAC,cAAc,CAC3B,CAAC;QAEF,yEAAyE;QACzE,+DAA+D;QAC/D,MAAM,UAAU,GAAG,YAAY,CAAC,aAAa,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QACtE,MAAM,YAAY,GAAsB,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACrF,MAAM,iBAAiB,GAAG,YAAY,EAAE,UAAU,IAAI,CAAC,CAAC;QAExD,uDAAuD;QACvD,KAAK,CAAC,cAAc,GAAG,iBAAiB,CAAC;QAEzC,0EAA0E;QAC1E,0BAA0B;QAC1B,0EAA0E;QAE1E,IAAI,OAAO,EAAE,CAAC;YACZ,0CAA0C;YAC1C,KAAK,CAAC,WAAW,GAAG,CAAC,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,6CAA6C;YAC7C,KAAK,CAAC,WAAW,IAAI,CAAC,CAAC;QACzB,CAAC;QAED,MAAM,kBAAkB,GAAG,KAAK,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC;QAE7E,0EAA0E;QAC1E,kCAAkC;QAClC,0EAA0E;QAE1E,2EAA2E;QAC3E,oDAAoD;QACpD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAEpC,OAAO;YACL,OAAO;YACP,iBAAiB;YACjB,YAAY;YACZ,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,kBAAkB;SACnB,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,0BAA0B;IAC1B,4EAA4E;IAE5E;;;;;;;;OAQG;IACH,UAAU;QACR,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC;QAE/C,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACxC,IAAI,GAAG,GAAG,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE,CAAC;gBACvC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,qBAAqB;IACrB,4EAA4E;IAE5E;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAED,4EAA4E;IAC5E,qEAAqE;IACrE,4EAA4E;IAE5E;;;;;OAKG;IACH,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;IAC5B,CAAC;IAED;;;;;;;;OAQG;IACH,QAAQ,CAAC,SAAiB;QACxB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,IAAI,CAAC,KAAK;YAAE,OAAO,SAAS,CAAC;QAC7B,4EAA4E;QAC5E,OAAO,EAAE,GAAG,KAAK,EAAE,gBAAgB,EAAE,CAAC,GAAG,KAAK,CAAC,gBAAgB,CAAC,EAAE,CAAC;IACrE,CAAC;CACF"}
@@ -0,0 +1,160 @@
1
+ /**
2
+ * @fileoverview TopicEmbeddingIndex — semantic similarity lookup for topic guardrails.
3
+ *
4
+ * This module implements a lightweight in-memory embedding index that:
5
+ *
6
+ * 1. **Builds** per-topic centroid embeddings from descriptions + examples.
7
+ * 2. **Matches** an arbitrary embedding or text string against all topic centroids
8
+ * using cosine similarity.
9
+ * 3. **Answers** boolean on-topic queries at a configurable similarity threshold.
10
+ *
11
+ * ### How centroids are built
12
+ * For each {@link TopicDescriptor} the index concatenates:
13
+ * ```
14
+ * texts = [descriptor.description, ...descriptor.examples]
15
+ * ```
16
+ * All topics are embedded in a single batch call to `embeddingFn` to minimise
17
+ * round-trips. The centroid for a topic is the component-wise average (mean)
18
+ * of all its embedding vectors.
19
+ *
20
+ * ### Similarity scoring
21
+ * Raw cosine similarity can be negative when vectors point in opposite directions.
22
+ * `matchByVector` clamps scores to `Math.max(0, similarity)` so that all
23
+ * {@link TopicMatch} values represent non-negative relevance scores.
24
+ *
25
+ * @module topicality/TopicEmbeddingIndex
26
+ */
27
+ import type { TopicDescriptor, TopicMatch } from './types';
28
+ /**
29
+ * Semantic embedding index for topicality guardrail matching.
30
+ *
31
+ * The index is intentionally **lazy** — it holds no embeddings until
32
+ * {@link build} is called. This makes instantiation cheap and lets the
33
+ * caller defer the (potentially expensive) batch embedding call until the
34
+ * agent's first message.
35
+ *
36
+ * @example
37
+ * ```ts
38
+ * const index = new TopicEmbeddingIndex(async (texts) => {
39
+ * const res = await openai.embeddings.create({ model: 'text-embedding-3-small', input: texts });
40
+ * return res.data.map(d => d.embedding);
41
+ * });
42
+ *
43
+ * await index.build(TOPIC_PRESETS.customerSupport);
44
+ *
45
+ * const matches = await index.match('How do I cancel my subscription?');
46
+ * // → [{ topicId: 'billing', topicName: 'Billing & Payments', similarity: 0.82 }, ...]
47
+ *
48
+ * const onTopic = await index.isOnTopic('Tell me a joke', 0.35);
49
+ * // → false (a joke doesn't match any customer-support topic)
50
+ * ```
51
+ */
52
+ export declare class TopicEmbeddingIndex {
53
+ /**
54
+ * Caller-supplied batch embedding function.
55
+ * Invoked once during {@link build} with all topic texts concatenated.
56
+ */
57
+ private readonly embeddingFn;
58
+ /**
59
+ * Internal store mapping `topicId → TopicEntry`.
60
+ * Populated by {@link build}; empty until then.
61
+ */
62
+ private readonly entries;
63
+ /** Whether {@link build} has been called and completed successfully. */
64
+ private built;
65
+ /**
66
+ * Creates a new `TopicEmbeddingIndex`.
67
+ *
68
+ * @param embeddingFn - Async function that converts an array of text strings
69
+ * into corresponding numeric embedding vectors. All returned vectors must
70
+ * share the same dimensionality. The function is called exactly **once**
71
+ * per {@link build} invocation with all texts for all topics batched
72
+ * together.
73
+ */
74
+ constructor(embeddingFn: (texts: string[]) => Promise<number[][]>);
75
+ /**
76
+ * Embeds all topic descriptions and examples, computes per-topic centroid
77
+ * embeddings, and stores them in the internal index.
78
+ *
79
+ * Calling `build()` a second time replaces the existing index entirely,
80
+ * allowing hot-reloading of topic configurations without recreating the
81
+ * instance.
82
+ *
83
+ * ### Centroid computation
84
+ * For each topic we collect `[description, ...examples]` as a list of
85
+ * strings, embed them all in one batch, then average the resulting vectors
86
+ * component-wise to produce a single representative centroid.
87
+ *
88
+ * All topics are embedded in a **single batch call** to minimise latency.
89
+ *
90
+ * @param topics - Array of {@link TopicDescriptor} objects to index.
91
+ * An empty array is valid — the index will simply return no matches.
92
+ * @returns A promise that resolves once all embeddings are computed and
93
+ * stored. Rejects if `embeddingFn` throws or returns vectors of
94
+ * mismatched length.
95
+ */
96
+ build(topics: TopicDescriptor[]): Promise<void>;
97
+ /**
98
+ * Computes similarity between a pre-computed embedding vector and all topic
99
+ * centroids **without** making any additional embedding calls.
100
+ *
101
+ * This is the hot path invoked by {@link TopicDriftTracker}, which maintains
102
+ * its own running embedding and never needs to re-embed.
103
+ *
104
+ * Results are clamped to `[0, 1]` (negative cosine → 0) and sorted
105
+ * descending by similarity.
106
+ *
107
+ * @param embedding - A numeric vector with the same dimensionality as the
108
+ * centroids produced during {@link build}.
109
+ * @returns Array of {@link TopicMatch} objects sorted by similarity
110
+ * descending. Returns an empty array if the index was not yet built or
111
+ * contains no topics.
112
+ */
113
+ matchByVector(embedding: number[]): TopicMatch[];
114
+ /**
115
+ * Embeds `text` and returns similarity scores against all topic centroids.
116
+ *
117
+ * This is a convenience wrapper that handles the embedding step. If you
118
+ * already have an embedding (e.g. from the drift tracker's running vector)
119
+ * prefer {@link matchByVector} to avoid a redundant embedding call.
120
+ *
121
+ * @param text - The user message or assistant output to evaluate.
122
+ * @returns A promise resolving to {@link TopicMatch}[] sorted descending.
123
+ */
124
+ match(text: string): Promise<TopicMatch[]>;
125
+ /**
126
+ * Returns `true` if the given embedding vector scores above `threshold`
127
+ * against **at least one** topic in the index.
128
+ *
129
+ * Uses {@link matchByVector} internally so no additional embedding call is
130
+ * made.
131
+ *
132
+ * @param embedding - Pre-computed numeric vector.
133
+ * @param threshold - Minimum similarity (in `[0, 1]`) for a topic to count
134
+ * as a match.
135
+ * @returns `true` if any topic centroid has similarity > threshold; otherwise `false`.
136
+ */
137
+ isOnTopicByVector(embedding: number[], threshold: number): boolean;
138
+ /**
139
+ * Embeds `text` and returns `true` if it scores above `threshold` against
140
+ * at least one allowed topic.
141
+ *
142
+ * @param text - The text to evaluate.
143
+ * @param threshold - Minimum cosine similarity for the text to be considered on-topic.
144
+ * @returns A promise resolving to `true` if on-topic, `false` otherwise.
145
+ */
146
+ isOnTopic(text: string, threshold: number): Promise<boolean>;
147
+ /**
148
+ * Whether {@link build} has been called and completed successfully.
149
+ *
150
+ * Use this to guard against calling {@link match} or {@link matchByVector}
151
+ * before the index is ready.
152
+ *
153
+ * @example
154
+ * ```ts
155
+ * if (!index.isBuilt) await index.build(topics);
156
+ * ```
157
+ */
158
+ get isBuilt(): boolean;
159
+ }
160
+ //# sourceMappingURL=TopicEmbeddingIndex.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TopicEmbeddingIndex.d.ts","sourceRoot":"","sources":["../src/TopicEmbeddingIndex.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAyB3D;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,mBAAmB;IAC9B;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA2C;IAEvE;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAsC;IAE9D,wEAAwE;IACxE,OAAO,CAAC,KAAK,CAAkB;IAM/B;;;;;;;;OAQG;gBACS,WAAW,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAQjE;;;;;;;;;;;;;;;;;;;;OAoBG;IACG,KAAK,CAAC,MAAM,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAyDrD;;;;;;;;;;;;;;;OAeG;IACH,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,GAAG,UAAU,EAAE;IA8BhD;;;;;;;;;OASG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC;IAUhD;;;;;;;;;;;OAWG;IACH,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO;IAUlE;;;;;;;OAOG;IACG,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IASlE;;;;;;;;;;OAUG;IACH,IAAI,OAAO,IAAI,OAAO,CAErB;CACF"}