@framers/agentos-ext-topicality 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,521 @@
1
+ /**
2
+ * @fileoverview IGuardrailService implementation for topicality enforcement.
3
+ *
4
+ * `TopicalityGuardrail` evaluates user input (and optionally agent output)
5
+ * against configured allowed and forbidden topic sets using semantic
6
+ * embedding similarity. It enforces three independent policy checks:
7
+ *
8
+ * 1. **Forbidden topics** — Messages that score above `forbiddenThreshold`
9
+ * against any forbidden topic are blocked (or flagged).
10
+ * 2. **Off-topic detection** — Messages that score below `allowedThreshold`
11
+ * against *all* allowed topics are flagged (or blocked/redirected).
12
+ * 3. **Session drift** — An EMA-based tracker flags sustained drift away
13
+ * from allowed topics across consecutive messages.
14
+ *
15
+ * ### Lazy initialisation
16
+ * Embedding indices are built on the **first evaluation call**, not at
17
+ * construction time. This keeps instantiation cheap and defers the
18
+ * potentially expensive batch embedding call until the agent actually
19
+ * receives its first message.
20
+ *
21
+ * ### Fail-open semantics
22
+ * All evaluation methods wrap their logic in try/catch. If the embedding
23
+ * function throws, or any other unexpected error occurs, the guardrail
24
+ * logs a warning and returns `null` (pass) to avoid blocking legitimate
25
+ * traffic due to infrastructure failures.
26
+ *
27
+ * @module topicality/TopicalityGuardrail
28
+ */
29
+
30
+ import type {
31
+ GuardrailConfig,
32
+ GuardrailEvaluationResult,
33
+ GuardrailInputPayload,
34
+ GuardrailOutputPayload,
35
+ IGuardrailService,
36
+ } from '@framers/agentos';
37
+ import { GuardrailAction } from '@framers/agentos';
38
+ import type { ISharedServiceRegistry } from '@framers/agentos';
39
+ import type { TopicalityPackOptions } from './types';
40
+ import { DEFAULT_DRIFT_CONFIG } from './types';
41
+ import { TopicEmbeddingIndex } from './TopicEmbeddingIndex';
42
+ import { TopicDriftTracker } from './TopicDriftTracker';
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Reason codes emitted by this guardrail
46
+ // ---------------------------------------------------------------------------
47
+
48
+ /**
49
+ * Machine-readable reason code for messages matching a forbidden topic.
50
+ * @internal
51
+ */
52
+ const REASON_FORBIDDEN = 'TOPICALITY_FORBIDDEN';
53
+
54
+ /**
55
+ * Machine-readable reason code for messages that do not match any allowed topic.
56
+ * @internal
57
+ */
58
+ const REASON_OFF_TOPIC = 'TOPICALITY_OFF_TOPIC';
59
+
60
+ /**
61
+ * Machine-readable reason code for sustained session-level topic drift.
62
+ * @internal
63
+ */
64
+ const REASON_DRIFT = 'TOPICALITY_DRIFT';
65
+
66
+ // ---------------------------------------------------------------------------
67
+ // TopicalityGuardrail
68
+ // ---------------------------------------------------------------------------
69
+
70
+ /**
71
+ * Guardrail that enforces topicality constraints via semantic embeddings.
72
+ *
73
+ * Implements {@link IGuardrailService} with Phase 2 (parallel) semantics:
74
+ * `evaluateStreamingChunks: false` and `canSanitize: false`. The guardrail
75
+ * never modifies content — it only blocks or flags.
76
+ *
77
+ * @example
78
+ * ```ts
79
+ * const guardrail = new TopicalityGuardrail(registry, {
80
+ * allowedTopics: TOPIC_PRESETS.customerSupport,
81
+ * forbiddenTopics: TOPIC_PRESETS.commonUnsafe,
82
+ * forbiddenAction: 'block',
83
+ * offTopicAction: 'flag',
84
+ * }, embeddingFn);
85
+ *
86
+ * const result = await guardrail.evaluateInput(payload);
87
+ * if (result?.action === GuardrailAction.BLOCK) {
88
+ * // Reject the message
89
+ * }
90
+ * ```
91
+ */
92
+ export class TopicalityGuardrail implements IGuardrailService {
93
+ // -------------------------------------------------------------------------
94
+ // IGuardrailService config
95
+ // -------------------------------------------------------------------------
96
+
97
+ /**
98
+ * Guardrail pipeline configuration.
99
+ *
100
+ * - `evaluateStreamingChunks: false` — topicality evaluation requires
101
+ * complete text, not partial deltas.
102
+ * - `canSanitize: false` — this guardrail only blocks or flags; it never
103
+ * modifies content, so it runs in Phase 2 (parallel) of the pipeline.
104
+ */
105
+ public readonly config: GuardrailConfig = {
106
+ evaluateStreamingChunks: false,
107
+ canSanitize: false,
108
+ };
109
+
110
+ // -------------------------------------------------------------------------
111
+ // Private state
112
+ // -------------------------------------------------------------------------
113
+
114
+ /** Shared service registry provided by the extension manager. */
115
+ private readonly services: ISharedServiceRegistry;
116
+
117
+ /** Resolved pack options with caller overrides. */
118
+ private readonly options: TopicalityPackOptions;
119
+
120
+ /** Caller-supplied or registry-backed embedding function. */
121
+ private readonly embeddingFn: (texts: string[]) => Promise<number[][]>;
122
+
123
+ /**
124
+ * Embedding index for allowed topics. Lazily built on the first
125
+ * evaluation call. `null` until built or if no allowed topics are
126
+ * configured.
127
+ */
128
+ private allowedIndex: TopicEmbeddingIndex | null = null;
129
+
130
+ /**
131
+ * Embedding index for forbidden topics. Lazily built on the first
132
+ * evaluation call. `null` until built or if no forbidden topics are
133
+ * configured.
134
+ */
135
+ private forbiddenIndex: TopicEmbeddingIndex | null = null;
136
+
137
+ /**
138
+ * Session-level EMA drift tracker. Only instantiated when
139
+ * `enableDriftDetection` is `true` (default). `null` otherwise.
140
+ */
141
+ private driftTracker: TopicDriftTracker | null = null;
142
+
143
+ /**
144
+ * Which side of the conversation to evaluate.
145
+ * - `'input'` — only user messages
146
+ * - `'output'` — only agent responses
147
+ * - `'both'` — both directions
148
+ */
149
+ private readonly scope: 'input' | 'output' | 'both';
150
+
151
+ /**
152
+ * Minimum similarity to any allowed topic for the message to be
153
+ * considered on-topic.
154
+ */
155
+ private readonly allowedThreshold: number;
156
+
157
+ /**
158
+ * Similarity above which a forbidden topic match triggers action.
159
+ */
160
+ private readonly forbiddenThreshold: number;
161
+
162
+ /**
163
+ * Whether the lazy initialisation of embedding indices has been
164
+ * performed. Prevents redundant build calls.
165
+ */
166
+ private indicesBuilt = false;
167
+
168
+ // -------------------------------------------------------------------------
169
+ // Constructor
170
+ // -------------------------------------------------------------------------
171
+
172
+ /**
173
+ * Creates a new `TopicalityGuardrail`.
174
+ *
175
+ * @param services - Shared service registry for heavyweight resource sharing.
176
+ * @param options - Pack-level configuration (topics, thresholds, actions).
177
+ * @param embeddingFn - Optional explicit embedding function. When omitted,
178
+ * the guardrail falls back to requesting an EmbeddingManager from the
179
+ * shared service registry at evaluation time.
180
+ */
181
+ constructor(
182
+ services: ISharedServiceRegistry,
183
+ options: TopicalityPackOptions,
184
+ embeddingFn?: (texts: string[]) => Promise<number[][]>,
185
+ ) {
186
+ this.services = services;
187
+ this.options = options;
188
+
189
+ // Resolve embedding function: prefer explicit argument, then fall back
190
+ // to the shared service registry.
191
+ this.embeddingFn = embeddingFn ?? this.createRegistryEmbeddingFn();
192
+
193
+ // Resolve scope and thresholds from options with sensible defaults.
194
+ this.scope = options.guardrailScope ?? 'input';
195
+ this.allowedThreshold = options.allowedThreshold ?? 0.35;
196
+ this.forbiddenThreshold = options.forbiddenThreshold ?? 0.65;
197
+
198
+ // Instantiate drift tracker if enabled (default: true).
199
+ const driftEnabled = options.enableDriftDetection !== false;
200
+ if (driftEnabled) {
201
+ const driftConfig = { ...DEFAULT_DRIFT_CONFIG, ...(options.drift ?? {}) };
202
+ this.driftTracker = new TopicDriftTracker(driftConfig);
203
+ }
204
+ }
205
+
206
+ /**
207
+ * Clears any session-level drift-tracking state held by this guardrail.
208
+ *
209
+ * Called by the topicality pack's `onDeactivate` hook so long-lived agents
210
+ * do not retain per-session EMA state after the pack is removed or the
211
+ * agent shuts down.
212
+ */
213
+ clearSessionState(): void {
214
+ this.driftTracker?.clear();
215
+ }
216
+
217
+ // -------------------------------------------------------------------------
218
+ // IGuardrailService — evaluateInput
219
+ // -------------------------------------------------------------------------
220
+
221
+ /**
222
+ * Evaluates a user input message against configured topic constraints.
223
+ *
224
+ * When `scope` is `'output'`, this method immediately returns `null`
225
+ * because input evaluation is disabled.
226
+ *
227
+ * @param payload - The input payload containing the user message text and
228
+ * session context.
229
+ * @returns A guardrail evaluation result (BLOCK or FLAG), or `null` if
230
+ * the message passes all topic checks. Returns `null` on any error
231
+ * (fail-open).
232
+ */
233
+ async evaluateInput(
234
+ payload: GuardrailInputPayload,
235
+ ): Promise<GuardrailEvaluationResult | null> {
236
+ // If scope is output-only, skip input evaluation entirely.
237
+ if (this.scope === 'output') {
238
+ return null;
239
+ }
240
+
241
+ try {
242
+ // Extract the text content from the input payload.
243
+ const text = payload.input.textInput;
244
+ if (!text || text.trim().length === 0) {
245
+ // No text to evaluate — pass through.
246
+ return null;
247
+ }
248
+
249
+ // Lazy-build embedding indices on the first call.
250
+ await this.ensureIndicesBuilt();
251
+
252
+ // Embed the user's text once — reuse the vector for all checks.
253
+ const [embedding] = await this.embeddingFn([text]);
254
+
255
+ // Run the core evaluation pipeline on the embedded vector.
256
+ return this.evaluateEmbedding(embedding, payload.context.sessionId);
257
+ } catch (error) {
258
+ // Fail-open: log the error but let the message through.
259
+ console.warn(
260
+ '[TopicalityGuardrail] evaluateInput failed (fail-open):',
261
+ error instanceof Error ? error.message : error,
262
+ );
263
+ return null;
264
+ }
265
+ }
266
+
267
+ // -------------------------------------------------------------------------
268
+ // IGuardrailService — evaluateOutput
269
+ // -------------------------------------------------------------------------
270
+
271
+ /**
272
+ * Evaluates an agent output chunk against configured topic constraints.
273
+ *
274
+ * When `scope` is `'input'`, this method immediately returns `null`
275
+ * because output evaluation is disabled.
276
+ *
277
+ * For output evaluation, the guardrail extracts text from the response
278
+ * chunk's `finalResponseText` field (since `evaluateStreamingChunks` is
279
+ * `false`, only FINAL_RESPONSE chunks are seen).
280
+ *
281
+ * @param payload - The output payload containing the response chunk and
282
+ * session context.
283
+ * @returns A guardrail evaluation result (BLOCK or FLAG), or `null` if
284
+ * the output passes all topic checks. Returns `null` on any error
285
+ * (fail-open).
286
+ */
287
+ async evaluateOutput(
288
+ payload: GuardrailOutputPayload,
289
+ ): Promise<GuardrailEvaluationResult | null> {
290
+ // If scope is input-only, skip output evaluation entirely.
291
+ if (this.scope === 'input') {
292
+ return null;
293
+ }
294
+
295
+ try {
296
+ // Extract text from the chunk. Since evaluateStreamingChunks is false,
297
+ // we receive FINAL_RESPONSE chunks with finalResponseText.
298
+ const chunk = payload.chunk as unknown as Record<string, unknown>;
299
+ const text =
300
+ (chunk.textDelta as string | undefined) ??
301
+ (chunk.finalResponseText as string | undefined) ??
302
+ '';
303
+
304
+ if (!text || text.trim().length === 0) {
305
+ return null;
306
+ }
307
+
308
+ // Lazy-build embedding indices on the first call.
309
+ await this.ensureIndicesBuilt();
310
+
311
+ // Embed the output text once.
312
+ const [embedding] = await this.embeddingFn([text]);
313
+
314
+ // Run the core evaluation pipeline.
315
+ return this.evaluateEmbedding(embedding, payload.context.sessionId);
316
+ } catch (error) {
317
+ // Fail-open: log and pass through.
318
+ console.warn(
319
+ '[TopicalityGuardrail] evaluateOutput failed (fail-open):',
320
+ error instanceof Error ? error.message : error,
321
+ );
322
+ return null;
323
+ }
324
+ }
325
+
326
+ // -------------------------------------------------------------------------
327
+ // Core evaluation pipeline
328
+ // -------------------------------------------------------------------------
329
+
330
+ /**
331
+ * Runs the three-stage topicality evaluation pipeline on a pre-computed
332
+ * embedding vector.
333
+ *
334
+ * Evaluation order:
335
+ * 1. Forbidden topic check (highest priority — immediate block/flag)
336
+ * 2. Off-topic check against allowed topics
337
+ * 3. Session drift check (only if drift detection is enabled and allowed
338
+ * topics are configured)
339
+ *
340
+ * @param embedding - Pre-computed embedding vector for the text.
341
+ * @param sessionId - Session identifier for drift tracking.
342
+ * @returns A {@link GuardrailEvaluationResult} if any check triggers, or
343
+ * `null` if all checks pass.
344
+ *
345
+ * @internal
346
+ */
347
+ private evaluateEmbedding(
348
+ embedding: number[],
349
+ sessionId: string,
350
+ ): GuardrailEvaluationResult | null {
351
+ // ------------------------------------------------------------------
352
+ // Step 1: Check forbidden topics
353
+ // ------------------------------------------------------------------
354
+ if (this.forbiddenIndex) {
355
+ const forbiddenMatches = this.forbiddenIndex.matchByVector(embedding);
356
+
357
+ // Check if any forbidden topic exceeds the threshold.
358
+ for (const match of forbiddenMatches) {
359
+ if (match.similarity > this.forbiddenThreshold) {
360
+ // Determine action: 'block' (default) or 'flag'.
361
+ const action =
362
+ this.options.forbiddenAction === 'flag'
363
+ ? GuardrailAction.FLAG
364
+ : GuardrailAction.BLOCK;
365
+
366
+ return {
367
+ action,
368
+ reason: `Message matches forbidden topic: ${match.topicName}`,
369
+ reasonCode: REASON_FORBIDDEN,
370
+ metadata: {
371
+ matchedTopic: match.topicId,
372
+ matchedTopicName: match.topicName,
373
+ similarity: match.similarity,
374
+ },
375
+ };
376
+ }
377
+ }
378
+ }
379
+
380
+ // ------------------------------------------------------------------
381
+ // Step 2: Check allowed topics (off-topic detection)
382
+ // ------------------------------------------------------------------
383
+ if (this.allowedIndex) {
384
+ const isOnTopic = this.allowedIndex.isOnTopicByVector(
385
+ embedding,
386
+ this.allowedThreshold,
387
+ );
388
+
389
+ if (!isOnTopic) {
390
+ // Get the nearest topic for metadata, even though it's below threshold.
391
+ const allMatches = this.allowedIndex.matchByVector(embedding);
392
+ const nearestTopic = allMatches.length > 0 ? allMatches[0] : null;
393
+
394
+ // Determine action based on offTopicAction option.
395
+ let action: GuardrailAction;
396
+ switch (this.options.offTopicAction) {
397
+ case 'block':
398
+ action = GuardrailAction.BLOCK;
399
+ break;
400
+ case 'redirect':
401
+ // Redirect maps to FLAG with metadata indicating redirection intent.
402
+ action = GuardrailAction.FLAG;
403
+ break;
404
+ default:
405
+ // Default: 'flag'
406
+ action = GuardrailAction.FLAG;
407
+ break;
408
+ }
409
+
410
+ return {
411
+ action,
412
+ reason: nearestTopic
413
+ ? `Message is off-topic. Nearest topic: ${nearestTopic.topicName} (similarity: ${nearestTopic.similarity.toFixed(3)})`
414
+ : 'Message is off-topic. No matching topics found.',
415
+ reasonCode: REASON_OFF_TOPIC,
416
+ metadata: {
417
+ nearestTopic: nearestTopic?.topicId ?? null,
418
+ nearestTopicName: nearestTopic?.topicName ?? null,
419
+ nearestSimilarity: nearestTopic?.similarity ?? 0,
420
+ },
421
+ };
422
+ }
423
+ }
424
+
425
+ // ------------------------------------------------------------------
426
+ // Step 3: Check session drift (only when drift detection is enabled
427
+ // and we have allowed topics to compare against)
428
+ // ------------------------------------------------------------------
429
+ if (this.driftTracker && this.allowedIndex) {
430
+ const driftResult = this.driftTracker.update(
431
+ sessionId,
432
+ embedding,
433
+ this.allowedIndex,
434
+ );
435
+
436
+ if (driftResult.driftLimitExceeded) {
437
+ return {
438
+ // Drift is always a FLAG — it represents a gradual trend, not
439
+ // an immediate policy violation.
440
+ action: GuardrailAction.FLAG,
441
+ reason: `Session has drifted off-topic for ${driftResult.driftStreak} consecutive messages.`,
442
+ reasonCode: REASON_DRIFT,
443
+ metadata: {
444
+ driftStreak: driftResult.driftStreak,
445
+ currentSimilarity: driftResult.currentSimilarity,
446
+ nearestTopic: driftResult.nearestTopic?.topicId ?? null,
447
+ nearestTopicName: driftResult.nearestTopic?.topicName ?? null,
448
+ },
449
+ };
450
+ }
451
+ }
452
+
453
+ // All checks passed — no action needed.
454
+ return null;
455
+ }
456
+
457
+ // -------------------------------------------------------------------------
458
+ // Lazy index building
459
+ // -------------------------------------------------------------------------
460
+
461
+ /**
462
+ * Ensures that the allowed and forbidden embedding indices have been built.
463
+ *
464
+ * Called once before the first evaluation. Subsequent calls are no-ops
465
+ * (guarded by the `indicesBuilt` flag).
466
+ *
467
+ * @internal
468
+ */
469
+ private async ensureIndicesBuilt(): Promise<void> {
470
+ if (this.indicesBuilt) {
471
+ return;
472
+ }
473
+
474
+ // Build the forbidden-topic index if any forbidden topics are configured.
475
+ if (this.options.forbiddenTopics && this.options.forbiddenTopics.length > 0) {
476
+ this.forbiddenIndex = new TopicEmbeddingIndex(this.embeddingFn);
477
+ await this.forbiddenIndex.build(this.options.forbiddenTopics);
478
+ }
479
+
480
+ // Build the allowed-topic index if any allowed topics are configured.
481
+ if (this.options.allowedTopics && this.options.allowedTopics.length > 0) {
482
+ this.allowedIndex = new TopicEmbeddingIndex(this.embeddingFn);
483
+ await this.allowedIndex.build(this.options.allowedTopics);
484
+ }
485
+
486
+ this.indicesBuilt = true;
487
+ }
488
+
489
+ // -------------------------------------------------------------------------
490
+ // Registry-based embedding fallback
491
+ // -------------------------------------------------------------------------
492
+
493
+ /**
494
+ * Creates an embedding function that retrieves an EmbeddingManager from
495
+ * the shared service registry at call time.
496
+ *
497
+ * This fallback is used when no explicit `embeddingFn` is provided to
498
+ * the constructor. It throws if the EmbeddingManager service is not
499
+ * available in the registry.
500
+ *
501
+ * @returns An async embedding function.
502
+ * @internal
503
+ */
504
+ private createRegistryEmbeddingFn(): (texts: string[]) => Promise<number[][]> {
505
+ return async (texts: string[]): Promise<number[][]> => {
506
+ // Attempt to retrieve the EmbeddingManager from the shared registry.
507
+ const em = await this.services.getOrCreate<{
508
+ generateEmbeddings: (texts: string[]) => Promise<number[][]>;
509
+ }>(
510
+ 'agentos:topicality:embedding-manager',
511
+ async () => {
512
+ throw new Error(
513
+ 'EmbeddingManager not available in shared service registry. ' +
514
+ 'Provide an explicit embeddingFn or register an EmbeddingManager.',
515
+ );
516
+ },
517
+ );
518
+ return em.generateEmbeddings(texts);
519
+ };
520
+ }
521
+ }