@framers/agentos-ext-ml-classifiers 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/dist/MLClassifierGuardrail.d.ts +88 -117
  3. package/dist/MLClassifierGuardrail.d.ts.map +1 -1
  4. package/dist/MLClassifierGuardrail.js +255 -264
  5. package/dist/MLClassifierGuardrail.js.map +1 -1
  6. package/dist/classifiers/InjectionClassifier.d.ts +1 -1
  7. package/dist/classifiers/InjectionClassifier.d.ts.map +1 -1
  8. package/dist/classifiers/JailbreakClassifier.d.ts +1 -1
  9. package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -1
  10. package/dist/classifiers/ToxicityClassifier.d.ts +1 -1
  11. package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -1
  12. package/dist/classifiers/WorkerClassifierProxy.d.ts +1 -1
  13. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -1
  14. package/dist/index.d.ts +16 -90
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +33 -306
  17. package/dist/index.js.map +1 -1
  18. package/dist/keyword-classifier.d.ts +26 -0
  19. package/dist/keyword-classifier.d.ts.map +1 -0
  20. package/dist/keyword-classifier.js +113 -0
  21. package/dist/keyword-classifier.js.map +1 -0
  22. package/dist/llm-classifier.d.ts +27 -0
  23. package/dist/llm-classifier.d.ts.map +1 -0
  24. package/dist/llm-classifier.js +129 -0
  25. package/dist/llm-classifier.js.map +1 -0
  26. package/dist/tools/ClassifyContentTool.d.ts +53 -80
  27. package/dist/tools/ClassifyContentTool.d.ts.map +1 -1
  28. package/dist/tools/ClassifyContentTool.js +52 -103
  29. package/dist/tools/ClassifyContentTool.js.map +1 -1
  30. package/dist/types.d.ts +77 -277
  31. package/dist/types.d.ts.map +1 -1
  32. package/dist/types.js +9 -55
  33. package/dist/types.js.map +1 -1
  34. package/package.json +10 -16
  35. package/src/MLClassifierGuardrail.ts +279 -316
  36. package/src/index.ts +35 -339
  37. package/src/keyword-classifier.ts +130 -0
  38. package/src/llm-classifier.ts +163 -0
  39. package/src/tools/ClassifyContentTool.ts +75 -132
  40. package/src/types.ts +78 -325
  41. package/test/ClassifierOrchestrator.spec.ts +365 -0
  42. package/test/ClassifyContentTool.spec.ts +226 -0
  43. package/test/InjectionClassifier.spec.ts +263 -0
  44. package/test/JailbreakClassifier.spec.ts +295 -0
  45. package/test/MLClassifierGuardrail.spec.ts +486 -0
  46. package/test/SlidingWindowBuffer.spec.ts +391 -0
  47. package/test/ToxicityClassifier.spec.ts +268 -0
  48. package/test/WorkerClassifierProxy.spec.ts +303 -0
  49. package/test/index.spec.ts +431 -0
  50. package/tsconfig.json +20 -0
  51. package/vitest.config.ts +24 -0
package/src/index.ts CHANGED
@@ -1,81 +1,24 @@
1
1
  /**
2
- * @fileoverview Pack factory for the ML Classifier Guardrail Extension Pack.
2
+ * @file index.ts
3
+ * @description Pack factory for the ML Classifiers extension pack.
3
4
  *
4
- * Exports the main `createMLClassifierPack()` factory that assembles the
5
- * ML classifier guardrail and the `classify_content` tool into a single
6
- * {@link ExtensionPack} ready for registration with the AgentOS extension
7
- * manager.
5
+ * Exports a `createExtensionPack()` factory that assembles the ML classifier
6
+ * guardrail and the `classify_content` tool into a single {@link ExtensionPack}
7
+ * ready for registration with the AgentOS extension manager.
8
8
  *
9
- * Also exports a `createExtensionPack()` bridge function that conforms to
10
- * the AgentOS manifest factory convention, delegating to
11
- * `createMLClassifierPack()` with options extracted from the
12
- * {@link ExtensionPackContext}.
13
- *
14
- * ### Default behaviour (zero-config)
15
- * When called without arguments, all three built-in classifiers (toxicity,
16
- * prompt-injection, jailbreak) are active using their default model IDs and
17
- * the default threshold set:
18
- * - block at 0.90 confidence
19
- * - flag at 0.70 confidence
20
- * - warn (sanitize) at 0.40 confidence
21
- *
22
- * ### Activation lifecycle
23
- * Components are built eagerly at pack creation time for direct programmatic
24
- * use. When the extension manager activates the pack, `onActivate` rebuilds
25
- * all components with the manager's shared service registry so heavyweight
26
- * resources (ONNX/WASM model pipelines) are shared across the agent.
27
- *
28
- * ### Disabling classifiers
29
- * Individual classifiers can be disabled by omitting them from the
30
- * `options.classifiers` array. An empty array or `undefined` activates all
31
- * three built-in classifiers.
32
- *
33
- * @example
34
- * ```typescript
35
- * import { createMLClassifierPack } from './ml-classifiers';
36
- *
37
- * // All built-in classifiers at default thresholds:
38
- * const pack = createMLClassifierPack();
39
- *
40
- * // Toxicity only with custom block threshold:
41
- * const strictPack = createMLClassifierPack({
42
- * classifiers: ['toxicity'],
43
- * thresholds: { blockThreshold: 0.85 },
44
- * streamingMode: true,
45
- * guardrailScope: 'both',
46
- * });
47
- * ```
48
- *
49
- * @module agentos/extensions/packs/ml-classifiers
9
+ * @module ml-classifiers
50
10
  */
51
11
 
52
- import type { ISharedServiceRegistry } from '@framers/agentos';
53
- import { SharedServiceRegistry } from '@framers/agentos';
54
12
  import type { ExtensionPack, ExtensionPackContext } from '@framers/agentos';
55
- import type { ExtensionDescriptor, ExtensionLifecycleContext } from '@framers/agentos';
56
13
  import { EXTENSION_KIND_GUARDRAIL, EXTENSION_KIND_TOOL } from '@framers/agentos';
57
- import type { MLClassifierPackOptions } from './types';
58
- import { DEFAULT_THRESHOLDS } from './types';
14
+ import type { MLClassifierOptions } from './types';
59
15
  import { MLClassifierGuardrail } from './MLClassifierGuardrail';
60
- import { ClassifierOrchestrator } from './ClassifierOrchestrator';
61
- import { SlidingWindowBuffer } from './SlidingWindowBuffer';
62
16
  import { ClassifyContentTool } from './tools/ClassifyContentTool';
63
- import { ToxicityClassifier } from './classifiers/ToxicityClassifier';
64
- import { InjectionClassifier } from './classifiers/InjectionClassifier';
65
- import { JailbreakClassifier } from './classifiers/JailbreakClassifier';
66
- import type { IContentClassifier } from './IContentClassifier';
67
17
 
68
18
  // ---------------------------------------------------------------------------
69
- // Re-exports — allow single-import for consumers
19
+ // Re-exports
70
20
  // ---------------------------------------------------------------------------
71
21
 
72
- /**
73
- * Re-export all types from the ML classifier type definitions so consumers
74
- * can import everything from a single entry point:
75
- * ```ts
76
- * import { createMLClassifierPack, DEFAULT_THRESHOLDS } from './ml-classifiers';
77
- * ```
78
- */
79
22
  export * from './types';
80
23
 
81
24
  // ---------------------------------------------------------------------------
@@ -83,267 +26,34 @@ export * from './types';
83
26
  // ---------------------------------------------------------------------------
84
27
 
85
28
  /**
86
- * Create an {@link ExtensionPack} that bundles:
87
- * - The {@link MLClassifierGuardrail} guardrail (evaluates input & output).
88
- * - The {@link ClassifyContentTool} `classify_content` tool (on-demand analysis).
89
- *
90
- * The built-in classifiers that are instantiated depend on `options.classifiers`:
91
- * - `'toxicity'` → {@link ToxicityClassifier} (`unitary/toxic-bert`)
92
- * - `'injection'` → {@link InjectionClassifier} (`protectai/deberta-v3-small-prompt-injection-v2`)
93
- * - `'jailbreak'` → {@link JailbreakClassifier} (`meta-llama/PromptGuard-86M`)
94
- *
95
- * When `options.classifiers` is `undefined` or empty, **all three** are active.
96
- *
97
- * Additional classifiers supplied via `options.customClassifiers` are appended
98
- * to the active list and run in parallel alongside the built-in ones.
29
+ * Create an ExtensionPack that bundles the ML classifier guardrail with
30
+ * the `classify_content` tool.
99
31
  *
100
32
  * @param options - Optional pack-level configuration. All properties have
101
- * sensible defaults; see {@link MLClassifierPackOptions}.
102
- * @returns A fully-configured {@link ExtensionPack} with one guardrail
103
- * descriptor and one tool descriptor.
33
+ * sensible defaults; see {@link MLClassifierOptions}.
34
+ * @returns A fully-configured {@link ExtensionPack}.
104
35
  */
105
- export function createMLClassifierPack(options?: MLClassifierPackOptions): ExtensionPack {
106
- /**
107
- * Resolved options default to empty object so every sub-check can
108
- * safely use `opts.foo` without null-guarding the whole `options` reference.
109
- */
110
- const opts: MLClassifierPackOptions = options ?? {};
111
-
112
- // -------------------------------------------------------------------------
113
- // Mutable state — upgraded by onActivate with the extension manager's
114
- // shared service registry.
115
- // -------------------------------------------------------------------------
116
-
117
- const state = {
118
- /**
119
- * Service registry — starts as a standalone instance so the pack can be
120
- * used directly (without activation) in unit tests and scripts.
121
- * Replaced with the shared registry when `onActivate` is called by the
122
- * extension manager.
123
- */
124
- services: new SharedServiceRegistry() as ISharedServiceRegistry,
125
- };
126
-
127
- // -------------------------------------------------------------------------
128
- // Component instances — rebuilt by buildComponents()
129
- // -------------------------------------------------------------------------
130
-
131
- /**
132
- * The guardrail that evaluates user input and/or agent output streams
133
- * against all active ML classifiers.
134
- */
135
- let guardrail: MLClassifierGuardrail;
136
-
137
- /**
138
- * The on-demand classification tool exposed to agents and workflows.
139
- */
140
- let tool: ClassifyContentTool;
141
-
142
- /**
143
- * The orchestrator that runs all active classifiers in parallel and folds
144
- * their results into a single {@link ChunkEvaluation} via worst-wins
145
- * aggregation.
146
- */
147
- let orchestrator: ClassifierOrchestrator;
148
-
149
- /**
150
- * The sliding-window buffer used internally by the guardrail to evaluate
151
- * streamed output tokens incrementally.
152
- */
153
- let buffer: SlidingWindowBuffer;
154
-
155
- // -------------------------------------------------------------------------
156
- // buildComponents
157
- // -------------------------------------------------------------------------
158
-
159
- /**
160
- * (Re)construct all pack components using the current `state.services`.
161
- *
162
- * Called once at pack creation for direct programmatic use, and again
163
- * during `onActivate` to upgrade to the extension manager's shared
164
- * service registry (so ONNX/WASM pipelines are shared across the agent).
165
- *
166
- * ### Classifier selection
167
- * The active classifiers are determined by `opts.classifiers`:
168
- * - `undefined` or empty → all three built-in classifiers are created.
169
- * - Non-empty array → only the named classifiers are created.
170
- *
171
- * Any `opts.customClassifiers` are always appended to the list.
172
- */
173
- function buildComponents(): void {
174
- // ------------------------------------------------------------------
175
- // 1. Determine which built-in classifiers to instantiate.
176
- // ------------------------------------------------------------------
177
-
178
- /**
179
- * Determine whether a given built-in classifier name is enabled.
180
- *
181
- * When `opts.classifiers` is undefined or an empty array every built-in
182
- * classifier is considered enabled (zero-config default).
183
- *
184
- * @param name - One of `'toxicity'`, `'injection'`, or `'jailbreak'`.
185
- * @returns `true` when the classifier should be included.
186
- */
187
- function isBuiltInEnabled(name: 'toxicity' | 'injection' | 'jailbreak'): boolean {
188
- // No explicit list — enable all built-in classifiers.
189
- if (!opts.classifiers || opts.classifiers.length === 0) {
190
- return true;
191
- }
192
- return opts.classifiers.includes(name);
193
- }
194
-
195
- /** Array that will be populated with every active IContentClassifier. */
196
- const activeClassifiers: IContentClassifier[] = [];
197
-
198
- // Toxicity classifier — detects hateful, abusive, and toxic language.
199
- if (isBuiltInEnabled('toxicity')) {
200
- activeClassifiers.push(new ToxicityClassifier(state.services));
201
- }
202
-
203
- // Injection classifier — detects prompt-injection payloads.
204
- if (isBuiltInEnabled('injection')) {
205
- activeClassifiers.push(new InjectionClassifier(state.services));
206
- }
207
-
208
- // Jailbreak classifier — detects system-prompt override attempts.
209
- if (isBuiltInEnabled('jailbreak')) {
210
- activeClassifiers.push(new JailbreakClassifier(state.services));
211
- }
212
-
213
- // Append any caller-supplied custom classifiers.
214
- if (opts.customClassifiers && opts.customClassifiers.length > 0) {
215
- activeClassifiers.push(...opts.customClassifiers);
216
- }
217
-
218
- // ------------------------------------------------------------------
219
- // 2. Resolve pack-level thresholds (merge caller overrides on top of
220
- // the library defaults).
221
- // ------------------------------------------------------------------
222
-
223
- const thresholds = {
224
- ...DEFAULT_THRESHOLDS,
225
- ...opts.thresholds,
226
- };
227
-
228
- // ------------------------------------------------------------------
229
- // 3. Build the orchestrator with the resolved classifier list and
230
- // thresholds.
231
- // ------------------------------------------------------------------
232
- orchestrator = new ClassifierOrchestrator(activeClassifiers, thresholds);
233
-
234
- // ------------------------------------------------------------------
235
- // 4. Build the sliding-window buffer for streaming evaluation.
236
- // ------------------------------------------------------------------
237
- buffer = new SlidingWindowBuffer({
238
- chunkSize: opts.chunkSize,
239
- contextSize: opts.contextSize,
240
- maxEvaluations: opts.maxEvaluations,
241
- });
242
-
243
- // ------------------------------------------------------------------
244
- // 5. Build the guardrail, passing the shared registry and options.
245
- // The guardrail creates its own orchestrator internally from the
246
- // `classifiers` option — we pass the pre-built classifier instances
247
- // via the third constructor argument.
248
- // ------------------------------------------------------------------
249
- guardrail = new MLClassifierGuardrail(state.services, opts, activeClassifiers);
250
-
251
- // ------------------------------------------------------------------
252
- // 6. Build the on-demand classification tool backed by the orchestrator.
253
- // ------------------------------------------------------------------
254
- tool = new ClassifyContentTool(orchestrator);
255
- }
256
-
257
- // Initial build — makes the pack usable immediately without activation.
258
- buildComponents();
259
-
260
- // -------------------------------------------------------------------------
261
- // ExtensionPack shape
262
- // -------------------------------------------------------------------------
36
+ export function createMLClassifierGuardrail(options?: MLClassifierOptions): ExtensionPack {
37
+ const guardrail = new MLClassifierGuardrail(options);
38
+ const tool = new ClassifyContentTool(guardrail);
263
39
 
264
40
  return {
265
- /** Canonical pack name used in manifests and logs. */
266
41
  name: 'ml-classifiers',
267
-
268
- /** Semantic version of this pack implementation. */
269
42
  version: '1.0.0',
270
-
271
- /**
272
- * Descriptor getter — always returns the latest (possibly rebuilt)
273
- * component instances. Using a getter ensures that after `onActivate`
274
- * rebuilds the components, the descriptors array reflects the new
275
- * references rather than stale closures from the initial build.
276
- */
277
- get descriptors(): ExtensionDescriptor[] {
278
- return [
279
- {
280
- /**
281
- * Guardrail descriptor.
282
- *
283
- * Priority 5 places this guardrail after the PII redaction guardrail
284
- * (priority 10) so PII is stripped before ML classification.
285
- */
286
- id: 'ml-classifier-guardrail',
287
- kind: EXTENSION_KIND_GUARDRAIL,
288
- priority: 5,
289
- payload: guardrail,
290
- },
291
- {
292
- /**
293
- * On-demand classification tool descriptor.
294
- *
295
- * Priority 0 uses the default ordering — tools are typically
296
- * ordered by name rather than priority.
297
- */
298
- id: 'classify_content',
299
- kind: EXTENSION_KIND_TOOL,
300
- priority: 0,
301
- payload: tool,
302
- },
303
- ];
304
- },
305
-
306
- /**
307
- * Lifecycle hook called by the extension manager when the pack is
308
- * activated.
309
- *
310
- * Upgrades the internal service registry to the extension manager's
311
- * shared instance (so ONNX/WASM model weights are shared across all
312
- * extensions) then rebuilds all components to use the new registry.
313
- *
314
- * @param context - Activation context provided by the extension manager.
315
- */
316
- onActivate: (context: ExtensionLifecycleContext): void => {
317
- // Upgrade to the shared registry when the manager provides one.
318
- if (context.services) {
319
- state.services = context.services;
320
- }
321
-
322
- // Rebuild all components with the upgraded registry.
323
- buildComponents();
324
- },
325
-
326
- /**
327
- * Lifecycle hook called when the pack is deactivated or the agent shuts
328
- * down.
329
- *
330
- * Disposes the classifier orchestrator (which releases ONNX/WASM
331
- * resources for every registered classifier) and clears the sliding
332
- * window buffer to release per-stream state.
333
- */
334
- onDeactivate: async (): Promise<void> => {
335
- // Dispose all classifiers managed by the orchestrator.
336
- // orchestrator may be undefined if buildComponents() was never called
337
- // successfully (defensive guard).
338
- if (orchestrator) {
339
- await orchestrator.dispose();
340
- }
341
-
342
- // Clear any in-progress stream buffers.
343
- if (buffer) {
344
- buffer.clear();
345
- }
346
- },
43
+ descriptors: [
44
+ {
45
+ id: 'ml-classifier-guardrail',
46
+ kind: EXTENSION_KIND_GUARDRAIL,
47
+ priority: 5,
48
+ payload: guardrail,
49
+ },
50
+ {
51
+ id: 'classify_content',
52
+ kind: EXTENSION_KIND_TOOL,
53
+ priority: 0,
54
+ payload: tool,
55
+ },
56
+ ],
347
57
  };
348
58
  }
349
59
 
@@ -356,28 +66,14 @@ export function createMLClassifierPack(options?: MLClassifierPackOptions): Exten
356
66
  *
357
67
  * Conforms to the convention expected by the extension loader when resolving
358
68
  * packs from manifests. Extracts `options` from the {@link ExtensionPackContext}
359
- * and delegates to {@link createMLClassifierPack}.
69
+ * and delegates to {@link createMLClassifierGuardrail}.
360
70
  *
361
- * @param context - Manifest context containing optional pack options, secret
362
- * resolver, and shared service registry.
71
+ * @param context - Manifest context containing optional pack options.
363
72
  * @returns A fully-configured {@link ExtensionPack}.
364
- *
365
- * @example Manifest entry:
366
- * ```json
367
- * {
368
- * "packs": [
369
- * {
370
- * "module": "./ml-classifiers",
371
- * "options": {
372
- * "classifiers": ["toxicity", "jailbreak"],
373
- * "thresholds": { "blockThreshold": 0.95 },
374
- * "streamingMode": true
375
- * }
376
- * }
377
- * ]
378
- * }
379
- * ```
380
73
  */
381
74
  export function createExtensionPack(context: ExtensionPackContext): ExtensionPack {
382
- return createMLClassifierPack(context.options as MLClassifierPackOptions);
75
+ return createMLClassifierGuardrail(context.options as MLClassifierOptions);
383
76
  }
77
+
78
+ /** @deprecated Use createMLClassifierGuardrail instead */
79
+ export const createMLClassifierPack = createMLClassifierGuardrail;
@@ -0,0 +1,130 @@
1
+ /**
2
+ * @file keyword-classifier.ts
3
+ * @description Lightweight keyword and regex-based safety classifier used as the
4
+ * last-resort fallback when neither ONNX models nor an LLM invoker are available.
5
+ *
6
+ * Returns normalised confidence scores per category based on keyword density and
7
+ * pattern matches. This is intentionally conservative — it will produce false
8
+ * positives in edge cases, but ensures the guardrail is never completely blind.
9
+ *
10
+ * @module ml-classifiers/keyword-classifier
11
+ */
12
+
13
+ import type { ClassifierCategory, CategoryScore } from './types';
14
+ import { ALL_CATEGORIES } from './types';
15
+
16
+ // ---------------------------------------------------------------------------
17
+ // Pattern dictionaries
18
+ // ---------------------------------------------------------------------------
19
+
20
+ /**
21
+ * Toxic language patterns — slurs, hate speech, and abusive terms.
22
+ *
23
+ * Each regex uses word boundaries (`\b`) to reduce false positives from
24
+ * substrings appearing in innocent words.
25
+ */
26
+ const TOXIC_PATTERNS: RegExp[] = [
27
+ /\b(fuck|shit|ass(?:hole)?|bitch|bastard|damn|crap)\b/i,
28
+ /\b(kill\s+(?:yourself|urself|you)|kys)\b/i,
29
+ /\b(retard(?:ed)?|idiot|moron|stupid\s+(?:bitch|ass))\b/i,
30
+ /\b(hate\s+(?:you|u)|die\s+(?:in|alone))\b/i,
31
+ /\b(racial|ethnic)\s+slur/i,
32
+ /\b(n[i1]gg|f[a4]g(?:got)?|tr[a4]nn)/i,
33
+ ];
34
+
35
+ /**
36
+ * Prompt injection / jailbreak patterns — attempts to override system
37
+ * instructions, extract system prompts, or bypass safety guardrails.
38
+ */
39
+ const INJECTION_PATTERNS: RegExp[] = [
40
+ /\bignore\s+(?:all\s+)?(?:previous|above|prior)\s+instructions?\b/i,
41
+ /\byou\s+are\s+now\s+(?:DAN|evil|unrestricted|jailbroken)\b/i,
42
+ /\bsystem\s*prompt\s*[:=]/i,
43
+ /\bdo\s+anything\s+now\b/i,
44
+ /\bdisregard\s+(?:your|all)\s+(?:rules|guidelines|instructions)\b/i,
45
+ /\bpretend\s+(?:you(?:'re|\s+are)\s+)?(?:not\s+an?\s+AI|unrestricted|evil)\b/i,
46
+ /\bact\s+as\s+(?:if|though)\s+(?:you\s+have\s+)?no\s+(?:restrictions|rules|limits)\b/i,
47
+ /\boverride\s+(?:safety|content)\s+(?:filters?|policies|guidelines)\b/i,
48
+ /\bjailbreak/i,
49
+ /\bprompt\s+(?:leak|injection|extract)/i,
50
+ ];
51
+
52
+ /**
53
+ * NSFW patterns — sexually explicit content markers.
54
+ */
55
+ const NSFW_PATTERNS: RegExp[] = [
56
+ /\b(porn(?:ography)?|hentai|xxx|nsfw)\b/i,
57
+ /\b(nude|naked|topless)\s+(?:photo|pic|image|video)\b/i,
58
+ /\bsexual(?:ly)?\s+explicit\b/i,
59
+ /\b(erotic|orgasm|masturbat)/i,
60
+ /\bsext(?:ing)?\b/i,
61
+ ];
62
+
63
+ /**
64
+ * Threat patterns — direct threats of violence, self-harm instructions,
65
+ * or dangerous activity incitement.
66
+ */
67
+ const THREAT_PATTERNS: RegExp[] = [
68
+ /\b(?:i(?:'ll|\s+will)\s+)?kill\s+(?:you|him|her|them)\b/i,
69
+ /\b(?:how\s+to\s+)?make\s+a?\s*(?:bomb|explosive|weapon)\b/i,
70
+ /\b(?:i(?:'ll|\s+will)\s+)?hurt\s+(?:you|myself|someone)\b/i,
71
+ /\bsuicid(?:e|al)\s+(?:method|instruction|guide|how)/i,
72
+ /\b(?:swat(?:ting)?|dox(?:x?ing)?)\s+(?:someone|him|her|you)\b/i,
73
+ /\bshoot\s+up\s+(?:a\s+)?(?:school|church|mosque|synagogue|building)\b/i,
74
+ ];
75
+
76
+ /**
77
+ * Map category names to their pattern arrays for uniform iteration.
78
+ */
79
+ const CATEGORY_PATTERNS: Record<ClassifierCategory, RegExp[]> = {
80
+ toxic: TOXIC_PATTERNS,
81
+ injection: INJECTION_PATTERNS,
82
+ nsfw: NSFW_PATTERNS,
83
+ threat: THREAT_PATTERNS,
84
+ };
85
+
86
+ // ---------------------------------------------------------------------------
87
+ // Public API
88
+ // ---------------------------------------------------------------------------
89
+
90
+ /**
91
+ * Classify a text string using keyword and regex pattern matching.
92
+ *
93
+ * Confidence is computed as `min(1.0, matchCount * weight)` where `weight`
94
+ * scales the number of distinct pattern matches into the [0, 1] range.
95
+ * A single match yields a base confidence of 0.4; each additional match
96
+ * adds 0.15 up to a cap of 1.0.
97
+ *
98
+ * @param text - The text to classify.
99
+ * @param categories - Which categories to evaluate. Defaults to all four.
100
+ * @returns Per-category confidence scores.
101
+ */
102
+ export function classifyByKeywords(
103
+ text: string,
104
+ categories: ClassifierCategory[] = ALL_CATEGORIES
105
+ ): CategoryScore[] {
106
+ const scores: CategoryScore[] = [];
107
+
108
+ for (const cat of categories) {
109
+ const patterns = CATEGORY_PATTERNS[cat];
110
+ if (!patterns) {
111
+ scores.push({ name: cat, confidence: 0 });
112
+ continue;
113
+ }
114
+
115
+ // Count how many distinct patterns match.
116
+ let matchCount = 0;
117
+ for (const re of patterns) {
118
+ if (re.test(text)) {
119
+ matchCount++;
120
+ }
121
+ }
122
+
123
+ // Scale: first match = 0.4, each additional += 0.15, capped at 1.0.
124
+ const confidence = matchCount === 0 ? 0 : Math.min(1.0, 0.4 + (matchCount - 1) * 0.15);
125
+
126
+ scores.push({ name: cat, confidence });
127
+ }
128
+
129
+ return scores;
130
+ }