@framers/agentos-ext-ml-classifiers 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/dist/MLClassifierGuardrail.d.ts +88 -117
  3. package/dist/MLClassifierGuardrail.d.ts.map +1 -1
  4. package/dist/MLClassifierGuardrail.js +255 -264
  5. package/dist/MLClassifierGuardrail.js.map +1 -1
  6. package/dist/classifiers/InjectionClassifier.d.ts +1 -1
  7. package/dist/classifiers/InjectionClassifier.d.ts.map +1 -1
  8. package/dist/classifiers/JailbreakClassifier.d.ts +1 -1
  9. package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -1
  10. package/dist/classifiers/ToxicityClassifier.d.ts +1 -1
  11. package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -1
  12. package/dist/classifiers/WorkerClassifierProxy.d.ts +1 -1
  13. package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -1
  14. package/dist/index.d.ts +16 -90
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +33 -306
  17. package/dist/index.js.map +1 -1
  18. package/dist/keyword-classifier.d.ts +26 -0
  19. package/dist/keyword-classifier.d.ts.map +1 -0
  20. package/dist/keyword-classifier.js +113 -0
  21. package/dist/keyword-classifier.js.map +1 -0
  22. package/dist/llm-classifier.d.ts +27 -0
  23. package/dist/llm-classifier.d.ts.map +1 -0
  24. package/dist/llm-classifier.js +129 -0
  25. package/dist/llm-classifier.js.map +1 -0
  26. package/dist/tools/ClassifyContentTool.d.ts +53 -80
  27. package/dist/tools/ClassifyContentTool.d.ts.map +1 -1
  28. package/dist/tools/ClassifyContentTool.js +52 -103
  29. package/dist/tools/ClassifyContentTool.js.map +1 -1
  30. package/dist/types.d.ts +77 -277
  31. package/dist/types.d.ts.map +1 -1
  32. package/dist/types.js +9 -55
  33. package/dist/types.js.map +1 -1
  34. package/package.json +10 -16
  35. package/src/MLClassifierGuardrail.ts +279 -316
  36. package/src/index.ts +35 -339
  37. package/src/keyword-classifier.ts +130 -0
  38. package/src/llm-classifier.ts +163 -0
  39. package/src/tools/ClassifyContentTool.ts +75 -132
  40. package/src/types.ts +78 -325
  41. package/test/ClassifierOrchestrator.spec.ts +365 -0
  42. package/test/ClassifyContentTool.spec.ts +226 -0
  43. package/test/InjectionClassifier.spec.ts +263 -0
  44. package/test/JailbreakClassifier.spec.ts +295 -0
  45. package/test/MLClassifierGuardrail.spec.ts +486 -0
  46. package/test/SlidingWindowBuffer.spec.ts +391 -0
  47. package/test/ToxicityClassifier.spec.ts +268 -0
  48. package/test/WorkerClassifierProxy.spec.ts +303 -0
  49. package/test/index.spec.ts +431 -0
  50. package/tsconfig.json +20 -0
  51. package/vitest.config.ts +24 -0
package/dist/types.js CHANGED
@@ -1,62 +1,16 @@
1
1
  /**
2
- * @fileoverview Core type definitions for the ML Classifier Guardrail Extension Pack.
2
+ * @file types.ts
3
+ * @description Core type definitions for the ML Classifiers extension pack.
3
4
  *
4
- * This file defines all configuration shapes, runtime result types, and
5
- * service-identifier constants used by the ML classifier pipeline. All
6
- * classifiers in this pack evaluate text content against learned models
7
- * (toxicity, prompt-injection, jailbreak) and emit structured results that
8
- * feed into the AgentOS guardrail decision tree.
5
+ * Defines the shared interfaces used across the ML classification system:
6
+ * classifier categories, confidence results, option shapes, and the LLM
7
+ * invoker callback signature.
9
8
  *
10
- * Import hierarchy
11
- * ----------------
12
- * ```
13
- * IUtilityAI ──── ClassificationResult, ClassificationScore
14
- * IGuardrailService ── GuardrailAction
15
- * │
16
- * ▼
17
- * types.ts (this file)
18
- * │
19
- * ▼
20
- * IContentClassifier.ts / SlidingWindowBuffer.ts / …
21
- * ```
22
- *
23
- * @module agentos/extensions/packs/ml-classifiers/types
24
- */
25
- /**
26
- * Sensible defaults for {@link ClassifierThresholds}.
27
- *
28
- * These values reflect a conservative-but-pragmatic policy:
29
- * - block at 90 % confidence → very high bar, minimises false positives
30
- * - flag at 70 % → surfaced for human review, not blocked
31
- * - warn at 40 % → low-confidence signal, handled with a light touch
9
+ * @module ml-classifiers/types
32
10
  */
33
- export const DEFAULT_THRESHOLDS = {
34
- blockThreshold: 0.9,
35
- flagThreshold: 0.7,
36
- warnThreshold: 0.4,
37
- };
38
- // ---------------------------------------------------------------------------
39
- // Service identifiers
40
- // ---------------------------------------------------------------------------
41
11
  /**
42
- * Well-known service identifier strings for the three built-in ML classifier
43
- * pipelines.
44
- *
45
- * These IDs follow the `agentos:<domain>:<name>` naming convention used
46
- * throughout the AgentOS extension ecosystem. Use them to retrieve specific
47
- * classifier services from the shared service registry.
48
- *
49
- * @example
50
- * ```typescript
51
- * const toxicity = serviceRegistry.get(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE);
52
- * ```
12
+ * All supported classifier categories as a constant array, used for
13
+ * iteration and default configuration.
53
14
  */
54
- export const ML_CLASSIFIER_SERVICE_IDS = {
55
- /** Classifier that detects toxic, hateful, or abusive language. */
56
- TOXICITY_PIPELINE: 'agentos:ml-classifiers:toxicity-pipeline',
57
- /** Classifier that detects prompt-injection attempts. */
58
- INJECTION_PIPELINE: 'agentos:ml-classifiers:injection-pipeline',
59
- /** Classifier that detects jailbreak / system-override attempts. */
60
- JAILBREAK_PIPELINE: 'agentos:ml-classifiers:jailbreak-pipeline',
61
- };
15
+ export const ALL_CATEGORIES = ['toxic', 'injection', 'nsfw', 'threat'];
62
16
  //# sourceMappingURL=types.js.map
package/dist/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AA2CH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAyB;IACtD,cAAc,EAAE,GAAG;IACnB,aAAa,EAAE,GAAG;IAClB,aAAa,EAAE,GAAG;CACV,CAAC;AA6NX,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG;IACvC,mEAAmE;IACnE,iBAAiB,EAAE,0CAA0C;IAE7D,yDAAyD;IACzD,kBAAkB,EAAE,2CAA2C;IAE/D,oEAAoE;IACpE,kBAAkB,EAAE,2CAA2C;CACvD,CAAC"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgBH;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAyB,CAAC,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@framers/agentos-ext-ml-classifiers",
3
- "version": "0.1.0",
4
- "description": "ML-based content classification guardrail (toxicity, injection, jailbreak) for AgentOS",
3
+ "version": "0.2.1",
4
+ "description": "ML-based content classifiers for AgentOS — toxicity, prompt injection, and NSFW detection via ONNX models or LLM fallback",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
@@ -11,23 +11,12 @@
11
11
  "types": "./dist/index.d.ts"
12
12
  }
13
13
  },
14
- "files": [
15
- "dist",
16
- "src",
17
- "SKILL.md",
18
- "manifest.json"
19
- ],
20
14
  "peerDependencies": {
21
- "@framers/agentos": "^0.1.0"
15
+ "@framers/agentos": ">=0.1.0"
22
16
  },
23
17
  "optionalDependencies": {
24
18
  "@huggingface/transformers": "^3.0.0"
25
19
  },
26
- "devDependencies": {
27
- "typescript": "^5.5.0",
28
- "vitest": "^1.6.0",
29
- "@framers/agentos": "0.1.47"
30
- },
31
20
  "license": "MIT",
32
21
  "author": "Frame.dev",
33
22
  "repository": {
@@ -38,8 +27,13 @@
38
27
  "publishConfig": {
39
28
  "access": "public"
40
29
  },
30
+ "devDependencies": {
31
+ "semantic-release": "^24.0.0",
32
+ "@semantic-release/github": "^11.0.0"
33
+ },
41
34
  "scripts": {
42
- "build": "tsc -p tsconfig.json",
43
- "test": "vitest run"
35
+ "build": "tsc",
36
+ "clean": "rm -rf dist",
37
+ "typecheck": "tsc --noEmit"
44
38
  }
45
39
  }