@framers/agentos-ext-ml-classifiers 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/MLClassifierGuardrail.d.ts +88 -117
- package/dist/MLClassifierGuardrail.d.ts.map +1 -1
- package/dist/MLClassifierGuardrail.js +255 -264
- package/dist/MLClassifierGuardrail.js.map +1 -1
- package/dist/classifiers/InjectionClassifier.d.ts +1 -1
- package/dist/classifiers/InjectionClassifier.d.ts.map +1 -1
- package/dist/classifiers/JailbreakClassifier.d.ts +1 -1
- package/dist/classifiers/JailbreakClassifier.d.ts.map +1 -1
- package/dist/classifiers/ToxicityClassifier.d.ts +1 -1
- package/dist/classifiers/ToxicityClassifier.d.ts.map +1 -1
- package/dist/classifiers/WorkerClassifierProxy.d.ts +1 -1
- package/dist/classifiers/WorkerClassifierProxy.d.ts.map +1 -1
- package/dist/index.d.ts +16 -90
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +33 -306
- package/dist/index.js.map +1 -1
- package/dist/keyword-classifier.d.ts +26 -0
- package/dist/keyword-classifier.d.ts.map +1 -0
- package/dist/keyword-classifier.js +113 -0
- package/dist/keyword-classifier.js.map +1 -0
- package/dist/llm-classifier.d.ts +27 -0
- package/dist/llm-classifier.d.ts.map +1 -0
- package/dist/llm-classifier.js +129 -0
- package/dist/llm-classifier.js.map +1 -0
- package/dist/tools/ClassifyContentTool.d.ts +53 -80
- package/dist/tools/ClassifyContentTool.d.ts.map +1 -1
- package/dist/tools/ClassifyContentTool.js +52 -103
- package/dist/tools/ClassifyContentTool.js.map +1 -1
- package/dist/types.d.ts +77 -277
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +9 -55
- package/dist/types.js.map +1 -1
- package/package.json +10 -16
- package/src/MLClassifierGuardrail.ts +279 -316
- package/src/index.ts +35 -339
- package/src/keyword-classifier.ts +130 -0
- package/src/llm-classifier.ts +163 -0
- package/src/tools/ClassifyContentTool.ts +75 -132
- package/src/types.ts +78 -325
- package/test/ClassifierOrchestrator.spec.ts +365 -0
- package/test/ClassifyContentTool.spec.ts +226 -0
- package/test/InjectionClassifier.spec.ts +263 -0
- package/test/JailbreakClassifier.spec.ts +295 -0
- package/test/MLClassifierGuardrail.spec.ts +486 -0
- package/test/SlidingWindowBuffer.spec.ts +391 -0
- package/test/ToxicityClassifier.spec.ts +268 -0
- package/test/WorkerClassifierProxy.spec.ts +303 -0
- package/test/index.spec.ts +431 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +24 -0
package/dist/types.js
CHANGED
|
@@ -1,62 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @
|
|
2
|
+
* @file types.ts
|
|
3
|
+
* @description Core type definitions for the ML Classifiers extension pack.
|
|
3
4
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* (toxicity, prompt-injection, jailbreak) and emit structured results that
|
|
8
|
-
* feed into the AgentOS guardrail decision tree.
|
|
5
|
+
* Defines the shared interfaces used across the ML classification system:
|
|
6
|
+
* classifier categories, confidence results, option shapes, and the LLM
|
|
7
|
+
* invoker callback signature.
|
|
9
8
|
*
|
|
10
|
-
*
|
|
11
|
-
* ----------------
|
|
12
|
-
* ```
|
|
13
|
-
* IUtilityAI ──── ClassificationResult, ClassificationScore
|
|
14
|
-
* IGuardrailService ── GuardrailAction
|
|
15
|
-
* │
|
|
16
|
-
* ▼
|
|
17
|
-
* types.ts (this file)
|
|
18
|
-
* │
|
|
19
|
-
* ▼
|
|
20
|
-
* IContentClassifier.ts / SlidingWindowBuffer.ts / …
|
|
21
|
-
* ```
|
|
22
|
-
*
|
|
23
|
-
* @module agentos/extensions/packs/ml-classifiers/types
|
|
24
|
-
*/
|
|
25
|
-
/**
|
|
26
|
-
* Sensible defaults for {@link ClassifierThresholds}.
|
|
27
|
-
*
|
|
28
|
-
* These values reflect a conservative-but-pragmatic policy:
|
|
29
|
-
* - block at 90 % confidence → very high bar, minimises false positives
|
|
30
|
-
* - flag at 70 % → surfaced for human review, not blocked
|
|
31
|
-
* - warn at 40 % → low-confidence signal, handled with a light touch
|
|
9
|
+
* @module ml-classifiers/types
|
|
32
10
|
*/
|
|
33
|
-
export const DEFAULT_THRESHOLDS = {
|
|
34
|
-
blockThreshold: 0.9,
|
|
35
|
-
flagThreshold: 0.7,
|
|
36
|
-
warnThreshold: 0.4,
|
|
37
|
-
};
|
|
38
|
-
// ---------------------------------------------------------------------------
|
|
39
|
-
// Service identifiers
|
|
40
|
-
// ---------------------------------------------------------------------------
|
|
41
11
|
/**
|
|
42
|
-
*
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
* These IDs follow the `agentos:<domain>:<name>` naming convention used
|
|
46
|
-
* throughout the AgentOS extension ecosystem. Use them to retrieve specific
|
|
47
|
-
* classifier services from the shared service registry.
|
|
48
|
-
*
|
|
49
|
-
* @example
|
|
50
|
-
* ```typescript
|
|
51
|
-
* const toxicity = serviceRegistry.get(ML_CLASSIFIER_SERVICE_IDS.TOXICITY_PIPELINE);
|
|
52
|
-
* ```
|
|
12
|
+
* All supported classifier categories as a constant array, used for
|
|
13
|
+
* iteration and default configuration.
|
|
53
14
|
*/
|
|
54
|
-
export const
|
|
55
|
-
/** Classifier that detects toxic, hateful, or abusive language. */
|
|
56
|
-
TOXICITY_PIPELINE: 'agentos:ml-classifiers:toxicity-pipeline',
|
|
57
|
-
/** Classifier that detects prompt-injection attempts. */
|
|
58
|
-
INJECTION_PIPELINE: 'agentos:ml-classifiers:injection-pipeline',
|
|
59
|
-
/** Classifier that detects jailbreak / system-override attempts. */
|
|
60
|
-
JAILBREAK_PIPELINE: 'agentos:ml-classifiers:jailbreak-pipeline',
|
|
61
|
-
};
|
|
15
|
+
export const ALL_CATEGORIES = ['toxic', 'injection', 'nsfw', 'threat'];
|
|
62
16
|
//# sourceMappingURL=types.js.map
|
package/dist/types.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAgBH;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAyB,CAAC,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@framers/agentos-ext-ml-classifiers",
|
|
3
|
-
"version": "0.1
|
|
4
|
-
"description": "ML-based content
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "ML-based content classifiers for AgentOS — toxicity, prompt injection, and NSFW detection via ONNX models or LLM fallback",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -11,23 +11,12 @@
|
|
|
11
11
|
"types": "./dist/index.d.ts"
|
|
12
12
|
}
|
|
13
13
|
},
|
|
14
|
-
"files": [
|
|
15
|
-
"dist",
|
|
16
|
-
"src",
|
|
17
|
-
"SKILL.md",
|
|
18
|
-
"manifest.json"
|
|
19
|
-
],
|
|
20
14
|
"peerDependencies": {
|
|
21
|
-
"@framers/agentos": "
|
|
15
|
+
"@framers/agentos": ">=0.1.0"
|
|
22
16
|
},
|
|
23
17
|
"optionalDependencies": {
|
|
24
18
|
"@huggingface/transformers": "^3.0.0"
|
|
25
19
|
},
|
|
26
|
-
"devDependencies": {
|
|
27
|
-
"typescript": "^5.5.0",
|
|
28
|
-
"vitest": "^1.6.0",
|
|
29
|
-
"@framers/agentos": "0.1.47"
|
|
30
|
-
},
|
|
31
20
|
"license": "MIT",
|
|
32
21
|
"author": "Frame.dev",
|
|
33
22
|
"repository": {
|
|
@@ -38,8 +27,13 @@
|
|
|
38
27
|
"publishConfig": {
|
|
39
28
|
"access": "public"
|
|
40
29
|
},
|
|
30
|
+
"devDependencies": {
|
|
31
|
+
"semantic-release": "^24.0.0",
|
|
32
|
+
"@semantic-release/github": "^11.0.0"
|
|
33
|
+
},
|
|
41
34
|
"scripts": {
|
|
42
|
-
"build": "tsc
|
|
43
|
-
"
|
|
35
|
+
"build": "tsc",
|
|
36
|
+
"clean": "rm -rf dist",
|
|
37
|
+
"typecheck": "tsc --noEmit"
|
|
44
38
|
}
|
|
45
39
|
}
|