@datafog/fogclaw 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/dist/engines/gliner.d.ts.map +1 -1
- package/dist/engines/gliner.js +113 -1
- package/dist/engines/gliner.js.map +1 -1
- package/docs/plugins/fogclaw.md +4 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/engines/gliner.ts +139 -1
- package/tests/gliner.test.ts +75 -3
- package/tests/scanner.test.ts +61 -2
package/README.md
CHANGED
|
@@ -29,6 +29,22 @@ npm run build
|
|
|
29
29
|
|
|
30
30
|
## Quick Start
|
|
31
31
|
|
|
32
|
+
### GLiNER first-run setup (no extra steps)
|
|
33
|
+
|
|
34
|
+
FogClaw automatically downloads the GLiNER ONNX model on first run if it is not already cached locally, then reuses it for all later starts.
|
|
35
|
+
|
|
36
|
+
What happens on first scan:
|
|
37
|
+
|
|
38
|
+
1. Tokenizers are downloaded (if needed).
|
|
39
|
+
2. The first available ONNX file from Hugging Face is downloaded to the plugin's local model cache:
|
|
40
|
+
- `.../node_modules/@xenova/transformers/.cache/<model-repo>/onnx/<selected-model>.onnx`
|
|
41
|
+
- (This download can take a moment depending on network and selected model size.)
|
|
42
|
+
3. GLiNER starts using local files, so later runs stay fast and offline-friendly.
|
|
43
|
+
|
|
44
|
+
If the download cannot be performed (network/firewall/auth), FogClaw safely falls back to regex-only mode and continues to protect common structured PII.
|
|
45
|
+
|
|
46
|
+
If your network requires Hugging Face authentication, export `HF_TOKEN` or `HF_ACCESS_TOKEN` before starting OpenClaw so model files can download.
|
|
47
|
+
|
|
32
48
|
1. Copy the example config:
|
|
33
49
|
|
|
34
50
|
```bash
|
|
@@ -144,7 +160,7 @@ Plus any labels you add via `custom_entities` in the config.
|
|
|
144
160
|
| `enabled` | `boolean` | `true` | Enable/disable the plugin |
|
|
145
161
|
| `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
|
|
146
162
|
| `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
|
|
147
|
-
| `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
|
|
163
|
+
| `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER (or a local `.onnx` path for advanced setups). |
|
|
148
164
|
| `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
|
|
149
165
|
| `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
|
|
150
166
|
| `entityActions` | `object` | `{}` | Per-entity-type action overrides |
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmJ1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
|
package/dist/engines/gliner.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { env } from "@xenova/transformers";
|
|
1
4
|
import { canonicalType } from "../types.js";
|
|
2
5
|
const DEFAULT_NER_LABELS = [
|
|
3
6
|
"person",
|
|
@@ -9,6 +12,114 @@ const DEFAULT_NER_LABELS = [
|
|
|
9
12
|
"account number",
|
|
10
13
|
"passport number",
|
|
11
14
|
];
|
|
15
|
+
const GLINER_MODEL_FILES = [
|
|
16
|
+
"onnx/model_q4f16.onnx",
|
|
17
|
+
"onnx/model_q4.onnx",
|
|
18
|
+
"onnx/model_bnb4.onnx",
|
|
19
|
+
"onnx/model_int8.onnx",
|
|
20
|
+
"onnx/model_uint8.onnx",
|
|
21
|
+
"onnx/model_quantized.onnx",
|
|
22
|
+
"onnx/model_fp16.onnx",
|
|
23
|
+
"onnx/model.onnx",
|
|
24
|
+
];
|
|
25
|
+
const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
|
|
26
|
+
function isLikelyLocalPath(modelPath) {
|
|
27
|
+
const trimmed = modelPath.trim();
|
|
28
|
+
if (!trimmed) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
const lower = trimmed.toLowerCase();
|
|
32
|
+
const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
|
|
33
|
+
if (hasExtension) {
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
function toAbsolutePath(value) {
|
|
42
|
+
return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
|
|
43
|
+
}
|
|
44
|
+
function getModelCacheDir() {
|
|
45
|
+
return env.localModelPath ?? path.join(process.cwd(), ".cache");
|
|
46
|
+
}
|
|
47
|
+
function sanitizeModelReference(modelPath) {
|
|
48
|
+
return modelPath.trim();
|
|
49
|
+
}
|
|
50
|
+
async function fileExists(filePath) {
|
|
51
|
+
try {
|
|
52
|
+
await fs.access(filePath);
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
async function downloadModelIfNeeded(modelRepo, filename) {
|
|
60
|
+
const cacheDir = getModelCacheDir();
|
|
61
|
+
const localPath = path.join(cacheDir, modelRepo, filename);
|
|
62
|
+
if (await fileExists(localPath)) {
|
|
63
|
+
return localPath;
|
|
64
|
+
}
|
|
65
|
+
const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
|
|
66
|
+
const headers = new Headers();
|
|
67
|
+
const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
|
|
68
|
+
if (token) {
|
|
69
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
70
|
+
}
|
|
71
|
+
const controller = new AbortController();
|
|
72
|
+
const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
|
|
73
|
+
try {
|
|
74
|
+
const response = await fetch(url, { headers, signal: controller.signal });
|
|
75
|
+
if (!response.ok) {
|
|
76
|
+
throw new Error(`Unable to download model artifact: ${response.status}`);
|
|
77
|
+
}
|
|
78
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
79
|
+
await fs.mkdir(path.dirname(localPath), { recursive: true });
|
|
80
|
+
await fs.writeFile(localPath, bytes);
|
|
81
|
+
return localPath;
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
85
|
+
throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
|
|
86
|
+
}
|
|
87
|
+
throw err;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
clearTimeout(timeout);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async function resolveModelPath(modelPath) {
|
|
94
|
+
const sanitized = sanitizeModelReference(modelPath);
|
|
95
|
+
if (!sanitized) {
|
|
96
|
+
throw new Error("Model path is empty");
|
|
97
|
+
}
|
|
98
|
+
if (isLikelyLocalPath(sanitized)) {
|
|
99
|
+
const absolutePath = toAbsolutePath(sanitized);
|
|
100
|
+
if (!(await fileExists(absolutePath))) {
|
|
101
|
+
throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
|
|
102
|
+
}
|
|
103
|
+
return absolutePath;
|
|
104
|
+
}
|
|
105
|
+
const candidates = GLINER_MODEL_FILES;
|
|
106
|
+
let lastError;
|
|
107
|
+
for (const filename of candidates) {
|
|
108
|
+
const localPath = path.join(getModelCacheDir(), sanitized, filename);
|
|
109
|
+
if (await fileExists(localPath)) {
|
|
110
|
+
return localPath;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
for (const filename of candidates) {
|
|
114
|
+
try {
|
|
115
|
+
return await downloadModelIfNeeded(sanitized, filename);
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
throw new Error(`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${lastError?.message ?? "unknown"}`);
|
|
122
|
+
}
|
|
12
123
|
export class GlinerEngine {
|
|
13
124
|
model = null;
|
|
14
125
|
modelPath;
|
|
@@ -23,12 +134,13 @@ export class GlinerEngine {
|
|
|
23
134
|
if (this.initialized)
|
|
24
135
|
return;
|
|
25
136
|
try {
|
|
137
|
+
const resolvedModelPath = await resolveModelPath(this.modelPath);
|
|
26
138
|
const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
|
|
27
139
|
const { Gliner } = glinerModule;
|
|
28
140
|
this.model = new Gliner({
|
|
29
141
|
tokenizerPath: this.modelPath,
|
|
30
142
|
onnxSettings: {
|
|
31
|
-
modelPath:
|
|
143
|
+
modelPath: resolvedModelPath,
|
|
32
144
|
executionProvider: "cpu",
|
|
33
145
|
},
|
|
34
146
|
maxWidth: 12,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,uBAAuB;IACvB,oBAAoB;IACpB,sBAAsB;IACtB,sBAAsB;IACtB,uBAAuB;IACvB,2BAA2B;IAC3B,sBAAsB;IACtB,iBAAiB;CAClB,CAAC;AAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,QAAgB;IACtE,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3D,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,GAAG,GAAG,0BAA0B,SAAS,iBAAiB,QAAQ,EAAE,CAAC;IAC3E,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAClE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,KAAK,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,yBAAyB,CAAC,CAAC;IAEhF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAErC,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,yBAAyB,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,SAAS,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAC/C,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACrE,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mCAAmC,SAAS,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAC3E,SAAS,EAAE,OAAO,IAAI,SACxB,EAAE,CACH,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,iBAAiB,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,iBAAiB;oBAC5B,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
|
package/docs/plugins/fogclaw.md
CHANGED
|
@@ -13,6 +13,9 @@ FogClaw is an OpenClaw plugin that protects agent workflows by detecting and han
|
|
|
13
13
|
|
|
14
14
|
It provides both proactive guardrail behavior (via the `before_agent_start` hook) and explicit tools:
|
|
15
15
|
|
|
16
|
+
- GLiNER ONNX artifacts are provisioned automatically on first run (no manual `download` step required).
|
|
17
|
+
- If the model cannot be downloaded (offline or restricted network), FogClaw continues in regex-only mode.
|
|
18
|
+
|
|
16
19
|
- `fogclaw_scan`: scans text for PII and custom entities.
|
|
17
20
|
- `fogclaw_redact`: scans and redacts sensitive matches.
|
|
18
21
|
|
|
@@ -59,6 +62,7 @@ Set plugin config under `plugins.entries.fogclaw.config`:
|
|
|
59
62
|
enabled: true,
|
|
60
63
|
guardrail_mode: "redact",
|
|
61
64
|
redactStrategy: "token",
|
|
65
|
+
model: "onnx-community/gliner_large-v2.1",
|
|
62
66
|
confidence_threshold: 0.5,
|
|
63
67
|
custom_entities: ["project codename", "competitor name"],
|
|
64
68
|
entityActions: {
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
package/src/engines/gliner.ts
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { env } from "@xenova/transformers";
|
|
4
|
+
|
|
1
5
|
import type { Entity } from "../types.js";
|
|
2
6
|
import { canonicalType } from "../types.js";
|
|
3
7
|
|
|
@@ -12,6 +16,139 @@ const DEFAULT_NER_LABELS = [
|
|
|
12
16
|
"passport number",
|
|
13
17
|
];
|
|
14
18
|
|
|
19
|
+
const GLINER_MODEL_FILES = [
|
|
20
|
+
"onnx/model_q4f16.onnx",
|
|
21
|
+
"onnx/model_q4.onnx",
|
|
22
|
+
"onnx/model_bnb4.onnx",
|
|
23
|
+
"onnx/model_int8.onnx",
|
|
24
|
+
"onnx/model_uint8.onnx",
|
|
25
|
+
"onnx/model_quantized.onnx",
|
|
26
|
+
"onnx/model_fp16.onnx",
|
|
27
|
+
"onnx/model.onnx",
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
|
|
31
|
+
|
|
32
|
+
function isLikelyLocalPath(modelPath: string): boolean {
|
|
33
|
+
const trimmed = modelPath.trim();
|
|
34
|
+
if (!trimmed) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const lower = trimmed.toLowerCase();
|
|
39
|
+
const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
|
|
40
|
+
if (hasExtension) {
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function toAbsolutePath(value: string): string {
|
|
52
|
+
return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function getModelCacheDir(): string {
|
|
56
|
+
return env.localModelPath ?? path.join(process.cwd(), ".cache");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function sanitizeModelReference(modelPath: string): string {
|
|
60
|
+
return modelPath.trim();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function fileExists(filePath: string): Promise<boolean> {
|
|
64
|
+
try {
|
|
65
|
+
await fs.access(filePath);
|
|
66
|
+
return true;
|
|
67
|
+
} catch {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function downloadModelIfNeeded(modelRepo: string, filename: string): Promise<string> {
|
|
73
|
+
const cacheDir = getModelCacheDir();
|
|
74
|
+
const localPath = path.join(cacheDir, modelRepo, filename);
|
|
75
|
+
|
|
76
|
+
if (await fileExists(localPath)) {
|
|
77
|
+
return localPath;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
|
|
81
|
+
const headers = new Headers();
|
|
82
|
+
const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
|
|
83
|
+
if (token) {
|
|
84
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const controller = new AbortController();
|
|
88
|
+
const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
const response = await fetch(url, { headers, signal: controller.signal });
|
|
92
|
+
if (!response.ok) {
|
|
93
|
+
throw new Error(`Unable to download model artifact: ${response.status}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
97
|
+
await fs.mkdir(path.dirname(localPath), { recursive: true });
|
|
98
|
+
await fs.writeFile(localPath, bytes);
|
|
99
|
+
|
|
100
|
+
return localPath;
|
|
101
|
+
} catch (err) {
|
|
102
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
103
|
+
throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
throw err;
|
|
107
|
+
} finally {
|
|
108
|
+
clearTimeout(timeout);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function resolveModelPath(modelPath: string): Promise<string> {
|
|
113
|
+
const sanitized = sanitizeModelReference(modelPath);
|
|
114
|
+
if (!sanitized) {
|
|
115
|
+
throw new Error("Model path is empty");
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (isLikelyLocalPath(sanitized)) {
|
|
119
|
+
const absolutePath = toAbsolutePath(sanitized);
|
|
120
|
+
if (!(await fileExists(absolutePath))) {
|
|
121
|
+
throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return absolutePath;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const candidates = GLINER_MODEL_FILES;
|
|
128
|
+
let lastError: Error | undefined;
|
|
129
|
+
|
|
130
|
+
for (const filename of candidates) {
|
|
131
|
+
const localPath = path.join(getModelCacheDir(), sanitized, filename);
|
|
132
|
+
if (await fileExists(localPath)) {
|
|
133
|
+
return localPath;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
for (const filename of candidates) {
|
|
138
|
+
try {
|
|
139
|
+
return await downloadModelIfNeeded(sanitized, filename);
|
|
140
|
+
} catch (err) {
|
|
141
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
throw new Error(
|
|
146
|
+
`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${
|
|
147
|
+
lastError?.message ?? "unknown"
|
|
148
|
+
}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
15
152
|
export class GlinerEngine {
|
|
16
153
|
private model: any = null;
|
|
17
154
|
private modelPath: string;
|
|
@@ -28,12 +165,13 @@ export class GlinerEngine {
|
|
|
28
165
|
if (this.initialized) return;
|
|
29
166
|
|
|
30
167
|
try {
|
|
168
|
+
const resolvedModelPath = await resolveModelPath(this.modelPath);
|
|
31
169
|
const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
|
|
32
170
|
const { Gliner } = glinerModule;
|
|
33
171
|
this.model = new Gliner({
|
|
34
172
|
tokenizerPath: this.modelPath,
|
|
35
173
|
onnxSettings: {
|
|
36
|
-
modelPath:
|
|
174
|
+
modelPath: resolvedModelPath,
|
|
37
175
|
executionProvider: "cpu",
|
|
38
176
|
},
|
|
39
177
|
maxWidth: 12,
|
package/tests/gliner.test.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import { describe, it, expect, vi
|
|
1
|
+
import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
2
5
|
|
|
3
6
|
// Mock the gliner npm package so we don't need the actual 1.4GB model
|
|
4
7
|
vi.mock("gliner", () => {
|
|
@@ -63,13 +66,82 @@ vi.mock("gliner", () => {
|
|
|
63
66
|
return { Gliner: MockGliner };
|
|
64
67
|
});
|
|
65
68
|
|
|
69
|
+
vi.mock("gliner/node", () => {
|
|
70
|
+
class MockGliner {
|
|
71
|
+
private config: any;
|
|
72
|
+
|
|
73
|
+
constructor(config: any) {
|
|
74
|
+
this.config = config;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async initialize(): Promise<void> {
|
|
78
|
+
// No-op in mock
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async inference(
|
|
82
|
+
text: string,
|
|
83
|
+
labels: string[],
|
|
84
|
+
options: { threshold: number },
|
|
85
|
+
): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
|
|
86
|
+
const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
|
|
87
|
+
|
|
88
|
+
const johnIndex = text.indexOf("John Smith");
|
|
89
|
+
if (johnIndex !== -1 && labels.includes("person")) {
|
|
90
|
+
results.push({
|
|
91
|
+
text: "John Smith",
|
|
92
|
+
label: "person",
|
|
93
|
+
score: 0.95,
|
|
94
|
+
start: johnIndex,
|
|
95
|
+
end: johnIndex + "John Smith".length,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const acmeIndex = text.indexOf("Acme Corp");
|
|
100
|
+
if (acmeIndex !== -1 && labels.includes("organization")) {
|
|
101
|
+
results.push({
|
|
102
|
+
text: "Acme Corp",
|
|
103
|
+
label: "organization",
|
|
104
|
+
score: 0.88,
|
|
105
|
+
start: acmeIndex,
|
|
106
|
+
end: acmeIndex + "Acme Corp".length,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const nyIndex = text.indexOf("New York");
|
|
111
|
+
if (nyIndex !== -1 && labels.includes("location")) {
|
|
112
|
+
results.push({
|
|
113
|
+
text: "New York",
|
|
114
|
+
label: "location",
|
|
115
|
+
score: 0.91,
|
|
116
|
+
start: nyIndex,
|
|
117
|
+
end: nyIndex + "New York".length,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return results;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return { Gliner: MockGliner };
|
|
126
|
+
});
|
|
127
|
+
|
|
66
128
|
import { GlinerEngine } from "../src/engines/gliner.js";
|
|
67
129
|
|
|
130
|
+
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
|
|
131
|
+
|
|
132
|
+
beforeAll(async () => {
|
|
133
|
+
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
afterAll(async () => {
|
|
137
|
+
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
138
|
+
});
|
|
139
|
+
|
|
68
140
|
describe("GlinerEngine", () => {
|
|
69
141
|
let engine: GlinerEngine;
|
|
70
142
|
|
|
71
143
|
beforeEach(async () => {
|
|
72
|
-
engine = new GlinerEngine(
|
|
144
|
+
engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
73
145
|
await engine.initialize();
|
|
74
146
|
});
|
|
75
147
|
|
|
@@ -166,7 +238,7 @@ describe("GlinerEngine", () => {
|
|
|
166
238
|
});
|
|
167
239
|
|
|
168
240
|
it("reports isInitialized correctly", async () => {
|
|
169
|
-
const freshEngine = new GlinerEngine(
|
|
241
|
+
const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
170
242
|
expect(freshEngine.isInitialized).toBe(false);
|
|
171
243
|
|
|
172
244
|
await freshEngine.initialize();
|
package/tests/scanner.test.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import { describe, it, expect, vi
|
|
1
|
+
import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
2
5
|
|
|
3
6
|
// Mock the gliner npm package so we don't need the actual model
|
|
4
7
|
vi.mock("gliner", () => {
|
|
@@ -43,12 +46,68 @@ vi.mock("gliner", () => {
|
|
|
43
46
|
};
|
|
44
47
|
});
|
|
45
48
|
|
|
49
|
+
vi.mock("gliner/node", () => {
|
|
50
|
+
return {
|
|
51
|
+
Gliner: class MockGliner {
|
|
52
|
+
async initialize() {}
|
|
53
|
+
async inference(
|
|
54
|
+
text: string,
|
|
55
|
+
labels: string[],
|
|
56
|
+
_opts: { threshold: number },
|
|
57
|
+
) {
|
|
58
|
+
const results: any[] = [];
|
|
59
|
+
|
|
60
|
+
// Simulate person detection for "John Smith"
|
|
61
|
+
if (text.includes("John Smith")) {
|
|
62
|
+
const idx = text.indexOf("John Smith");
|
|
63
|
+
results.push({
|
|
64
|
+
text: "John Smith",
|
|
65
|
+
label: "person",
|
|
66
|
+
score: 0.95,
|
|
67
|
+
start: idx,
|
|
68
|
+
end: idx + 10,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Simulate organization detection for "Acme Corp"
|
|
73
|
+
if (text.includes("Acme Corp")) {
|
|
74
|
+
const idx = text.indexOf("Acme Corp");
|
|
75
|
+
results.push({
|
|
76
|
+
text: "Acme Corp",
|
|
77
|
+
label: "organization",
|
|
78
|
+
score: 0.88,
|
|
79
|
+
start: idx,
|
|
80
|
+
end: idx + 9,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Only return results whose labels are requested
|
|
85
|
+
return results.filter((r) => labels.includes(r.label));
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
});
|
|
90
|
+
|
|
46
91
|
import { Scanner } from "../src/scanner.js";
|
|
47
92
|
import { DEFAULT_CONFIG } from "../src/config.js";
|
|
48
93
|
import type { FogClawConfig } from "../src/types.js";
|
|
49
94
|
|
|
95
|
+
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
|
|
96
|
+
|
|
97
|
+
beforeAll(async () => {
|
|
98
|
+
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
afterAll(async () => {
|
|
102
|
+
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
103
|
+
});
|
|
104
|
+
|
|
50
105
|
function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
|
|
51
|
-
return {
|
|
106
|
+
return {
|
|
107
|
+
...DEFAULT_CONFIG,
|
|
108
|
+
model: TEST_ONNX_MODEL_PATH,
|
|
109
|
+
...overrides,
|
|
110
|
+
};
|
|
52
111
|
}
|
|
53
112
|
|
|
54
113
|
describe("Scanner", () => {
|