@datafog/fogclaw 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/dist/engines/gliner.d.ts.map +1 -1
- package/dist/engines/gliner.js +121 -5
- package/dist/engines/gliner.js.map +1 -1
- package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +20 -19
- package/docs/plugins/fogclaw.md +4 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/engines/gliner.ts +157 -6
- package/tests/gliner.test.ts +111 -6
- package/tests/scanner.test.ts +95 -6
package/README.md
CHANGED
|
@@ -29,6 +29,22 @@ npm run build
|
|
|
29
29
|
|
|
30
30
|
## Quick Start
|
|
31
31
|
|
|
32
|
+
### GLiNER first-run setup (no extra steps)
|
|
33
|
+
|
|
34
|
+
FogClaw automatically downloads the GLiNER ONNX model on first run if it is not already cached locally, then reuses it for all later starts.
|
|
35
|
+
|
|
36
|
+
What happens on first scan:
|
|
37
|
+
|
|
38
|
+
1. Tokenizers are downloaded (if needed).
|
|
39
|
+
2. The first available ONNX file from Hugging Face is downloaded to the plugin's local model cache:
|
|
40
|
+
- `.../node_modules/@xenova/transformers/.cache/<model-repo>/onnx/<selected-model>.onnx`
|
|
41
|
+
- (This download can take a moment depending on network and selected model size.)
|
|
42
|
+
3. GLiNER starts using local files, so later runs stay fast and offline-friendly.
|
|
43
|
+
|
|
44
|
+
If the download cannot be performed (network/firewall/auth), FogClaw safely falls back to regex-only mode and continues to protect common structured PII.
|
|
45
|
+
|
|
46
|
+
If your network requires Hugging Face authentication, export `HF_TOKEN` or `HF_ACCESS_TOKEN` before starting OpenClaw so model files can download.
|
|
47
|
+
|
|
32
48
|
1. Copy the example config:
|
|
33
49
|
|
|
34
50
|
```bash
|
|
@@ -144,7 +160,7 @@ Plus any labels you add via `custom_entities` in the config.
|
|
|
144
160
|
| `enabled` | `boolean` | `true` | Enable/disable the plugin |
|
|
145
161
|
| `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
|
|
146
162
|
| `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
|
|
147
|
-
| `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
|
|
163
|
+
| `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER (or a local `.onnx` path for advanced setups). |
|
|
148
164
|
| `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
|
|
149
165
|
| `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
|
|
150
166
|
| `entityActions` | `object` | `{}` | Per-entity-type action overrides |
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmJ1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA4CnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
|
package/dist/engines/gliner.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { env } from "@xenova/transformers";
|
|
1
4
|
import { canonicalType } from "../types.js";
|
|
2
5
|
const DEFAULT_NER_LABELS = [
|
|
3
6
|
"person",
|
|
@@ -9,6 +12,114 @@ const DEFAULT_NER_LABELS = [
|
|
|
9
12
|
"account number",
|
|
10
13
|
"passport number",
|
|
11
14
|
];
|
|
15
|
+
const GLINER_MODEL_FILES = [
|
|
16
|
+
"onnx/model_q4f16.onnx",
|
|
17
|
+
"onnx/model_q4.onnx",
|
|
18
|
+
"onnx/model_bnb4.onnx",
|
|
19
|
+
"onnx/model_int8.onnx",
|
|
20
|
+
"onnx/model_uint8.onnx",
|
|
21
|
+
"onnx/model_quantized.onnx",
|
|
22
|
+
"onnx/model_fp16.onnx",
|
|
23
|
+
"onnx/model.onnx",
|
|
24
|
+
];
|
|
25
|
+
const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
|
|
26
|
+
function isLikelyLocalPath(modelPath) {
|
|
27
|
+
const trimmed = modelPath.trim();
|
|
28
|
+
if (!trimmed) {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
const lower = trimmed.toLowerCase();
|
|
32
|
+
const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
|
|
33
|
+
if (hasExtension) {
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
function toAbsolutePath(value) {
|
|
42
|
+
return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
|
|
43
|
+
}
|
|
44
|
+
function getModelCacheDir() {
|
|
45
|
+
return env.localModelPath ?? path.join(process.cwd(), ".cache");
|
|
46
|
+
}
|
|
47
|
+
function sanitizeModelReference(modelPath) {
|
|
48
|
+
return modelPath.trim();
|
|
49
|
+
}
|
|
50
|
+
async function fileExists(filePath) {
|
|
51
|
+
try {
|
|
52
|
+
await fs.access(filePath);
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
async function downloadModelIfNeeded(modelRepo, filename) {
|
|
60
|
+
const cacheDir = getModelCacheDir();
|
|
61
|
+
const localPath = path.join(cacheDir, modelRepo, filename);
|
|
62
|
+
if (await fileExists(localPath)) {
|
|
63
|
+
return localPath;
|
|
64
|
+
}
|
|
65
|
+
const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
|
|
66
|
+
const headers = new Headers();
|
|
67
|
+
const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
|
|
68
|
+
if (token) {
|
|
69
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
70
|
+
}
|
|
71
|
+
const controller = new AbortController();
|
|
72
|
+
const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
|
|
73
|
+
try {
|
|
74
|
+
const response = await fetch(url, { headers, signal: controller.signal });
|
|
75
|
+
if (!response.ok) {
|
|
76
|
+
throw new Error(`Unable to download model artifact: ${response.status}`);
|
|
77
|
+
}
|
|
78
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
79
|
+
await fs.mkdir(path.dirname(localPath), { recursive: true });
|
|
80
|
+
await fs.writeFile(localPath, bytes);
|
|
81
|
+
return localPath;
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
85
|
+
throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
|
|
86
|
+
}
|
|
87
|
+
throw err;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
clearTimeout(timeout);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async function resolveModelPath(modelPath) {
|
|
94
|
+
const sanitized = sanitizeModelReference(modelPath);
|
|
95
|
+
if (!sanitized) {
|
|
96
|
+
throw new Error("Model path is empty");
|
|
97
|
+
}
|
|
98
|
+
if (isLikelyLocalPath(sanitized)) {
|
|
99
|
+
const absolutePath = toAbsolutePath(sanitized);
|
|
100
|
+
if (!(await fileExists(absolutePath))) {
|
|
101
|
+
throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
|
|
102
|
+
}
|
|
103
|
+
return absolutePath;
|
|
104
|
+
}
|
|
105
|
+
const candidates = GLINER_MODEL_FILES;
|
|
106
|
+
let lastError;
|
|
107
|
+
for (const filename of candidates) {
|
|
108
|
+
const localPath = path.join(getModelCacheDir(), sanitized, filename);
|
|
109
|
+
if (await fileExists(localPath)) {
|
|
110
|
+
return localPath;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
for (const filename of candidates) {
|
|
114
|
+
try {
|
|
115
|
+
return await downloadModelIfNeeded(sanitized, filename);
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
throw new Error(`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${lastError?.message ?? "unknown"}`);
|
|
122
|
+
}
|
|
12
123
|
export class GlinerEngine {
|
|
13
124
|
model = null;
|
|
14
125
|
modelPath;
|
|
@@ -23,16 +134,17 @@ export class GlinerEngine {
|
|
|
23
134
|
if (this.initialized)
|
|
24
135
|
return;
|
|
25
136
|
try {
|
|
137
|
+
const resolvedModelPath = await resolveModelPath(this.modelPath);
|
|
26
138
|
const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
|
|
27
139
|
const { Gliner } = glinerModule;
|
|
28
140
|
this.model = new Gliner({
|
|
29
141
|
tokenizerPath: this.modelPath,
|
|
30
142
|
onnxSettings: {
|
|
31
|
-
modelPath:
|
|
143
|
+
modelPath: resolvedModelPath,
|
|
32
144
|
executionProvider: "cpu",
|
|
33
145
|
},
|
|
34
146
|
maxWidth: 12,
|
|
35
|
-
modelType: "
|
|
147
|
+
modelType: "span-level",
|
|
36
148
|
});
|
|
37
149
|
await this.model.initialize();
|
|
38
150
|
this.initialized = true;
|
|
@@ -57,11 +169,15 @@ export class GlinerEngine {
|
|
|
57
169
|
];
|
|
58
170
|
// Deduplicate labels
|
|
59
171
|
const uniqueLabels = [...new Set(labels)];
|
|
60
|
-
const
|
|
172
|
+
const rawResults = await this.model.inference({
|
|
173
|
+
texts: [text],
|
|
174
|
+
entities: uniqueLabels,
|
|
175
|
+
flatNer: false,
|
|
61
176
|
threshold: this.threshold,
|
|
62
177
|
});
|
|
63
|
-
|
|
64
|
-
|
|
178
|
+
const flatResults = Array.isArray(rawResults) ? rawResults.flat() : [];
|
|
179
|
+
return flatResults.map((r) => ({
|
|
180
|
+
text: r.spanText ?? r.text,
|
|
65
181
|
label: canonicalType(r.label),
|
|
66
182
|
start: r.start,
|
|
67
183
|
end: r.end,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,uBAAuB;IACvB,oBAAoB;IACpB,sBAAsB;IACtB,sBAAsB;IACtB,uBAAuB;IACvB,2BAA2B;IAC3B,sBAAsB;IACtB,iBAAiB;CAClB,CAAC;AAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,QAAgB;IACtE,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3D,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,GAAG,GAAG,0BAA0B,SAAS,iBAAiB,QAAQ,EAAE,CAAC;IAC3E,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAClE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,KAAK,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,yBAAyB,CAAC,CAAC;IAEhF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAErC,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,yBAAyB,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,SAAS,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAC/C,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACrE,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mCAAmC,SAAS,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAC3E,SAAS,EAAE,OAAO,IAAI,SACxB,EAAE,CACH,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,iBAAiB,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,iBAAiB;oBAC5B,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,YAAY;aACxB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;YAC5C,KAAK,EAAE,CAAC,IAAI,CAAC;YACb,QAAQ,EAAE,YAAY;YACtB,OAAO,EAAE,KAAK;YACd,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QACH,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAEvE,OAAO,WAAW,CAAC,GAAG,CACpB,CACE,CAOC,EACD,EAAE,CAAC,CAAC;YACJ,IAAI,EAAE,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAI;YAC1B,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
|
|
@@ -25,11 +25,11 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
|
|
|
25
25
|
- [x] (2026-02-17T18:54:00Z) P3 [M1] Updated `package-lock` metadata and refreshed scope for build/release artifacts.
|
|
26
26
|
- [x] (2026-02-17T18:55:00Z) P4 [M2] Re-ran build/test/smoke + `npm pack --json` + `npm publish --dry-run` validations.
|
|
27
27
|
- [x] (2026-02-17T18:56:00Z) P5 [M2] Verified `openclaw plugins install` against the built `datafog-fogclaw-0.1.0.tgz` in a clean runtime; plugin now loads as `fogclaw` with status `loaded` and tools `fogclaw_scan, fogclaw_redact`.
|
|
28
|
-
- [ ] (2026-02-
|
|
28
|
+
- [ ] (2026-02-17T20:33:00Z) P5 [M2] Verify `openclaw plugins install @datafog/fogclaw` resolves to published `0.1.4` (or later) when stale plugin state is cleared and npm 2FA is provided.
|
|
29
29
|
- [x] (2026-02-17T19:27:00Z) P6 [M2] Fixed GLiNER startup blocker in Node by pinning `onnxruntime-web` to `1.21.0`, preventing `./webgpu` export resolution errors from `gliner` in OpenClaw install paths.
|
|
30
30
|
- [x] (2026-02-17T19:34:00Z) P6 [M2] Added direct `sharp` dependency `0.34.5` with an override to prevent optional sharp native install failure (`sharp-darwin-arm64v8.node` missing) during OpenClaw install-time dependency bootstrap.
|
|
31
|
-
- [ ] (2026-02-
|
|
32
|
-
- [
|
|
31
|
+
- [ ] (2026-02-17T20:29:00Z) P6 [M3] Publish this startup hardening update under `@datafog/fogclaw@0.1.4` after npm publish/auth is completed.
|
|
32
|
+
- [x] (2026-02-17T18:56:00Z) P6 [M3] Prepare and execute V1 publish/release of `@datafog/fogclaw` (attempt blocked by EOTP in this environment).
|
|
33
33
|
- [x] (2026-02-17T18:57:00Z) P7 [M3] Capture release artifacts and update evidence notes; add follow-up for dependency install blocker in OpenClaw install path.
|
|
34
34
|
|
|
35
35
|
|
|
@@ -47,13 +47,13 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
|
|
|
47
47
|
- Observation: The prior `TypeError: Cannot read properties of undefined (reading 'trim')` install failure was caused by OpenClaw's `registerTool` contract when tool objects omit a top-level `name`.
|
|
48
48
|
Evidence: `src/plugins/registry.ts` in OpenClaw (`registerTool` maps `tool.name` without null-guard); fixed in this repository by adding `name` fields to both tool objects.
|
|
49
49
|
|
|
50
|
-
- Observation: `openclaw plugins install @datafog/fogclaw`
|
|
51
|
-
Evidence:
|
|
50
|
+
- Observation: `openclaw plugins install @datafog/fogclaw` cannot be validated from npm in this environment yet (latest candidate `0.1.4` is not published due OTP/auth), but local install from the tarball succeeds from a clean runtime and confirms plugin load path behavior.
|
|
51
|
+
Evidence: `npm view @datafog/fogclaw@0.1.4` (404), followed by `openclaw plugins install ./datafog-fogclaw-0.1.4.tgz`, `openclaw plugins info fogclaw`.
|
|
52
52
|
|
|
53
53
|
- Observation: GLiNER startup now avoids the `onnxruntime-web/webgpu` exports failure by pinning `onnxruntime-web` to 1.21.0, which has a Node-compatible `./webgpu` export path in this runtime.
|
|
54
54
|
Evidence: local `import('onnxruntime-web/webgpu')` succeeds after dependency pin, and OpenClaw install logs no longer show the subpath exports error.
|
|
55
55
|
- Observation: optional sharp runtime failures are now mitigated in clean install flows by pinning direct `sharp` 0.34.5; this removes the previously recurrent `Cannot find module '../build/Release/sharp-darwin-arm64v8.node'` warning in OpenClaw plugin install logs.
|
|
56
|
-
Evidence: `openclaw plugins install` from `datafog-fogclaw-0.1.
|
|
56
|
+
Evidence: `openclaw plugins install` from `datafog-fogclaw-0.1.4.tgz` clean runtime no longer emits that missing binary warning.
|
|
57
57
|
|
|
58
58
|
## Decision Log
|
|
59
59
|
|
|
@@ -83,8 +83,9 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
|
|
|
83
83
|
- Local validation confirms namespace rename compiles and tests (`npm run build`, `npm run test`, `npm run test:plugin-smoke`) continue to pass.
|
|
84
84
|
- `npm pack --json` and `npm publish --dry-run` now emit scoped package metadata under `@datafog/fogclaw`.
|
|
85
85
|
- `openclaw plugins install` against a clean temporary state and local `datafog-fogclaw-0.1.0.tgz` now succeeds; `openclaw plugins info fogclaw` shows status `loaded` and tools `fogclaw_scan`, `fogclaw_redact`.
|
|
86
|
-
- `openclaw plugins install @datafog/fogclaw` via npm registry is still blocked
|
|
87
|
-
- GLiNER
|
|
86
|
+
- `openclaw plugins install @datafog/fogclaw` via npm registry is still blocked because `0.1.4` has not yet been published (OTP/auth required). Local tarball install on clean runtime succeeds as an equivalent smoke test.
|
|
87
|
+
- GLiNER now returns detections with `source: gliner` after the `modelType` input-shape fix, rather than falling back to regex-only on this environment.
|
|
88
|
+
- GLiNER startup now avoids the webgpu export resolution error after pinning `onnxruntime-web` to `1.21.0`. After additionally forcing `modelType: "span-level"`, local runtime now performs ONNX inference and returns entities instead of failing with `span_idx`/`texts is not iterable` on this environment, while still safely falling back to regex if GLiNER initialization fails.
|
|
88
89
|
|
|
89
90
|
|
|
90
91
|
## Context and Orientation
|
|
@@ -254,7 +255,7 @@ Expect:
|
|
|
254
255
|
- Scope migration evidence:
|
|
255
256
|
|
|
256
257
|
package: @datafog/fogclaw
|
|
257
|
-
version: 0.1.
|
|
258
|
+
version: 0.1.4
|
|
258
259
|
`npm pkg get name` output: `"@datafog/fogclaw"`
|
|
259
260
|
`npm pkg get openclaw` output:
|
|
260
261
|
`{"extensions":["./dist/index.js"]}`
|
|
@@ -268,17 +269,17 @@ Expect:
|
|
|
268
269
|
=> `function fogclaw FogClaw`
|
|
269
270
|
|
|
270
271
|
- Reproducibility evidence:
|
|
271
|
-
- `npm pack --json` output includes `datafog-fogclaw-0.1.
|
|
272
|
+
- `npm pack --json` output includes `datafog-fogclaw-0.1.4.tgz` and `openclaw.plugin.json`/`dist/index.js` in file list.
|
|
272
273
|
- `npm publish --dry-run` succeeded and produced scoped package manifest notice.
|
|
273
274
|
|
|
274
275
|
- Installability evidence:
|
|
275
|
-
- `openclaw plugins install
|
|
276
|
-
- `openclaw plugins install
|
|
277
|
-
- GLiNER startup
|
|
276
|
+
- `openclaw plugins install` of a local `datafog-fogclaw-0.1.4.tgz` now succeeds and returns `gliner`-type detections in this environment.
|
|
277
|
+
- `openclaw plugins install /path/to/datafog-fogclaw-0.1.4.tgz` and `openclaw plugins info fogclaw` in a clean runtime now succeed and report plugin status `loaded` with tools `fogclaw_scan`, `fogclaw_redact`; GLiNER returns PERSON/ORGANIZATION entities for sample text.
|
|
278
|
+
- GLiNER startup still logs optional runtime inference compatibility warnings in some environments, but plugin registration and install now succeed reliably.
|
|
278
279
|
|
|
279
280
|
- `git rev-parse HEAD` (of implementation snapshot): capture before final merge.
|
|
280
281
|
|
|
281
|
-
- Scoped package discoverability: not yet in npm registry
|
|
282
|
+
- Scoped package discoverability: not yet live in npm registry as `@datafog/fogclaw@0.1.4` (publish blocked by OTP in this environment).
|
|
282
283
|
|
|
283
284
|
|
|
284
285
|
## Interfaces and Dependencies
|
|
@@ -303,12 +304,12 @@ Expect:
|
|
|
303
304
|
|
|
304
305
|
## Verify/Release Decision
|
|
305
306
|
|
|
306
|
-
- decision:
|
|
307
|
-
- date: 2026-02-
|
|
307
|
+
- decision: pending
|
|
308
|
+
- date: 2026-02-17T20:33:00Z
|
|
308
309
|
- open findings by priority (if any): pending
|
|
309
310
|
- evidence:
|
|
310
|
-
- installability in clean runtime succeeds for local `datafog-fogclaw-0.1.
|
|
311
|
-
- scoped
|
|
311
|
+
- installability in clean runtime succeeds for local `datafog-fogclaw-0.1.4.tgz` after tool-name registration and dependency-hardening fixes
|
|
312
|
+
- scoped npm install is not yet validated because `0.1.4` has not been published (publish blocked by EOTP).
|
|
312
313
|
- rollback: revert to previous working scoped package state (or keep changes in branch) if publish credentials/visibility unavailable
|
|
313
314
|
- post-release checks:
|
|
314
315
|
- `openclaw plugins install @datafog/fogclaw`
|
|
@@ -321,4 +322,4 @@ Expect:
|
|
|
321
322
|
- 2026-02-17T10:57:00Z: Initialized plan for V1 scoped-release path in `@datafog/fogclaw` and documented zero-logic-change constraints for immediate installability milestone.
|
|
322
323
|
- 2026-02-17T18:57:00Z: Completed namespace migration in package metadata and install/docs (`package.json`, `package-lock.json`, `README.md`, `docs/plugins/fogclaw.md`). Ran full local validation (`npm run build`, `npm run test`, `npm run test:plugin-smoke`, `npm pack --json`, `npm publish --dry-run`) and documented install blocker (package not yet published to npm).
|
|
323
324
|
- 2026-02-17T19:08:00Z: Fixed OpenClaw compatibility in `src/index.ts` by adding explicit `name` fields to `fogclaw_scan` and `fogclaw_redact` tool registrations to avoid undefined `.trim()` during registration; verified clean-runtime install/load succeeds with local tarball (`openclaw plugins install <tgz>`, `plugins info`, `plugins list`).
|
|
324
|
-
- 2026-02-
|
|
325
|
+
- 2026-02-17T20:31:00Z: Added explicit `modelType: "span-level"` for GLiNER runtime configuration so ONNX feeds match downloaded model inputs; local install now returns real `gliner`-backed detections for PERSON/ORGANIZATION and no longer errors with `input 'span_idx' is missing in 'feeds'` in this environment.
|
package/docs/plugins/fogclaw.md
CHANGED
|
@@ -13,6 +13,9 @@ FogClaw is an OpenClaw plugin that protects agent workflows by detecting and han
|
|
|
13
13
|
|
|
14
14
|
It provides both proactive guardrail behavior (via the `before_agent_start` hook) and explicit tools:
|
|
15
15
|
|
|
16
|
+
- GLiNER ONNX artifacts are provisioned automatically on first run (no manual `download` step required).
|
|
17
|
+
- If the model cannot be downloaded (offline or restricted network), FogClaw continues in regex-only mode.
|
|
18
|
+
|
|
16
19
|
- `fogclaw_scan`: scans text for PII and custom entities.
|
|
17
20
|
- `fogclaw_redact`: scans and redacts sensitive matches.
|
|
18
21
|
|
|
@@ -59,6 +62,7 @@ Set plugin config under `plugins.entries.fogclaw.config`:
|
|
|
59
62
|
enabled: true,
|
|
60
63
|
guardrail_mode: "redact",
|
|
61
64
|
redactStrategy: "token",
|
|
65
|
+
model: "onnx-community/gliner_large-v2.1",
|
|
62
66
|
confidence_threshold: 0.5,
|
|
63
67
|
custom_entities: ["project codename", "competitor name"],
|
|
64
68
|
entityActions: {
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
package/src/engines/gliner.ts
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { env } from "@xenova/transformers";
|
|
4
|
+
|
|
1
5
|
import type { Entity } from "../types.js";
|
|
2
6
|
import { canonicalType } from "../types.js";
|
|
3
7
|
|
|
@@ -12,6 +16,139 @@ const DEFAULT_NER_LABELS = [
|
|
|
12
16
|
"passport number",
|
|
13
17
|
];
|
|
14
18
|
|
|
19
|
+
const GLINER_MODEL_FILES = [
|
|
20
|
+
"onnx/model_q4f16.onnx",
|
|
21
|
+
"onnx/model_q4.onnx",
|
|
22
|
+
"onnx/model_bnb4.onnx",
|
|
23
|
+
"onnx/model_int8.onnx",
|
|
24
|
+
"onnx/model_uint8.onnx",
|
|
25
|
+
"onnx/model_quantized.onnx",
|
|
26
|
+
"onnx/model_fp16.onnx",
|
|
27
|
+
"onnx/model.onnx",
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
|
|
31
|
+
|
|
32
|
+
function isLikelyLocalPath(modelPath: string): boolean {
|
|
33
|
+
const trimmed = modelPath.trim();
|
|
34
|
+
if (!trimmed) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const lower = trimmed.toLowerCase();
|
|
39
|
+
const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
|
|
40
|
+
if (hasExtension) {
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function toAbsolutePath(value: string): string {
|
|
52
|
+
return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function getModelCacheDir(): string {
|
|
56
|
+
return env.localModelPath ?? path.join(process.cwd(), ".cache");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function sanitizeModelReference(modelPath: string): string {
|
|
60
|
+
return modelPath.trim();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function fileExists(filePath: string): Promise<boolean> {
|
|
64
|
+
try {
|
|
65
|
+
await fs.access(filePath);
|
|
66
|
+
return true;
|
|
67
|
+
} catch {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function downloadModelIfNeeded(modelRepo: string, filename: string): Promise<string> {
|
|
73
|
+
const cacheDir = getModelCacheDir();
|
|
74
|
+
const localPath = path.join(cacheDir, modelRepo, filename);
|
|
75
|
+
|
|
76
|
+
if (await fileExists(localPath)) {
|
|
77
|
+
return localPath;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
|
|
81
|
+
const headers = new Headers();
|
|
82
|
+
const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
|
|
83
|
+
if (token) {
|
|
84
|
+
headers.set("Authorization", `Bearer ${token}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const controller = new AbortController();
|
|
88
|
+
const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
|
|
89
|
+
|
|
90
|
+
try {
|
|
91
|
+
const response = await fetch(url, { headers, signal: controller.signal });
|
|
92
|
+
if (!response.ok) {
|
|
93
|
+
throw new Error(`Unable to download model artifact: ${response.status}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const bytes = new Uint8Array(await response.arrayBuffer());
|
|
97
|
+
await fs.mkdir(path.dirname(localPath), { recursive: true });
|
|
98
|
+
await fs.writeFile(localPath, bytes);
|
|
99
|
+
|
|
100
|
+
return localPath;
|
|
101
|
+
} catch (err) {
|
|
102
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
103
|
+
throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
throw err;
|
|
107
|
+
} finally {
|
|
108
|
+
clearTimeout(timeout);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async function resolveModelPath(modelPath: string): Promise<string> {
|
|
113
|
+
const sanitized = sanitizeModelReference(modelPath);
|
|
114
|
+
if (!sanitized) {
|
|
115
|
+
throw new Error("Model path is empty");
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (isLikelyLocalPath(sanitized)) {
|
|
119
|
+
const absolutePath = toAbsolutePath(sanitized);
|
|
120
|
+
if (!(await fileExists(absolutePath))) {
|
|
121
|
+
throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return absolutePath;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const candidates = GLINER_MODEL_FILES;
|
|
128
|
+
let lastError: Error | undefined;
|
|
129
|
+
|
|
130
|
+
for (const filename of candidates) {
|
|
131
|
+
const localPath = path.join(getModelCacheDir(), sanitized, filename);
|
|
132
|
+
if (await fileExists(localPath)) {
|
|
133
|
+
return localPath;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
for (const filename of candidates) {
|
|
138
|
+
try {
|
|
139
|
+
return await downloadModelIfNeeded(sanitized, filename);
|
|
140
|
+
} catch (err) {
|
|
141
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
throw new Error(
|
|
146
|
+
`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${
|
|
147
|
+
lastError?.message ?? "unknown"
|
|
148
|
+
}`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
15
152
|
export class GlinerEngine {
|
|
16
153
|
private model: any = null;
|
|
17
154
|
private modelPath: string;
|
|
@@ -28,16 +165,17 @@ export class GlinerEngine {
|
|
|
28
165
|
if (this.initialized) return;
|
|
29
166
|
|
|
30
167
|
try {
|
|
168
|
+
const resolvedModelPath = await resolveModelPath(this.modelPath);
|
|
31
169
|
const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
|
|
32
170
|
const { Gliner } = glinerModule;
|
|
33
171
|
this.model = new Gliner({
|
|
34
172
|
tokenizerPath: this.modelPath,
|
|
35
173
|
onnxSettings: {
|
|
36
|
-
modelPath:
|
|
174
|
+
modelPath: resolvedModelPath,
|
|
37
175
|
executionProvider: "cpu",
|
|
38
176
|
},
|
|
39
177
|
maxWidth: 12,
|
|
40
|
-
modelType: "
|
|
178
|
+
modelType: "span-level",
|
|
41
179
|
});
|
|
42
180
|
await this.model.initialize();
|
|
43
181
|
this.initialized = true;
|
|
@@ -67,13 +205,26 @@ export class GlinerEngine {
|
|
|
67
205
|
// Deduplicate labels
|
|
68
206
|
const uniqueLabels = [...new Set(labels)];
|
|
69
207
|
|
|
70
|
-
const
|
|
208
|
+
const rawResults = await this.model.inference({
|
|
209
|
+
texts: [text],
|
|
210
|
+
entities: uniqueLabels,
|
|
211
|
+
flatNer: false,
|
|
71
212
|
threshold: this.threshold,
|
|
72
213
|
});
|
|
214
|
+
const flatResults = Array.isArray(rawResults) ? rawResults.flat() : [];
|
|
73
215
|
|
|
74
|
-
return
|
|
75
|
-
(
|
|
76
|
-
|
|
216
|
+
return flatResults.map(
|
|
217
|
+
(
|
|
218
|
+
r: {
|
|
219
|
+
spanText?: string;
|
|
220
|
+
text: string;
|
|
221
|
+
label: string;
|
|
222
|
+
score: number;
|
|
223
|
+
start: number;
|
|
224
|
+
end: number;
|
|
225
|
+
},
|
|
226
|
+
) => ({
|
|
227
|
+
text: r.spanText ?? r.text,
|
|
77
228
|
label: canonicalType(r.label),
|
|
78
229
|
start: r.start,
|
|
79
230
|
end: r.end,
|
package/tests/gliner.test.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import { describe, it, expect, vi
|
|
1
|
+
import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
2
5
|
|
|
3
6
|
// Mock the gliner npm package so we don't need the actual 1.4GB model
|
|
4
7
|
vi.mock("gliner", () => {
|
|
@@ -14,10 +17,102 @@ vi.mock("gliner", () => {
|
|
|
14
17
|
}
|
|
15
18
|
|
|
16
19
|
async inference(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
+
request: { texts: string[]; entities: string[] } | string | string[],
|
|
21
|
+
maybeEntities?: string[],
|
|
22
|
+
_flatNer = false,
|
|
23
|
+
_threshold = 0.5,
|
|
20
24
|
): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
|
|
25
|
+
const text =
|
|
26
|
+
typeof request === "string"
|
|
27
|
+
? request
|
|
28
|
+
: Array.isArray(request)
|
|
29
|
+
? request[0] ?? ""
|
|
30
|
+
: request.texts[0] ?? "";
|
|
31
|
+
const requestEntities =
|
|
32
|
+
typeof request === "object" && request !== null && "entities" in request
|
|
33
|
+
? request.entities
|
|
34
|
+
: undefined;
|
|
35
|
+
const labels =
|
|
36
|
+
Array.isArray(maybeEntities)
|
|
37
|
+
? maybeEntities
|
|
38
|
+
: requestEntities ?? [];
|
|
39
|
+
const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
|
|
40
|
+
|
|
41
|
+
// Simulate entity detection for "John Smith"
|
|
42
|
+
const johnIndex = text.indexOf("John Smith");
|
|
43
|
+
if (johnIndex !== -1 && labels.includes("person")) {
|
|
44
|
+
results.push({
|
|
45
|
+
text: "John Smith",
|
|
46
|
+
label: "person",
|
|
47
|
+
score: 0.95,
|
|
48
|
+
start: johnIndex,
|
|
49
|
+
end: johnIndex + "John Smith".length,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Simulate entity detection for "Acme Corp"
|
|
54
|
+
const acmeIndex = text.indexOf("Acme Corp");
|
|
55
|
+
if (acmeIndex !== -1 && labels.includes("organization")) {
|
|
56
|
+
results.push({
|
|
57
|
+
text: "Acme Corp",
|
|
58
|
+
label: "organization",
|
|
59
|
+
score: 0.88,
|
|
60
|
+
start: acmeIndex,
|
|
61
|
+
end: acmeIndex + "Acme Corp".length,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Simulate entity detection for "New York"
|
|
66
|
+
const nyIndex = text.indexOf("New York");
|
|
67
|
+
if (nyIndex !== -1 && labels.includes("location")) {
|
|
68
|
+
results.push({
|
|
69
|
+
text: "New York",
|
|
70
|
+
label: "location",
|
|
71
|
+
score: 0.91,
|
|
72
|
+
start: nyIndex,
|
|
73
|
+
end: nyIndex + "New York".length,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return results;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return { Gliner: MockGliner };
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
vi.mock("gliner/node", () => {
|
|
85
|
+
class MockGliner {
|
|
86
|
+
private config: any;
|
|
87
|
+
|
|
88
|
+
constructor(config: any) {
|
|
89
|
+
this.config = config;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async initialize(): Promise<void> {
|
|
93
|
+
// No-op in mock
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async inference(
|
|
97
|
+
request: { texts: string[]; entities: string[] } | string | string[],
|
|
98
|
+
maybeEntities?: string[],
|
|
99
|
+
_flatNer = false,
|
|
100
|
+
_threshold = 0.5,
|
|
101
|
+
): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
|
|
102
|
+
const text =
|
|
103
|
+
typeof request === "string"
|
|
104
|
+
? request
|
|
105
|
+
: Array.isArray(request)
|
|
106
|
+
? request[0] ?? ""
|
|
107
|
+
: request.texts[0] ?? "";
|
|
108
|
+
const requestEntities =
|
|
109
|
+
typeof request === "object" && request !== null && "entities" in request
|
|
110
|
+
? request.entities
|
|
111
|
+
: undefined;
|
|
112
|
+
const labels =
|
|
113
|
+
Array.isArray(maybeEntities)
|
|
114
|
+
? maybeEntities
|
|
115
|
+
: requestEntities ?? [];
|
|
21
116
|
const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
|
|
22
117
|
|
|
23
118
|
// Simulate entity detection for "John Smith"
|
|
@@ -65,11 +160,21 @@ vi.mock("gliner", () => {
|
|
|
65
160
|
|
|
66
161
|
import { GlinerEngine } from "../src/engines/gliner.js";
|
|
67
162
|
|
|
163
|
+
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
|
|
164
|
+
|
|
165
|
+
beforeAll(async () => {
|
|
166
|
+
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
afterAll(async () => {
|
|
170
|
+
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
171
|
+
});
|
|
172
|
+
|
|
68
173
|
describe("GlinerEngine", () => {
|
|
69
174
|
let engine: GlinerEngine;
|
|
70
175
|
|
|
71
176
|
beforeEach(async () => {
|
|
72
|
-
engine = new GlinerEngine(
|
|
177
|
+
engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
73
178
|
await engine.initialize();
|
|
74
179
|
});
|
|
75
180
|
|
|
@@ -166,7 +271,7 @@ describe("GlinerEngine", () => {
|
|
|
166
271
|
});
|
|
167
272
|
|
|
168
273
|
it("reports isInitialized correctly", async () => {
|
|
169
|
-
const freshEngine = new GlinerEngine(
|
|
274
|
+
const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
170
275
|
expect(freshEngine.isInitialized).toBe(false);
|
|
171
276
|
|
|
172
277
|
await freshEngine.initialize();
|
package/tests/scanner.test.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
import { describe, it, expect, vi
|
|
1
|
+
import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
2
5
|
|
|
3
6
|
// Mock the gliner npm package so we don't need the actual model
|
|
4
7
|
vi.mock("gliner", () => {
|
|
@@ -6,10 +9,25 @@ vi.mock("gliner", () => {
|
|
|
6
9
|
Gliner: class MockGliner {
|
|
7
10
|
async initialize() {}
|
|
8
11
|
async inference(
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
request: { texts: string[]; entities: string[] } | string | string[],
|
|
13
|
+
maybeEntities?: string[],
|
|
14
|
+
_flatNer = false,
|
|
15
|
+
_threshold = 0.5,
|
|
12
16
|
) {
|
|
17
|
+
const text =
|
|
18
|
+
typeof request === "string"
|
|
19
|
+
? request
|
|
20
|
+
: Array.isArray(request)
|
|
21
|
+
? request[0] ?? ""
|
|
22
|
+
: request.texts[0] ?? "";
|
|
23
|
+
const requestEntities =
|
|
24
|
+
typeof request === "object" && request !== null && "entities" in request
|
|
25
|
+
? request.entities
|
|
26
|
+
: undefined;
|
|
27
|
+
const labels =
|
|
28
|
+
Array.isArray(maybeEntities)
|
|
29
|
+
? maybeEntities
|
|
30
|
+
: requestEntities ?? [];
|
|
13
31
|
const results: any[] = [];
|
|
14
32
|
|
|
15
33
|
// Simulate person detection for "John Smith"
|
|
@@ -39,7 +57,64 @@ vi.mock("gliner", () => {
|
|
|
39
57
|
// Only return results whose labels are requested
|
|
40
58
|
return results.filter((r) => labels.includes(r.label));
|
|
41
59
|
}
|
|
42
|
-
}
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
vi.mock("gliner/node", () => {
|
|
65
|
+
return {
|
|
66
|
+
Gliner: class MockGliner {
|
|
67
|
+
async initialize() {}
|
|
68
|
+
async inference(
|
|
69
|
+
request: { texts: string[]; entities: string[] } | string | string[],
|
|
70
|
+
maybeEntities?: string[],
|
|
71
|
+
_flatNer = false,
|
|
72
|
+
_threshold = 0.5,
|
|
73
|
+
) {
|
|
74
|
+
const text =
|
|
75
|
+
typeof request === "string"
|
|
76
|
+
? request
|
|
77
|
+
: Array.isArray(request)
|
|
78
|
+
? request[0] ?? ""
|
|
79
|
+
: request.texts[0] ?? "";
|
|
80
|
+
const requestEntities =
|
|
81
|
+
typeof request === "object" && request !== null && "entities" in request
|
|
82
|
+
? request.entities
|
|
83
|
+
: undefined;
|
|
84
|
+
const labels =
|
|
85
|
+
Array.isArray(maybeEntities)
|
|
86
|
+
? maybeEntities
|
|
87
|
+
: requestEntities ?? [];
|
|
88
|
+
const results: any[] = [];
|
|
89
|
+
|
|
90
|
+
// Simulate person detection for "John Smith"
|
|
91
|
+
if (text.includes("John Smith")) {
|
|
92
|
+
const idx = text.indexOf("John Smith");
|
|
93
|
+
results.push({
|
|
94
|
+
text: "John Smith",
|
|
95
|
+
label: "person",
|
|
96
|
+
score: 0.95,
|
|
97
|
+
start: idx,
|
|
98
|
+
end: idx + 10,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Simulate organization detection for "Acme Corp"
|
|
103
|
+
if (text.includes("Acme Corp")) {
|
|
104
|
+
const idx = text.indexOf("Acme Corp");
|
|
105
|
+
results.push({
|
|
106
|
+
text: "Acme Corp",
|
|
107
|
+
label: "organization",
|
|
108
|
+
score: 0.88,
|
|
109
|
+
start: idx,
|
|
110
|
+
end: idx + 9,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Only return results whose labels are requested
|
|
115
|
+
return results.filter((r) => labels.includes(r.label));
|
|
116
|
+
}
|
|
117
|
+
}
|
|
43
118
|
};
|
|
44
119
|
});
|
|
45
120
|
|
|
@@ -47,8 +122,22 @@ import { Scanner } from "../src/scanner.js";
|
|
|
47
122
|
import { DEFAULT_CONFIG } from "../src/config.js";
|
|
48
123
|
import type { FogClawConfig } from "../src/types.js";
|
|
49
124
|
|
|
125
|
+
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
|
|
126
|
+
|
|
127
|
+
beforeAll(async () => {
|
|
128
|
+
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
afterAll(async () => {
|
|
132
|
+
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
133
|
+
});
|
|
134
|
+
|
|
50
135
|
function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
|
|
51
|
-
return {
|
|
136
|
+
return {
|
|
137
|
+
...DEFAULT_CONFIG,
|
|
138
|
+
model: TEST_ONNX_MODEL_PATH,
|
|
139
|
+
...overrides,
|
|
140
|
+
};
|
|
52
141
|
}
|
|
53
142
|
|
|
54
143
|
describe("Scanner", () => {
|