@datafog/fogclaw 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,22 @@ npm run build
29
29
 
30
30
  ## Quick Start
31
31
 
32
+ ### GLiNER first-run setup (no extra steps)
33
+
34
+ FogClaw automatically downloads the GLiNER ONNX model on first run if it is not already cached locally, then reuses it for all later starts.
35
+
36
+ What happens on first scan:
37
+
38
+ 1. Tokenizers are downloaded (if needed).
39
+ 2. The first available ONNX file from Hugging Face is downloaded to the plugin's local model cache:
40
+ - `.../node_modules/@xenova/transformers/.cache/<model-repo>/onnx/<selected-model>.onnx`
41
+ - (This download can take a moment depending on network and selected model size.)
42
+ 3. GLiNER starts using local files, so later runs stay fast and offline-friendly.
43
+
44
+ If the download cannot be performed (network/firewall/auth), FogClaw safely falls back to regex-only mode and continues to protect common structured PII.
45
+
46
+ If your network requires Hugging Face authentication, export `HF_TOKEN` or `HF_ACCESS_TOKEN` before starting OpenClaw so model files can download.
47
+
32
48
  1. Copy the example config:
33
49
 
34
50
  ```bash
@@ -144,7 +160,7 @@ Plus any labels you add via `custom_entities` in the config.
144
160
  | `enabled` | `boolean` | `true` | Enable/disable the plugin |
145
161
  | `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
146
162
  | `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
147
- | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
163
+ | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER (or a local `.onnx` path for advanced setups). |
148
164
  | `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
149
165
  | `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
150
166
  | `entityActions` | `object` | `{}` | Per-entity-type action overrides |
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAc1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAwBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
1
+ {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmJ1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
@@ -1,3 +1,6 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
1
4
  import { canonicalType } from "../types.js";
2
5
  const DEFAULT_NER_LABELS = [
3
6
  "person",
@@ -9,6 +12,114 @@ const DEFAULT_NER_LABELS = [
9
12
  "account number",
10
13
  "passport number",
11
14
  ];
15
+ const GLINER_MODEL_FILES = [
16
+ "onnx/model_q4f16.onnx",
17
+ "onnx/model_q4.onnx",
18
+ "onnx/model_bnb4.onnx",
19
+ "onnx/model_int8.onnx",
20
+ "onnx/model_uint8.onnx",
21
+ "onnx/model_quantized.onnx",
22
+ "onnx/model_fp16.onnx",
23
+ "onnx/model.onnx",
24
+ ];
25
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
26
+ function isLikelyLocalPath(modelPath) {
27
+ const trimmed = modelPath.trim();
28
+ if (!trimmed) {
29
+ return false;
30
+ }
31
+ const lower = trimmed.toLowerCase();
32
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
33
+ if (hasExtension) {
34
+ return true;
35
+ }
36
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
37
+ return true;
38
+ }
39
+ return false;
40
+ }
41
+ function toAbsolutePath(value) {
42
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
43
+ }
44
+ function getModelCacheDir() {
45
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
46
+ }
47
+ function sanitizeModelReference(modelPath) {
48
+ return modelPath.trim();
49
+ }
50
+ async function fileExists(filePath) {
51
+ try {
52
+ await fs.access(filePath);
53
+ return true;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ async function downloadModelIfNeeded(modelRepo, filename) {
60
+ const cacheDir = getModelCacheDir();
61
+ const localPath = path.join(cacheDir, modelRepo, filename);
62
+ if (await fileExists(localPath)) {
63
+ return localPath;
64
+ }
65
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
66
+ const headers = new Headers();
67
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
68
+ if (token) {
69
+ headers.set("Authorization", `Bearer ${token}`);
70
+ }
71
+ const controller = new AbortController();
72
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
73
+ try {
74
+ const response = await fetch(url, { headers, signal: controller.signal });
75
+ if (!response.ok) {
76
+ throw new Error(`Unable to download model artifact: ${response.status}`);
77
+ }
78
+ const bytes = new Uint8Array(await response.arrayBuffer());
79
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
80
+ await fs.writeFile(localPath, bytes);
81
+ return localPath;
82
+ }
83
+ catch (err) {
84
+ if (err instanceof Error && err.name === "AbortError") {
85
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
86
+ }
87
+ throw err;
88
+ }
89
+ finally {
90
+ clearTimeout(timeout);
91
+ }
92
+ }
93
+ async function resolveModelPath(modelPath) {
94
+ const sanitized = sanitizeModelReference(modelPath);
95
+ if (!sanitized) {
96
+ throw new Error("Model path is empty");
97
+ }
98
+ if (isLikelyLocalPath(sanitized)) {
99
+ const absolutePath = toAbsolutePath(sanitized);
100
+ if (!(await fileExists(absolutePath))) {
101
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
102
+ }
103
+ return absolutePath;
104
+ }
105
+ const candidates = GLINER_MODEL_FILES;
106
+ let lastError;
107
+ for (const filename of candidates) {
108
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
109
+ if (await fileExists(localPath)) {
110
+ return localPath;
111
+ }
112
+ }
113
+ for (const filename of candidates) {
114
+ try {
115
+ return await downloadModelIfNeeded(sanitized, filename);
116
+ }
117
+ catch (err) {
118
+ lastError = err instanceof Error ? err : new Error(String(err));
119
+ }
120
+ }
121
+ throw new Error(`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${lastError?.message ?? "unknown"}`);
122
+ }
12
123
  export class GlinerEngine {
13
124
  model = null;
14
125
  modelPath;
@@ -23,12 +134,13 @@ export class GlinerEngine {
23
134
  if (this.initialized)
24
135
  return;
25
136
  try {
137
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
26
138
  const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
27
139
  const { Gliner } = glinerModule;
28
140
  this.model = new Gliner({
29
141
  tokenizerPath: this.modelPath,
30
142
  onnxSettings: {
31
- modelPath: this.modelPath,
143
+ modelPath: resolvedModelPath,
32
144
  executionProvider: "cpu",
33
145
  },
34
146
  maxWidth: 12,
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
1
+ {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,uBAAuB;IACvB,oBAAoB;IACpB,sBAAsB;IACtB,sBAAsB;IACtB,uBAAuB;IACvB,2BAA2B;IAC3B,sBAAsB;IACtB,iBAAiB;CAClB,CAAC;AAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,QAAgB;IACtE,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3D,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,GAAG,GAAG,0BAA0B,SAAS,iBAAiB,QAAQ,EAAE,CAAC;IAC3E,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAClE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,KAAK,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,yBAAyB,CAAC,CAAC;IAEhF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAErC,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,yBAAyB,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,SAAS,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAC/C,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACrE,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mCAAmC,SAAS,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAC3E,SAAS,EAAE,OAAO,IAAI,SACxB,EAAE,CACH,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,iBAAiB,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,iBAAiB;oBAC5B,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
@@ -13,6 +13,9 @@ FogClaw is an OpenClaw plugin that protects agent workflows by detecting and han
13
13
 
14
14
  It provides both proactive guardrail behavior (via the `before_agent_start` hook) and explicit tools:
15
15
 
16
+ - GLiNER ONNX artifacts are provisioned automatically on first run (no manual `download` step required).
17
+ - If the model cannot be downloaded (offline or restricted network), FogClaw continues in regex-only mode.
18
+
16
19
  - `fogclaw_scan`: scans text for PII and custom entities.
17
20
  - `fogclaw_redact`: scans and redacts sensitive matches.
18
21
 
@@ -59,6 +62,7 @@ Set plugin config under `plugins.entries.fogclaw.config`:
59
62
  enabled: true,
60
63
  guardrail_mode: "redact",
61
64
  redactStrategy: "token",
65
+ model: "onnx-community/gliner_large-v2.1",
62
66
  confidence_threshold: 0.5,
63
67
  custom_entities: ["project codename", "competitor name"],
64
68
  entityActions: {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "fogclaw",
3
3
  "name": "FogClaw",
4
- "version": "0.1.0",
4
+ "version": "0.1.3",
5
5
  "description": "PII detection & custom entity redaction powered by DataFog",
6
6
  "configSchema": {
7
7
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@datafog/fogclaw",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "OpenClaw plugin for PII detection & custom entity redaction powered by DataFog",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,3 +1,7 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
4
+
1
5
  import type { Entity } from "../types.js";
2
6
  import { canonicalType } from "../types.js";
3
7
 
@@ -12,6 +16,139 @@ const DEFAULT_NER_LABELS = [
12
16
  "passport number",
13
17
  ];
14
18
 
19
+ const GLINER_MODEL_FILES = [
20
+ "onnx/model_q4f16.onnx",
21
+ "onnx/model_q4.onnx",
22
+ "onnx/model_bnb4.onnx",
23
+ "onnx/model_int8.onnx",
24
+ "onnx/model_uint8.onnx",
25
+ "onnx/model_quantized.onnx",
26
+ "onnx/model_fp16.onnx",
27
+ "onnx/model.onnx",
28
+ ];
29
+
30
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
31
+
32
+ function isLikelyLocalPath(modelPath: string): boolean {
33
+ const trimmed = modelPath.trim();
34
+ if (!trimmed) {
35
+ return false;
36
+ }
37
+
38
+ const lower = trimmed.toLowerCase();
39
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
40
+ if (hasExtension) {
41
+ return true;
42
+ }
43
+
44
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
45
+ return true;
46
+ }
47
+
48
+ return false;
49
+ }
50
+
51
+ function toAbsolutePath(value: string): string {
52
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
53
+ }
54
+
55
+ function getModelCacheDir(): string {
56
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
57
+ }
58
+
59
+ function sanitizeModelReference(modelPath: string): string {
60
+ return modelPath.trim();
61
+ }
62
+
63
+ async function fileExists(filePath: string): Promise<boolean> {
64
+ try {
65
+ await fs.access(filePath);
66
+ return true;
67
+ } catch {
68
+ return false;
69
+ }
70
+ }
71
+
72
+ async function downloadModelIfNeeded(modelRepo: string, filename: string): Promise<string> {
73
+ const cacheDir = getModelCacheDir();
74
+ const localPath = path.join(cacheDir, modelRepo, filename);
75
+
76
+ if (await fileExists(localPath)) {
77
+ return localPath;
78
+ }
79
+
80
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
81
+ const headers = new Headers();
82
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
83
+ if (token) {
84
+ headers.set("Authorization", `Bearer ${token}`);
85
+ }
86
+
87
+ const controller = new AbortController();
88
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
89
+
90
+ try {
91
+ const response = await fetch(url, { headers, signal: controller.signal });
92
+ if (!response.ok) {
93
+ throw new Error(`Unable to download model artifact: ${response.status}`);
94
+ }
95
+
96
+ const bytes = new Uint8Array(await response.arrayBuffer());
97
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
98
+ await fs.writeFile(localPath, bytes);
99
+
100
+ return localPath;
101
+ } catch (err) {
102
+ if (err instanceof Error && err.name === "AbortError") {
103
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
104
+ }
105
+
106
+ throw err;
107
+ } finally {
108
+ clearTimeout(timeout);
109
+ }
110
+ }
111
+
112
+ async function resolveModelPath(modelPath: string): Promise<string> {
113
+ const sanitized = sanitizeModelReference(modelPath);
114
+ if (!sanitized) {
115
+ throw new Error("Model path is empty");
116
+ }
117
+
118
+ if (isLikelyLocalPath(sanitized)) {
119
+ const absolutePath = toAbsolutePath(sanitized);
120
+ if (!(await fileExists(absolutePath))) {
121
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
122
+ }
123
+
124
+ return absolutePath;
125
+ }
126
+
127
+ const candidates = GLINER_MODEL_FILES;
128
+ let lastError: Error | undefined;
129
+
130
+ for (const filename of candidates) {
131
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
132
+ if (await fileExists(localPath)) {
133
+ return localPath;
134
+ }
135
+ }
136
+
137
+ for (const filename of candidates) {
138
+ try {
139
+ return await downloadModelIfNeeded(sanitized, filename);
140
+ } catch (err) {
141
+ lastError = err instanceof Error ? err : new Error(String(err));
142
+ }
143
+ }
144
+
145
+ throw new Error(
146
+ `Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${
147
+ lastError?.message ?? "unknown"
148
+ }`,
149
+ );
150
+ }
151
+
15
152
  export class GlinerEngine {
16
153
  private model: any = null;
17
154
  private modelPath: string;
@@ -28,12 +165,13 @@ export class GlinerEngine {
28
165
  if (this.initialized) return;
29
166
 
30
167
  try {
168
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
31
169
  const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
32
170
  const { Gliner } = glinerModule;
33
171
  this.model = new Gliner({
34
172
  tokenizerPath: this.modelPath,
35
173
  onnxSettings: {
36
- modelPath: this.modelPath,
174
+ modelPath: resolvedModelPath,
37
175
  executionProvider: "cpu",
38
176
  },
39
177
  maxWidth: 12,
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual 1.4GB model
4
7
  vi.mock("gliner", () => {
@@ -63,13 +66,82 @@ vi.mock("gliner", () => {
63
66
  return { Gliner: MockGliner };
64
67
  });
65
68
 
69
+ vi.mock("gliner/node", () => {
70
+ class MockGliner {
71
+ private config: any;
72
+
73
+ constructor(config: any) {
74
+ this.config = config;
75
+ }
76
+
77
+ async initialize(): Promise<void> {
78
+ // No-op in mock
79
+ }
80
+
81
+ async inference(
82
+ text: string,
83
+ labels: string[],
84
+ options: { threshold: number },
85
+ ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
86
+ const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
87
+
88
+ const johnIndex = text.indexOf("John Smith");
89
+ if (johnIndex !== -1 && labels.includes("person")) {
90
+ results.push({
91
+ text: "John Smith",
92
+ label: "person",
93
+ score: 0.95,
94
+ start: johnIndex,
95
+ end: johnIndex + "John Smith".length,
96
+ });
97
+ }
98
+
99
+ const acmeIndex = text.indexOf("Acme Corp");
100
+ if (acmeIndex !== -1 && labels.includes("organization")) {
101
+ results.push({
102
+ text: "Acme Corp",
103
+ label: "organization",
104
+ score: 0.88,
105
+ start: acmeIndex,
106
+ end: acmeIndex + "Acme Corp".length,
107
+ });
108
+ }
109
+
110
+ const nyIndex = text.indexOf("New York");
111
+ if (nyIndex !== -1 && labels.includes("location")) {
112
+ results.push({
113
+ text: "New York",
114
+ label: "location",
115
+ score: 0.91,
116
+ start: nyIndex,
117
+ end: nyIndex + "New York".length,
118
+ });
119
+ }
120
+
121
+ return results;
122
+ }
123
+ }
124
+
125
+ return { Gliner: MockGliner };
126
+ });
127
+
66
128
  import { GlinerEngine } from "../src/engines/gliner.js";
67
129
 
130
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
131
+
132
+ beforeAll(async () => {
133
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
134
+ });
135
+
136
+ afterAll(async () => {
137
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
138
+ });
139
+
68
140
  describe("GlinerEngine", () => {
69
141
  let engine: GlinerEngine;
70
142
 
71
143
  beforeEach(async () => {
72
- engine = new GlinerEngine("onnx-community/gliner_small-v2.5", 0.5);
144
+ engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
73
145
  await engine.initialize();
74
146
  });
75
147
 
@@ -166,7 +238,7 @@ describe("GlinerEngine", () => {
166
238
  });
167
239
 
168
240
  it("reports isInitialized correctly", async () => {
169
- const freshEngine = new GlinerEngine("some-model", 0.5);
241
+ const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
170
242
  expect(freshEngine.isInitialized).toBe(false);
171
243
 
172
244
  await freshEngine.initialize();
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual model
4
7
  vi.mock("gliner", () => {
@@ -43,12 +46,68 @@ vi.mock("gliner", () => {
43
46
  };
44
47
  });
45
48
 
49
+ vi.mock("gliner/node", () => {
50
+ return {
51
+ Gliner: class MockGliner {
52
+ async initialize() {}
53
+ async inference(
54
+ text: string,
55
+ labels: string[],
56
+ _opts: { threshold: number },
57
+ ) {
58
+ const results: any[] = [];
59
+
60
+ // Simulate person detection for "John Smith"
61
+ if (text.includes("John Smith")) {
62
+ const idx = text.indexOf("John Smith");
63
+ results.push({
64
+ text: "John Smith",
65
+ label: "person",
66
+ score: 0.95,
67
+ start: idx,
68
+ end: idx + 10,
69
+ });
70
+ }
71
+
72
+ // Simulate organization detection for "Acme Corp"
73
+ if (text.includes("Acme Corp")) {
74
+ const idx = text.indexOf("Acme Corp");
75
+ results.push({
76
+ text: "Acme Corp",
77
+ label: "organization",
78
+ score: 0.88,
79
+ start: idx,
80
+ end: idx + 9,
81
+ });
82
+ }
83
+
84
+ // Only return results whose labels are requested
85
+ return results.filter((r) => labels.includes(r.label));
86
+ }
87
+ },
88
+ };
89
+ });
90
+
46
91
  import { Scanner } from "../src/scanner.js";
47
92
  import { DEFAULT_CONFIG } from "../src/config.js";
48
93
  import type { FogClawConfig } from "../src/types.js";
49
94
 
95
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
96
+
97
+ beforeAll(async () => {
98
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
99
+ });
100
+
101
+ afterAll(async () => {
102
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
103
+ });
104
+
50
105
  function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
51
- return { ...DEFAULT_CONFIG, ...overrides };
106
+ return {
107
+ ...DEFAULT_CONFIG,
108
+ model: TEST_ONNX_MODEL_PATH,
109
+ ...overrides,
110
+ };
52
111
  }
53
112
 
54
113
  describe("Scanner", () => {