@datafog/fogclaw 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,22 @@ npm run build
29
29
 
30
30
  ## Quick Start
31
31
 
32
+ ### GLiNER first-run setup (no extra steps)
33
+
34
+ FogClaw automatically downloads the GLiNER ONNX model on first run if it is not already cached locally, then reuses it for all later starts.
35
+
36
+ What happens on first scan:
37
+
38
+ 1. Tokenizers are downloaded (if needed).
39
+ 2. The first available ONNX file from Hugging Face is downloaded to the plugin's local model cache:
40
+ - `.../node_modules/@xenova/transformers/.cache/<model-repo>/onnx/<selected-model>.onnx`
41
+ - (This download can take a moment depending on network and selected model size.)
42
+ 3. GLiNER starts using local files, so later runs stay fast and offline-friendly.
43
+
44
+ If the download cannot be performed (network/firewall/auth), FogClaw safely falls back to regex-only mode and continues to protect common structured PII.
45
+
46
+ If your network requires Hugging Face authentication, export `HF_TOKEN` or `HF_ACCESS_TOKEN` before starting OpenClaw so model files can download.
47
+
32
48
  1. Copy the example config:
33
49
 
34
50
  ```bash
@@ -144,7 +160,7 @@ Plus any labels you add via `custom_entities` in the config.
144
160
  | `enabled` | `boolean` | `true` | Enable/disable the plugin |
145
161
  | `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
146
162
  | `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
147
- | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
163
+ | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER (or a local `.onnx` path for advanced setups). |
148
164
  | `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
149
165
  | `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
150
166
  | `entityActions` | `object` | `{}` | Per-entity-type action overrides |
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAc1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
1
+ {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmJ1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
@@ -1,3 +1,6 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
1
4
  import { canonicalType } from "../types.js";
2
5
  const DEFAULT_NER_LABELS = [
3
6
  "person",
@@ -9,6 +12,114 @@ const DEFAULT_NER_LABELS = [
9
12
  "account number",
10
13
  "passport number",
11
14
  ];
15
+ const GLINER_MODEL_FILES = [
16
+ "onnx/model_q4f16.onnx",
17
+ "onnx/model_q4.onnx",
18
+ "onnx/model_bnb4.onnx",
19
+ "onnx/model_int8.onnx",
20
+ "onnx/model_uint8.onnx",
21
+ "onnx/model_quantized.onnx",
22
+ "onnx/model_fp16.onnx",
23
+ "onnx/model.onnx",
24
+ ];
25
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
26
+ function isLikelyLocalPath(modelPath) {
27
+ const trimmed = modelPath.trim();
28
+ if (!trimmed) {
29
+ return false;
30
+ }
31
+ const lower = trimmed.toLowerCase();
32
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
33
+ if (hasExtension) {
34
+ return true;
35
+ }
36
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
37
+ return true;
38
+ }
39
+ return false;
40
+ }
41
+ function toAbsolutePath(value) {
42
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
43
+ }
44
+ function getModelCacheDir() {
45
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
46
+ }
47
+ function sanitizeModelReference(modelPath) {
48
+ return modelPath.trim();
49
+ }
50
+ async function fileExists(filePath) {
51
+ try {
52
+ await fs.access(filePath);
53
+ return true;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ async function downloadModelIfNeeded(modelRepo, filename) {
60
+ const cacheDir = getModelCacheDir();
61
+ const localPath = path.join(cacheDir, modelRepo, filename);
62
+ if (await fileExists(localPath)) {
63
+ return localPath;
64
+ }
65
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
66
+ const headers = new Headers();
67
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
68
+ if (token) {
69
+ headers.set("Authorization", `Bearer ${token}`);
70
+ }
71
+ const controller = new AbortController();
72
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
73
+ try {
74
+ const response = await fetch(url, { headers, signal: controller.signal });
75
+ if (!response.ok) {
76
+ throw new Error(`Unable to download model artifact: ${response.status}`);
77
+ }
78
+ const bytes = new Uint8Array(await response.arrayBuffer());
79
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
80
+ await fs.writeFile(localPath, bytes);
81
+ return localPath;
82
+ }
83
+ catch (err) {
84
+ if (err instanceof Error && err.name === "AbortError") {
85
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
86
+ }
87
+ throw err;
88
+ }
89
+ finally {
90
+ clearTimeout(timeout);
91
+ }
92
+ }
93
+ async function resolveModelPath(modelPath) {
94
+ const sanitized = sanitizeModelReference(modelPath);
95
+ if (!sanitized) {
96
+ throw new Error("Model path is empty");
97
+ }
98
+ if (isLikelyLocalPath(sanitized)) {
99
+ const absolutePath = toAbsolutePath(sanitized);
100
+ if (!(await fileExists(absolutePath))) {
101
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
102
+ }
103
+ return absolutePath;
104
+ }
105
+ const candidates = GLINER_MODEL_FILES;
106
+ let lastError;
107
+ for (const filename of candidates) {
108
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
109
+ if (await fileExists(localPath)) {
110
+ return localPath;
111
+ }
112
+ }
113
+ for (const filename of candidates) {
114
+ try {
115
+ return await downloadModelIfNeeded(sanitized, filename);
116
+ }
117
+ catch (err) {
118
+ lastError = err instanceof Error ? err : new Error(String(err));
119
+ }
120
+ }
121
+ throw new Error(`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${lastError?.message ?? "unknown"}`);
122
+ }
12
123
  export class GlinerEngine {
13
124
  model = null;
14
125
  modelPath;
@@ -23,11 +134,13 @@ export class GlinerEngine {
23
134
  if (this.initialized)
24
135
  return;
25
136
  try {
26
- const { Gliner } = await import("gliner");
137
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
138
+ const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
139
+ const { Gliner } = glinerModule;
27
140
  this.model = new Gliner({
28
141
  tokenizerPath: this.modelPath,
29
142
  onnxSettings: {
30
- modelPath: this.modelPath,
143
+ modelPath: resolvedModelPath,
31
144
  executionProvider: "cpu",
32
145
  },
33
146
  maxWidth: 12,
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
1
+ {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,uBAAuB;IACvB,oBAAoB;IACpB,sBAAsB;IACtB,sBAAsB;IACtB,uBAAuB;IACvB,2BAA2B;IAC3B,sBAAsB;IACtB,iBAAiB;CAClB,CAAC;AAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,QAAgB;IACtE,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3D,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,GAAG,GAAG,0BAA0B,SAAS,iBAAiB,QAAQ,EAAE,CAAC;IAC3E,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAClE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,KAAK,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,yBAAyB,CAAC,CAAC;IAEhF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAErC,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,yBAAyB,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,SAAS,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAC/C,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACrE,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mCAAmC,SAAS,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAC3E,SAAS,EAAE,OAAO,IAAI,SACxB,EAAE,CACH,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,iBAAiB,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,iBAAiB;oBAC5B,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
@@ -25,7 +25,10 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
25
25
  - [x] (2026-02-17T18:54:00Z) P3 [M1] Updated `package-lock` metadata and refreshed scope for build/release artifacts.
26
26
  - [x] (2026-02-17T18:55:00Z) P4 [M2] Re-ran build/test/smoke + `npm pack --json` + `npm publish --dry-run` validations.
27
27
  - [x] (2026-02-17T18:56:00Z) P5 [M2] Verified `openclaw plugins install` against the built `datafog-fogclaw-0.1.0.tgz` in a clean runtime; plugin now loads as `fogclaw` with status `loaded` and tools `fogclaw_scan, fogclaw_redact`.
28
- - [ ] (2026-02-17T18:56:00Z) P5 [M2] Verify `openclaw plugins install @datafog/fogclaw` with live npm resolution (blocked until package is published).
28
+ - [ ] (2026-02-17T19:34:00Z) P5 [M2] Verify `openclaw plugins install @datafog/fogclaw` with live npm resolution (blocked until package is published).
29
+ - [x] (2026-02-17T19:27:00Z) P6 [M2] Fixed GLiNER startup blocker in Node by pinning `onnxruntime-web` to `1.21.0`, preventing `./webgpu` export resolution errors from `gliner` in OpenClaw install paths.
30
+ - [x] (2026-02-17T19:34:00Z) P6 [M2] Added direct `sharp` dependency `0.34.5` with an override to prevent optional sharp native install failure (`sharp-darwin-arm64v8.node` missing) during OpenClaw install-time dependency bootstrap.
31
+ - [ ] (2026-02-17T19:34:00Z) P6 [M3] Publish/ship this startup hardening update under `@datafog/fogclaw@0.1.2` after npm publish/auth is completed.
29
32
  - [ ] (2026-02-17T18:56:00Z) P6 [M3] Prepare and execute V1 publish/release of `@datafog/fogclaw` (publishing blocked by org access/2FA status in current environment).
30
33
  - [x] (2026-02-17T18:57:00Z) P7 [M3] Capture release artifacts and update evidence notes; add follow-up for dependency install blocker in OpenClaw install path.
31
34
 
@@ -47,8 +50,10 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
47
50
  - Observation: `openclaw plugins install @datafog/fogclaw` still fails in this environment with `404 Not Found - GET https://registry.npmjs.org/@datafog%2ffogclaw` because the scoped package is not yet published to npm.
48
51
  Evidence: CLI output from `openclaw plugins install @datafog/fogclaw` in current machine state.
49
52
 
50
- - Observation: plugin loads with runtime warning about optional `sharp` optional dependency for GLiNER image ops; warning does not prevent plugin load.
51
- Evidence: repeat `Cannot find module '../build/Release/sharp-darwin-arm64v8.node'` messages from `openclaw plugins install` run on clean state.
53
+ - Observation: GLiNER startup now avoids the `onnxruntime-web/webgpu` exports failure by pinning `onnxruntime-web` to 1.21.0, which has a Node-compatible `./webgpu` export path in this runtime.
54
+ Evidence: local `import('onnxruntime-web/webgpu')` succeeds after dependency pin, and OpenClaw install logs no longer show the subpath exports error.
55
+ - Observation: optional sharp runtime failures are now mitigated in clean install flows by pinning direct `sharp` 0.34.5; this removes the previously recurrent `Cannot find module '../build/Release/sharp-darwin-arm64v8.node'` warning in OpenClaw plugin install logs.
56
+ Evidence: `openclaw plugins install` from `datafog-fogclaw-0.1.2.tgz` clean runtime no longer emits that missing binary warning.
52
57
 
53
58
  ## Decision Log
54
59
 
@@ -79,7 +84,7 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
79
84
  - `npm pack --json` and `npm publish --dry-run` now emit scoped package metadata under `@datafog/fogclaw`.
80
85
  - `openclaw plugins install` against a clean temporary state and local `datafog-fogclaw-0.1.0.tgz` now succeeds; `openclaw plugins info fogclaw` shows status `loaded` and tools `fogclaw_scan`, `fogclaw_redact`.
81
86
  - `openclaw plugins install @datafog/fogclaw` via npm registry is still blocked in this environment by publish visibility (`404 Not Found`) until release is live.
82
- - GLiNER/`sharp` warnings are still non-fatal during plugin registration in this environment and do not prevent plugin load.
87
+ - GLiNER startup now avoids the webgpu export resolution error after pinning `onnxruntime-web` to `1.21.0`; remaining startup behavior in this environment is currently a fallback when model backends are unavailable, with no further webgpu/export or sharp-missing warnings after dependency pinning.
83
88
 
84
89
 
85
90
  ## Context and Orientation
@@ -249,7 +254,7 @@ Expect:
249
254
  - Scope migration evidence:
250
255
 
251
256
  package: @datafog/fogclaw
252
- version: 0.1.0
257
+ version: 0.1.2
253
258
  `npm pkg get name` output: `"@datafog/fogclaw"`
254
259
  `npm pkg get openclaw` output:
255
260
  `{"extensions":["./dist/index.js"]}`
@@ -263,7 +268,7 @@ Expect:
263
268
  => `function fogclaw FogClaw`
264
269
 
265
270
  - Reproducibility evidence:
266
- - `npm pack --json` output includes `datafog-fogclaw-0.1.0.tgz` and `openclaw.plugin.json`/`dist/index.js` in file list.
271
+ - `npm pack --json` output includes `datafog-fogclaw-0.1.2.tgz` and `openclaw.plugin.json`/`dist/index.js` in file list.
267
272
  - `npm publish --dry-run` succeeded and produced scoped package manifest notice.
268
273
 
269
274
  - Installability evidence:
@@ -302,7 +307,7 @@ Expect:
302
307
  - date: 2026-02-17T19:08:00Z
303
308
  - open findings by priority (if any): pending
304
309
  - evidence:
305
- - installability in clean runtime succeeds for local `datafog-fogclaw-0.1.0.tgz` after tool-name registration fix
310
+ - installability in clean runtime succeeds for local `datafog-fogclaw-0.1.2.tgz` after tool-name registration and dependency-hardening fixes
306
311
  - scoped package install still returns registry 404 (not yet published)
307
312
  - rollback: revert to previous working scoped package state (or keep changes in branch) if publish credentials/visibility unavailable
308
313
  - post-release checks:
@@ -316,3 +321,4 @@ Expect:
316
321
  - 2026-02-17T10:57:00Z: Initialized plan for V1 scoped-release path in `@datafog/fogclaw` and documented zero-logic-change constraints for immediate installability milestone.
317
322
  - 2026-02-17T18:57:00Z: Completed namespace migration in package metadata and install/docs (`package.json`, `package-lock.json`, `README.md`, `docs/plugins/fogclaw.md`). Ran full local validation (`npm run build`, `npm run test`, `npm run test:plugin-smoke`, `npm pack --json`, `npm publish --dry-run`) and documented install blocker (package not yet published to npm).
318
323
  - 2026-02-17T19:08:00Z: Fixed OpenClaw compatibility in `src/index.ts` by adding explicit `name` fields to `fogclaw_scan` and `fogclaw_redact` tool registrations to avoid undefined `.trim()` during registration; verified clean-runtime install/load succeeds with local tarball (`openclaw plugins install <tgz>`, `plugins info`, `plugins list`).
324
+ - 2026-02-17T19:34:00Z: Added explicit `sharp@0.34.5` dependency with override to avoid missing `sharp-darwin-arm64v8.node` crashes during OpenClaw dependency bootstrap; verified clean-plugin install no longer emits that optional native-module warning. Remaining GLiNER startup warnings are now backend-availability related in this environment, not sharp/webgpu export errors.
@@ -13,6 +13,9 @@ FogClaw is an OpenClaw plugin that protects agent workflows by detecting and han
13
13
 
14
14
  It provides both proactive guardrail behavior (via the `before_agent_start` hook) and explicit tools:
15
15
 
16
+ - GLiNER ONNX artifacts are provisioned automatically on first run (no manual `download` step required).
17
+ - If the model cannot be downloaded (offline or restricted network), FogClaw continues in regex-only mode.
18
+
16
19
  - `fogclaw_scan`: scans text for PII and custom entities.
17
20
  - `fogclaw_redact`: scans and redacts sensitive matches.
18
21
 
@@ -59,6 +62,7 @@ Set plugin config under `plugins.entries.fogclaw.config`:
59
62
  enabled: true,
60
63
  guardrail_mode: "redact",
61
64
  redactStrategy: "token",
65
+ model: "onnx-community/gliner_large-v2.1",
62
66
  confidence_threshold: 0.5,
63
67
  custom_entities: ["project codename", "competitor name"],
64
68
  entityActions: {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "fogclaw",
3
3
  "name": "FogClaw",
4
- "version": "0.1.0",
4
+ "version": "0.1.3",
5
5
  "description": "PII detection & custom entity redaction powered by DataFog",
6
6
  "configSchema": {
7
7
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@datafog/fogclaw",
3
- "version": "0.1.0",
3
+ "version": "0.1.3",
4
4
  "description": "OpenClaw plugin for PII detection & custom entity redaction powered by DataFog",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -14,7 +14,9 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "gliner": "^0.0.19",
17
- "onnxruntime-node": "1.19.2"
17
+ "onnxruntime-node": "1.19.2",
18
+ "onnxruntime-web": "1.21.0",
19
+ "sharp": "0.34.5"
18
20
  },
19
21
  "devDependencies": {
20
22
  "@types/node": "^22.0.0",
@@ -32,6 +34,10 @@
32
34
  "license": "MIT",
33
35
  "repository": {
34
36
  "type": "git",
35
- "url": "https://github.com/datafog/fogclaw"
37
+ "url": "git+https://github.com/datafog/fogclaw.git"
38
+ },
39
+ "overrides": {
40
+ "onnxruntime-web": "1.21.0",
41
+ "sharp": "0.34.5"
36
42
  }
37
43
  }
@@ -1,3 +1,7 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
4
+
1
5
  import type { Entity } from "../types.js";
2
6
  import { canonicalType } from "../types.js";
3
7
 
@@ -12,6 +16,139 @@ const DEFAULT_NER_LABELS = [
12
16
  "passport number",
13
17
  ];
14
18
 
19
+ const GLINER_MODEL_FILES = [
20
+ "onnx/model_q4f16.onnx",
21
+ "onnx/model_q4.onnx",
22
+ "onnx/model_bnb4.onnx",
23
+ "onnx/model_int8.onnx",
24
+ "onnx/model_uint8.onnx",
25
+ "onnx/model_quantized.onnx",
26
+ "onnx/model_fp16.onnx",
27
+ "onnx/model.onnx",
28
+ ];
29
+
30
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
31
+
32
+ function isLikelyLocalPath(modelPath: string): boolean {
33
+ const trimmed = modelPath.trim();
34
+ if (!trimmed) {
35
+ return false;
36
+ }
37
+
38
+ const lower = trimmed.toLowerCase();
39
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
40
+ if (hasExtension) {
41
+ return true;
42
+ }
43
+
44
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
45
+ return true;
46
+ }
47
+
48
+ return false;
49
+ }
50
+
51
+ function toAbsolutePath(value: string): string {
52
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
53
+ }
54
+
55
+ function getModelCacheDir(): string {
56
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
57
+ }
58
+
59
+ function sanitizeModelReference(modelPath: string): string {
60
+ return modelPath.trim();
61
+ }
62
+
63
+ async function fileExists(filePath: string): Promise<boolean> {
64
+ try {
65
+ await fs.access(filePath);
66
+ return true;
67
+ } catch {
68
+ return false;
69
+ }
70
+ }
71
+
72
+ async function downloadModelIfNeeded(modelRepo: string, filename: string): Promise<string> {
73
+ const cacheDir = getModelCacheDir();
74
+ const localPath = path.join(cacheDir, modelRepo, filename);
75
+
76
+ if (await fileExists(localPath)) {
77
+ return localPath;
78
+ }
79
+
80
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
81
+ const headers = new Headers();
82
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
83
+ if (token) {
84
+ headers.set("Authorization", `Bearer ${token}`);
85
+ }
86
+
87
+ const controller = new AbortController();
88
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
89
+
90
+ try {
91
+ const response = await fetch(url, { headers, signal: controller.signal });
92
+ if (!response.ok) {
93
+ throw new Error(`Unable to download model artifact: ${response.status}`);
94
+ }
95
+
96
+ const bytes = new Uint8Array(await response.arrayBuffer());
97
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
98
+ await fs.writeFile(localPath, bytes);
99
+
100
+ return localPath;
101
+ } catch (err) {
102
+ if (err instanceof Error && err.name === "AbortError") {
103
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
104
+ }
105
+
106
+ throw err;
107
+ } finally {
108
+ clearTimeout(timeout);
109
+ }
110
+ }
111
+
112
+ async function resolveModelPath(modelPath: string): Promise<string> {
113
+ const sanitized = sanitizeModelReference(modelPath);
114
+ if (!sanitized) {
115
+ throw new Error("Model path is empty");
116
+ }
117
+
118
+ if (isLikelyLocalPath(sanitized)) {
119
+ const absolutePath = toAbsolutePath(sanitized);
120
+ if (!(await fileExists(absolutePath))) {
121
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
122
+ }
123
+
124
+ return absolutePath;
125
+ }
126
+
127
+ const candidates = GLINER_MODEL_FILES;
128
+ let lastError: Error | undefined;
129
+
130
+ for (const filename of candidates) {
131
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
132
+ if (await fileExists(localPath)) {
133
+ return localPath;
134
+ }
135
+ }
136
+
137
+ for (const filename of candidates) {
138
+ try {
139
+ return await downloadModelIfNeeded(sanitized, filename);
140
+ } catch (err) {
141
+ lastError = err instanceof Error ? err : new Error(String(err));
142
+ }
143
+ }
144
+
145
+ throw new Error(
146
+ `Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${
147
+ lastError?.message ?? "unknown"
148
+ }`,
149
+ );
150
+ }
151
+
15
152
  export class GlinerEngine {
16
153
  private model: any = null;
17
154
  private modelPath: string;
@@ -28,11 +165,13 @@ export class GlinerEngine {
28
165
  if (this.initialized) return;
29
166
 
30
167
  try {
31
- const { Gliner } = await import("gliner");
168
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
169
+ const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
170
+ const { Gliner } = glinerModule;
32
171
  this.model = new Gliner({
33
172
  tokenizerPath: this.modelPath,
34
173
  onnxSettings: {
35
- modelPath: this.modelPath,
174
+ modelPath: resolvedModelPath,
36
175
  executionProvider: "cpu",
37
176
  },
38
177
  maxWidth: 12,
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual 1.4GB model
4
7
  vi.mock("gliner", () => {
@@ -63,13 +66,82 @@ vi.mock("gliner", () => {
63
66
  return { Gliner: MockGliner };
64
67
  });
65
68
 
69
+ vi.mock("gliner/node", () => {
70
+ class MockGliner {
71
+ private config: any;
72
+
73
+ constructor(config: any) {
74
+ this.config = config;
75
+ }
76
+
77
+ async initialize(): Promise<void> {
78
+ // No-op in mock
79
+ }
80
+
81
+ async inference(
82
+ text: string,
83
+ labels: string[],
84
+ options: { threshold: number },
85
+ ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
86
+ const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
87
+
88
+ const johnIndex = text.indexOf("John Smith");
89
+ if (johnIndex !== -1 && labels.includes("person")) {
90
+ results.push({
91
+ text: "John Smith",
92
+ label: "person",
93
+ score: 0.95,
94
+ start: johnIndex,
95
+ end: johnIndex + "John Smith".length,
96
+ });
97
+ }
98
+
99
+ const acmeIndex = text.indexOf("Acme Corp");
100
+ if (acmeIndex !== -1 && labels.includes("organization")) {
101
+ results.push({
102
+ text: "Acme Corp",
103
+ label: "organization",
104
+ score: 0.88,
105
+ start: acmeIndex,
106
+ end: acmeIndex + "Acme Corp".length,
107
+ });
108
+ }
109
+
110
+ const nyIndex = text.indexOf("New York");
111
+ if (nyIndex !== -1 && labels.includes("location")) {
112
+ results.push({
113
+ text: "New York",
114
+ label: "location",
115
+ score: 0.91,
116
+ start: nyIndex,
117
+ end: nyIndex + "New York".length,
118
+ });
119
+ }
120
+
121
+ return results;
122
+ }
123
+ }
124
+
125
+ return { Gliner: MockGliner };
126
+ });
127
+
66
128
  import { GlinerEngine } from "../src/engines/gliner.js";
67
129
 
130
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
131
+
132
+ beforeAll(async () => {
133
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
134
+ });
135
+
136
+ afterAll(async () => {
137
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
138
+ });
139
+
68
140
  describe("GlinerEngine", () => {
69
141
  let engine: GlinerEngine;
70
142
 
71
143
  beforeEach(async () => {
72
- engine = new GlinerEngine("onnx-community/gliner_small-v2.5", 0.5);
144
+ engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
73
145
  await engine.initialize();
74
146
  });
75
147
 
@@ -166,7 +238,7 @@ describe("GlinerEngine", () => {
166
238
  });
167
239
 
168
240
  it("reports isInitialized correctly", async () => {
169
- const freshEngine = new GlinerEngine("some-model", 0.5);
241
+ const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
170
242
  expect(freshEngine.isInitialized).toBe(false);
171
243
 
172
244
  await freshEngine.initialize();
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual model
4
7
  vi.mock("gliner", () => {
@@ -43,12 +46,68 @@ vi.mock("gliner", () => {
43
46
  };
44
47
  });
45
48
 
49
+ vi.mock("gliner/node", () => {
50
+ return {
51
+ Gliner: class MockGliner {
52
+ async initialize() {}
53
+ async inference(
54
+ text: string,
55
+ labels: string[],
56
+ _opts: { threshold: number },
57
+ ) {
58
+ const results: any[] = [];
59
+
60
+ // Simulate person detection for "John Smith"
61
+ if (text.includes("John Smith")) {
62
+ const idx = text.indexOf("John Smith");
63
+ results.push({
64
+ text: "John Smith",
65
+ label: "person",
66
+ score: 0.95,
67
+ start: idx,
68
+ end: idx + 10,
69
+ });
70
+ }
71
+
72
+ // Simulate organization detection for "Acme Corp"
73
+ if (text.includes("Acme Corp")) {
74
+ const idx = text.indexOf("Acme Corp");
75
+ results.push({
76
+ text: "Acme Corp",
77
+ label: "organization",
78
+ score: 0.88,
79
+ start: idx,
80
+ end: idx + 9,
81
+ });
82
+ }
83
+
84
+ // Only return results whose labels are requested
85
+ return results.filter((r) => labels.includes(r.label));
86
+ }
87
+ },
88
+ };
89
+ });
90
+
46
91
  import { Scanner } from "../src/scanner.js";
47
92
  import { DEFAULT_CONFIG } from "../src/config.js";
48
93
  import type { FogClawConfig } from "../src/types.js";
49
94
 
95
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
96
+
97
+ beforeAll(async () => {
98
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
99
+ });
100
+
101
+ afterAll(async () => {
102
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
103
+ });
104
+
50
105
  function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
51
- return { ...DEFAULT_CONFIG, ...overrides };
106
+ return {
107
+ ...DEFAULT_CONFIG,
108
+ model: TEST_ONNX_MODEL_PATH,
109
+ ...overrides,
110
+ };
52
111
  }
53
112
 
54
113
  describe("Scanner", () => {