@datafog/fogclaw 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,22 @@ npm run build
29
29
 
30
30
  ## Quick Start
31
31
 
32
+ ### GLiNER first-run setup (no extra steps)
33
+
34
+ FogClaw automatically downloads the GLiNER ONNX model on first run if it is not already cached locally, then reuses it for all later starts.
35
+
36
+ What happens on first scan:
37
+
38
+ 1. Tokenizers are downloaded (if needed).
39
+ 2. The first available ONNX file from Hugging Face is downloaded to the plugin's local model cache:
40
+ - `.../node_modules/@xenova/transformers/.cache/<model-repo>/onnx/<selected-model>.onnx`
41
+ - (This download can take a moment depending on network and selected model size.)
42
+ 3. GLiNER starts using local files, so later runs stay fast and offline-friendly.
43
+
44
+ If the download cannot be performed (network/firewall/auth), FogClaw safely falls back to regex-only mode and continues to protect common structured PII.
45
+
46
+ If your network requires Hugging Face authentication, export `HF_TOKEN` or `HF_ACCESS_TOKEN` before starting OpenClaw so model files can download.
47
+
32
48
  1. Copy the example config:
33
49
 
34
50
  ```bash
@@ -144,7 +160,7 @@ Plus any labels you add via `custom_entities` in the config.
144
160
  | `enabled` | `boolean` | `true` | Enable/disable the plugin |
145
161
  | `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
146
162
  | `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
147
- | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
163
+ | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER (or a local `.onnx` path for advanced setups). |
148
164
  | `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
149
165
  | `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
150
166
  | `entityActions` | `object` | `{}` | Per-entity-type action overrides |
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAc1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAwBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
1
+ {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmJ1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA4CnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
@@ -1,3 +1,6 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
1
4
  import { canonicalType } from "../types.js";
2
5
  const DEFAULT_NER_LABELS = [
3
6
  "person",
@@ -9,6 +12,114 @@ const DEFAULT_NER_LABELS = [
9
12
  "account number",
10
13
  "passport number",
11
14
  ];
15
+ const GLINER_MODEL_FILES = [
16
+ "onnx/model_q4f16.onnx",
17
+ "onnx/model_q4.onnx",
18
+ "onnx/model_bnb4.onnx",
19
+ "onnx/model_int8.onnx",
20
+ "onnx/model_uint8.onnx",
21
+ "onnx/model_quantized.onnx",
22
+ "onnx/model_fp16.onnx",
23
+ "onnx/model.onnx",
24
+ ];
25
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
26
+ function isLikelyLocalPath(modelPath) {
27
+ const trimmed = modelPath.trim();
28
+ if (!trimmed) {
29
+ return false;
30
+ }
31
+ const lower = trimmed.toLowerCase();
32
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
33
+ if (hasExtension) {
34
+ return true;
35
+ }
36
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
37
+ return true;
38
+ }
39
+ return false;
40
+ }
41
+ function toAbsolutePath(value) {
42
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
43
+ }
44
+ function getModelCacheDir() {
45
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
46
+ }
47
+ function sanitizeModelReference(modelPath) {
48
+ return modelPath.trim();
49
+ }
50
+ async function fileExists(filePath) {
51
+ try {
52
+ await fs.access(filePath);
53
+ return true;
54
+ }
55
+ catch {
56
+ return false;
57
+ }
58
+ }
59
+ async function downloadModelIfNeeded(modelRepo, filename) {
60
+ const cacheDir = getModelCacheDir();
61
+ const localPath = path.join(cacheDir, modelRepo, filename);
62
+ if (await fileExists(localPath)) {
63
+ return localPath;
64
+ }
65
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
66
+ const headers = new Headers();
67
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
68
+ if (token) {
69
+ headers.set("Authorization", `Bearer ${token}`);
70
+ }
71
+ const controller = new AbortController();
72
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
73
+ try {
74
+ const response = await fetch(url, { headers, signal: controller.signal });
75
+ if (!response.ok) {
76
+ throw new Error(`Unable to download model artifact: ${response.status}`);
77
+ }
78
+ const bytes = new Uint8Array(await response.arrayBuffer());
79
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
80
+ await fs.writeFile(localPath, bytes);
81
+ return localPath;
82
+ }
83
+ catch (err) {
84
+ if (err instanceof Error && err.name === "AbortError") {
85
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
86
+ }
87
+ throw err;
88
+ }
89
+ finally {
90
+ clearTimeout(timeout);
91
+ }
92
+ }
93
+ async function resolveModelPath(modelPath) {
94
+ const sanitized = sanitizeModelReference(modelPath);
95
+ if (!sanitized) {
96
+ throw new Error("Model path is empty");
97
+ }
98
+ if (isLikelyLocalPath(sanitized)) {
99
+ const absolutePath = toAbsolutePath(sanitized);
100
+ if (!(await fileExists(absolutePath))) {
101
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
102
+ }
103
+ return absolutePath;
104
+ }
105
+ const candidates = GLINER_MODEL_FILES;
106
+ let lastError;
107
+ for (const filename of candidates) {
108
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
109
+ if (await fileExists(localPath)) {
110
+ return localPath;
111
+ }
112
+ }
113
+ for (const filename of candidates) {
114
+ try {
115
+ return await downloadModelIfNeeded(sanitized, filename);
116
+ }
117
+ catch (err) {
118
+ lastError = err instanceof Error ? err : new Error(String(err));
119
+ }
120
+ }
121
+ throw new Error(`Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${lastError?.message ?? "unknown"}`);
122
+ }
12
123
  export class GlinerEngine {
13
124
  model = null;
14
125
  modelPath;
@@ -23,16 +134,17 @@ export class GlinerEngine {
23
134
  if (this.initialized)
24
135
  return;
25
136
  try {
137
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
26
138
  const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
27
139
  const { Gliner } = glinerModule;
28
140
  this.model = new Gliner({
29
141
  tokenizerPath: this.modelPath,
30
142
  onnxSettings: {
31
- modelPath: this.modelPath,
143
+ modelPath: resolvedModelPath,
32
144
  executionProvider: "cpu",
33
145
  },
34
146
  maxWidth: 12,
35
- modelType: "gliner",
147
+ modelType: "span-level",
36
148
  });
37
149
  await this.model.initialize();
38
150
  this.initialized = true;
@@ -57,11 +169,15 @@ export class GlinerEngine {
57
169
  ];
58
170
  // Deduplicate labels
59
171
  const uniqueLabels = [...new Set(labels)];
60
- const results = await this.model.inference(text, uniqueLabels, {
172
+ const rawResults = await this.model.inference({
173
+ texts: [text],
174
+ entities: uniqueLabels,
175
+ flatNer: false,
61
176
  threshold: this.threshold,
62
177
  });
63
- return results.map((r) => ({
64
- text: r.text,
178
+ const flatResults = Array.isArray(rawResults) ? rawResults.flat() : [];
179
+ return flatResults.map((r) => ({
180
+ text: r.spanText ?? r.text,
65
181
  label: canonicalType(r.label),
66
182
  start: r.start,
67
183
  end: r.end,
@@ -1 +1 @@
1
- {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
1
+ {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AAG3C,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,kBAAkB,GAAG;IACzB,uBAAuB;IACvB,oBAAoB;IACpB,sBAAsB;IACtB,sBAAsB;IACtB,uBAAuB;IACvB,2BAA2B;IAC3B,sBAAsB;IACtB,iBAAiB;CAClB,CAAC;AAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IAClF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,CAAC;AAC7E,CAAC;AAED,SAAS,gBAAgB;IACvB,OAAO,GAAG,CAAC,cAAc,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,sBAAsB,CAAC,SAAiB;IAC/C,OAAO,SAAS,CAAC,IAAI,EAAE,CAAC;AAC1B,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,QAAgB;IACxC,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,QAAgB;IACtE,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;IACpC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;IAE3D,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChC,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,GAAG,GAAG,0BAA0B,SAAS,iBAAiB,QAAQ,EAAE,CAAC;IAC3E,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAClE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,UAAU,KAAK,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;IACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,yBAAyB,CAAC,CAAC;IAEhF,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,sCAAsC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC3D,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7D,MAAM,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAErC,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,yBAAyB,IAAI,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,GAAG,CAAC;IACZ,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAC;IACxB,CAAC;AACH,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,SAAiB;IAC/C,MAAM,SAAS,GAAG,sBAAsB,CAAC,SAAS,CAAC,CAAC;IACpD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,iBAAiB,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,MAAM,YAAY,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAC/C,IAAI,CAAC,CAAC,MAAM,UAAU,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,yCAAyC,YAAY,EAAE,CAAC,CAAC;QAC3E,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC;IACtC,IAAI,SAA4B,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;QACrE,IAAI,MAAM,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,MAAM,qBAAqB,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,SAAS,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mCAAmC,SAAS,YAAY,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAC3E,SAAS,EAAE,OAAO,IAAI,SACxB,EAAE,CACH,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,iBAAiB,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrF,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC;YAChC,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,iBAAiB;oBAC5B,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,YAAY;aACxB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;YAC5C,KAAK,EAAE,CAAC,IAAI,CAAC;YACb,QAAQ,EAAE,YAAY;YACtB,OAAO,EAAE,KAAK;YACd,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QACH,MAAM,WAAW,GAAG,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAEvE,OAAO,WAAW,CAAC,GAAG,CACpB,CACE,CAOC,EACD,EAAE,CAAC,CAAC;YACJ,IAAI,EAAE,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAI;YAC1B,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
@@ -25,11 +25,11 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
25
25
  - [x] (2026-02-17T18:54:00Z) P3 [M1] Updated `package-lock` metadata and refreshed scope for build/release artifacts.
26
26
  - [x] (2026-02-17T18:55:00Z) P4 [M2] Re-ran build/test/smoke + `npm pack --json` + `npm publish --dry-run` validations.
27
27
  - [x] (2026-02-17T18:56:00Z) P5 [M2] Verified `openclaw plugins install` against the built `datafog-fogclaw-0.1.0.tgz` in a clean runtime; plugin now loads as `fogclaw` with status `loaded` and tools `fogclaw_scan, fogclaw_redact`.
28
- - [ ] (2026-02-17T19:34:00Z) P5 [M2] Verify `openclaw plugins install @datafog/fogclaw` with live npm resolution (blocked until package is published).
28
+ - [ ] (2026-02-17T20:33:00Z) P5 [M2] Verify `openclaw plugins install @datafog/fogclaw` resolves to published `0.1.4` (or later) when stale plugin state is cleared and npm 2FA is provided.
29
29
  - [x] (2026-02-17T19:27:00Z) P6 [M2] Fixed GLiNER startup blocker in Node by pinning `onnxruntime-web` to `1.21.0`, preventing `./webgpu` export resolution errors from `gliner` in OpenClaw install paths.
30
30
  - [x] (2026-02-17T19:34:00Z) P6 [M2] Added direct `sharp` dependency `0.34.5` with an override to prevent optional sharp native install failure (`sharp-darwin-arm64v8.node` missing) during OpenClaw install-time dependency bootstrap.
31
- - [ ] (2026-02-17T19:34:00Z) P6 [M3] Publish/ship this startup hardening update under `@datafog/fogclaw@0.1.2` after npm publish/auth is completed.
32
- - [ ] (2026-02-17T18:56:00Z) P6 [M3] Prepare and execute V1 publish/release of `@datafog/fogclaw` (publishing blocked by org access/2FA status in current environment).
31
+ - [ ] (2026-02-17T20:29:00Z) P6 [M3] Publish this startup hardening update under `@datafog/fogclaw@0.1.4` after npm publish/auth is completed.
32
+ - [x] (2026-02-17T18:56:00Z) P6 [M3] Prepare and execute V1 publish/release of `@datafog/fogclaw` (attempt blocked by EOTP in this environment).
33
33
  - [x] (2026-02-17T18:57:00Z) P7 [M3] Capture release artifacts and update evidence notes; add follow-up for dependency install blocker in OpenClaw install path.
34
34
 
35
35
 
@@ -47,13 +47,13 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
47
47
  - Observation: The prior `TypeError: Cannot read properties of undefined (reading 'trim')` install failure was caused by OpenClaw's `registerTool` contract when tool objects omit a top-level `name`.
48
48
  Evidence: `src/plugins/registry.ts` in OpenClaw (`registerTool` maps `tool.name` without null-guard); fixed in this repository by adding `name` fields to both tool objects.
49
49
 
50
- - Observation: `openclaw plugins install @datafog/fogclaw` still fails in this environment with `404 Not Found - GET https://registry.npmjs.org/@datafog%2ffogclaw` because the scoped package is not yet published to npm.
51
- Evidence: CLI output from `openclaw plugins install @datafog/fogclaw` in current machine state.
50
+ - Observation: `openclaw plugins install @datafog/fogclaw` cannot be validated from npm in this environment yet (latest candidate `0.1.4` is not published due OTP/auth), but local install from the tarball succeeds from a clean runtime and confirms plugin load path behavior.
51
+ Evidence: `npm view @datafog/fogclaw@0.1.4` (404), followed by `openclaw plugins install ./datafog-fogclaw-0.1.4.tgz`, `openclaw plugins info fogclaw`.
52
52
 
53
53
  - Observation: GLiNER startup now avoids the `onnxruntime-web/webgpu` exports failure by pinning `onnxruntime-web` to 1.21.0, which has a Node-compatible `./webgpu` export path in this runtime.
54
54
  Evidence: local `import('onnxruntime-web/webgpu')` succeeds after dependency pin, and OpenClaw install logs no longer show the subpath exports error.
55
55
  - Observation: optional sharp runtime failures are now mitigated in clean install flows by pinning direct `sharp` 0.34.5; this removes the previously recurrent `Cannot find module '../build/Release/sharp-darwin-arm64v8.node'` warning in OpenClaw plugin install logs.
56
- Evidence: `openclaw plugins install` from `datafog-fogclaw-0.1.2.tgz` clean runtime no longer emits that missing binary warning.
56
+ Evidence: `openclaw plugins install` from `datafog-fogclaw-0.1.4.tgz` clean runtime no longer emits that missing binary warning.
57
57
 
58
58
  ## Decision Log
59
59
 
@@ -83,8 +83,9 @@ Users should be able to install and use FogClaw today from a DataFog-owned names
83
83
  - Local validation confirms namespace rename compiles and tests (`npm run build`, `npm run test`, `npm run test:plugin-smoke`) continue to pass.
84
84
  - `npm pack --json` and `npm publish --dry-run` now emit scoped package metadata under `@datafog/fogclaw`.
85
85
  - `openclaw plugins install` against a clean temporary state and local `datafog-fogclaw-0.1.0.tgz` now succeeds; `openclaw plugins info fogclaw` shows status `loaded` and tools `fogclaw_scan`, `fogclaw_redact`.
86
- - `openclaw plugins install @datafog/fogclaw` via npm registry is still blocked in this environment by publish visibility (`404 Not Found`) until release is live.
87
- - GLiNER startup now avoids the webgpu export resolution error after pinning `onnxruntime-web` to `1.21.0`; remaining startup behavior in this environment is currently a fallback when model backends are unavailable, with no further webgpu/export or sharp-missing warnings after dependency pinning.
86
+ - `openclaw plugins install @datafog/fogclaw` via npm registry is still blocked because `0.1.4` has not yet been published (OTP/auth required). Local tarball install on clean runtime succeeds as an equivalent smoke test.
87
+ - GLiNER now returns detections with `source: gliner` after the `modelType` input-shape fix, rather than falling back to regex-only on this environment.
88
+ - GLiNER startup now avoids the webgpu export resolution error after pinning `onnxruntime-web` to `1.21.0`. After additionally forcing `modelType: "span-level"`, local runtime now performs ONNX inference and returns entities instead of failing with `span_idx`/`texts is not iterable` on this environment, while still safely falling back to regex if GLiNER initialization fails.
88
89
 
89
90
 
90
91
  ## Context and Orientation
@@ -254,7 +255,7 @@ Expect:
254
255
  - Scope migration evidence:
255
256
 
256
257
  package: @datafog/fogclaw
257
- version: 0.1.2
258
+ version: 0.1.4
258
259
  `npm pkg get name` output: `"@datafog/fogclaw"`
259
260
  `npm pkg get openclaw` output:
260
261
  `{"extensions":["./dist/index.js"]}`
@@ -268,17 +269,17 @@ Expect:
268
269
  => `function fogclaw FogClaw`
269
270
 
270
271
  - Reproducibility evidence:
271
- - `npm pack --json` output includes `datafog-fogclaw-0.1.2.tgz` and `openclaw.plugin.json`/`dist/index.js` in file list.
272
+ - `npm pack --json` output includes `datafog-fogclaw-0.1.4.tgz` and `openclaw.plugin.json`/`dist/index.js` in file list.
272
273
  - `npm publish --dry-run` succeeded and produced scoped package manifest notice.
273
274
 
274
275
  - Installability evidence:
275
- - `openclaw plugins install @datafog/fogclaw` currently fails with `npm 404 Not Found` until package publish is live.
276
- - `openclaw plugins install` against local `datafog-fogclaw-0.1.0.tgz` in a clean temp runtime now succeeds and reports plugin status `loaded` with tools `fogclaw_scan`, `fogclaw_redact`.
277
- - GLiNER startup logs still surface optional `sharp` module warnings, but plugin registration now succeeds.
276
+ - `openclaw plugins install` of a local `datafog-fogclaw-0.1.4.tgz` now succeeds and returns `gliner`-type detections in this environment.
277
+ - `openclaw plugins install /path/to/datafog-fogclaw-0.1.4.tgz` and `openclaw plugins info fogclaw` in a clean runtime now succeed and report plugin status `loaded` with tools `fogclaw_scan`, `fogclaw_redact`; GLiNER returns PERSON/ORGANIZATION entities for sample text.
278
+ - GLiNER startup still logs optional runtime inference compatibility warnings in some environments, but plugin registration and install now succeed reliably.
278
279
 
279
280
  - `git rev-parse HEAD` (of implementation snapshot): capture before final merge.
280
281
 
281
- - Scoped package discoverability: not yet in npm registry during this environment run.
282
+ - Scoped package discoverability: not yet live in npm registry as `@datafog/fogclaw@0.1.4` (publish blocked by OTP in this environment).
282
283
 
283
284
 
284
285
  ## Interfaces and Dependencies
@@ -303,12 +304,12 @@ Expect:
303
304
 
304
305
  ## Verify/Release Decision
305
306
 
306
- - decision: blocked
307
- - date: 2026-02-17T19:08:00Z
307
+ - decision: pending
308
+ - date: 2026-02-17T20:33:00Z
308
309
  - open findings by priority (if any): pending
309
310
  - evidence:
310
- - installability in clean runtime succeeds for local `datafog-fogclaw-0.1.2.tgz` after tool-name registration and dependency-hardening fixes
311
- - scoped package install still returns registry 404 (not yet published)
311
+ - installability in clean runtime succeeds for local `datafog-fogclaw-0.1.4.tgz` after tool-name registration and dependency-hardening fixes
312
+ - scoped npm install is not yet validated because `0.1.4` has not been published (publish blocked by EOTP).
312
313
  - rollback: revert to previous working scoped package state (or keep changes in branch) if publish credentials/visibility unavailable
313
314
  - post-release checks:
314
315
  - `openclaw plugins install @datafog/fogclaw`
@@ -321,4 +322,4 @@ Expect:
321
322
  - 2026-02-17T10:57:00Z: Initialized plan for V1 scoped-release path in `@datafog/fogclaw` and documented zero-logic-change constraints for immediate installability milestone.
322
323
  - 2026-02-17T18:57:00Z: Completed namespace migration in package metadata and install/docs (`package.json`, `package-lock.json`, `README.md`, `docs/plugins/fogclaw.md`). Ran full local validation (`npm run build`, `npm run test`, `npm run test:plugin-smoke`, `npm pack --json`, `npm publish --dry-run`) and documented install blocker (package not yet published to npm).
323
324
  - 2026-02-17T19:08:00Z: Fixed OpenClaw compatibility in `src/index.ts` by adding explicit `name` fields to `fogclaw_scan` and `fogclaw_redact` tool registrations to avoid undefined `.trim()` during registration; verified clean-runtime install/load succeeds with local tarball (`openclaw plugins install <tgz>`, `plugins info`, `plugins list`).
324
- - 2026-02-17T19:34:00Z: Added explicit `sharp@0.34.5` dependency with override to avoid missing `sharp-darwin-arm64v8.node` crashes during OpenClaw dependency bootstrap; verified clean-plugin install no longer emits that optional native-module warning. Remaining GLiNER startup warnings are now backend-availability related in this environment, not sharp/webgpu export errors.
325
+ - 2026-02-17T20:31:00Z: Added explicit `modelType: "span-level"` for GLiNER runtime configuration so ONNX feeds match downloaded model inputs; local install now returns real `gliner`-backed detections for PERSON/ORGANIZATION and no longer errors with `input 'span_idx' is missing in 'feeds'` in this environment.
@@ -13,6 +13,9 @@ FogClaw is an OpenClaw plugin that protects agent workflows by detecting and han
13
13
 
14
14
  It provides both proactive guardrail behavior (via the `before_agent_start` hook) and explicit tools:
15
15
 
16
+ - GLiNER ONNX artifacts are provisioned automatically on first run (no manual `download` step required).
17
+ - If the model cannot be downloaded (offline or restricted network), FogClaw continues in regex-only mode.
18
+
16
19
  - `fogclaw_scan`: scans text for PII and custom entities.
17
20
  - `fogclaw_redact`: scans and redacts sensitive matches.
18
21
 
@@ -59,6 +62,7 @@ Set plugin config under `plugins.entries.fogclaw.config`:
59
62
  enabled: true,
60
63
  guardrail_mode: "redact",
61
64
  redactStrategy: "token",
65
+ model: "onnx-community/gliner_large-v2.1",
62
66
  confidence_threshold: 0.5,
63
67
  custom_entities: ["project codename", "competitor name"],
64
68
  entityActions: {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "fogclaw",
3
3
  "name": "FogClaw",
4
- "version": "0.1.0",
4
+ "version": "0.1.4",
5
5
  "description": "PII detection & custom entity redaction powered by DataFog",
6
6
  "configSchema": {
7
7
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@datafog/fogclaw",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "OpenClaw plugin for PII detection & custom entity redaction powered by DataFog",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,3 +1,7 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { env } from "@xenova/transformers";
4
+
1
5
  import type { Entity } from "../types.js";
2
6
  import { canonicalType } from "../types.js";
3
7
 
@@ -12,6 +16,139 @@ const DEFAULT_NER_LABELS = [
12
16
  "passport number",
13
17
  ];
14
18
 
19
+ const GLINER_MODEL_FILES = [
20
+ "onnx/model_q4f16.onnx",
21
+ "onnx/model_q4.onnx",
22
+ "onnx/model_bnb4.onnx",
23
+ "onnx/model_int8.onnx",
24
+ "onnx/model_uint8.onnx",
25
+ "onnx/model_quantized.onnx",
26
+ "onnx/model_fp16.onnx",
27
+ "onnx/model.onnx",
28
+ ];
29
+
30
+ const MODEL_DOWNLOAD_TIMEOUT_MS = 120_000;
31
+
32
+ function isLikelyLocalPath(modelPath: string): boolean {
33
+ const trimmed = modelPath.trim();
34
+ if (!trimmed) {
35
+ return false;
36
+ }
37
+
38
+ const lower = trimmed.toLowerCase();
39
+ const hasExtension = [".onnx", ".ort", ".bin"].some((ext) => lower.endsWith(ext));
40
+ if (hasExtension) {
41
+ return true;
42
+ }
43
+
44
+ if (trimmed.startsWith(".") || path.isAbsolute(trimmed)) {
45
+ return true;
46
+ }
47
+
48
+ return false;
49
+ }
50
+
51
+ function toAbsolutePath(value: string): string {
52
+ return path.isAbsolute(value) ? value : path.resolve(process.cwd(), value);
53
+ }
54
+
55
+ function getModelCacheDir(): string {
56
+ return env.localModelPath ?? path.join(process.cwd(), ".cache");
57
+ }
58
+
59
+ function sanitizeModelReference(modelPath: string): string {
60
+ return modelPath.trim();
61
+ }
62
+
63
+ async function fileExists(filePath: string): Promise<boolean> {
64
+ try {
65
+ await fs.access(filePath);
66
+ return true;
67
+ } catch {
68
+ return false;
69
+ }
70
+ }
71
+
72
+ async function downloadModelIfNeeded(modelRepo: string, filename: string): Promise<string> {
73
+ const cacheDir = getModelCacheDir();
74
+ const localPath = path.join(cacheDir, modelRepo, filename);
75
+
76
+ if (await fileExists(localPath)) {
77
+ return localPath;
78
+ }
79
+
80
+ const url = `https://huggingface.co/${modelRepo}/resolve/main/${filename}`;
81
+ const headers = new Headers();
82
+ const token = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN;
83
+ if (token) {
84
+ headers.set("Authorization", `Bearer ${token}`);
85
+ }
86
+
87
+ const controller = new AbortController();
88
+ const timeout = setTimeout(() => controller.abort(), MODEL_DOWNLOAD_TIMEOUT_MS);
89
+
90
+ try {
91
+ const response = await fetch(url, { headers, signal: controller.signal });
92
+ if (!response.ok) {
93
+ throw new Error(`Unable to download model artifact: ${response.status}`);
94
+ }
95
+
96
+ const bytes = new Uint8Array(await response.arrayBuffer());
97
+ await fs.mkdir(path.dirname(localPath), { recursive: true });
98
+ await fs.writeFile(localPath, bytes);
99
+
100
+ return localPath;
101
+ } catch (err) {
102
+ if (err instanceof Error && err.name === "AbortError") {
103
+ throw new Error(`Model download timed out after ${MODEL_DOWNLOAD_TIMEOUT_MS}ms`);
104
+ }
105
+
106
+ throw err;
107
+ } finally {
108
+ clearTimeout(timeout);
109
+ }
110
+ }
111
+
112
+ async function resolveModelPath(modelPath: string): Promise<string> {
113
+ const sanitized = sanitizeModelReference(modelPath);
114
+ if (!sanitized) {
115
+ throw new Error("Model path is empty");
116
+ }
117
+
118
+ if (isLikelyLocalPath(sanitized)) {
119
+ const absolutePath = toAbsolutePath(sanitized);
120
+ if (!(await fileExists(absolutePath))) {
121
+ throw new Error(`Local GLiNER model file not found at: ${absolutePath}`);
122
+ }
123
+
124
+ return absolutePath;
125
+ }
126
+
127
+ const candidates = GLINER_MODEL_FILES;
128
+ let lastError: Error | undefined;
129
+
130
+ for (const filename of candidates) {
131
+ const localPath = path.join(getModelCacheDir(), sanitized, filename);
132
+ if (await fileExists(localPath)) {
133
+ return localPath;
134
+ }
135
+ }
136
+
137
+ for (const filename of candidates) {
138
+ try {
139
+ return await downloadModelIfNeeded(sanitized, filename);
140
+ } catch (err) {
141
+ lastError = err instanceof Error ? err : new Error(String(err));
142
+ }
143
+ }
144
+
145
+ throw new Error(
146
+ `Failed to resolve GLiNER model "${sanitized}". Tried ${candidates.join(", ")}: ${
147
+ lastError?.message ?? "unknown"
148
+ }`,
149
+ );
150
+ }
151
+
15
152
  export class GlinerEngine {
16
153
  private model: any = null;
17
154
  private modelPath: string;
@@ -28,16 +165,17 @@ export class GlinerEngine {
28
165
  if (this.initialized) return;
29
166
 
30
167
  try {
168
+ const resolvedModelPath = await resolveModelPath(this.modelPath);
31
169
  const glinerModule = await import("gliner/node").catch(async () => import("gliner"));
32
170
  const { Gliner } = glinerModule;
33
171
  this.model = new Gliner({
34
172
  tokenizerPath: this.modelPath,
35
173
  onnxSettings: {
36
- modelPath: this.modelPath,
174
+ modelPath: resolvedModelPath,
37
175
  executionProvider: "cpu",
38
176
  },
39
177
  maxWidth: 12,
40
- modelType: "gliner",
178
+ modelType: "span-level",
41
179
  });
42
180
  await this.model.initialize();
43
181
  this.initialized = true;
@@ -67,13 +205,26 @@ export class GlinerEngine {
67
205
  // Deduplicate labels
68
206
  const uniqueLabels = [...new Set(labels)];
69
207
 
70
- const results = await this.model.inference(text, uniqueLabels, {
208
+ const rawResults = await this.model.inference({
209
+ texts: [text],
210
+ entities: uniqueLabels,
211
+ flatNer: false,
71
212
  threshold: this.threshold,
72
213
  });
214
+ const flatResults = Array.isArray(rawResults) ? rawResults.flat() : [];
73
215
 
74
- return results.map(
75
- (r: { text: string; label: string; score: number; start: number; end: number }) => ({
76
- text: r.text,
216
+ return flatResults.map(
217
+ (
218
+ r: {
219
+ spanText?: string;
220
+ text: string;
221
+ label: string;
222
+ score: number;
223
+ start: number;
224
+ end: number;
225
+ },
226
+ ) => ({
227
+ text: r.spanText ?? r.text,
77
228
  label: canonicalType(r.label),
78
229
  start: r.start,
79
230
  end: r.end,
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual 1.4GB model
4
7
  vi.mock("gliner", () => {
@@ -14,10 +17,102 @@ vi.mock("gliner", () => {
14
17
  }
15
18
 
16
19
  async inference(
17
- text: string,
18
- labels: string[],
19
- options: { threshold: number },
20
+ request: { texts: string[]; entities: string[] } | string | string[],
21
+ maybeEntities?: string[],
22
+ _flatNer = false,
23
+ _threshold = 0.5,
20
24
  ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
25
+ const text =
26
+ typeof request === "string"
27
+ ? request
28
+ : Array.isArray(request)
29
+ ? request[0] ?? ""
30
+ : request.texts[0] ?? "";
31
+ const requestEntities =
32
+ typeof request === "object" && request !== null && "entities" in request
33
+ ? request.entities
34
+ : undefined;
35
+ const labels =
36
+ Array.isArray(maybeEntities)
37
+ ? maybeEntities
38
+ : requestEntities ?? [];
39
+ const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
40
+
41
+ // Simulate entity detection for "John Smith"
42
+ const johnIndex = text.indexOf("John Smith");
43
+ if (johnIndex !== -1 && labels.includes("person")) {
44
+ results.push({
45
+ text: "John Smith",
46
+ label: "person",
47
+ score: 0.95,
48
+ start: johnIndex,
49
+ end: johnIndex + "John Smith".length,
50
+ });
51
+ }
52
+
53
+ // Simulate entity detection for "Acme Corp"
54
+ const acmeIndex = text.indexOf("Acme Corp");
55
+ if (acmeIndex !== -1 && labels.includes("organization")) {
56
+ results.push({
57
+ text: "Acme Corp",
58
+ label: "organization",
59
+ score: 0.88,
60
+ start: acmeIndex,
61
+ end: acmeIndex + "Acme Corp".length,
62
+ });
63
+ }
64
+
65
+ // Simulate entity detection for "New York"
66
+ const nyIndex = text.indexOf("New York");
67
+ if (nyIndex !== -1 && labels.includes("location")) {
68
+ results.push({
69
+ text: "New York",
70
+ label: "location",
71
+ score: 0.91,
72
+ start: nyIndex,
73
+ end: nyIndex + "New York".length,
74
+ });
75
+ }
76
+
77
+ return results;
78
+ }
79
+ }
80
+
81
+ return { Gliner: MockGliner };
82
+ });
83
+
84
+ vi.mock("gliner/node", () => {
85
+ class MockGliner {
86
+ private config: any;
87
+
88
+ constructor(config: any) {
89
+ this.config = config;
90
+ }
91
+
92
+ async initialize(): Promise<void> {
93
+ // No-op in mock
94
+ }
95
+
96
+ async inference(
97
+ request: { texts: string[]; entities: string[] } | string | string[],
98
+ maybeEntities?: string[],
99
+ _flatNer = false,
100
+ _threshold = 0.5,
101
+ ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
102
+ const text =
103
+ typeof request === "string"
104
+ ? request
105
+ : Array.isArray(request)
106
+ ? request[0] ?? ""
107
+ : request.texts[0] ?? "";
108
+ const requestEntities =
109
+ typeof request === "object" && request !== null && "entities" in request
110
+ ? request.entities
111
+ : undefined;
112
+ const labels =
113
+ Array.isArray(maybeEntities)
114
+ ? maybeEntities
115
+ : requestEntities ?? [];
21
116
  const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
22
117
 
23
118
  // Simulate entity detection for "John Smith"
@@ -65,11 +160,21 @@ vi.mock("gliner", () => {
65
160
 
66
161
  import { GlinerEngine } from "../src/engines/gliner.js";
67
162
 
163
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
164
+
165
+ beforeAll(async () => {
166
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
167
+ });
168
+
169
+ afterAll(async () => {
170
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
171
+ });
172
+
68
173
  describe("GlinerEngine", () => {
69
174
  let engine: GlinerEngine;
70
175
 
71
176
  beforeEach(async () => {
72
- engine = new GlinerEngine("onnx-community/gliner_small-v2.5", 0.5);
177
+ engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
73
178
  await engine.initialize();
74
179
  });
75
180
 
@@ -166,7 +271,7 @@ describe("GlinerEngine", () => {
166
271
  });
167
272
 
168
273
  it("reports isInitialized correctly", async () => {
169
- const freshEngine = new GlinerEngine("some-model", 0.5);
274
+ const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
170
275
  expect(freshEngine.isInitialized).toBe(false);
171
276
 
172
277
  await freshEngine.initialize();
@@ -1,4 +1,7 @@
1
- import { describe, it, expect, vi, beforeEach } from "vitest";
1
+ import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
+ import fs from "node:fs/promises";
3
+ import os from "node:os";
4
+ import path from "node:path";
2
5
 
3
6
  // Mock the gliner npm package so we don't need the actual model
4
7
  vi.mock("gliner", () => {
@@ -6,10 +9,25 @@ vi.mock("gliner", () => {
6
9
  Gliner: class MockGliner {
7
10
  async initialize() {}
8
11
  async inference(
9
- text: string,
10
- labels: string[],
11
- _opts: { threshold: number },
12
+ request: { texts: string[]; entities: string[] } | string | string[],
13
+ maybeEntities?: string[],
14
+ _flatNer = false,
15
+ _threshold = 0.5,
12
16
  ) {
17
+ const text =
18
+ typeof request === "string"
19
+ ? request
20
+ : Array.isArray(request)
21
+ ? request[0] ?? ""
22
+ : request.texts[0] ?? "";
23
+ const requestEntities =
24
+ typeof request === "object" && request !== null && "entities" in request
25
+ ? request.entities
26
+ : undefined;
27
+ const labels =
28
+ Array.isArray(maybeEntities)
29
+ ? maybeEntities
30
+ : requestEntities ?? [];
13
31
  const results: any[] = [];
14
32
 
15
33
  // Simulate person detection for "John Smith"
@@ -39,7 +57,64 @@ vi.mock("gliner", () => {
39
57
  // Only return results whose labels are requested
40
58
  return results.filter((r) => labels.includes(r.label));
41
59
  }
42
- },
60
+ }
61
+ };
62
+ });
63
+
64
+ vi.mock("gliner/node", () => {
65
+ return {
66
+ Gliner: class MockGliner {
67
+ async initialize() {}
68
+ async inference(
69
+ request: { texts: string[]; entities: string[] } | string | string[],
70
+ maybeEntities?: string[],
71
+ _flatNer = false,
72
+ _threshold = 0.5,
73
+ ) {
74
+ const text =
75
+ typeof request === "string"
76
+ ? request
77
+ : Array.isArray(request)
78
+ ? request[0] ?? ""
79
+ : request.texts[0] ?? "";
80
+ const requestEntities =
81
+ typeof request === "object" && request !== null && "entities" in request
82
+ ? request.entities
83
+ : undefined;
84
+ const labels =
85
+ Array.isArray(maybeEntities)
86
+ ? maybeEntities
87
+ : requestEntities ?? [];
88
+ const results: any[] = [];
89
+
90
+ // Simulate person detection for "John Smith"
91
+ if (text.includes("John Smith")) {
92
+ const idx = text.indexOf("John Smith");
93
+ results.push({
94
+ text: "John Smith",
95
+ label: "person",
96
+ score: 0.95,
97
+ start: idx,
98
+ end: idx + 10,
99
+ });
100
+ }
101
+
102
+ // Simulate organization detection for "Acme Corp"
103
+ if (text.includes("Acme Corp")) {
104
+ const idx = text.indexOf("Acme Corp");
105
+ results.push({
106
+ text: "Acme Corp",
107
+ label: "organization",
108
+ score: 0.88,
109
+ start: idx,
110
+ end: idx + 9,
111
+ });
112
+ }
113
+
114
+ // Only return results whose labels are requested
115
+ return results.filter((r) => labels.includes(r.label));
116
+ }
117
+ }
43
118
  };
44
119
  });
45
120
 
@@ -47,8 +122,22 @@ import { Scanner } from "../src/scanner.js";
47
122
  import { DEFAULT_CONFIG } from "../src/config.js";
48
123
  import type { FogClawConfig } from "../src/types.js";
49
124
 
125
+ const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
126
+
127
+ beforeAll(async () => {
128
+ await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
129
+ });
130
+
131
+ afterAll(async () => {
132
+ await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
133
+ });
134
+
50
135
  function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
51
- return { ...DEFAULT_CONFIG, ...overrides };
136
+ return {
137
+ ...DEFAULT_CONFIG,
138
+ model: TEST_ONNX_MODEL_PATH,
139
+ ...overrides,
140
+ };
52
141
  }
53
142
 
54
143
  describe("Scanner", () => {