@datafog/fogclaw 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +39 -0
  3. package/dist/backlog-tools.d.ts +57 -0
  4. package/dist/backlog-tools.d.ts.map +1 -0
  5. package/dist/backlog-tools.js +173 -0
  6. package/dist/backlog-tools.js.map +1 -0
  7. package/dist/backlog.d.ts +82 -0
  8. package/dist/backlog.d.ts.map +1 -0
  9. package/dist/backlog.js +169 -0
  10. package/dist/backlog.js.map +1 -0
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +6 -0
  13. package/dist/config.js.map +1 -1
  14. package/dist/extract.d.ts +28 -0
  15. package/dist/extract.d.ts.map +1 -0
  16. package/dist/extract.js +91 -0
  17. package/dist/extract.js.map +1 -0
  18. package/dist/index.d.ts +2 -1
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +96 -3
  21. package/dist/index.js.map +1 -1
  22. package/dist/message-sending-handler.d.ts +41 -0
  23. package/dist/message-sending-handler.d.ts.map +1 -0
  24. package/dist/message-sending-handler.js +54 -0
  25. package/dist/message-sending-handler.js.map +1 -0
  26. package/dist/tool-result-handler.d.ts +37 -0
  27. package/dist/tool-result-handler.d.ts.map +1 -0
  28. package/dist/tool-result-handler.js +95 -0
  29. package/dist/tool-result-handler.js.map +1 -0
  30. package/dist/types.d.ts +16 -0
  31. package/dist/types.d.ts.map +1 -1
  32. package/dist/types.js +3 -0
  33. package/dist/types.js.map +1 -1
  34. package/openclaw.plugin.json +11 -1
  35. package/package.json +7 -1
  36. package/.github/workflows/harness-docs.yml +0 -30
  37. package/AGENTS.md +0 -28
  38. package/docs/DATA.md +0 -28
  39. package/docs/DESIGN.md +0 -17
  40. package/docs/DOMAIN_DOCS.md +0 -30
  41. package/docs/FRONTEND.md +0 -24
  42. package/docs/OBSERVABILITY.md +0 -25
  43. package/docs/PLANS.md +0 -171
  44. package/docs/PRODUCT_SENSE.md +0 -20
  45. package/docs/RELIABILITY.md +0 -60
  46. package/docs/SECURITY.md +0 -50
  47. package/docs/design-docs/core-beliefs.md +0 -17
  48. package/docs/design-docs/index.md +0 -8
  49. package/docs/generated/README.md +0 -36
  50. package/docs/generated/memory.md +0 -1
  51. package/docs/plans/2026-02-16-fogclaw-design.md +0 -172
  52. package/docs/plans/2026-02-16-fogclaw-implementation.md +0 -1606
  53. package/docs/plans/README.md +0 -15
  54. package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +0 -386
  55. package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +0 -328
  56. package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +0 -244
  57. package/docs/plans/tech-debt-tracker.md +0 -42
  58. package/docs/plugins/fogclaw.md +0 -101
  59. package/docs/runbooks/address-review-findings.md +0 -30
  60. package/docs/runbooks/ci-failures.md +0 -46
  61. package/docs/runbooks/code-review.md +0 -34
  62. package/docs/runbooks/merge-change.md +0 -28
  63. package/docs/runbooks/pull-request.md +0 -45
  64. package/docs/runbooks/record-evidence.md +0 -43
  65. package/docs/runbooks/reproduce-bug.md +0 -42
  66. package/docs/runbooks/respond-to-feedback.md +0 -42
  67. package/docs/runbooks/review-findings.md +0 -31
  68. package/docs/runbooks/submit-openclaw-plugin.md +0 -68
  69. package/docs/runbooks/update-agents-md.md +0 -59
  70. package/docs/runbooks/update-domain-docs.md +0 -42
  71. package/docs/runbooks/validate-current-state.md +0 -41
  72. package/docs/runbooks/verify-release.md +0 -69
  73. package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +0 -115
  74. package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +0 -125
  75. package/docs/specs/README.md +0 -5
  76. package/docs/specs/index.md +0 -8
  77. package/docs/spikes/README.md +0 -8
  78. package/fogclaw.config.example.json +0 -33
  79. package/scripts/ci/he-docs-config.json +0 -123
  80. package/scripts/ci/he-docs-drift.sh +0 -112
  81. package/scripts/ci/he-docs-lint.sh +0 -234
  82. package/scripts/ci/he-plans-lint.sh +0 -354
  83. package/scripts/ci/he-runbooks-lint.sh +0 -445
  84. package/scripts/ci/he-specs-lint.sh +0 -258
  85. package/scripts/ci/he-spikes-lint.sh +0 -249
  86. package/scripts/runbooks/select-runbooks.sh +0 -154
  87. package/src/config.ts +0 -183
  88. package/src/engines/gliner.ts +0 -240
  89. package/src/engines/regex.ts +0 -71
  90. package/src/index.ts +0 -372
  91. package/src/redactor.ts +0 -51
  92. package/src/scanner.ts +0 -196
  93. package/src/types.ts +0 -71
  94. package/tests/config.test.ts +0 -78
  95. package/tests/gliner.test.ts +0 -289
  96. package/tests/plugin-smoke.test.ts +0 -143
  97. package/tests/redactor.test.ts +0 -320
  98. package/tests/regex.test.ts +0 -345
  99. package/tests/scanner.test.ts +0 -348
  100. package/tsconfig.json +0 -20
@@ -1,345 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
- import { RegexEngine } from "../src/engines/regex.js";
3
-
4
- const engine = new RegexEngine();
5
-
6
- /** Helper: assert every returned entity has correct span offsets */
7
- function assertSpans(text: string) {
8
- const entities = engine.scan(text);
9
- for (const e of entities) {
10
- expect(text.slice(e.start, e.end)).toBe(e.text);
11
- }
12
- return entities;
13
- }
14
-
15
- // ---------------------------------------------------------------------------
16
- // EMAIL
17
- // ---------------------------------------------------------------------------
18
- describe("EMAIL", () => {
19
- it("detects a simple email", () => {
20
- const entities = assertSpans("Contact alice@example.com for info.");
21
- const emails = entities.filter((e) => e.label === "EMAIL");
22
- expect(emails).toHaveLength(1);
23
- expect(emails[0].text).toBe("alice@example.com");
24
- expect(emails[0].confidence).toBe(1.0);
25
- expect(emails[0].source).toBe("regex");
26
- });
27
-
28
- it("detects email with subdomains", () => {
29
- const entities = assertSpans("Send to bob@mail.example.co.uk now");
30
- const emails = entities.filter((e) => e.label === "EMAIL");
31
- expect(emails).toHaveLength(1);
32
- expect(emails[0].text).toBe("bob@mail.example.co.uk");
33
- });
34
-
35
- it("detects email with special chars in local part", () => {
36
- const entities = assertSpans("user+tag@example.org");
37
- const emails = entities.filter((e) => e.label === "EMAIL");
38
- expect(emails).toHaveLength(1);
39
- expect(emails[0].text).toBe("user+tag@example.org");
40
- });
41
-
42
- it("does not match bare @-signs or partial addresses", () => {
43
- const entities = engine.scan("@ or foo@ or @bar");
44
- const emails = entities.filter((e) => e.label === "EMAIL");
45
- expect(emails).toHaveLength(0);
46
- });
47
- });
48
-
49
- // ---------------------------------------------------------------------------
50
- // PHONE
51
- // ---------------------------------------------------------------------------
52
- describe("PHONE", () => {
53
- it("detects US phone with dashes", () => {
54
- const entities = assertSpans("Call 555-123-4567 today.");
55
- const phones = entities.filter((e) => e.label === "PHONE");
56
- expect(phones).toHaveLength(1);
57
- expect(phones[0].text).toBe("555-123-4567");
58
- });
59
-
60
- it("detects US phone with parentheses", () => {
61
- const entities = assertSpans("Phone: (555) 123-4567");
62
- const phones = entities.filter((e) => e.label === "PHONE");
63
- expect(phones).toHaveLength(1);
64
- expect(phones[0].text).toBe("(555) 123-4567");
65
- });
66
-
67
- it("detects +1 prefix", () => {
68
- const entities = assertSpans("Reach me at +1-800-555-1234.");
69
- const phones = entities.filter((e) => e.label === "PHONE");
70
- expect(phones).toHaveLength(1);
71
- expect(phones[0].text).toBe("+1-800-555-1234");
72
- });
73
-
74
- it("detects international format", () => {
75
- const entities = assertSpans("Number: +44 20 7946 0958");
76
- const phones = entities.filter((e) => e.label === "PHONE");
77
- expect(phones).toHaveLength(1);
78
- expect(phones[0].text).toBe("+44 20 7946 0958");
79
- });
80
-
81
- it("does not match short digit sequences", () => {
82
- const entities = engine.scan("Code 12345 here");
83
- const phones = entities.filter((e) => e.label === "PHONE");
84
- expect(phones).toHaveLength(0);
85
- });
86
- });
87
-
88
- // ---------------------------------------------------------------------------
89
- // SSN
90
- // ---------------------------------------------------------------------------
91
- describe("SSN", () => {
92
- it("detects a valid SSN with dashes", () => {
93
- const entities = assertSpans("SSN: 123-45-6789");
94
- const ssns = entities.filter((e) => e.label === "SSN");
95
- expect(ssns).toHaveLength(1);
96
- expect(ssns[0].text).toBe("123-45-6789");
97
- });
98
-
99
- it("detects a valid SSN without dashes", () => {
100
- const entities = assertSpans("SSN 123456789 filed.");
101
- const ssns = entities.filter((e) => e.label === "SSN");
102
- expect(ssns).toHaveLength(1);
103
- expect(ssns[0].text).toBe("123456789");
104
- });
105
-
106
- it("rejects SSN starting with 000", () => {
107
- const entities = engine.scan("Invalid SSN 000-12-3456");
108
- const ssns = entities.filter((e) => e.label === "SSN");
109
- expect(ssns).toHaveLength(0);
110
- });
111
-
112
- it("rejects SSN starting with 666", () => {
113
- const entities = engine.scan("Invalid SSN 666-12-3456");
114
- const ssns = entities.filter((e) => e.label === "SSN");
115
- expect(ssns).toHaveLength(0);
116
- });
117
-
118
- it("rejects SSN with 00 in middle group", () => {
119
- const entities = engine.scan("Invalid SSN 123-00-6789");
120
- const ssns = entities.filter((e) => e.label === "SSN");
121
- expect(ssns).toHaveLength(0);
122
- });
123
-
124
- it("rejects SSN with 0000 in last group", () => {
125
- const entities = engine.scan("Invalid SSN 123-45-0000");
126
- const ssns = entities.filter((e) => e.label === "SSN");
127
- expect(ssns).toHaveLength(0);
128
- });
129
- });
130
-
131
- // ---------------------------------------------------------------------------
132
- // CREDIT_CARD
133
- // ---------------------------------------------------------------------------
134
- describe("CREDIT_CARD", () => {
135
- it("detects a Visa card (16 digits)", () => {
136
- const entities = assertSpans("Card: 4111111111111111");
137
- const cards = entities.filter((e) => e.label === "CREDIT_CARD");
138
- expect(cards).toHaveLength(1);
139
- expect(cards[0].text).toBe("4111111111111111");
140
- });
141
-
142
- it("detects a Mastercard", () => {
143
- const entities = assertSpans("MC 5500000000000004");
144
- const cards = entities.filter((e) => e.label === "CREDIT_CARD");
145
- expect(cards).toHaveLength(1);
146
- expect(cards[0].text).toBe("5500000000000004");
147
- });
148
-
149
- it("detects an Amex card", () => {
150
- const entities = assertSpans("Amex 378282246310005");
151
- const cards = entities.filter((e) => e.label === "CREDIT_CARD");
152
- expect(cards).toHaveLength(1);
153
- expect(cards[0].text).toBe("378282246310005");
154
- });
155
-
156
- it("detects card number with dashes", () => {
157
- const entities = assertSpans("Card 4111-1111-1111-1111 charged");
158
- const cards = entities.filter((e) => e.label === "CREDIT_CARD");
159
- expect(cards).toHaveLength(1);
160
- expect(cards[0].text).toBe("4111-1111-1111-1111");
161
- });
162
-
163
- it("detects card number with spaces", () => {
164
- const entities = assertSpans("Card 5500 0000 0000 0004 charged");
165
- const cards = entities.filter((e) => e.label === "CREDIT_CARD");
166
- expect(cards).toHaveLength(1);
167
- expect(cards[0].text).toBe("5500 0000 0000 0004");
168
- });
169
- });
170
-
171
- // ---------------------------------------------------------------------------
172
- // IP_ADDRESS
173
- // ---------------------------------------------------------------------------
174
- describe("IP_ADDRESS", () => {
175
- it("detects a valid IPv4 address", () => {
176
- const entities = assertSpans("Server at 192.168.1.1 is up.");
177
- const ips = entities.filter((e) => e.label === "IP_ADDRESS");
178
- expect(ips).toHaveLength(1);
179
- expect(ips[0].text).toBe("192.168.1.1");
180
- });
181
-
182
- it("detects 0.0.0.0", () => {
183
- const entities = assertSpans("Bind to 0.0.0.0 for all interfaces.");
184
- const ips = entities.filter((e) => e.label === "IP_ADDRESS");
185
- expect(ips).toHaveLength(1);
186
- expect(ips[0].text).toBe("0.0.0.0");
187
- });
188
-
189
- it("detects 255.255.255.255", () => {
190
- const entities = assertSpans("Broadcast: 255.255.255.255");
191
- const ips = entities.filter((e) => e.label === "IP_ADDRESS");
192
- expect(ips).toHaveLength(1);
193
- expect(ips[0].text).toBe("255.255.255.255");
194
- });
195
-
196
- it("rejects IP with octet > 255", () => {
197
- const entities = engine.scan("Invalid 256.1.2.3 address");
198
- const ips = entities.filter((e) => e.label === "IP_ADDRESS");
199
- // Should not match 256.1.2.3 as a complete valid IP
200
- for (const ip of ips) {
201
- expect(ip.text).not.toBe("256.1.2.3");
202
- }
203
- });
204
-
205
- it("rejects IP with octet 999", () => {
206
- const entities = engine.scan("Bad IP 999.999.999.999");
207
- const ips = entities.filter((e) => e.label === "IP_ADDRESS");
208
- for (const ip of ips) {
209
- expect(ip.text).not.toBe("999.999.999.999");
210
- }
211
- });
212
- });
213
-
214
- // ---------------------------------------------------------------------------
215
- // DATE
216
- // ---------------------------------------------------------------------------
217
- describe("DATE", () => {
218
- it("detects MM/DD/YYYY format", () => {
219
- const entities = assertSpans("Born on 01/15/1990 in NY.");
220
- const dates = entities.filter((e) => e.label === "DATE");
221
- expect(dates).toHaveLength(1);
222
- expect(dates[0].text).toBe("01/15/1990");
223
- });
224
-
225
- it("detects YYYY-MM-DD format", () => {
226
- const entities = assertSpans("Date: 2024-03-15 confirmed.");
227
- const dates = entities.filter((e) => e.label === "DATE");
228
- expect(dates).toHaveLength(1);
229
- expect(dates[0].text).toBe("2024-03-15");
230
- });
231
-
232
- it("detects Month DD, YYYY format", () => {
233
- const entities = assertSpans("On January 5, 2023 we met.");
234
- const dates = entities.filter((e) => e.label === "DATE");
235
- expect(dates).toHaveLength(1);
236
- expect(dates[0].text).toBe("January 5, 2023");
237
- });
238
-
239
- it("detects abbreviated month", () => {
240
- const entities = assertSpans("Meeting: Dec 25, 2022 at noon.");
241
- const dates = entities.filter((e) => e.label === "DATE");
242
- expect(dates).toHaveLength(1);
243
- expect(dates[0].text).toBe("Dec 25, 2022");
244
- });
245
-
246
- it("detects MM-DD-YY format", () => {
247
- const entities = assertSpans("Filed 03-15-90 in records.");
248
- const dates = entities.filter((e) => e.label === "DATE");
249
- expect(dates).toHaveLength(1);
250
- expect(dates[0].text).toBe("03-15-90");
251
- });
252
- });
253
-
254
- // ---------------------------------------------------------------------------
255
- // ZIP_CODE
256
- // ---------------------------------------------------------------------------
257
- describe("ZIP_CODE", () => {
258
- it("detects a 5-digit ZIP", () => {
259
- const entities = assertSpans("ZIP 90210 area.");
260
- const zips = entities.filter((e) => e.label === "ZIP_CODE");
261
- expect(zips).toHaveLength(1);
262
- expect(zips[0].text).toBe("90210");
263
- });
264
-
265
- it("detects a ZIP+4", () => {
266
- const entities = assertSpans("Mailing: 90210-1234 confirmed.");
267
- const zips = entities.filter((e) => e.label === "ZIP_CODE");
268
- expect(zips).toHaveLength(1);
269
- expect(zips[0].text).toBe("90210-1234");
270
- });
271
- });
272
-
273
- // ---------------------------------------------------------------------------
274
- // COMBINED / EDGE CASES
275
- // ---------------------------------------------------------------------------
276
- describe("Multiple entities in one text", () => {
277
- it("finds email, phone, and SSN in same text", () => {
278
- const text =
279
- "Contact alice@example.com or 555-123-4567. SSN: 123-45-6789.";
280
- const entities = assertSpans(text);
281
-
282
- const labels = entities.map((e) => e.label);
283
- expect(labels).toContain("EMAIL");
284
- expect(labels).toContain("PHONE");
285
- expect(labels).toContain("SSN");
286
- });
287
-
288
- it("finds multiple emails", () => {
289
- const text = "Send to a@b.com and c@d.org please.";
290
- const entities = assertSpans(text);
291
- const emails = entities.filter((e) => e.label === "EMAIL");
292
- expect(emails).toHaveLength(2);
293
- expect(emails[0].text).toBe("a@b.com");
294
- expect(emails[1].text).toBe("c@d.org");
295
- });
296
- });
297
-
298
- describe("Empty and no-match inputs", () => {
299
- it("returns empty array for empty string", () => {
300
- const entities = engine.scan("");
301
- expect(entities).toEqual([]);
302
- });
303
-
304
- it("returns empty array for text with no PII", () => {
305
- const entities = engine.scan("The quick brown fox jumps over the lazy dog.");
306
- // Filter out anything that might false-positive
307
- const meaningful = entities.filter(
308
- (e) => !["ZIP_CODE"].includes(e.label) || e.text.length >= 5
309
- );
310
- // This sentence has no PII
311
- expect(entities).toEqual([]);
312
- });
313
- });
314
-
315
- describe("Entity shape", () => {
316
- it("every entity has correct confidence and source", () => {
317
- const text = "Email: test@test.com Phone: 555-123-4567";
318
- const entities = engine.scan(text);
319
- for (const e of entities) {
320
- expect(e.confidence).toBe(1.0);
321
- expect(e.source).toBe("regex");
322
- expect(typeof e.start).toBe("number");
323
- expect(typeof e.end).toBe("number");
324
- expect(e.end).toBeGreaterThan(e.start);
325
- expect(typeof e.text).toBe("string");
326
- expect(typeof e.label).toBe("string");
327
- }
328
- });
329
-
330
- it("span offsets are correct for all entity types", () => {
331
- const text =
332
- "Email: user@site.com, Phone: (800) 555-0199, SSN: 321-54-9876, " +
333
- "Card: 4111111111111111, IP: 10.0.0.1, Date: 2024-06-15, ZIP: 60601";
334
- assertSpans(text);
335
- });
336
- });
337
-
338
- describe("Repeated scan calls (lastIndex reset)", () => {
339
- it("produces the same results on consecutive calls", () => {
340
- const text = "Email alice@example.com and call 555-123-4567.";
341
- const first = engine.scan(text);
342
- const second = engine.scan(text);
343
- expect(first).toEqual(second);
344
- });
345
- });
@@ -1,348 +0,0 @@
1
- import {
2
- beforeAll,
3
- beforeEach,
4
- afterAll,
5
- describe,
6
- it,
7
- expect,
8
- vi,
9
- } from "vitest";
10
- import fs from "node:fs/promises";
11
- import os from "node:os";
12
- import path from "node:path";
13
-
14
- // Mock the gliner npm package so we don't need the actual model
15
- vi.mock("gliner", () => {
16
- return {
17
- Gliner: class MockGliner {
18
- async initialize() {}
19
- async inference(
20
- request: { texts: string[]; entities: string[] } | string | string[],
21
- maybeEntities?: string[],
22
- _flatNer = false,
23
- _threshold = 0.5,
24
- ) {
25
- const text =
26
- typeof request === "string"
27
- ? request
28
- : Array.isArray(request)
29
- ? request[0] ?? ""
30
- : request.texts[0] ?? "";
31
- const requestEntities =
32
- typeof request === "object" && request !== null && "entities" in request
33
- ? request.entities
34
- : undefined;
35
- const labels =
36
- Array.isArray(maybeEntities)
37
- ? maybeEntities
38
- : requestEntities ?? [];
39
- const results: any[] = [];
40
-
41
- // Simulate person detection for "John Smith"
42
- if (text.includes("John Smith")) {
43
- const idx = text.indexOf("John Smith");
44
- results.push({
45
- text: "John Smith",
46
- label: "person",
47
- score: 0.95,
48
- start: idx,
49
- end: idx + 10,
50
- });
51
- }
52
-
53
- // Simulate organization detection for "Acme Corp"
54
- if (text.includes("Acme Corp")) {
55
- const idx = text.indexOf("Acme Corp");
56
- results.push({
57
- text: "Acme Corp",
58
- label: "organization",
59
- score: 0.88,
60
- start: idx,
61
- end: idx + 9,
62
- });
63
- }
64
-
65
- // Only return results whose labels are requested
66
- return results.filter((r) => labels.includes(r.label));
67
- }
68
- }
69
- };
70
- });
71
-
72
- vi.mock("gliner/node", () => {
73
- return {
74
- Gliner: class MockGliner {
75
- async initialize() {}
76
- async inference(
77
- request: { texts: string[]; entities: string[] } | string | string[],
78
- maybeEntities?: string[],
79
- _flatNer = false,
80
- _threshold = 0.5,
81
- ) {
82
- const text =
83
- typeof request === "string"
84
- ? request
85
- : Array.isArray(request)
86
- ? request[0] ?? ""
87
- : request.texts[0] ?? "";
88
- const requestEntities =
89
- typeof request === "object" && request !== null && "entities" in request
90
- ? request.entities
91
- : undefined;
92
- const labels =
93
- Array.isArray(maybeEntities)
94
- ? maybeEntities
95
- : requestEntities ?? [];
96
- const results: any[] = [];
97
-
98
- // Simulate person detection for "John Smith"
99
- if (text.includes("John Smith")) {
100
- const idx = text.indexOf("John Smith");
101
- results.push({
102
- text: "John Smith",
103
- label: "person",
104
- score: 0.95,
105
- start: idx,
106
- end: idx + 10,
107
- });
108
- }
109
-
110
- // Simulate organization detection for "Acme Corp"
111
- if (text.includes("Acme Corp")) {
112
- const idx = text.indexOf("Acme Corp");
113
- results.push({
114
- text: "Acme Corp",
115
- label: "organization",
116
- score: 0.88,
117
- start: idx,
118
- end: idx + 9,
119
- });
120
- }
121
-
122
- // Only return results whose labels are requested
123
- return results.filter((r) => labels.includes(r.label));
124
- }
125
- }
126
- };
127
- });
128
-
129
- import { Scanner } from "../src/scanner.js";
130
- import { DEFAULT_CONFIG } from "../src/config.js";
131
- import type { FogClawConfig } from "../src/types.js";
132
-
133
- const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
134
-
135
- beforeAll(async () => {
136
- await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
137
- });
138
-
139
- afterAll(async () => {
140
- await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
141
- });
142
-
143
- function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
144
- return {
145
- ...DEFAULT_CONFIG,
146
- model: TEST_ONNX_MODEL_PATH,
147
- ...overrides,
148
- };
149
- }
150
-
151
- describe("Scanner", () => {
152
- let scanner: Scanner;
153
-
154
- beforeEach(async () => {
155
- scanner = new Scanner(makeConfig());
156
- await scanner.initialize();
157
- });
158
-
159
- it("detects regex entities (email) without needing GLiNER", async () => {
160
- // Even without initialize, regex should work
161
- const regexOnly = new Scanner(makeConfig());
162
- // Deliberately NOT calling initialize — GLiNER unavailable
163
-
164
- const result = await regexOnly.scan("Contact us at test@example.com please.");
165
-
166
- expect(result.entities.length).toBeGreaterThanOrEqual(1);
167
- const email = result.entities.find((e) => e.label === "EMAIL");
168
- expect(email).toBeDefined();
169
- expect(email!.text).toBe("test@example.com");
170
- expect(email!.source).toBe("regex");
171
- });
172
-
173
- it("detects GLiNER entities (person names)", async () => {
174
- const result = await scanner.scan("My name is John Smith.");
175
-
176
- const person = result.entities.find((e) => e.label === "PERSON");
177
- expect(person).toBeDefined();
178
- expect(person!.text).toBe("John Smith");
179
- expect(person!.source).toBe("gliner");
180
- expect(person!.confidence).toBe(0.95);
181
- });
182
-
183
- it("merges results from both engines (email + person in same text)", async () => {
184
- const result = await scanner.scan(
185
- "John Smith can be reached at john@example.com for details.",
186
- );
187
-
188
- const person = result.entities.find((e) => e.label === "PERSON");
189
- const email = result.entities.find((e) => e.label === "EMAIL");
190
-
191
- expect(person).toBeDefined();
192
- expect(email).toBeDefined();
193
- expect(person!.source).toBe("gliner");
194
- expect(email!.source).toBe("regex");
195
- });
196
-
197
- it("applies per-entity confidence threshold overrides", async () => {
198
- const strictScanner = new Scanner(
199
- makeConfig({
200
- entityConfidenceThresholds: {
201
- PERSON: 0.98,
202
- },
203
- }),
204
- );
205
- await strictScanner.initialize();
206
-
207
- const result = await strictScanner.scan("My name is John Smith.");
208
- expect(result.entities.find((e) => e.label === "PERSON")).toBeUndefined();
209
- });
210
-
211
- it("supports allowlist exact matches across global and per-entity rules", async () => {
212
- const allowlistScanner = new Scanner(
213
- makeConfig({
214
- allowlist: {
215
- values: ["john@example.com"],
216
- patterns: ["^internal-"],
217
- entities: {
218
- PERSON: ["john smith"],
219
- },
220
- },
221
- }),
222
- );
223
- await allowlistScanner.initialize();
224
-
225
- const result = await allowlistScanner.scan(
226
- "John Smith can be reached at john@example.com.",
227
- );
228
-
229
- expect(result.entities.find((e) => e.label === "EMAIL")).toBeUndefined();
230
- expect(result.entities.find((e) => e.label === "PERSON")).toBeUndefined();
231
- });
232
-
233
- it("applies allowlist regex patterns", async () => {
234
- const allowlistScanner = new Scanner(
235
- makeConfig({
236
- allowlist: {
237
- values: [],
238
- patterns: ["test@example\\.com"],
239
- entities: {},
240
- },
241
- }),
242
- );
243
- await allowlistScanner.initialize();
244
-
245
- const result = await allowlistScanner.scan("This is test@example.com for redaction.");
246
- expect(result.entities.find((e) => e.label === "EMAIL")).toBeUndefined();
247
- });
248
-
249
- it("deduplicates overlapping spans keeping higher confidence", async () => {
250
- // Scan text that might produce overlapping entities
251
- // The dedup logic should keep higher confidence when spans overlap
252
- const result = await scanner.scan("Contact John Smith today.");
253
-
254
- // We shouldn't have duplicate entities for the same span
255
- const starts = result.entities.map((e) => e.start);
256
- const uniqueStarts = [...new Set(starts)];
257
- // If there were overlapping entities, dedup should have resolved them
258
- expect(starts.length).toBe(uniqueStarts.length);
259
- });
260
-
261
- it("returns original text in result", async () => {
262
- const text = "Hello John Smith, your email is test@example.com.";
263
- const result = await scanner.scan(text);
264
-
265
- expect(result.text).toBe(text);
266
- });
267
-
268
- it("accepts extra labels at scan time", async () => {
269
- // The mock only returns results for labels that are in the labels array
270
- // Extra labels get passed through to GLiNER
271
- const result = await scanner.scan(
272
- "John Smith works at Acme Corp.",
273
- ["organization"],
274
- );
275
-
276
- // Person is always in default labels, organization should be detected too
277
- const person = result.entities.find((e) => e.label === "PERSON");
278
- const org = result.entities.find((e) => e.label === "ORGANIZATION");
279
-
280
- expect(person).toBeDefined();
281
- expect(org).toBeDefined();
282
- });
283
-
284
- it("falls back to regex-only when GLiNER is not initialized", async () => {
285
- const fallbackScanner = new Scanner(makeConfig());
286
- // Do NOT call initialize — GLiNER stays unavailable
287
-
288
- const result = await fallbackScanner.scan(
289
- "John Smith at john@example.com",
290
- );
291
-
292
- // Should still find the email via regex
293
- const email = result.entities.find((e) => e.label === "EMAIL");
294
- expect(email).toBeDefined();
295
- expect(email!.source).toBe("regex");
296
-
297
- // Should NOT find person because GLiNER is not available
298
- const person = result.entities.find((e) => e.label === "PERSON");
299
- expect(person).toBeUndefined();
300
- });
301
-
302
- it("empty text returns empty entities", async () => {
303
- const result = await scanner.scan("");
304
-
305
- expect(result.entities).toEqual([]);
306
- expect(result.text).toBe("");
307
- });
308
-
309
- it("entities are sorted by start position after merge", async () => {
310
- const result = await scanner.scan(
311
- "John Smith can be reached at john@example.com for details.",
312
- );
313
-
314
- for (let i = 1; i < result.entities.length; i++) {
315
- expect(result.entities[i].start).toBeGreaterThanOrEqual(
316
- result.entities[i - 1].start,
317
- );
318
- }
319
- });
320
-
321
- it("passes custom_entities from config to GLiNER engine", async () => {
322
- const customScanner = new Scanner(
323
- makeConfig({ custom_entities: ["product", "event"] }),
324
- );
325
- await customScanner.initialize();
326
-
327
- // Should not throw, custom labels are set on the engine
328
- const result = await customScanner.scan("John Smith attended the event.");
329
- expect(result.entities.length).toBeGreaterThanOrEqual(1);
330
- });
331
-
332
- it("handles text with only regex-detectable entities", async () => {
333
- const result = await scanner.scan(
334
- "Send to test@example.com and call 555-123-4567.",
335
- );
336
-
337
- expect(result.entities.length).toBeGreaterThanOrEqual(1);
338
- const email = result.entities.find((e) => e.label === "EMAIL");
339
- expect(email).toBeDefined();
340
- });
341
-
342
- it("handles text with no detectable entities", async () => {
343
- const result = await scanner.scan("Hello world, this is a simple test.");
344
-
345
- expect(result.entities).toEqual([]);
346
- expect(result.text).toBe("Hello world, this is a simple test.");
347
- });
348
- });