@datafog/fogclaw 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/backlog-tools.d.ts +57 -0
- package/dist/backlog-tools.d.ts.map +1 -0
- package/dist/backlog-tools.js +173 -0
- package/dist/backlog-tools.js.map +1 -0
- package/dist/backlog.d.ts +82 -0
- package/dist/backlog.d.ts.map +1 -0
- package/dist/backlog.js +169 -0
- package/dist/backlog.js.map +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +6 -0
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +87 -2
- package/dist/index.js.map +1 -1
- package/dist/message-sending-handler.d.ts +2 -1
- package/dist/message-sending-handler.d.ts.map +1 -1
- package/dist/message-sending-handler.js +5 -1
- package/dist/message-sending-handler.js.map +1 -1
- package/dist/tool-result-handler.d.ts +2 -1
- package/dist/tool-result-handler.d.ts.map +1 -1
- package/dist/tool-result-handler.js +5 -1
- package/dist/tool-result-handler.js.map +1 -1
- package/dist/types.d.ts +15 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/openclaw.plugin.json +11 -1
- package/package.json +7 -1
- package/.github/workflows/harness-docs.yml +0 -30
- package/AGENTS.md +0 -28
- package/docs/DATA.md +0 -28
- package/docs/DESIGN.md +0 -17
- package/docs/DOMAIN_DOCS.md +0 -30
- package/docs/FRONTEND.md +0 -24
- package/docs/OBSERVABILITY.md +0 -32
- package/docs/PLANS.md +0 -171
- package/docs/PRODUCT_SENSE.md +0 -20
- package/docs/RELIABILITY.md +0 -60
- package/docs/SECURITY.md +0 -52
- package/docs/design-docs/core-beliefs.md +0 -17
- package/docs/design-docs/index.md +0 -8
- package/docs/generated/README.md +0 -36
- package/docs/generated/memory.md +0 -1
- package/docs/plans/2026-02-16-fogclaw-design.md +0 -172
- package/docs/plans/2026-02-16-fogclaw-implementation.md +0 -1606
- package/docs/plans/README.md +0 -15
- package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +0 -386
- package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +0 -328
- package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +0 -244
- package/docs/plans/active/2026-02-17-feat-tool-result-pii-scanning-plan.md +0 -293
- package/docs/plans/tech-debt-tracker.md +0 -42
- package/docs/plugins/fogclaw.md +0 -101
- package/docs/runbooks/address-review-findings.md +0 -30
- package/docs/runbooks/ci-failures.md +0 -46
- package/docs/runbooks/code-review.md +0 -34
- package/docs/runbooks/merge-change.md +0 -28
- package/docs/runbooks/pull-request.md +0 -45
- package/docs/runbooks/record-evidence.md +0 -43
- package/docs/runbooks/reproduce-bug.md +0 -42
- package/docs/runbooks/respond-to-feedback.md +0 -42
- package/docs/runbooks/review-findings.md +0 -31
- package/docs/runbooks/submit-openclaw-plugin.md +0 -68
- package/docs/runbooks/update-agents-md.md +0 -59
- package/docs/runbooks/update-domain-docs.md +0 -42
- package/docs/runbooks/validate-current-state.md +0 -41
- package/docs/runbooks/verify-release.md +0 -69
- package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +0 -115
- package/docs/specs/2026-02-17-feat-outbound-message-pii-scanning-spec.md +0 -93
- package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +0 -125
- package/docs/specs/2026-02-17-feat-tool-result-pii-scanning-spec.md +0 -122
- package/docs/specs/README.md +0 -5
- package/docs/specs/index.md +0 -8
- package/docs/spikes/README.md +0 -8
- package/fogclaw.config.example.json +0 -33
- package/scripts/ci/he-docs-config.json +0 -123
- package/scripts/ci/he-docs-drift.sh +0 -112
- package/scripts/ci/he-docs-lint.sh +0 -234
- package/scripts/ci/he-plans-lint.sh +0 -354
- package/scripts/ci/he-runbooks-lint.sh +0 -445
- package/scripts/ci/he-specs-lint.sh +0 -258
- package/scripts/ci/he-spikes-lint.sh +0 -249
- package/scripts/runbooks/select-runbooks.sh +0 -154
- package/src/config.ts +0 -183
- package/src/engines/gliner.ts +0 -240
- package/src/engines/regex.ts +0 -71
- package/src/extract.ts +0 -98
- package/src/index.ts +0 -381
- package/src/message-sending-handler.ts +0 -87
- package/src/redactor.ts +0 -51
- package/src/scanner.ts +0 -196
- package/src/tool-result-handler.ts +0 -133
- package/src/types.ts +0 -75
- package/tests/config.test.ts +0 -78
- package/tests/extract.test.ts +0 -185
- package/tests/gliner.test.ts +0 -289
- package/tests/message-sending-handler.test.ts +0 -244
- package/tests/plugin-smoke.test.ts +0 -250
- package/tests/redactor.test.ts +0 -320
- package/tests/regex.test.ts +0 -345
- package/tests/scanner.test.ts +0 -348
- package/tests/tool-result-handler.test.ts +0 -329
- package/tsconfig.json +0 -20
package/tests/regex.test.ts
DELETED
|
@@ -1,345 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { RegexEngine } from "../src/engines/regex.js";
|
|
3
|
-
|
|
4
|
-
const engine = new RegexEngine();
|
|
5
|
-
|
|
6
|
-
/** Helper: assert every returned entity has correct span offsets */
|
|
7
|
-
function assertSpans(text: string) {
|
|
8
|
-
const entities = engine.scan(text);
|
|
9
|
-
for (const e of entities) {
|
|
10
|
-
expect(text.slice(e.start, e.end)).toBe(e.text);
|
|
11
|
-
}
|
|
12
|
-
return entities;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
// EMAIL
|
|
17
|
-
// ---------------------------------------------------------------------------
|
|
18
|
-
describe("EMAIL", () => {
|
|
19
|
-
it("detects a simple email", () => {
|
|
20
|
-
const entities = assertSpans("Contact alice@example.com for info.");
|
|
21
|
-
const emails = entities.filter((e) => e.label === "EMAIL");
|
|
22
|
-
expect(emails).toHaveLength(1);
|
|
23
|
-
expect(emails[0].text).toBe("alice@example.com");
|
|
24
|
-
expect(emails[0].confidence).toBe(1.0);
|
|
25
|
-
expect(emails[0].source).toBe("regex");
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
it("detects email with subdomains", () => {
|
|
29
|
-
const entities = assertSpans("Send to bob@mail.example.co.uk now");
|
|
30
|
-
const emails = entities.filter((e) => e.label === "EMAIL");
|
|
31
|
-
expect(emails).toHaveLength(1);
|
|
32
|
-
expect(emails[0].text).toBe("bob@mail.example.co.uk");
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
it("detects email with special chars in local part", () => {
|
|
36
|
-
const entities = assertSpans("user+tag@example.org");
|
|
37
|
-
const emails = entities.filter((e) => e.label === "EMAIL");
|
|
38
|
-
expect(emails).toHaveLength(1);
|
|
39
|
-
expect(emails[0].text).toBe("user+tag@example.org");
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
it("does not match bare @-signs or partial addresses", () => {
|
|
43
|
-
const entities = engine.scan("@ or foo@ or @bar");
|
|
44
|
-
const emails = entities.filter((e) => e.label === "EMAIL");
|
|
45
|
-
expect(emails).toHaveLength(0);
|
|
46
|
-
});
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
// ---------------------------------------------------------------------------
|
|
50
|
-
// PHONE
|
|
51
|
-
// ---------------------------------------------------------------------------
|
|
52
|
-
describe("PHONE", () => {
|
|
53
|
-
it("detects US phone with dashes", () => {
|
|
54
|
-
const entities = assertSpans("Call 555-123-4567 today.");
|
|
55
|
-
const phones = entities.filter((e) => e.label === "PHONE");
|
|
56
|
-
expect(phones).toHaveLength(1);
|
|
57
|
-
expect(phones[0].text).toBe("555-123-4567");
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
it("detects US phone with parentheses", () => {
|
|
61
|
-
const entities = assertSpans("Phone: (555) 123-4567");
|
|
62
|
-
const phones = entities.filter((e) => e.label === "PHONE");
|
|
63
|
-
expect(phones).toHaveLength(1);
|
|
64
|
-
expect(phones[0].text).toBe("(555) 123-4567");
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
it("detects +1 prefix", () => {
|
|
68
|
-
const entities = assertSpans("Reach me at +1-800-555-1234.");
|
|
69
|
-
const phones = entities.filter((e) => e.label === "PHONE");
|
|
70
|
-
expect(phones).toHaveLength(1);
|
|
71
|
-
expect(phones[0].text).toBe("+1-800-555-1234");
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it("detects international format", () => {
|
|
75
|
-
const entities = assertSpans("Number: +44 20 7946 0958");
|
|
76
|
-
const phones = entities.filter((e) => e.label === "PHONE");
|
|
77
|
-
expect(phones).toHaveLength(1);
|
|
78
|
-
expect(phones[0].text).toBe("+44 20 7946 0958");
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
it("does not match short digit sequences", () => {
|
|
82
|
-
const entities = engine.scan("Code 12345 here");
|
|
83
|
-
const phones = entities.filter((e) => e.label === "PHONE");
|
|
84
|
-
expect(phones).toHaveLength(0);
|
|
85
|
-
});
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
// ---------------------------------------------------------------------------
|
|
89
|
-
// SSN
|
|
90
|
-
// ---------------------------------------------------------------------------
|
|
91
|
-
describe("SSN", () => {
|
|
92
|
-
it("detects a valid SSN with dashes", () => {
|
|
93
|
-
const entities = assertSpans("SSN: 123-45-6789");
|
|
94
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
95
|
-
expect(ssns).toHaveLength(1);
|
|
96
|
-
expect(ssns[0].text).toBe("123-45-6789");
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
it("detects a valid SSN without dashes", () => {
|
|
100
|
-
const entities = assertSpans("SSN 123456789 filed.");
|
|
101
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
102
|
-
expect(ssns).toHaveLength(1);
|
|
103
|
-
expect(ssns[0].text).toBe("123456789");
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
it("rejects SSN starting with 000", () => {
|
|
107
|
-
const entities = engine.scan("Invalid SSN 000-12-3456");
|
|
108
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
109
|
-
expect(ssns).toHaveLength(0);
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
it("rejects SSN starting with 666", () => {
|
|
113
|
-
const entities = engine.scan("Invalid SSN 666-12-3456");
|
|
114
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
115
|
-
expect(ssns).toHaveLength(0);
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
it("rejects SSN with 00 in middle group", () => {
|
|
119
|
-
const entities = engine.scan("Invalid SSN 123-00-6789");
|
|
120
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
121
|
-
expect(ssns).toHaveLength(0);
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
it("rejects SSN with 0000 in last group", () => {
|
|
125
|
-
const entities = engine.scan("Invalid SSN 123-45-0000");
|
|
126
|
-
const ssns = entities.filter((e) => e.label === "SSN");
|
|
127
|
-
expect(ssns).toHaveLength(0);
|
|
128
|
-
});
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
// ---------------------------------------------------------------------------
|
|
132
|
-
// CREDIT_CARD
|
|
133
|
-
// ---------------------------------------------------------------------------
|
|
134
|
-
describe("CREDIT_CARD", () => {
|
|
135
|
-
it("detects a Visa card (16 digits)", () => {
|
|
136
|
-
const entities = assertSpans("Card: 4111111111111111");
|
|
137
|
-
const cards = entities.filter((e) => e.label === "CREDIT_CARD");
|
|
138
|
-
expect(cards).toHaveLength(1);
|
|
139
|
-
expect(cards[0].text).toBe("4111111111111111");
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
it("detects a Mastercard", () => {
|
|
143
|
-
const entities = assertSpans("MC 5500000000000004");
|
|
144
|
-
const cards = entities.filter((e) => e.label === "CREDIT_CARD");
|
|
145
|
-
expect(cards).toHaveLength(1);
|
|
146
|
-
expect(cards[0].text).toBe("5500000000000004");
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
it("detects an Amex card", () => {
|
|
150
|
-
const entities = assertSpans("Amex 378282246310005");
|
|
151
|
-
const cards = entities.filter((e) => e.label === "CREDIT_CARD");
|
|
152
|
-
expect(cards).toHaveLength(1);
|
|
153
|
-
expect(cards[0].text).toBe("378282246310005");
|
|
154
|
-
});
|
|
155
|
-
|
|
156
|
-
it("detects card number with dashes", () => {
|
|
157
|
-
const entities = assertSpans("Card 4111-1111-1111-1111 charged");
|
|
158
|
-
const cards = entities.filter((e) => e.label === "CREDIT_CARD");
|
|
159
|
-
expect(cards).toHaveLength(1);
|
|
160
|
-
expect(cards[0].text).toBe("4111-1111-1111-1111");
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
it("detects card number with spaces", () => {
|
|
164
|
-
const entities = assertSpans("Card 5500 0000 0000 0004 charged");
|
|
165
|
-
const cards = entities.filter((e) => e.label === "CREDIT_CARD");
|
|
166
|
-
expect(cards).toHaveLength(1);
|
|
167
|
-
expect(cards[0].text).toBe("5500 0000 0000 0004");
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
|
|
171
|
-
// ---------------------------------------------------------------------------
|
|
172
|
-
// IP_ADDRESS
|
|
173
|
-
// ---------------------------------------------------------------------------
|
|
174
|
-
describe("IP_ADDRESS", () => {
|
|
175
|
-
it("detects a valid IPv4 address", () => {
|
|
176
|
-
const entities = assertSpans("Server at 192.168.1.1 is up.");
|
|
177
|
-
const ips = entities.filter((e) => e.label === "IP_ADDRESS");
|
|
178
|
-
expect(ips).toHaveLength(1);
|
|
179
|
-
expect(ips[0].text).toBe("192.168.1.1");
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
it("detects 0.0.0.0", () => {
|
|
183
|
-
const entities = assertSpans("Bind to 0.0.0.0 for all interfaces.");
|
|
184
|
-
const ips = entities.filter((e) => e.label === "IP_ADDRESS");
|
|
185
|
-
expect(ips).toHaveLength(1);
|
|
186
|
-
expect(ips[0].text).toBe("0.0.0.0");
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
it("detects 255.255.255.255", () => {
|
|
190
|
-
const entities = assertSpans("Broadcast: 255.255.255.255");
|
|
191
|
-
const ips = entities.filter((e) => e.label === "IP_ADDRESS");
|
|
192
|
-
expect(ips).toHaveLength(1);
|
|
193
|
-
expect(ips[0].text).toBe("255.255.255.255");
|
|
194
|
-
});
|
|
195
|
-
|
|
196
|
-
it("rejects IP with octet > 255", () => {
|
|
197
|
-
const entities = engine.scan("Invalid 256.1.2.3 address");
|
|
198
|
-
const ips = entities.filter((e) => e.label === "IP_ADDRESS");
|
|
199
|
-
// Should not match 256.1.2.3 as a complete valid IP
|
|
200
|
-
for (const ip of ips) {
|
|
201
|
-
expect(ip.text).not.toBe("256.1.2.3");
|
|
202
|
-
}
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
it("rejects IP with octet 999", () => {
|
|
206
|
-
const entities = engine.scan("Bad IP 999.999.999.999");
|
|
207
|
-
const ips = entities.filter((e) => e.label === "IP_ADDRESS");
|
|
208
|
-
for (const ip of ips) {
|
|
209
|
-
expect(ip.text).not.toBe("999.999.999.999");
|
|
210
|
-
}
|
|
211
|
-
});
|
|
212
|
-
});
|
|
213
|
-
|
|
214
|
-
// ---------------------------------------------------------------------------
|
|
215
|
-
// DATE
|
|
216
|
-
// ---------------------------------------------------------------------------
|
|
217
|
-
describe("DATE", () => {
|
|
218
|
-
it("detects MM/DD/YYYY format", () => {
|
|
219
|
-
const entities = assertSpans("Born on 01/15/1990 in NY.");
|
|
220
|
-
const dates = entities.filter((e) => e.label === "DATE");
|
|
221
|
-
expect(dates).toHaveLength(1);
|
|
222
|
-
expect(dates[0].text).toBe("01/15/1990");
|
|
223
|
-
});
|
|
224
|
-
|
|
225
|
-
it("detects YYYY-MM-DD format", () => {
|
|
226
|
-
const entities = assertSpans("Date: 2024-03-15 confirmed.");
|
|
227
|
-
const dates = entities.filter((e) => e.label === "DATE");
|
|
228
|
-
expect(dates).toHaveLength(1);
|
|
229
|
-
expect(dates[0].text).toBe("2024-03-15");
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
it("detects Month DD, YYYY format", () => {
|
|
233
|
-
const entities = assertSpans("On January 5, 2023 we met.");
|
|
234
|
-
const dates = entities.filter((e) => e.label === "DATE");
|
|
235
|
-
expect(dates).toHaveLength(1);
|
|
236
|
-
expect(dates[0].text).toBe("January 5, 2023");
|
|
237
|
-
});
|
|
238
|
-
|
|
239
|
-
it("detects abbreviated month", () => {
|
|
240
|
-
const entities = assertSpans("Meeting: Dec 25, 2022 at noon.");
|
|
241
|
-
const dates = entities.filter((e) => e.label === "DATE");
|
|
242
|
-
expect(dates).toHaveLength(1);
|
|
243
|
-
expect(dates[0].text).toBe("Dec 25, 2022");
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
it("detects MM-DD-YY format", () => {
|
|
247
|
-
const entities = assertSpans("Filed 03-15-90 in records.");
|
|
248
|
-
const dates = entities.filter((e) => e.label === "DATE");
|
|
249
|
-
expect(dates).toHaveLength(1);
|
|
250
|
-
expect(dates[0].text).toBe("03-15-90");
|
|
251
|
-
});
|
|
252
|
-
});
|
|
253
|
-
|
|
254
|
-
// ---------------------------------------------------------------------------
|
|
255
|
-
// ZIP_CODE
|
|
256
|
-
// ---------------------------------------------------------------------------
|
|
257
|
-
describe("ZIP_CODE", () => {
|
|
258
|
-
it("detects a 5-digit ZIP", () => {
|
|
259
|
-
const entities = assertSpans("ZIP 90210 area.");
|
|
260
|
-
const zips = entities.filter((e) => e.label === "ZIP_CODE");
|
|
261
|
-
expect(zips).toHaveLength(1);
|
|
262
|
-
expect(zips[0].text).toBe("90210");
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
it("detects a ZIP+4", () => {
|
|
266
|
-
const entities = assertSpans("Mailing: 90210-1234 confirmed.");
|
|
267
|
-
const zips = entities.filter((e) => e.label === "ZIP_CODE");
|
|
268
|
-
expect(zips).toHaveLength(1);
|
|
269
|
-
expect(zips[0].text).toBe("90210-1234");
|
|
270
|
-
});
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
// ---------------------------------------------------------------------------
|
|
274
|
-
// COMBINED / EDGE CASES
|
|
275
|
-
// ---------------------------------------------------------------------------
|
|
276
|
-
describe("Multiple entities in one text", () => {
|
|
277
|
-
it("finds email, phone, and SSN in same text", () => {
|
|
278
|
-
const text =
|
|
279
|
-
"Contact alice@example.com or 555-123-4567. SSN: 123-45-6789.";
|
|
280
|
-
const entities = assertSpans(text);
|
|
281
|
-
|
|
282
|
-
const labels = entities.map((e) => e.label);
|
|
283
|
-
expect(labels).toContain("EMAIL");
|
|
284
|
-
expect(labels).toContain("PHONE");
|
|
285
|
-
expect(labels).toContain("SSN");
|
|
286
|
-
});
|
|
287
|
-
|
|
288
|
-
it("finds multiple emails", () => {
|
|
289
|
-
const text = "Send to a@b.com and c@d.org please.";
|
|
290
|
-
const entities = assertSpans(text);
|
|
291
|
-
const emails = entities.filter((e) => e.label === "EMAIL");
|
|
292
|
-
expect(emails).toHaveLength(2);
|
|
293
|
-
expect(emails[0].text).toBe("a@b.com");
|
|
294
|
-
expect(emails[1].text).toBe("c@d.org");
|
|
295
|
-
});
|
|
296
|
-
});
|
|
297
|
-
|
|
298
|
-
describe("Empty and no-match inputs", () => {
|
|
299
|
-
it("returns empty array for empty string", () => {
|
|
300
|
-
const entities = engine.scan("");
|
|
301
|
-
expect(entities).toEqual([]);
|
|
302
|
-
});
|
|
303
|
-
|
|
304
|
-
it("returns empty array for text with no PII", () => {
|
|
305
|
-
const entities = engine.scan("The quick brown fox jumps over the lazy dog.");
|
|
306
|
-
// Filter out anything that might false-positive
|
|
307
|
-
const meaningful = entities.filter(
|
|
308
|
-
(e) => !["ZIP_CODE"].includes(e.label) || e.text.length >= 5
|
|
309
|
-
);
|
|
310
|
-
// This sentence has no PII
|
|
311
|
-
expect(entities).toEqual([]);
|
|
312
|
-
});
|
|
313
|
-
});
|
|
314
|
-
|
|
315
|
-
describe("Entity shape", () => {
|
|
316
|
-
it("every entity has correct confidence and source", () => {
|
|
317
|
-
const text = "Email: test@test.com Phone: 555-123-4567";
|
|
318
|
-
const entities = engine.scan(text);
|
|
319
|
-
for (const e of entities) {
|
|
320
|
-
expect(e.confidence).toBe(1.0);
|
|
321
|
-
expect(e.source).toBe("regex");
|
|
322
|
-
expect(typeof e.start).toBe("number");
|
|
323
|
-
expect(typeof e.end).toBe("number");
|
|
324
|
-
expect(e.end).toBeGreaterThan(e.start);
|
|
325
|
-
expect(typeof e.text).toBe("string");
|
|
326
|
-
expect(typeof e.label).toBe("string");
|
|
327
|
-
}
|
|
328
|
-
});
|
|
329
|
-
|
|
330
|
-
it("span offsets are correct for all entity types", () => {
|
|
331
|
-
const text =
|
|
332
|
-
"Email: user@site.com, Phone: (800) 555-0199, SSN: 321-54-9876, " +
|
|
333
|
-
"Card: 4111111111111111, IP: 10.0.0.1, Date: 2024-06-15, ZIP: 60601";
|
|
334
|
-
assertSpans(text);
|
|
335
|
-
});
|
|
336
|
-
});
|
|
337
|
-
|
|
338
|
-
describe("Repeated scan calls (lastIndex reset)", () => {
|
|
339
|
-
it("produces the same results on consecutive calls", () => {
|
|
340
|
-
const text = "Email alice@example.com and call 555-123-4567.";
|
|
341
|
-
const first = engine.scan(text);
|
|
342
|
-
const second = engine.scan(text);
|
|
343
|
-
expect(first).toEqual(second);
|
|
344
|
-
});
|
|
345
|
-
});
|
package/tests/scanner.test.ts
DELETED
|
@@ -1,348 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
beforeAll,
|
|
3
|
-
beforeEach,
|
|
4
|
-
afterAll,
|
|
5
|
-
describe,
|
|
6
|
-
it,
|
|
7
|
-
expect,
|
|
8
|
-
vi,
|
|
9
|
-
} from "vitest";
|
|
10
|
-
import fs from "node:fs/promises";
|
|
11
|
-
import os from "node:os";
|
|
12
|
-
import path from "node:path";
|
|
13
|
-
|
|
14
|
-
// Mock the gliner npm package so we don't need the actual model
|
|
15
|
-
vi.mock("gliner", () => {
|
|
16
|
-
return {
|
|
17
|
-
Gliner: class MockGliner {
|
|
18
|
-
async initialize() {}
|
|
19
|
-
async inference(
|
|
20
|
-
request: { texts: string[]; entities: string[] } | string | string[],
|
|
21
|
-
maybeEntities?: string[],
|
|
22
|
-
_flatNer = false,
|
|
23
|
-
_threshold = 0.5,
|
|
24
|
-
) {
|
|
25
|
-
const text =
|
|
26
|
-
typeof request === "string"
|
|
27
|
-
? request
|
|
28
|
-
: Array.isArray(request)
|
|
29
|
-
? request[0] ?? ""
|
|
30
|
-
: request.texts[0] ?? "";
|
|
31
|
-
const requestEntities =
|
|
32
|
-
typeof request === "object" && request !== null && "entities" in request
|
|
33
|
-
? request.entities
|
|
34
|
-
: undefined;
|
|
35
|
-
const labels =
|
|
36
|
-
Array.isArray(maybeEntities)
|
|
37
|
-
? maybeEntities
|
|
38
|
-
: requestEntities ?? [];
|
|
39
|
-
const results: any[] = [];
|
|
40
|
-
|
|
41
|
-
// Simulate person detection for "John Smith"
|
|
42
|
-
if (text.includes("John Smith")) {
|
|
43
|
-
const idx = text.indexOf("John Smith");
|
|
44
|
-
results.push({
|
|
45
|
-
text: "John Smith",
|
|
46
|
-
label: "person",
|
|
47
|
-
score: 0.95,
|
|
48
|
-
start: idx,
|
|
49
|
-
end: idx + 10,
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// Simulate organization detection for "Acme Corp"
|
|
54
|
-
if (text.includes("Acme Corp")) {
|
|
55
|
-
const idx = text.indexOf("Acme Corp");
|
|
56
|
-
results.push({
|
|
57
|
-
text: "Acme Corp",
|
|
58
|
-
label: "organization",
|
|
59
|
-
score: 0.88,
|
|
60
|
-
start: idx,
|
|
61
|
-
end: idx + 9,
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Only return results whose labels are requested
|
|
66
|
-
return results.filter((r) => labels.includes(r.label));
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
vi.mock("gliner/node", () => {
|
|
73
|
-
return {
|
|
74
|
-
Gliner: class MockGliner {
|
|
75
|
-
async initialize() {}
|
|
76
|
-
async inference(
|
|
77
|
-
request: { texts: string[]; entities: string[] } | string | string[],
|
|
78
|
-
maybeEntities?: string[],
|
|
79
|
-
_flatNer = false,
|
|
80
|
-
_threshold = 0.5,
|
|
81
|
-
) {
|
|
82
|
-
const text =
|
|
83
|
-
typeof request === "string"
|
|
84
|
-
? request
|
|
85
|
-
: Array.isArray(request)
|
|
86
|
-
? request[0] ?? ""
|
|
87
|
-
: request.texts[0] ?? "";
|
|
88
|
-
const requestEntities =
|
|
89
|
-
typeof request === "object" && request !== null && "entities" in request
|
|
90
|
-
? request.entities
|
|
91
|
-
: undefined;
|
|
92
|
-
const labels =
|
|
93
|
-
Array.isArray(maybeEntities)
|
|
94
|
-
? maybeEntities
|
|
95
|
-
: requestEntities ?? [];
|
|
96
|
-
const results: any[] = [];
|
|
97
|
-
|
|
98
|
-
// Simulate person detection for "John Smith"
|
|
99
|
-
if (text.includes("John Smith")) {
|
|
100
|
-
const idx = text.indexOf("John Smith");
|
|
101
|
-
results.push({
|
|
102
|
-
text: "John Smith",
|
|
103
|
-
label: "person",
|
|
104
|
-
score: 0.95,
|
|
105
|
-
start: idx,
|
|
106
|
-
end: idx + 10,
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// Simulate organization detection for "Acme Corp"
|
|
111
|
-
if (text.includes("Acme Corp")) {
|
|
112
|
-
const idx = text.indexOf("Acme Corp");
|
|
113
|
-
results.push({
|
|
114
|
-
text: "Acme Corp",
|
|
115
|
-
label: "organization",
|
|
116
|
-
score: 0.88,
|
|
117
|
-
start: idx,
|
|
118
|
-
end: idx + 9,
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// Only return results whose labels are requested
|
|
123
|
-
return results.filter((r) => labels.includes(r.label));
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
import { Scanner } from "../src/scanner.js";
|
|
130
|
-
import { DEFAULT_CONFIG } from "../src/config.js";
|
|
131
|
-
import type { FogClawConfig } from "../src/types.js";
|
|
132
|
-
|
|
133
|
-
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-scanner-gliner-model-test.onnx");
|
|
134
|
-
|
|
135
|
-
beforeAll(async () => {
|
|
136
|
-
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
afterAll(async () => {
|
|
140
|
-
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
function makeConfig(overrides: Partial<FogClawConfig> = {}): FogClawConfig {
|
|
144
|
-
return {
|
|
145
|
-
...DEFAULT_CONFIG,
|
|
146
|
-
model: TEST_ONNX_MODEL_PATH,
|
|
147
|
-
...overrides,
|
|
148
|
-
};
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
describe("Scanner", () => {
|
|
152
|
-
let scanner: Scanner;
|
|
153
|
-
|
|
154
|
-
beforeEach(async () => {
|
|
155
|
-
scanner = new Scanner(makeConfig());
|
|
156
|
-
await scanner.initialize();
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
it("detects regex entities (email) without needing GLiNER", async () => {
|
|
160
|
-
// Even without initialize, regex should work
|
|
161
|
-
const regexOnly = new Scanner(makeConfig());
|
|
162
|
-
// Deliberately NOT calling initialize — GLiNER unavailable
|
|
163
|
-
|
|
164
|
-
const result = await regexOnly.scan("Contact us at test@example.com please.");
|
|
165
|
-
|
|
166
|
-
expect(result.entities.length).toBeGreaterThanOrEqual(1);
|
|
167
|
-
const email = result.entities.find((e) => e.label === "EMAIL");
|
|
168
|
-
expect(email).toBeDefined();
|
|
169
|
-
expect(email!.text).toBe("test@example.com");
|
|
170
|
-
expect(email!.source).toBe("regex");
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
it("detects GLiNER entities (person names)", async () => {
|
|
174
|
-
const result = await scanner.scan("My name is John Smith.");
|
|
175
|
-
|
|
176
|
-
const person = result.entities.find((e) => e.label === "PERSON");
|
|
177
|
-
expect(person).toBeDefined();
|
|
178
|
-
expect(person!.text).toBe("John Smith");
|
|
179
|
-
expect(person!.source).toBe("gliner");
|
|
180
|
-
expect(person!.confidence).toBe(0.95);
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
it("merges results from both engines (email + person in same text)", async () => {
|
|
184
|
-
const result = await scanner.scan(
|
|
185
|
-
"John Smith can be reached at john@example.com for details.",
|
|
186
|
-
);
|
|
187
|
-
|
|
188
|
-
const person = result.entities.find((e) => e.label === "PERSON");
|
|
189
|
-
const email = result.entities.find((e) => e.label === "EMAIL");
|
|
190
|
-
|
|
191
|
-
expect(person).toBeDefined();
|
|
192
|
-
expect(email).toBeDefined();
|
|
193
|
-
expect(person!.source).toBe("gliner");
|
|
194
|
-
expect(email!.source).toBe("regex");
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
it("applies per-entity confidence threshold overrides", async () => {
|
|
198
|
-
const strictScanner = new Scanner(
|
|
199
|
-
makeConfig({
|
|
200
|
-
entityConfidenceThresholds: {
|
|
201
|
-
PERSON: 0.98,
|
|
202
|
-
},
|
|
203
|
-
}),
|
|
204
|
-
);
|
|
205
|
-
await strictScanner.initialize();
|
|
206
|
-
|
|
207
|
-
const result = await strictScanner.scan("My name is John Smith.");
|
|
208
|
-
expect(result.entities.find((e) => e.label === "PERSON")).toBeUndefined();
|
|
209
|
-
});
|
|
210
|
-
|
|
211
|
-
it("supports allowlist exact matches across global and per-entity rules", async () => {
|
|
212
|
-
const allowlistScanner = new Scanner(
|
|
213
|
-
makeConfig({
|
|
214
|
-
allowlist: {
|
|
215
|
-
values: ["john@example.com"],
|
|
216
|
-
patterns: ["^internal-"],
|
|
217
|
-
entities: {
|
|
218
|
-
PERSON: ["john smith"],
|
|
219
|
-
},
|
|
220
|
-
},
|
|
221
|
-
}),
|
|
222
|
-
);
|
|
223
|
-
await allowlistScanner.initialize();
|
|
224
|
-
|
|
225
|
-
const result = await allowlistScanner.scan(
|
|
226
|
-
"John Smith can be reached at john@example.com.",
|
|
227
|
-
);
|
|
228
|
-
|
|
229
|
-
expect(result.entities.find((e) => e.label === "EMAIL")).toBeUndefined();
|
|
230
|
-
expect(result.entities.find((e) => e.label === "PERSON")).toBeUndefined();
|
|
231
|
-
});
|
|
232
|
-
|
|
233
|
-
it("applies allowlist regex patterns", async () => {
|
|
234
|
-
const allowlistScanner = new Scanner(
|
|
235
|
-
makeConfig({
|
|
236
|
-
allowlist: {
|
|
237
|
-
values: [],
|
|
238
|
-
patterns: ["test@example\\.com"],
|
|
239
|
-
entities: {},
|
|
240
|
-
},
|
|
241
|
-
}),
|
|
242
|
-
);
|
|
243
|
-
await allowlistScanner.initialize();
|
|
244
|
-
|
|
245
|
-
const result = await allowlistScanner.scan("This is test@example.com for redaction.");
|
|
246
|
-
expect(result.entities.find((e) => e.label === "EMAIL")).toBeUndefined();
|
|
247
|
-
});
|
|
248
|
-
|
|
249
|
-
it("deduplicates overlapping spans keeping higher confidence", async () => {
|
|
250
|
-
// Scan text that might produce overlapping entities
|
|
251
|
-
// The dedup logic should keep higher confidence when spans overlap
|
|
252
|
-
const result = await scanner.scan("Contact John Smith today.");
|
|
253
|
-
|
|
254
|
-
// We shouldn't have duplicate entities for the same span
|
|
255
|
-
const starts = result.entities.map((e) => e.start);
|
|
256
|
-
const uniqueStarts = [...new Set(starts)];
|
|
257
|
-
// If there were overlapping entities, dedup should have resolved them
|
|
258
|
-
expect(starts.length).toBe(uniqueStarts.length);
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
it("returns original text in result", async () => {
|
|
262
|
-
const text = "Hello John Smith, your email is test@example.com.";
|
|
263
|
-
const result = await scanner.scan(text);
|
|
264
|
-
|
|
265
|
-
expect(result.text).toBe(text);
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
it("accepts extra labels at scan time", async () => {
|
|
269
|
-
// The mock only returns results for labels that are in the labels array
|
|
270
|
-
// Extra labels get passed through to GLiNER
|
|
271
|
-
const result = await scanner.scan(
|
|
272
|
-
"John Smith works at Acme Corp.",
|
|
273
|
-
["organization"],
|
|
274
|
-
);
|
|
275
|
-
|
|
276
|
-
// Person is always in default labels, organization should be detected too
|
|
277
|
-
const person = result.entities.find((e) => e.label === "PERSON");
|
|
278
|
-
const org = result.entities.find((e) => e.label === "ORGANIZATION");
|
|
279
|
-
|
|
280
|
-
expect(person).toBeDefined();
|
|
281
|
-
expect(org).toBeDefined();
|
|
282
|
-
});
|
|
283
|
-
|
|
284
|
-
it("falls back to regex-only when GLiNER is not initialized", async () => {
|
|
285
|
-
const fallbackScanner = new Scanner(makeConfig());
|
|
286
|
-
// Do NOT call initialize — GLiNER stays unavailable
|
|
287
|
-
|
|
288
|
-
const result = await fallbackScanner.scan(
|
|
289
|
-
"John Smith at john@example.com",
|
|
290
|
-
);
|
|
291
|
-
|
|
292
|
-
// Should still find the email via regex
|
|
293
|
-
const email = result.entities.find((e) => e.label === "EMAIL");
|
|
294
|
-
expect(email).toBeDefined();
|
|
295
|
-
expect(email!.source).toBe("regex");
|
|
296
|
-
|
|
297
|
-
// Should NOT find person because GLiNER is not available
|
|
298
|
-
const person = result.entities.find((e) => e.label === "PERSON");
|
|
299
|
-
expect(person).toBeUndefined();
|
|
300
|
-
});
|
|
301
|
-
|
|
302
|
-
it("empty text returns empty entities", async () => {
|
|
303
|
-
const result = await scanner.scan("");
|
|
304
|
-
|
|
305
|
-
expect(result.entities).toEqual([]);
|
|
306
|
-
expect(result.text).toBe("");
|
|
307
|
-
});
|
|
308
|
-
|
|
309
|
-
it("entities are sorted by start position after merge", async () => {
|
|
310
|
-
const result = await scanner.scan(
|
|
311
|
-
"John Smith can be reached at john@example.com for details.",
|
|
312
|
-
);
|
|
313
|
-
|
|
314
|
-
for (let i = 1; i < result.entities.length; i++) {
|
|
315
|
-
expect(result.entities[i].start).toBeGreaterThanOrEqual(
|
|
316
|
-
result.entities[i - 1].start,
|
|
317
|
-
);
|
|
318
|
-
}
|
|
319
|
-
});
|
|
320
|
-
|
|
321
|
-
it("passes custom_entities from config to GLiNER engine", async () => {
|
|
322
|
-
const customScanner = new Scanner(
|
|
323
|
-
makeConfig({ custom_entities: ["product", "event"] }),
|
|
324
|
-
);
|
|
325
|
-
await customScanner.initialize();
|
|
326
|
-
|
|
327
|
-
// Should not throw, custom labels are set on the engine
|
|
328
|
-
const result = await customScanner.scan("John Smith attended the event.");
|
|
329
|
-
expect(result.entities.length).toBeGreaterThanOrEqual(1);
|
|
330
|
-
});
|
|
331
|
-
|
|
332
|
-
it("handles text with only regex-detectable entities", async () => {
|
|
333
|
-
const result = await scanner.scan(
|
|
334
|
-
"Send to test@example.com and call 555-123-4567.",
|
|
335
|
-
);
|
|
336
|
-
|
|
337
|
-
expect(result.entities.length).toBeGreaterThanOrEqual(1);
|
|
338
|
-
const email = result.entities.find((e) => e.label === "EMAIL");
|
|
339
|
-
expect(email).toBeDefined();
|
|
340
|
-
});
|
|
341
|
-
|
|
342
|
-
it("handles text with no detectable entities", async () => {
|
|
343
|
-
const result = await scanner.scan("Hello world, this is a simple test.");
|
|
344
|
-
|
|
345
|
-
expect(result.entities).toEqual([]);
|
|
346
|
-
expect(result.text).toBe("Hello world, this is a simple test.");
|
|
347
|
-
});
|
|
348
|
-
});
|