@datafog/fogclaw 0.1.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +39 -0
- package/dist/backlog-tools.d.ts +57 -0
- package/dist/backlog-tools.d.ts.map +1 -0
- package/dist/backlog-tools.js +173 -0
- package/dist/backlog-tools.js.map +1 -0
- package/dist/backlog.d.ts +82 -0
- package/dist/backlog.d.ts.map +1 -0
- package/dist/backlog.js +169 -0
- package/dist/backlog.js.map +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +6 -0
- package/dist/config.js.map +1 -1
- package/dist/extract.d.ts +28 -0
- package/dist/extract.d.ts.map +1 -0
- package/dist/extract.js +91 -0
- package/dist/extract.js.map +1 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +96 -3
- package/dist/index.js.map +1 -1
- package/dist/message-sending-handler.d.ts +41 -0
- package/dist/message-sending-handler.d.ts.map +1 -0
- package/dist/message-sending-handler.js +54 -0
- package/dist/message-sending-handler.js.map +1 -0
- package/dist/tool-result-handler.d.ts +37 -0
- package/dist/tool-result-handler.d.ts.map +1 -0
- package/dist/tool-result-handler.js +95 -0
- package/dist/tool-result-handler.js.map +1 -0
- package/dist/types.d.ts +16 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -1
- package/openclaw.plugin.json +11 -1
- package/package.json +7 -1
- package/.github/workflows/harness-docs.yml +0 -30
- package/AGENTS.md +0 -28
- package/docs/DATA.md +0 -28
- package/docs/DESIGN.md +0 -17
- package/docs/DOMAIN_DOCS.md +0 -30
- package/docs/FRONTEND.md +0 -24
- package/docs/OBSERVABILITY.md +0 -25
- package/docs/PLANS.md +0 -171
- package/docs/PRODUCT_SENSE.md +0 -20
- package/docs/RELIABILITY.md +0 -60
- package/docs/SECURITY.md +0 -50
- package/docs/design-docs/core-beliefs.md +0 -17
- package/docs/design-docs/index.md +0 -8
- package/docs/generated/README.md +0 -36
- package/docs/generated/memory.md +0 -1
- package/docs/plans/2026-02-16-fogclaw-design.md +0 -172
- package/docs/plans/2026-02-16-fogclaw-implementation.md +0 -1606
- package/docs/plans/README.md +0 -15
- package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +0 -386
- package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +0 -328
- package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +0 -244
- package/docs/plans/tech-debt-tracker.md +0 -42
- package/docs/plugins/fogclaw.md +0 -101
- package/docs/runbooks/address-review-findings.md +0 -30
- package/docs/runbooks/ci-failures.md +0 -46
- package/docs/runbooks/code-review.md +0 -34
- package/docs/runbooks/merge-change.md +0 -28
- package/docs/runbooks/pull-request.md +0 -45
- package/docs/runbooks/record-evidence.md +0 -43
- package/docs/runbooks/reproduce-bug.md +0 -42
- package/docs/runbooks/respond-to-feedback.md +0 -42
- package/docs/runbooks/review-findings.md +0 -31
- package/docs/runbooks/submit-openclaw-plugin.md +0 -68
- package/docs/runbooks/update-agents-md.md +0 -59
- package/docs/runbooks/update-domain-docs.md +0 -42
- package/docs/runbooks/validate-current-state.md +0 -41
- package/docs/runbooks/verify-release.md +0 -69
- package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +0 -115
- package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +0 -125
- package/docs/specs/README.md +0 -5
- package/docs/specs/index.md +0 -8
- package/docs/spikes/README.md +0 -8
- package/fogclaw.config.example.json +0 -33
- package/scripts/ci/he-docs-config.json +0 -123
- package/scripts/ci/he-docs-drift.sh +0 -112
- package/scripts/ci/he-docs-lint.sh +0 -234
- package/scripts/ci/he-plans-lint.sh +0 -354
- package/scripts/ci/he-runbooks-lint.sh +0 -445
- package/scripts/ci/he-specs-lint.sh +0 -258
- package/scripts/ci/he-spikes-lint.sh +0 -249
- package/scripts/runbooks/select-runbooks.sh +0 -154
- package/src/config.ts +0 -183
- package/src/engines/gliner.ts +0 -240
- package/src/engines/regex.ts +0 -71
- package/src/index.ts +0 -372
- package/src/redactor.ts +0 -51
- package/src/scanner.ts +0 -196
- package/src/types.ts +0 -71
- package/tests/config.test.ts +0 -78
- package/tests/gliner.test.ts +0 -289
- package/tests/plugin-smoke.test.ts +0 -143
- package/tests/redactor.test.ts +0 -320
- package/tests/regex.test.ts +0 -345
- package/tests/scanner.test.ts +0 -348
- package/tsconfig.json +0 -20
package/src/redactor.ts
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
import type { Entity, RedactResult, RedactStrategy } from "./types.js";
|
|
3
|
-
|
|
4
|
-
export function redact(
|
|
5
|
-
text: string,
|
|
6
|
-
entities: Entity[],
|
|
7
|
-
strategy: RedactStrategy = "token",
|
|
8
|
-
): RedactResult {
|
|
9
|
-
if (entities.length === 0) {
|
|
10
|
-
return { redacted_text: text, mapping: {}, entities: [] };
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
// Sort by start position descending so we replace from end to start
|
|
14
|
-
// without corrupting earlier offsets
|
|
15
|
-
const sorted = [...entities].sort((a, b) => b.start - a.start);
|
|
16
|
-
|
|
17
|
-
const counters: Record<string, number> = {};
|
|
18
|
-
const mapping: Record<string, string> = {};
|
|
19
|
-
let result = text;
|
|
20
|
-
|
|
21
|
-
for (const entity of sorted) {
|
|
22
|
-
const replacement = makeReplacement(entity, strategy, counters);
|
|
23
|
-
mapping[replacement] = entity.text;
|
|
24
|
-
result = result.slice(0, entity.start) + replacement + result.slice(entity.end);
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
return { redacted_text: result, mapping, entities };
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
function makeReplacement(
|
|
31
|
-
entity: Entity,
|
|
32
|
-
strategy: RedactStrategy,
|
|
33
|
-
counters: Record<string, number>,
|
|
34
|
-
): string {
|
|
35
|
-
switch (strategy) {
|
|
36
|
-
case "token": {
|
|
37
|
-
counters[entity.label] = (counters[entity.label] ?? 0) + 1;
|
|
38
|
-
return `[${entity.label}_${counters[entity.label]}]`;
|
|
39
|
-
}
|
|
40
|
-
case "mask": {
|
|
41
|
-
return "*".repeat(Math.max(entity.text.length, 1));
|
|
42
|
-
}
|
|
43
|
-
case "hash": {
|
|
44
|
-
const digest = createHash("sha256")
|
|
45
|
-
.update(entity.text)
|
|
46
|
-
.digest("hex")
|
|
47
|
-
.slice(0, 12);
|
|
48
|
-
return `[${entity.label}_${digest}]`;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
}
|
package/src/scanner.ts
DELETED
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
import type { Entity, FogClawConfig } from "./types.js";
|
|
2
|
-
import { canonicalType } from "./types.js";
|
|
3
|
-
import { RegexEngine } from "./engines/regex.js";
|
|
4
|
-
import { GlinerEngine } from "./engines/gliner.js";
|
|
5
|
-
|
|
6
|
-
type AllowlistPatternCache = {
|
|
7
|
-
values: Set<string>;
|
|
8
|
-
patterns: RegExp[];
|
|
9
|
-
entityValues: Map<string, Set<string>>;
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
function normalizeAllowlistValue(value: string): string {
|
|
13
|
-
return value.trim().toLowerCase();
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
function buildPatternMaps(value: string[] | undefined): RegExp[] {
|
|
17
|
-
if (!value || value.length === 0) {
|
|
18
|
-
return [];
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
return value.map((pattern) => new RegExp(pattern, "i"));
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export class Scanner {
|
|
25
|
-
private regexEngine: RegexEngine;
|
|
26
|
-
private glinerEngine: GlinerEngine;
|
|
27
|
-
private glinerAvailable = false;
|
|
28
|
-
private config: FogClawConfig;
|
|
29
|
-
private allowlist: AllowlistPatternCache;
|
|
30
|
-
|
|
31
|
-
constructor(config: FogClawConfig) {
|
|
32
|
-
this.config = config;
|
|
33
|
-
this.regexEngine = new RegexEngine();
|
|
34
|
-
|
|
35
|
-
const glinerThreshold = this.computeGlinerThreshold(config);
|
|
36
|
-
this.glinerEngine = new GlinerEngine(config.model, glinerThreshold);
|
|
37
|
-
if (config.custom_entities.length > 0) {
|
|
38
|
-
this.glinerEngine.setCustomLabels(config.custom_entities);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
this.allowlist = this.buildAllowlistCache(config.allowlist);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
async initialize(): Promise<void> {
|
|
45
|
-
try {
|
|
46
|
-
await this.glinerEngine.initialize();
|
|
47
|
-
this.glinerAvailable = true;
|
|
48
|
-
} catch (err) {
|
|
49
|
-
console.warn(
|
|
50
|
-
`[fogclaw] GLiNER failed to initialize, falling back to regex-only mode: ${err instanceof Error ? err.message : String(err)}`,
|
|
51
|
-
);
|
|
52
|
-
this.glinerAvailable = false;
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
async scan(text: string, extraLabels?: string[]): Promise<{ entities: Entity[]; text: string }> {
|
|
57
|
-
if (!text) return { entities: [], text };
|
|
58
|
-
|
|
59
|
-
// Step 1: Regex pass (always runs, synchronous)
|
|
60
|
-
const regexEntities = this.filterByPolicy(this.regexEngine.scan(text));
|
|
61
|
-
|
|
62
|
-
// Step 2: GLiNER pass (if available)
|
|
63
|
-
let glinerEntities: Entity[] = [];
|
|
64
|
-
if (this.glinerAvailable) {
|
|
65
|
-
try {
|
|
66
|
-
glinerEntities = await this.glinerEngine.scan(text, extraLabels);
|
|
67
|
-
glinerEntities = this.filterByConfidence(glinerEntities);
|
|
68
|
-
glinerEntities = this.filterByPolicy(glinerEntities);
|
|
69
|
-
} catch (err) {
|
|
70
|
-
console.warn(
|
|
71
|
-
`[fogclaw] GLiNER scan failed, using regex results only: ${
|
|
72
|
-
err instanceof Error ? err.message : String(err)
|
|
73
|
-
}`,
|
|
74
|
-
);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// Step 3: Merge and deduplicate
|
|
79
|
-
const merged = deduplicateEntities([...regexEntities, ...glinerEntities]);
|
|
80
|
-
|
|
81
|
-
return { entities: merged, text };
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
private filterByConfidence(entities: Entity[]): Entity[] {
|
|
85
|
-
return entities.filter((entity) => {
|
|
86
|
-
const threshold = this.getThresholdForLabel(entity.label);
|
|
87
|
-
return entity.confidence >= threshold;
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
private filterByPolicy(entities: Entity[]): Entity[] {
|
|
92
|
-
if (
|
|
93
|
-
this.allowlist.values.size === 0 &&
|
|
94
|
-
this.allowlist.patterns.length === 0 &&
|
|
95
|
-
this.allowlist.entityValues.size === 0
|
|
96
|
-
) {
|
|
97
|
-
return entities;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return entities.filter((entity) => !this.shouldAllowlistEntity(entity));
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
private shouldAllowlistEntity(entity: Entity): boolean {
|
|
104
|
-
const normalizedText = normalizeAllowlistValue(entity.text);
|
|
105
|
-
|
|
106
|
-
if (this.allowlist.values.has(normalizedText)) {
|
|
107
|
-
return true;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (this.allowlist.patterns.some((pattern) => pattern.test(entity.text))) {
|
|
111
|
-
return true;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
const entityValues = this.allowlist.entityValues.get(entity.label);
|
|
115
|
-
if (entityValues && entityValues.has(normalizedText)) {
|
|
116
|
-
return true;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
return false;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
private getThresholdForLabel(label: string): number {
|
|
123
|
-
const canonicalLabel = canonicalType(label);
|
|
124
|
-
return this.config.entityConfidenceThresholds[canonicalLabel] ?? this.config.confidence_threshold;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
private computeGlinerThreshold(config: FogClawConfig): number {
|
|
128
|
-
const thresholds = Object.values(config.entityConfidenceThresholds);
|
|
129
|
-
if (thresholds.length === 0) {
|
|
130
|
-
return config.confidence_threshold;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
return Math.min(config.confidence_threshold, ...thresholds);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
private buildAllowlistCache(allowlist: FogClawConfig["allowlist"]): AllowlistPatternCache {
|
|
137
|
-
const globalValues = new Set(
|
|
138
|
-
allowlist.values.map((value) => normalizeAllowlistValue(value)),
|
|
139
|
-
);
|
|
140
|
-
|
|
141
|
-
const globalPatterns = buildPatternMaps(allowlist.patterns);
|
|
142
|
-
|
|
143
|
-
const entityValues = new Map<string, Set<string>>();
|
|
144
|
-
for (const [entityType, values] of Object.entries(allowlist.entities)) {
|
|
145
|
-
const canonical = canonicalType(entityType);
|
|
146
|
-
const uniqueValues = values
|
|
147
|
-
.map((value) => normalizeAllowlistValue(value))
|
|
148
|
-
.filter((value) => value.length > 0);
|
|
149
|
-
entityValues.set(canonical, new Set(uniqueValues));
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
return {
|
|
153
|
-
values: globalValues,
|
|
154
|
-
patterns: globalPatterns,
|
|
155
|
-
entityValues,
|
|
156
|
-
};
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
get isGlinerAvailable(): boolean {
|
|
160
|
-
return this.glinerAvailable;
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Remove overlapping entity spans. When two entities overlap,
|
|
166
|
-
* keep the one with higher confidence. If equal, prefer regex.
|
|
167
|
-
*/
|
|
168
|
-
function deduplicateEntities(entities: Entity[]): Entity[] {
|
|
169
|
-
if (entities.length <= 1) return entities;
|
|
170
|
-
|
|
171
|
-
// Sort by start position, then by confidence descending
|
|
172
|
-
const sorted = [...entities].sort((a, b) => {
|
|
173
|
-
if (a.start !== b.start) return a.start - b.start;
|
|
174
|
-
return b.confidence - a.confidence;
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
const result: Entity[] = [sorted[0]];
|
|
178
|
-
|
|
179
|
-
for (let i = 1; i < sorted.length; i++) {
|
|
180
|
-
const current = sorted[i];
|
|
181
|
-
const last = result[result.length - 1];
|
|
182
|
-
|
|
183
|
-
// Check for overlap
|
|
184
|
-
if (current.start < last.end) {
|
|
185
|
-
// Overlapping: keep higher confidence (already in result if first)
|
|
186
|
-
if (current.confidence > last.confidence) {
|
|
187
|
-
result[result.length - 1] = current;
|
|
188
|
-
}
|
|
189
|
-
// Otherwise keep what's already in result
|
|
190
|
-
} else {
|
|
191
|
-
result.push(current);
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return result;
|
|
196
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
export interface Entity {
|
|
2
|
-
text: string;
|
|
3
|
-
label: string;
|
|
4
|
-
start: number;
|
|
5
|
-
end: number;
|
|
6
|
-
confidence: number;
|
|
7
|
-
source: "regex" | "gliner";
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export type RedactStrategy = "token" | "mask" | "hash";
|
|
11
|
-
|
|
12
|
-
export type GuardrailAction = "redact" | "block" | "warn";
|
|
13
|
-
|
|
14
|
-
export interface EntityConfidenceThresholds {
|
|
15
|
-
[entityType: string]: number;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export interface EntityAllowlist {
|
|
19
|
-
values: string[];
|
|
20
|
-
patterns: string[];
|
|
21
|
-
entities: Record<string, string[]>;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export interface FogClawConfig {
|
|
25
|
-
enabled: boolean;
|
|
26
|
-
guardrail_mode: GuardrailAction;
|
|
27
|
-
redactStrategy: RedactStrategy;
|
|
28
|
-
model: string;
|
|
29
|
-
confidence_threshold: number;
|
|
30
|
-
custom_entities: string[];
|
|
31
|
-
entityActions: Record<string, GuardrailAction>;
|
|
32
|
-
entityConfidenceThresholds: EntityConfidenceThresholds;
|
|
33
|
-
allowlist: EntityAllowlist;
|
|
34
|
-
auditEnabled: boolean;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export interface ScanResult {
|
|
38
|
-
entities: Entity[];
|
|
39
|
-
text: string;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export interface RedactResult {
|
|
43
|
-
redacted_text: string;
|
|
44
|
-
mapping: Record<string, string>;
|
|
45
|
-
entities: Entity[];
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export interface GuardrailPlan {
|
|
49
|
-
blocked: Entity[];
|
|
50
|
-
warned: Entity[];
|
|
51
|
-
redacted: Entity[];
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export const CANONICAL_TYPE_MAP: Record<string, string> = {
|
|
55
|
-
DOB: "DATE",
|
|
56
|
-
ZIP: "ZIP_CODE",
|
|
57
|
-
PER: "PERSON",
|
|
58
|
-
ORG: "ORGANIZATION",
|
|
59
|
-
GPE: "LOCATION",
|
|
60
|
-
LOC: "LOCATION",
|
|
61
|
-
FAC: "ADDRESS",
|
|
62
|
-
PHONE_NUMBER: "PHONE",
|
|
63
|
-
SOCIAL_SECURITY_NUMBER: "SSN",
|
|
64
|
-
CREDIT_CARD_NUMBER: "CREDIT_CARD",
|
|
65
|
-
DATE_OF_BIRTH: "DATE",
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
export function canonicalType(entityType: string): string {
|
|
69
|
-
const normalized = entityType.toUpperCase().trim();
|
|
70
|
-
return CANONICAL_TYPE_MAP[normalized] ?? normalized;
|
|
71
|
-
}
|
package/tests/config.test.ts
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
|
|
3
|
-
import { loadConfig } from "../src/config.js";
|
|
4
|
-
|
|
5
|
-
describe("FogClaw config", () => {
|
|
6
|
-
it("loads defaults for new policy fields", () => {
|
|
7
|
-
const config = loadConfig({});
|
|
8
|
-
|
|
9
|
-
expect(config.entityConfidenceThresholds).toEqual({});
|
|
10
|
-
expect(config.allowlist).toMatchObject({
|
|
11
|
-
values: [],
|
|
12
|
-
patterns: [],
|
|
13
|
-
entities: {},
|
|
14
|
-
});
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
it("canonicalizes per-entity confidence threshold keys", () => {
|
|
18
|
-
const config = loadConfig({
|
|
19
|
-
entityConfidenceThresholds: {
|
|
20
|
-
person: 0.7,
|
|
21
|
-
},
|
|
22
|
-
});
|
|
23
|
-
|
|
24
|
-
expect(config.entityConfidenceThresholds).toEqual({
|
|
25
|
-
PERSON: 0.7,
|
|
26
|
-
});
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
it("rejects invalid per-entity confidence thresholds", () => {
|
|
30
|
-
expect(() =>
|
|
31
|
-
loadConfig({
|
|
32
|
-
entityConfidenceThresholds: {
|
|
33
|
-
PERSON: 1.2,
|
|
34
|
-
},
|
|
35
|
-
}),
|
|
36
|
-
).toThrow('entityConfidenceThresholds["PERSON"] must be between 0 and 1, got 1.2');
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it("validates allowlist regex patterns", () => {
|
|
40
|
-
expect(() =>
|
|
41
|
-
loadConfig({
|
|
42
|
-
allowlist: {
|
|
43
|
-
values: ["ok@example.com"],
|
|
44
|
-
patterns: ["["],
|
|
45
|
-
entities: {
|
|
46
|
-
PERSON: ["John"],
|
|
47
|
-
},
|
|
48
|
-
},
|
|
49
|
-
}),
|
|
50
|
-
).toThrow(/invalid regex pattern/);
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
it("canonicalizes allowlist entity keys", () => {
|
|
54
|
-
const config = loadConfig({
|
|
55
|
-
allowlist: {
|
|
56
|
-
entities: {
|
|
57
|
-
person: ["John"],
|
|
58
|
-
},
|
|
59
|
-
},
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
expect(config.allowlist.entities).toEqual({
|
|
63
|
-
PERSON: ["John"],
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
it("canonicalizes entity action labels", () => {
|
|
68
|
-
const config = loadConfig({
|
|
69
|
-
entityActions: {
|
|
70
|
-
person: "block",
|
|
71
|
-
},
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
expect(config.entityActions).toEqual({
|
|
75
|
-
PERSON: "block",
|
|
76
|
-
});
|
|
77
|
-
});
|
|
78
|
-
});
|
package/tests/gliner.test.ts
DELETED
|
@@ -1,289 +0,0 @@
|
|
|
1
|
-
import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
|
|
2
|
-
import fs from "node:fs/promises";
|
|
3
|
-
import os from "node:os";
|
|
4
|
-
import path from "node:path";
|
|
5
|
-
|
|
6
|
-
// Mock the gliner npm package so we don't need the actual 1.4GB model
|
|
7
|
-
vi.mock("gliner", () => {
|
|
8
|
-
class MockGliner {
|
|
9
|
-
private config: any;
|
|
10
|
-
|
|
11
|
-
constructor(config: any) {
|
|
12
|
-
this.config = config;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
async initialize(): Promise<void> {
|
|
16
|
-
// No-op in mock
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
async inference(
|
|
20
|
-
request: { texts: string[]; entities: string[] } | string | string[],
|
|
21
|
-
maybeEntities?: string[],
|
|
22
|
-
_flatNer = false,
|
|
23
|
-
_threshold = 0.5,
|
|
24
|
-
): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
|
|
25
|
-
const text =
|
|
26
|
-
typeof request === "string"
|
|
27
|
-
? request
|
|
28
|
-
: Array.isArray(request)
|
|
29
|
-
? request[0] ?? ""
|
|
30
|
-
: request.texts[0] ?? "";
|
|
31
|
-
const requestEntities =
|
|
32
|
-
typeof request === "object" && request !== null && "entities" in request
|
|
33
|
-
? request.entities
|
|
34
|
-
: undefined;
|
|
35
|
-
const labels =
|
|
36
|
-
Array.isArray(maybeEntities)
|
|
37
|
-
? maybeEntities
|
|
38
|
-
: requestEntities ?? [];
|
|
39
|
-
const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
|
|
40
|
-
|
|
41
|
-
// Simulate entity detection for "John Smith"
|
|
42
|
-
const johnIndex = text.indexOf("John Smith");
|
|
43
|
-
if (johnIndex !== -1 && labels.includes("person")) {
|
|
44
|
-
results.push({
|
|
45
|
-
text: "John Smith",
|
|
46
|
-
label: "person",
|
|
47
|
-
score: 0.95,
|
|
48
|
-
start: johnIndex,
|
|
49
|
-
end: johnIndex + "John Smith".length,
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// Simulate entity detection for "Acme Corp"
|
|
54
|
-
const acmeIndex = text.indexOf("Acme Corp");
|
|
55
|
-
if (acmeIndex !== -1 && labels.includes("organization")) {
|
|
56
|
-
results.push({
|
|
57
|
-
text: "Acme Corp",
|
|
58
|
-
label: "organization",
|
|
59
|
-
score: 0.88,
|
|
60
|
-
start: acmeIndex,
|
|
61
|
-
end: acmeIndex + "Acme Corp".length,
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// Simulate entity detection for "New York"
|
|
66
|
-
const nyIndex = text.indexOf("New York");
|
|
67
|
-
if (nyIndex !== -1 && labels.includes("location")) {
|
|
68
|
-
results.push({
|
|
69
|
-
text: "New York",
|
|
70
|
-
label: "location",
|
|
71
|
-
score: 0.91,
|
|
72
|
-
start: nyIndex,
|
|
73
|
-
end: nyIndex + "New York".length,
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return results;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
return { Gliner: MockGliner };
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
vi.mock("gliner/node", () => {
|
|
85
|
-
class MockGliner {
|
|
86
|
-
private config: any;
|
|
87
|
-
|
|
88
|
-
constructor(config: any) {
|
|
89
|
-
this.config = config;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
async initialize(): Promise<void> {
|
|
93
|
-
// No-op in mock
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
async inference(
|
|
97
|
-
request: { texts: string[]; entities: string[] } | string | string[],
|
|
98
|
-
maybeEntities?: string[],
|
|
99
|
-
_flatNer = false,
|
|
100
|
-
_threshold = 0.5,
|
|
101
|
-
): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
|
|
102
|
-
const text =
|
|
103
|
-
typeof request === "string"
|
|
104
|
-
? request
|
|
105
|
-
: Array.isArray(request)
|
|
106
|
-
? request[0] ?? ""
|
|
107
|
-
: request.texts[0] ?? "";
|
|
108
|
-
const requestEntities =
|
|
109
|
-
typeof request === "object" && request !== null && "entities" in request
|
|
110
|
-
? request.entities
|
|
111
|
-
: undefined;
|
|
112
|
-
const labels =
|
|
113
|
-
Array.isArray(maybeEntities)
|
|
114
|
-
? maybeEntities
|
|
115
|
-
: requestEntities ?? [];
|
|
116
|
-
const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
|
|
117
|
-
|
|
118
|
-
// Simulate entity detection for "John Smith"
|
|
119
|
-
const johnIndex = text.indexOf("John Smith");
|
|
120
|
-
if (johnIndex !== -1 && labels.includes("person")) {
|
|
121
|
-
results.push({
|
|
122
|
-
text: "John Smith",
|
|
123
|
-
label: "person",
|
|
124
|
-
score: 0.95,
|
|
125
|
-
start: johnIndex,
|
|
126
|
-
end: johnIndex + "John Smith".length,
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// Simulate entity detection for "Acme Corp"
|
|
131
|
-
const acmeIndex = text.indexOf("Acme Corp");
|
|
132
|
-
if (acmeIndex !== -1 && labels.includes("organization")) {
|
|
133
|
-
results.push({
|
|
134
|
-
text: "Acme Corp",
|
|
135
|
-
label: "organization",
|
|
136
|
-
score: 0.88,
|
|
137
|
-
start: acmeIndex,
|
|
138
|
-
end: acmeIndex + "Acme Corp".length,
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Simulate entity detection for "New York"
|
|
143
|
-
const nyIndex = text.indexOf("New York");
|
|
144
|
-
if (nyIndex !== -1 && labels.includes("location")) {
|
|
145
|
-
results.push({
|
|
146
|
-
text: "New York",
|
|
147
|
-
label: "location",
|
|
148
|
-
score: 0.91,
|
|
149
|
-
start: nyIndex,
|
|
150
|
-
end: nyIndex + "New York".length,
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
return results;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return { Gliner: MockGliner };
|
|
159
|
-
});
|
|
160
|
-
|
|
161
|
-
import { GlinerEngine } from "../src/engines/gliner.js";
|
|
162
|
-
|
|
163
|
-
const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
|
|
164
|
-
|
|
165
|
-
beforeAll(async () => {
|
|
166
|
-
await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
afterAll(async () => {
|
|
170
|
-
await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
describe("GlinerEngine", () => {
|
|
174
|
-
let engine: GlinerEngine;
|
|
175
|
-
|
|
176
|
-
beforeEach(async () => {
|
|
177
|
-
engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
178
|
-
await engine.initialize();
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
it("detects person entities with canonical PERSON label", async () => {
|
|
182
|
-
const entities = await engine.scan("My name is John Smith and I live here.");
|
|
183
|
-
|
|
184
|
-
expect(entities).toHaveLength(1);
|
|
185
|
-
expect(entities[0].text).toBe("John Smith");
|
|
186
|
-
expect(entities[0].label).toBe("PERSON");
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
it("detects organization entities with canonical ORGANIZATION label", async () => {
|
|
190
|
-
const entities = await engine.scan("I work at Acme Corp downtown.");
|
|
191
|
-
|
|
192
|
-
expect(entities).toHaveLength(1);
|
|
193
|
-
expect(entities[0].text).toBe("Acme Corp");
|
|
194
|
-
expect(entities[0].label).toBe("ORGANIZATION");
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
it("detects multiple entity types in the same text", async () => {
|
|
198
|
-
const entities = await engine.scan(
|
|
199
|
-
"John Smith works at Acme Corp in New York.",
|
|
200
|
-
);
|
|
201
|
-
|
|
202
|
-
expect(entities).toHaveLength(3);
|
|
203
|
-
|
|
204
|
-
const labels = entities.map((e) => e.label);
|
|
205
|
-
expect(labels).toContain("PERSON");
|
|
206
|
-
expect(labels).toContain("ORGANIZATION");
|
|
207
|
-
expect(labels).toContain("LOCATION");
|
|
208
|
-
});
|
|
209
|
-
|
|
210
|
-
it("returns empty array for text with no entities", async () => {
|
|
211
|
-
const entities = await engine.scan("Hello world, this is a test.");
|
|
212
|
-
|
|
213
|
-
expect(entities).toEqual([]);
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
it("returns empty array for empty string input", async () => {
|
|
217
|
-
const entities = await engine.scan("");
|
|
218
|
-
|
|
219
|
-
expect(entities).toEqual([]);
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
it("allows setting custom labels without crashing", async () => {
|
|
223
|
-
expect(() => engine.setCustomLabels(["product", "event"])).not.toThrow();
|
|
224
|
-
|
|
225
|
-
// Scan still works after setting custom labels
|
|
226
|
-
const entities = await engine.scan("John Smith attended the event.");
|
|
227
|
-
expect(entities).toHaveLength(1);
|
|
228
|
-
expect(entities[0].label).toBe("PERSON");
|
|
229
|
-
});
|
|
230
|
-
|
|
231
|
-
it("applies canonical type mapping (lowercase person -> PERSON)", async () => {
|
|
232
|
-
// The mock returns lowercase "person" as label; canonicalType should map it to "PERSON"
|
|
233
|
-
const entities = await engine.scan("John Smith is here.");
|
|
234
|
-
|
|
235
|
-
expect(entities[0].label).toBe("PERSON");
|
|
236
|
-
// Verify it's not lowercase
|
|
237
|
-
expect(entities[0].label).not.toBe("person");
|
|
238
|
-
});
|
|
239
|
-
|
|
240
|
-
it("sets source to gliner for all detected entities", async () => {
|
|
241
|
-
const entities = await engine.scan(
|
|
242
|
-
"John Smith works at Acme Corp in New York.",
|
|
243
|
-
);
|
|
244
|
-
|
|
245
|
-
for (const entity of entities) {
|
|
246
|
-
expect(entity.source).toBe("gliner");
|
|
247
|
-
}
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
it("confidence comes from model score", async () => {
|
|
251
|
-
const entities = await engine.scan(
|
|
252
|
-
"John Smith works at Acme Corp in New York.",
|
|
253
|
-
);
|
|
254
|
-
|
|
255
|
-
const person = entities.find((e) => e.label === "PERSON");
|
|
256
|
-
const org = entities.find((e) => e.label === "ORGANIZATION");
|
|
257
|
-
const loc = entities.find((e) => e.label === "LOCATION");
|
|
258
|
-
|
|
259
|
-
// These match the scores set in our mock
|
|
260
|
-
expect(person?.confidence).toBe(0.95);
|
|
261
|
-
expect(org?.confidence).toBe(0.88);
|
|
262
|
-
expect(loc?.confidence).toBe(0.91);
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
it("throws if scan is called before initialize", async () => {
|
|
266
|
-
const uninitializedEngine = new GlinerEngine("some-model", 0.5);
|
|
267
|
-
|
|
268
|
-
await expect(uninitializedEngine.scan("test")).rejects.toThrow(
|
|
269
|
-
"GLiNER engine not initialized. Call initialize() first.",
|
|
270
|
-
);
|
|
271
|
-
});
|
|
272
|
-
|
|
273
|
-
it("reports isInitialized correctly", async () => {
|
|
274
|
-
const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
|
|
275
|
-
expect(freshEngine.isInitialized).toBe(false);
|
|
276
|
-
|
|
277
|
-
await freshEngine.initialize();
|
|
278
|
-
expect(freshEngine.isInitialized).toBe(true);
|
|
279
|
-
});
|
|
280
|
-
|
|
281
|
-
it("includes correct start and end offsets", async () => {
|
|
282
|
-
const text = "Contact John Smith for details.";
|
|
283
|
-
const entities = await engine.scan(text);
|
|
284
|
-
|
|
285
|
-
expect(entities).toHaveLength(1);
|
|
286
|
-
expect(entities[0].start).toBe(8); // "Contact " is 8 chars
|
|
287
|
-
expect(entities[0].end).toBe(18); // 8 + "John Smith".length = 18
|
|
288
|
-
});
|
|
289
|
-
});
|