@datafog/fogclaw 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +39 -0
  3. package/dist/backlog-tools.d.ts +57 -0
  4. package/dist/backlog-tools.d.ts.map +1 -0
  5. package/dist/backlog-tools.js +173 -0
  6. package/dist/backlog-tools.js.map +1 -0
  7. package/dist/backlog.d.ts +82 -0
  8. package/dist/backlog.d.ts.map +1 -0
  9. package/dist/backlog.js +169 -0
  10. package/dist/backlog.js.map +1 -0
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +6 -0
  13. package/dist/config.js.map +1 -1
  14. package/dist/extract.d.ts +28 -0
  15. package/dist/extract.d.ts.map +1 -0
  16. package/dist/extract.js +91 -0
  17. package/dist/extract.js.map +1 -0
  18. package/dist/index.d.ts +2 -1
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +96 -3
  21. package/dist/index.js.map +1 -1
  22. package/dist/message-sending-handler.d.ts +41 -0
  23. package/dist/message-sending-handler.d.ts.map +1 -0
  24. package/dist/message-sending-handler.js +54 -0
  25. package/dist/message-sending-handler.js.map +1 -0
  26. package/dist/tool-result-handler.d.ts +37 -0
  27. package/dist/tool-result-handler.d.ts.map +1 -0
  28. package/dist/tool-result-handler.js +95 -0
  29. package/dist/tool-result-handler.js.map +1 -0
  30. package/dist/types.d.ts +16 -0
  31. package/dist/types.d.ts.map +1 -1
  32. package/dist/types.js +3 -0
  33. package/dist/types.js.map +1 -1
  34. package/openclaw.plugin.json +11 -1
  35. package/package.json +7 -1
  36. package/.github/workflows/harness-docs.yml +0 -30
  37. package/AGENTS.md +0 -28
  38. package/docs/DATA.md +0 -28
  39. package/docs/DESIGN.md +0 -17
  40. package/docs/DOMAIN_DOCS.md +0 -30
  41. package/docs/FRONTEND.md +0 -24
  42. package/docs/OBSERVABILITY.md +0 -25
  43. package/docs/PLANS.md +0 -171
  44. package/docs/PRODUCT_SENSE.md +0 -20
  45. package/docs/RELIABILITY.md +0 -60
  46. package/docs/SECURITY.md +0 -50
  47. package/docs/design-docs/core-beliefs.md +0 -17
  48. package/docs/design-docs/index.md +0 -8
  49. package/docs/generated/README.md +0 -36
  50. package/docs/generated/memory.md +0 -1
  51. package/docs/plans/2026-02-16-fogclaw-design.md +0 -172
  52. package/docs/plans/2026-02-16-fogclaw-implementation.md +0 -1606
  53. package/docs/plans/README.md +0 -15
  54. package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +0 -386
  55. package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +0 -328
  56. package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +0 -244
  57. package/docs/plans/tech-debt-tracker.md +0 -42
  58. package/docs/plugins/fogclaw.md +0 -101
  59. package/docs/runbooks/address-review-findings.md +0 -30
  60. package/docs/runbooks/ci-failures.md +0 -46
  61. package/docs/runbooks/code-review.md +0 -34
  62. package/docs/runbooks/merge-change.md +0 -28
  63. package/docs/runbooks/pull-request.md +0 -45
  64. package/docs/runbooks/record-evidence.md +0 -43
  65. package/docs/runbooks/reproduce-bug.md +0 -42
  66. package/docs/runbooks/respond-to-feedback.md +0 -42
  67. package/docs/runbooks/review-findings.md +0 -31
  68. package/docs/runbooks/submit-openclaw-plugin.md +0 -68
  69. package/docs/runbooks/update-agents-md.md +0 -59
  70. package/docs/runbooks/update-domain-docs.md +0 -42
  71. package/docs/runbooks/validate-current-state.md +0 -41
  72. package/docs/runbooks/verify-release.md +0 -69
  73. package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +0 -115
  74. package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +0 -125
  75. package/docs/specs/README.md +0 -5
  76. package/docs/specs/index.md +0 -8
  77. package/docs/spikes/README.md +0 -8
  78. package/fogclaw.config.example.json +0 -33
  79. package/scripts/ci/he-docs-config.json +0 -123
  80. package/scripts/ci/he-docs-drift.sh +0 -112
  81. package/scripts/ci/he-docs-lint.sh +0 -234
  82. package/scripts/ci/he-plans-lint.sh +0 -354
  83. package/scripts/ci/he-runbooks-lint.sh +0 -445
  84. package/scripts/ci/he-specs-lint.sh +0 -258
  85. package/scripts/ci/he-spikes-lint.sh +0 -249
  86. package/scripts/runbooks/select-runbooks.sh +0 -154
  87. package/src/config.ts +0 -183
  88. package/src/engines/gliner.ts +0 -240
  89. package/src/engines/regex.ts +0 -71
  90. package/src/index.ts +0 -372
  91. package/src/redactor.ts +0 -51
  92. package/src/scanner.ts +0 -196
  93. package/src/types.ts +0 -71
  94. package/tests/config.test.ts +0 -78
  95. package/tests/gliner.test.ts +0 -289
  96. package/tests/plugin-smoke.test.ts +0 -143
  97. package/tests/redactor.test.ts +0 -320
  98. package/tests/regex.test.ts +0 -345
  99. package/tests/scanner.test.ts +0 -348
  100. package/tsconfig.json +0 -20
package/src/redactor.ts DELETED
@@ -1,51 +0,0 @@
1
- import { createHash } from "node:crypto";
2
- import type { Entity, RedactResult, RedactStrategy } from "./types.js";
3
-
4
- export function redact(
5
- text: string,
6
- entities: Entity[],
7
- strategy: RedactStrategy = "token",
8
- ): RedactResult {
9
- if (entities.length === 0) {
10
- return { redacted_text: text, mapping: {}, entities: [] };
11
- }
12
-
13
- // Sort by start position descending so we replace from end to start
14
- // without corrupting earlier offsets
15
- const sorted = [...entities].sort((a, b) => b.start - a.start);
16
-
17
- const counters: Record<string, number> = {};
18
- const mapping: Record<string, string> = {};
19
- let result = text;
20
-
21
- for (const entity of sorted) {
22
- const replacement = makeReplacement(entity, strategy, counters);
23
- mapping[replacement] = entity.text;
24
- result = result.slice(0, entity.start) + replacement + result.slice(entity.end);
25
- }
26
-
27
- return { redacted_text: result, mapping, entities };
28
- }
29
-
30
- function makeReplacement(
31
- entity: Entity,
32
- strategy: RedactStrategy,
33
- counters: Record<string, number>,
34
- ): string {
35
- switch (strategy) {
36
- case "token": {
37
- counters[entity.label] = (counters[entity.label] ?? 0) + 1;
38
- return `[${entity.label}_${counters[entity.label]}]`;
39
- }
40
- case "mask": {
41
- return "*".repeat(Math.max(entity.text.length, 1));
42
- }
43
- case "hash": {
44
- const digest = createHash("sha256")
45
- .update(entity.text)
46
- .digest("hex")
47
- .slice(0, 12);
48
- return `[${entity.label}_${digest}]`;
49
- }
50
- }
51
- }
package/src/scanner.ts DELETED
@@ -1,196 +0,0 @@
1
- import type { Entity, FogClawConfig } from "./types.js";
2
- import { canonicalType } from "./types.js";
3
- import { RegexEngine } from "./engines/regex.js";
4
- import { GlinerEngine } from "./engines/gliner.js";
5
-
6
- type AllowlistPatternCache = {
7
- values: Set<string>;
8
- patterns: RegExp[];
9
- entityValues: Map<string, Set<string>>;
10
- };
11
-
12
- function normalizeAllowlistValue(value: string): string {
13
- return value.trim().toLowerCase();
14
- }
15
-
16
- function buildPatternMaps(value: string[] | undefined): RegExp[] {
17
- if (!value || value.length === 0) {
18
- return [];
19
- }
20
-
21
- return value.map((pattern) => new RegExp(pattern, "i"));
22
- }
23
-
24
- export class Scanner {
25
- private regexEngine: RegexEngine;
26
- private glinerEngine: GlinerEngine;
27
- private glinerAvailable = false;
28
- private config: FogClawConfig;
29
- private allowlist: AllowlistPatternCache;
30
-
31
- constructor(config: FogClawConfig) {
32
- this.config = config;
33
- this.regexEngine = new RegexEngine();
34
-
35
- const glinerThreshold = this.computeGlinerThreshold(config);
36
- this.glinerEngine = new GlinerEngine(config.model, glinerThreshold);
37
- if (config.custom_entities.length > 0) {
38
- this.glinerEngine.setCustomLabels(config.custom_entities);
39
- }
40
-
41
- this.allowlist = this.buildAllowlistCache(config.allowlist);
42
- }
43
-
44
- async initialize(): Promise<void> {
45
- try {
46
- await this.glinerEngine.initialize();
47
- this.glinerAvailable = true;
48
- } catch (err) {
49
- console.warn(
50
- `[fogclaw] GLiNER failed to initialize, falling back to regex-only mode: ${err instanceof Error ? err.message : String(err)}`,
51
- );
52
- this.glinerAvailable = false;
53
- }
54
- }
55
-
56
- async scan(text: string, extraLabels?: string[]): Promise<{ entities: Entity[]; text: string }> {
57
- if (!text) return { entities: [], text };
58
-
59
- // Step 1: Regex pass (always runs, synchronous)
60
- const regexEntities = this.filterByPolicy(this.regexEngine.scan(text));
61
-
62
- // Step 2: GLiNER pass (if available)
63
- let glinerEntities: Entity[] = [];
64
- if (this.glinerAvailable) {
65
- try {
66
- glinerEntities = await this.glinerEngine.scan(text, extraLabels);
67
- glinerEntities = this.filterByConfidence(glinerEntities);
68
- glinerEntities = this.filterByPolicy(glinerEntities);
69
- } catch (err) {
70
- console.warn(
71
- `[fogclaw] GLiNER scan failed, using regex results only: ${
72
- err instanceof Error ? err.message : String(err)
73
- }`,
74
- );
75
- }
76
- }
77
-
78
- // Step 3: Merge and deduplicate
79
- const merged = deduplicateEntities([...regexEntities, ...glinerEntities]);
80
-
81
- return { entities: merged, text };
82
- }
83
-
84
- private filterByConfidence(entities: Entity[]): Entity[] {
85
- return entities.filter((entity) => {
86
- const threshold = this.getThresholdForLabel(entity.label);
87
- return entity.confidence >= threshold;
88
- });
89
- }
90
-
91
- private filterByPolicy(entities: Entity[]): Entity[] {
92
- if (
93
- this.allowlist.values.size === 0 &&
94
- this.allowlist.patterns.length === 0 &&
95
- this.allowlist.entityValues.size === 0
96
- ) {
97
- return entities;
98
- }
99
-
100
- return entities.filter((entity) => !this.shouldAllowlistEntity(entity));
101
- }
102
-
103
- private shouldAllowlistEntity(entity: Entity): boolean {
104
- const normalizedText = normalizeAllowlistValue(entity.text);
105
-
106
- if (this.allowlist.values.has(normalizedText)) {
107
- return true;
108
- }
109
-
110
- if (this.allowlist.patterns.some((pattern) => pattern.test(entity.text))) {
111
- return true;
112
- }
113
-
114
- const entityValues = this.allowlist.entityValues.get(entity.label);
115
- if (entityValues && entityValues.has(normalizedText)) {
116
- return true;
117
- }
118
-
119
- return false;
120
- }
121
-
122
- private getThresholdForLabel(label: string): number {
123
- const canonicalLabel = canonicalType(label);
124
- return this.config.entityConfidenceThresholds[canonicalLabel] ?? this.config.confidence_threshold;
125
- }
126
-
127
- private computeGlinerThreshold(config: FogClawConfig): number {
128
- const thresholds = Object.values(config.entityConfidenceThresholds);
129
- if (thresholds.length === 0) {
130
- return config.confidence_threshold;
131
- }
132
-
133
- return Math.min(config.confidence_threshold, ...thresholds);
134
- }
135
-
136
- private buildAllowlistCache(allowlist: FogClawConfig["allowlist"]): AllowlistPatternCache {
137
- const globalValues = new Set(
138
- allowlist.values.map((value) => normalizeAllowlistValue(value)),
139
- );
140
-
141
- const globalPatterns = buildPatternMaps(allowlist.patterns);
142
-
143
- const entityValues = new Map<string, Set<string>>();
144
- for (const [entityType, values] of Object.entries(allowlist.entities)) {
145
- const canonical = canonicalType(entityType);
146
- const uniqueValues = values
147
- .map((value) => normalizeAllowlistValue(value))
148
- .filter((value) => value.length > 0);
149
- entityValues.set(canonical, new Set(uniqueValues));
150
- }
151
-
152
- return {
153
- values: globalValues,
154
- patterns: globalPatterns,
155
- entityValues,
156
- };
157
- }
158
-
159
- get isGlinerAvailable(): boolean {
160
- return this.glinerAvailable;
161
- }
162
- }
163
-
164
- /**
165
- * Remove overlapping entity spans. When two entities overlap,
166
- * keep the one with higher confidence. If equal, prefer regex.
167
- */
168
- function deduplicateEntities(entities: Entity[]): Entity[] {
169
- if (entities.length <= 1) return entities;
170
-
171
- // Sort by start position, then by confidence descending
172
- const sorted = [...entities].sort((a, b) => {
173
- if (a.start !== b.start) return a.start - b.start;
174
- return b.confidence - a.confidence;
175
- });
176
-
177
- const result: Entity[] = [sorted[0]];
178
-
179
- for (let i = 1; i < sorted.length; i++) {
180
- const current = sorted[i];
181
- const last = result[result.length - 1];
182
-
183
- // Check for overlap
184
- if (current.start < last.end) {
185
- // Overlapping: keep higher confidence (already in result if first)
186
- if (current.confidence > last.confidence) {
187
- result[result.length - 1] = current;
188
- }
189
- // Otherwise keep what's already in result
190
- } else {
191
- result.push(current);
192
- }
193
- }
194
-
195
- return result;
196
- }
package/src/types.ts DELETED
@@ -1,71 +0,0 @@
1
- export interface Entity {
2
- text: string;
3
- label: string;
4
- start: number;
5
- end: number;
6
- confidence: number;
7
- source: "regex" | "gliner";
8
- }
9
-
10
- export type RedactStrategy = "token" | "mask" | "hash";
11
-
12
- export type GuardrailAction = "redact" | "block" | "warn";
13
-
14
- export interface EntityConfidenceThresholds {
15
- [entityType: string]: number;
16
- }
17
-
18
- export interface EntityAllowlist {
19
- values: string[];
20
- patterns: string[];
21
- entities: Record<string, string[]>;
22
- }
23
-
24
- export interface FogClawConfig {
25
- enabled: boolean;
26
- guardrail_mode: GuardrailAction;
27
- redactStrategy: RedactStrategy;
28
- model: string;
29
- confidence_threshold: number;
30
- custom_entities: string[];
31
- entityActions: Record<string, GuardrailAction>;
32
- entityConfidenceThresholds: EntityConfidenceThresholds;
33
- allowlist: EntityAllowlist;
34
- auditEnabled: boolean;
35
- }
36
-
37
- export interface ScanResult {
38
- entities: Entity[];
39
- text: string;
40
- }
41
-
42
- export interface RedactResult {
43
- redacted_text: string;
44
- mapping: Record<string, string>;
45
- entities: Entity[];
46
- }
47
-
48
- export interface GuardrailPlan {
49
- blocked: Entity[];
50
- warned: Entity[];
51
- redacted: Entity[];
52
- }
53
-
54
- export const CANONICAL_TYPE_MAP: Record<string, string> = {
55
- DOB: "DATE",
56
- ZIP: "ZIP_CODE",
57
- PER: "PERSON",
58
- ORG: "ORGANIZATION",
59
- GPE: "LOCATION",
60
- LOC: "LOCATION",
61
- FAC: "ADDRESS",
62
- PHONE_NUMBER: "PHONE",
63
- SOCIAL_SECURITY_NUMBER: "SSN",
64
- CREDIT_CARD_NUMBER: "CREDIT_CARD",
65
- DATE_OF_BIRTH: "DATE",
66
- };
67
-
68
- export function canonicalType(entityType: string): string {
69
- const normalized = entityType.toUpperCase().trim();
70
- return CANONICAL_TYPE_MAP[normalized] ?? normalized;
71
- }
@@ -1,78 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- import { loadConfig } from "../src/config.js";
4
-
5
- describe("FogClaw config", () => {
6
- it("loads defaults for new policy fields", () => {
7
- const config = loadConfig({});
8
-
9
- expect(config.entityConfidenceThresholds).toEqual({});
10
- expect(config.allowlist).toMatchObject({
11
- values: [],
12
- patterns: [],
13
- entities: {},
14
- });
15
- });
16
-
17
- it("canonicalizes per-entity confidence threshold keys", () => {
18
- const config = loadConfig({
19
- entityConfidenceThresholds: {
20
- person: 0.7,
21
- },
22
- });
23
-
24
- expect(config.entityConfidenceThresholds).toEqual({
25
- PERSON: 0.7,
26
- });
27
- });
28
-
29
- it("rejects invalid per-entity confidence thresholds", () => {
30
- expect(() =>
31
- loadConfig({
32
- entityConfidenceThresholds: {
33
- PERSON: 1.2,
34
- },
35
- }),
36
- ).toThrow('entityConfidenceThresholds["PERSON"] must be between 0 and 1, got 1.2');
37
- });
38
-
39
- it("validates allowlist regex patterns", () => {
40
- expect(() =>
41
- loadConfig({
42
- allowlist: {
43
- values: ["ok@example.com"],
44
- patterns: ["["],
45
- entities: {
46
- PERSON: ["John"],
47
- },
48
- },
49
- }),
50
- ).toThrow(/invalid regex pattern/);
51
- });
52
-
53
- it("canonicalizes allowlist entity keys", () => {
54
- const config = loadConfig({
55
- allowlist: {
56
- entities: {
57
- person: ["John"],
58
- },
59
- },
60
- });
61
-
62
- expect(config.allowlist.entities).toEqual({
63
- PERSON: ["John"],
64
- });
65
- });
66
-
67
- it("canonicalizes entity action labels", () => {
68
- const config = loadConfig({
69
- entityActions: {
70
- person: "block",
71
- },
72
- });
73
-
74
- expect(config.entityActions).toEqual({
75
- PERSON: "block",
76
- });
77
- });
78
- });
@@ -1,289 +0,0 @@
1
- import { beforeAll, beforeEach, afterAll, describe, it, expect, vi } from "vitest";
2
- import fs from "node:fs/promises";
3
- import os from "node:os";
4
- import path from "node:path";
5
-
6
- // Mock the gliner npm package so we don't need the actual 1.4GB model
7
- vi.mock("gliner", () => {
8
- class MockGliner {
9
- private config: any;
10
-
11
- constructor(config: any) {
12
- this.config = config;
13
- }
14
-
15
- async initialize(): Promise<void> {
16
- // No-op in mock
17
- }
18
-
19
- async inference(
20
- request: { texts: string[]; entities: string[] } | string | string[],
21
- maybeEntities?: string[],
22
- _flatNer = false,
23
- _threshold = 0.5,
24
- ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
25
- const text =
26
- typeof request === "string"
27
- ? request
28
- : Array.isArray(request)
29
- ? request[0] ?? ""
30
- : request.texts[0] ?? "";
31
- const requestEntities =
32
- typeof request === "object" && request !== null && "entities" in request
33
- ? request.entities
34
- : undefined;
35
- const labels =
36
- Array.isArray(maybeEntities)
37
- ? maybeEntities
38
- : requestEntities ?? [];
39
- const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
40
-
41
- // Simulate entity detection for "John Smith"
42
- const johnIndex = text.indexOf("John Smith");
43
- if (johnIndex !== -1 && labels.includes("person")) {
44
- results.push({
45
- text: "John Smith",
46
- label: "person",
47
- score: 0.95,
48
- start: johnIndex,
49
- end: johnIndex + "John Smith".length,
50
- });
51
- }
52
-
53
- // Simulate entity detection for "Acme Corp"
54
- const acmeIndex = text.indexOf("Acme Corp");
55
- if (acmeIndex !== -1 && labels.includes("organization")) {
56
- results.push({
57
- text: "Acme Corp",
58
- label: "organization",
59
- score: 0.88,
60
- start: acmeIndex,
61
- end: acmeIndex + "Acme Corp".length,
62
- });
63
- }
64
-
65
- // Simulate entity detection for "New York"
66
- const nyIndex = text.indexOf("New York");
67
- if (nyIndex !== -1 && labels.includes("location")) {
68
- results.push({
69
- text: "New York",
70
- label: "location",
71
- score: 0.91,
72
- start: nyIndex,
73
- end: nyIndex + "New York".length,
74
- });
75
- }
76
-
77
- return results;
78
- }
79
- }
80
-
81
- return { Gliner: MockGliner };
82
- });
83
-
84
- vi.mock("gliner/node", () => {
85
- class MockGliner {
86
- private config: any;
87
-
88
- constructor(config: any) {
89
- this.config = config;
90
- }
91
-
92
- async initialize(): Promise<void> {
93
- // No-op in mock
94
- }
95
-
96
- async inference(
97
- request: { texts: string[]; entities: string[] } | string | string[],
98
- maybeEntities?: string[],
99
- _flatNer = false,
100
- _threshold = 0.5,
101
- ): Promise<Array<{ text: string; label: string; score: number; start: number; end: number }>> {
102
- const text =
103
- typeof request === "string"
104
- ? request
105
- : Array.isArray(request)
106
- ? request[0] ?? ""
107
- : request.texts[0] ?? "";
108
- const requestEntities =
109
- typeof request === "object" && request !== null && "entities" in request
110
- ? request.entities
111
- : undefined;
112
- const labels =
113
- Array.isArray(maybeEntities)
114
- ? maybeEntities
115
- : requestEntities ?? [];
116
- const results: Array<{ text: string; label: string; score: number; start: number; end: number }> = [];
117
-
118
- // Simulate entity detection for "John Smith"
119
- const johnIndex = text.indexOf("John Smith");
120
- if (johnIndex !== -1 && labels.includes("person")) {
121
- results.push({
122
- text: "John Smith",
123
- label: "person",
124
- score: 0.95,
125
- start: johnIndex,
126
- end: johnIndex + "John Smith".length,
127
- });
128
- }
129
-
130
- // Simulate entity detection for "Acme Corp"
131
- const acmeIndex = text.indexOf("Acme Corp");
132
- if (acmeIndex !== -1 && labels.includes("organization")) {
133
- results.push({
134
- text: "Acme Corp",
135
- label: "organization",
136
- score: 0.88,
137
- start: acmeIndex,
138
- end: acmeIndex + "Acme Corp".length,
139
- });
140
- }
141
-
142
- // Simulate entity detection for "New York"
143
- const nyIndex = text.indexOf("New York");
144
- if (nyIndex !== -1 && labels.includes("location")) {
145
- results.push({
146
- text: "New York",
147
- label: "location",
148
- score: 0.91,
149
- start: nyIndex,
150
- end: nyIndex + "New York".length,
151
- });
152
- }
153
-
154
- return results;
155
- }
156
- }
157
-
158
- return { Gliner: MockGliner };
159
- });
160
-
161
- import { GlinerEngine } from "../src/engines/gliner.js";
162
-
163
- const TEST_ONNX_MODEL_PATH = path.join(os.tmpdir(), "fogclaw-gliner-model-test.onnx");
164
-
165
- beforeAll(async () => {
166
- await fs.writeFile(TEST_ONNX_MODEL_PATH, "mock-onnx-model", "utf8");
167
- });
168
-
169
- afterAll(async () => {
170
- await fs.unlink(TEST_ONNX_MODEL_PATH).catch(() => undefined);
171
- });
172
-
173
- describe("GlinerEngine", () => {
174
- let engine: GlinerEngine;
175
-
176
- beforeEach(async () => {
177
- engine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
178
- await engine.initialize();
179
- });
180
-
181
- it("detects person entities with canonical PERSON label", async () => {
182
- const entities = await engine.scan("My name is John Smith and I live here.");
183
-
184
- expect(entities).toHaveLength(1);
185
- expect(entities[0].text).toBe("John Smith");
186
- expect(entities[0].label).toBe("PERSON");
187
- });
188
-
189
- it("detects organization entities with canonical ORGANIZATION label", async () => {
190
- const entities = await engine.scan("I work at Acme Corp downtown.");
191
-
192
- expect(entities).toHaveLength(1);
193
- expect(entities[0].text).toBe("Acme Corp");
194
- expect(entities[0].label).toBe("ORGANIZATION");
195
- });
196
-
197
- it("detects multiple entity types in the same text", async () => {
198
- const entities = await engine.scan(
199
- "John Smith works at Acme Corp in New York.",
200
- );
201
-
202
- expect(entities).toHaveLength(3);
203
-
204
- const labels = entities.map((e) => e.label);
205
- expect(labels).toContain("PERSON");
206
- expect(labels).toContain("ORGANIZATION");
207
- expect(labels).toContain("LOCATION");
208
- });
209
-
210
- it("returns empty array for text with no entities", async () => {
211
- const entities = await engine.scan("Hello world, this is a test.");
212
-
213
- expect(entities).toEqual([]);
214
- });
215
-
216
- it("returns empty array for empty string input", async () => {
217
- const entities = await engine.scan("");
218
-
219
- expect(entities).toEqual([]);
220
- });
221
-
222
- it("allows setting custom labels without crashing", async () => {
223
- expect(() => engine.setCustomLabels(["product", "event"])).not.toThrow();
224
-
225
- // Scan still works after setting custom labels
226
- const entities = await engine.scan("John Smith attended the event.");
227
- expect(entities).toHaveLength(1);
228
- expect(entities[0].label).toBe("PERSON");
229
- });
230
-
231
- it("applies canonical type mapping (lowercase person -> PERSON)", async () => {
232
- // The mock returns lowercase "person" as label; canonicalType should map it to "PERSON"
233
- const entities = await engine.scan("John Smith is here.");
234
-
235
- expect(entities[0].label).toBe("PERSON");
236
- // Verify it's not lowercase
237
- expect(entities[0].label).not.toBe("person");
238
- });
239
-
240
- it("sets source to gliner for all detected entities", async () => {
241
- const entities = await engine.scan(
242
- "John Smith works at Acme Corp in New York.",
243
- );
244
-
245
- for (const entity of entities) {
246
- expect(entity.source).toBe("gliner");
247
- }
248
- });
249
-
250
- it("confidence comes from model score", async () => {
251
- const entities = await engine.scan(
252
- "John Smith works at Acme Corp in New York.",
253
- );
254
-
255
- const person = entities.find((e) => e.label === "PERSON");
256
- const org = entities.find((e) => e.label === "ORGANIZATION");
257
- const loc = entities.find((e) => e.label === "LOCATION");
258
-
259
- // These match the scores set in our mock
260
- expect(person?.confidence).toBe(0.95);
261
- expect(org?.confidence).toBe(0.88);
262
- expect(loc?.confidence).toBe(0.91);
263
- });
264
-
265
- it("throws if scan is called before initialize", async () => {
266
- const uninitializedEngine = new GlinerEngine("some-model", 0.5);
267
-
268
- await expect(uninitializedEngine.scan("test")).rejects.toThrow(
269
- "GLiNER engine not initialized. Call initialize() first.",
270
- );
271
- });
272
-
273
- it("reports isInitialized correctly", async () => {
274
- const freshEngine = new GlinerEngine(TEST_ONNX_MODEL_PATH, 0.5);
275
- expect(freshEngine.isInitialized).toBe(false);
276
-
277
- await freshEngine.initialize();
278
- expect(freshEngine.isInitialized).toBe(true);
279
- });
280
-
281
- it("includes correct start and end offsets", async () => {
282
- const text = "Contact John Smith for details.";
283
- const entities = await engine.scan(text);
284
-
285
- expect(entities).toHaveLength(1);
286
- expect(entities[0].start).toBe(8); // "Contact " is 8 chars
287
- expect(entities[0].end).toBe(18); // 8 + "John Smith".length = 18
288
- });
289
- });