@datafog/fogclaw 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/scanner.js CHANGED
@@ -1,17 +1,30 @@
1
+ import { canonicalType } from "./types.js";
1
2
  import { RegexEngine } from "./engines/regex.js";
2
3
  import { GlinerEngine } from "./engines/gliner.js";
4
+ function normalizeAllowlistValue(value) {
5
+ return value.trim().toLowerCase();
6
+ }
7
+ function buildPatternMaps(value) {
8
+ if (!value || value.length === 0) {
9
+ return [];
10
+ }
11
+ return value.map((pattern) => new RegExp(pattern, "i"));
12
+ }
3
13
  export class Scanner {
4
14
  regexEngine;
5
15
  glinerEngine;
6
16
  glinerAvailable = false;
7
17
  config;
18
+ allowlist;
8
19
  constructor(config) {
9
20
  this.config = config;
10
21
  this.regexEngine = new RegexEngine();
11
- this.glinerEngine = new GlinerEngine(config.model, config.confidence_threshold);
22
+ const glinerThreshold = this.computeGlinerThreshold(config);
23
+ this.glinerEngine = new GlinerEngine(config.model, glinerThreshold);
12
24
  if (config.custom_entities.length > 0) {
13
25
  this.glinerEngine.setCustomLabels(config.custom_entities);
14
26
  }
27
+ this.allowlist = this.buildAllowlistCache(config.allowlist);
15
28
  }
16
29
  async initialize() {
17
30
  try {
@@ -27,12 +40,14 @@ export class Scanner {
27
40
  if (!text)
28
41
  return { entities: [], text };
29
42
  // Step 1: Regex pass (always runs, synchronous)
30
- const regexEntities = this.regexEngine.scan(text);
43
+ const regexEntities = this.filterByPolicy(this.regexEngine.scan(text));
31
44
  // Step 2: GLiNER pass (if available)
32
45
  let glinerEntities = [];
33
46
  if (this.glinerAvailable) {
34
47
  try {
35
48
  glinerEntities = await this.glinerEngine.scan(text, extraLabels);
49
+ glinerEntities = this.filterByConfidence(glinerEntities);
50
+ glinerEntities = this.filterByPolicy(glinerEntities);
36
51
  }
37
52
  catch (err) {
38
53
  console.warn(`[fogclaw] GLiNER scan failed, using regex results only: ${err instanceof Error ? err.message : String(err)}`);
@@ -42,6 +57,65 @@ export class Scanner {
42
57
  const merged = deduplicateEntities([...regexEntities, ...glinerEntities]);
43
58
  return { entities: merged, text };
44
59
  }
60
+ filterByConfidence(entities) {
61
+ return entities.filter((entity) => {
62
+ const threshold = this.getThresholdForLabel(entity.label);
63
+ return entity.confidence >= threshold;
64
+ });
65
+ }
66
+ filterByPolicy(entities) {
67
+ if (this.allowlist.values.size === 0 &&
68
+ this.allowlist.patterns.length === 0 &&
69
+ this.allowlist.entityValues.size === 0) {
70
+ return entities;
71
+ }
72
+ return entities.filter((entity) => !this.shouldAllowlistEntity(entity));
73
+ }
74
+ shouldAllowlistEntity(entity) {
75
+ const normalizedText = normalizeAllowlistValue(entity.text);
76
+ if (this.allowlist.values.has(normalizedText)) {
77
+ return true;
78
+ }
79
+ if (this.allowlist.patterns.some((pattern) => pattern.test(entity.text))) {
80
+ return true;
81
+ }
82
+ const entityValues = this.allowlist.entityValues.get(entity.label);
83
+ if (entityValues && entityValues.has(normalizedText)) {
84
+ return true;
85
+ }
86
+ return false;
87
+ }
88
+ getThresholdForLabel(label) {
89
+ const canonicalLabel = canonicalType(label);
90
+ return this.config.entityConfidenceThresholds[canonicalLabel] ?? this.config.confidence_threshold;
91
+ }
92
+ computeGlinerThreshold(config) {
93
+ const thresholds = Object.values(config.entityConfidenceThresholds);
94
+ if (thresholds.length === 0) {
95
+ return config.confidence_threshold;
96
+ }
97
+ return Math.min(config.confidence_threshold, ...thresholds);
98
+ }
99
+ buildAllowlistCache(allowlist) {
100
+ const globalValues = new Set(allowlist.values.map((value) => normalizeAllowlistValue(value)));
101
+ const globalPatterns = buildPatternMaps(allowlist.patterns);
102
+ const entityValues = new Map();
103
+ for (const [entityType, values] of Object.entries(allowlist.entities)) {
104
+ const canonical = canonicalType(entityType);
105
+ const uniqueValues = values
106
+ .map((value) => normalizeAllowlistValue(value))
107
+ .filter((value) => value.length > 0);
108
+ entityValues.set(canonical, new Set(uniqueValues));
109
+ }
110
+ return {
111
+ values: globalValues,
112
+ patterns: globalPatterns,
113
+ entityValues,
114
+ };
115
+ }
116
+ get isGlinerAvailable() {
117
+ return this.glinerAvailable;
118
+ }
45
119
  }
46
120
  /**
47
121
  * Remove overlapping entity spans. When two entities overlap,
@@ -1 +1 @@
1
- {"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAEnD,MAAM,OAAO,OAAO;IACV,WAAW,CAAc;IACzB,YAAY,CAAe;IAC3B,eAAe,GAAG,KAAK,CAAC;IACxB,MAAM,CAAgB;IAE9B,YAAY,MAAqB;QAC/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAClC,MAAM,CAAC,KAAK,EACZ,MAAM,CAAC,oBAAoB,CAC5B,CAAC;QACF,IAAI,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CACV,2EAA2E,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC9H,CAAC;YACF,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QAEzC,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElD,qCAAqC;QACrC,IAAI,cAAc,GAAa,EAAE,CAAC;QAClC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,cAAc,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;YACnE,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CAAC,2DAA2D,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC9H,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,MAAM,MAAM,GAAG,mBAAmB,CAAC,CAAC,GAAG,aAAa,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC;QAE1E,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC;CACF;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAAC,QAAkB;IAC7C,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,wDAAwD;IACxD,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK;YAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,oBAAoB;QACpB,IAAI,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,mEAAmE;YACnE,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;gBACzC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC;YACtC,CAAC;YACD,0CAA0C;QAC5C,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"scanner.js","sourceRoot":"","sources":["../src/scanner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAQnD,SAAS,uBAAuB,CAAC,KAAa;IAC5C,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;AACpC,CAAC;AAED,SAAS,gBAAgB,CAAC,KAA2B;IACnD,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED,MAAM,OAAO,OAAO;IACV,WAAW,CAAc;IACzB,YAAY,CAAe;IAC3B,eAAe,GAAG,KAAK,CAAC;IACxB,MAAM,CAAgB;IACtB,SAAS,CAAwB;IAEzC,YAAY,MAAqB;QAC/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,EAAE,CAAC;QAErC,MAAM,eAAe,GAAG,IAAI,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5D,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACpE,IAAI,MAAM,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtC,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAC5D,CAAC;QAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC9D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CACV,2EAA2E,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC9H,CAAC;YACF,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QAEzC,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QAEvE,qCAAqC;QACrC,IAAI,cAAc,GAAa,EAAE,CAAC;QAClC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,cAAc,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;gBACjE,cAAc,GAAG,IAAI,CAAC,kBAAkB,CAAC,cAAc,CAAC,CAAC;gBACzD,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,CAAC,CAAC;YACvD,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,IAAI,CACV,2DACE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CACjD,EAAE,CACH,CAAC;YACJ,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,MAAM,MAAM,GAAG,mBAAmB,CAAC,CAAC,GAAG,aAAa,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC;QAE1E,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC;IAEO,kBAAkB,CAAC,QAAkB;QAC3C,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE;YAChC,MAAM,SAAS,GAAG,IAAI,CAAC,oBAAoB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1D,OAAO,MAAM,CAAC,UAAU,IAAI,SAAS,CAAC;QACxC,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,cAAc,CAAC,QAAkB;QACvC,IACE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC;YAChC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;YACpC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,KAAK,CAAC,EACtC,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,MAAM,CAAC,CAAC,CAAC;IAC1E,CAAC;IAEO,qBAAqB,CAAC,MAAc;QAC1C,MAAM,cAAc,GAAG,uBAAuB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAE5D,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;YAC9C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACzE,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QACnE,IAAI,YAAY,IAAI,YAAY,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;YACrD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,oBAAoB,CAAC,KAAa;QACxC,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QAC5C,OAAO,IAAI,CAAC,MAAM,CAAC,0BAA0B,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC;IACpG,CAAC;IAEO,sBAAsB,CAAC,MAAqB;QAClD,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,0BAA0B,CAAC,CAAC;QACpE,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,MAAM,CAAC,oBAAoB,CAAC;QACrC,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,oBAAoB,EAAE,GAAG,UAAU,CAAC,CAAC;IAC9D,CAAC;IAEO,mBAAmB,CAAC,SAAqC;QAC/D,MAAM,YAAY,GAAG,IAAI,GAAG,CAC1B,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,uBAAuB,CAAC,KAAK,CAAC,CAAC,CAChE,CAAC;QAEF,MAAM,cAAc,GAAG,gBAAgB,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAE5D,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;QACpD,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC;YACtE,MAAM,SAAS,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;YAC5C,MAAM,YAAY,GAAG,MAAM;iBACxB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,uBAAuB,CAAC,KAAK,CAAC,CAAC;iBAC9C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACvC,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC;QACrD,CAAC;QAED,OAAO;YACL,MAAM,EAAE,YAAY;YACpB,QAAQ,EAAE,cAAc;YACxB,YAAY;SACb,CAAC;IACJ,CAAC;IAED,IAAI,iBAAiB;QACnB,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;CACF;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAAC,QAAkB;IAC7C,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,wDAAwD;IACxD,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK;YAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEvC,oBAAoB;QACpB,IAAI,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,mEAAmE;YACnE,IAAI,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;gBACzC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC;YACtC,CAAC;YACD,0CAA0C;QAC5C,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/types.d.ts CHANGED
@@ -8,6 +8,14 @@ export interface Entity {
8
8
  }
9
9
  export type RedactStrategy = "token" | "mask" | "hash";
10
10
  export type GuardrailAction = "redact" | "block" | "warn";
11
+ export interface EntityConfidenceThresholds {
12
+ [entityType: string]: number;
13
+ }
14
+ export interface EntityAllowlist {
15
+ values: string[];
16
+ patterns: string[];
17
+ entities: Record<string, string[]>;
18
+ }
11
19
  export interface FogClawConfig {
12
20
  enabled: boolean;
13
21
  guardrail_mode: GuardrailAction;
@@ -16,6 +24,9 @@ export interface FogClawConfig {
16
24
  confidence_threshold: number;
17
25
  custom_entities: string[];
18
26
  entityActions: Record<string, GuardrailAction>;
27
+ entityConfidenceThresholds: EntityConfidenceThresholds;
28
+ allowlist: EntityAllowlist;
29
+ auditEnabled: boolean;
19
30
  }
20
31
  export interface ScanResult {
21
32
  entities: Entity[];
@@ -26,6 +37,11 @@ export interface RedactResult {
26
37
  mapping: Record<string, string>;
27
38
  entities: Entity[];
28
39
  }
40
+ export interface GuardrailPlan {
41
+ blocked: Entity[];
42
+ warned: Entity[];
43
+ redacted: Entity[];
44
+ }
29
45
  export declare const CANONICAL_TYPE_MAP: Record<string, string>;
30
46
  export declare function canonicalType(entityType: string): string;
31
47
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;CAC5B;AAED,MAAM,MAAM,cAAc,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;AAEvD,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,OAAO,GAAG,MAAM,CAAC;AAE1D,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,cAAc,EAAE,eAAe,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,oBAAoB,EAAE,MAAM,CAAC;IAC7B,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;CAChD;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAYrD,CAAC;AAEF,wBAAgB,aAAa,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAGxD"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;CAC5B;AAED,MAAM,MAAM,cAAc,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;AAEvD,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,OAAO,GAAG,MAAM,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,cAAc,EAAE,eAAe,CAAC;IAChC,cAAc,EAAE,cAAc,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,oBAAoB,EAAE,MAAM,CAAC;IAC7B,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAC/C,0BAA0B,EAAE,0BAA0B,CAAC;IACvD,SAAS,EAAE,eAAe,CAAC;IAC3B,YAAY,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,eAAO,MAAM,kBAAkB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAYrD,CAAC;AAEF,wBAAgB,aAAa,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAGxD"}
package/dist/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAkCA,MAAM,CAAC,MAAM,kBAAkB,GAA2B;IACxD,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,cAAc;IACnB,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,SAAS;IACd,YAAY,EAAE,OAAO;IACrB,sBAAsB,EAAE,KAAK;IAC7B,kBAAkB,EAAE,aAAa;IACjC,aAAa,EAAE,MAAM;CACtB,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,UAAkB;IAC9C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACnD,OAAO,kBAAkB,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;AACtD,CAAC"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAqDA,MAAM,CAAC,MAAM,kBAAkB,GAA2B;IACxD,GAAG,EAAE,MAAM;IACX,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,QAAQ;IACb,GAAG,EAAE,cAAc;IACnB,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,UAAU;IACf,GAAG,EAAE,SAAS;IACd,YAAY,EAAE,OAAO;IACrB,sBAAsB,EAAE,KAAK;IAC7B,kBAAkB,EAAE,aAAa;IACjC,aAAa,EAAE,MAAM;CACtB,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,UAAkB;IAC9C,MAAM,UAAU,GAAG,UAAU,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACnD,OAAO,kBAAkB,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;AACtD,CAAC"}
@@ -4,6 +4,10 @@
4
4
  "redactStrategy": "token",
5
5
  "model": "onnx-community/gliner_large-v2.1",
6
6
  "confidence_threshold": 0.5,
7
+ "entityConfidenceThresholds": {
8
+ "PERSON": 0.6,
9
+ "ORGANIZATION": 0.7
10
+ },
7
11
  "custom_entities": ["project codename", "internal tool name"],
8
12
  "entityActions": {
9
13
  "SSN": "block",
@@ -11,5 +15,19 @@
11
15
  "EMAIL": "redact",
12
16
  "PHONE": "redact",
13
17
  "PERSON": "warn"
14
- }
18
+ },
19
+ "allowlist": {
20
+ "values": [
21
+ "noreply@example.com"
22
+ ],
23
+ "patterns": [
24
+ "^internal-"
25
+ ],
26
+ "entities": {
27
+ "PERSON": [
28
+ "john doe"
29
+ ]
30
+ }
31
+ },
32
+ "auditEnabled": true
15
33
  }
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "fogclaw",
3
3
  "name": "FogClaw",
4
- "version": "0.1.4",
5
- "description": "PII detection & custom entity redaction powered by DataFog",
4
+ "version": "0.1.6",
5
+ "description": "PII detection & custom entity redaction plugin powered by DataFog",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "properties": {
@@ -30,6 +30,15 @@
30
30
  "minimum": 0,
31
31
  "maximum": 1
32
32
  },
33
+ "entityConfidenceThresholds": {
34
+ "type": "object",
35
+ "additionalProperties": {
36
+ "type": "number",
37
+ "minimum": 0,
38
+ "maximum": 1
39
+ },
40
+ "default": {}
41
+ },
33
42
  "custom_entities": {
34
43
  "type": "array",
35
44
  "items": {
@@ -44,6 +53,43 @@
44
53
  "enum": ["redact", "block", "warn"]
45
54
  },
46
55
  "default": {}
56
+ },
57
+ "allowlist": {
58
+ "type": "object",
59
+ "properties": {
60
+ "values": {
61
+ "type": "array",
62
+ "items": {
63
+ "type": "string"
64
+ },
65
+ "default": []
66
+ },
67
+ "patterns": {
68
+ "type": "array",
69
+ "items": {
70
+ "type": "string"
71
+ },
72
+ "default": []
73
+ },
74
+ "entities": {
75
+ "type": "object",
76
+ "additionalProperties": {
77
+ "type": "array",
78
+ "items": {
79
+ "type": "string"
80
+ }
81
+ }
82
+ }
83
+ },
84
+ "default": {
85
+ "values": [],
86
+ "patterns": [],
87
+ "entities": {}
88
+ }
89
+ },
90
+ "auditEnabled": {
91
+ "type": "boolean",
92
+ "default": true
47
93
  }
48
94
  }
49
95
  },
@@ -70,6 +116,11 @@
70
116
  "help": "Minimum GLiNER score (0-1) required before an entity is treated as a detection.",
71
117
  "advanced": true
72
118
  },
119
+ "entityConfidenceThresholds": {
120
+ "label": "Per-Entity Confidence Thresholds",
121
+ "help": "Override confidence thresholds by entity label (for example: {\"PERSON\": 0.95, \"ORGANIZATION\": 0.7}).",
122
+ "advanced": true
123
+ },
73
124
  "custom_entities": {
74
125
  "label": "Custom Entity Labels",
75
126
  "help": "Extra labels to detect as sensitive entities (for example: `project code`, `competitor name`)."
@@ -78,6 +129,16 @@
78
129
  "label": "Entity Actions",
79
130
  "help": "Map specific entity labels to per-entity behavior (for example: {\"EMAIL\": \"block\", \"PHONE\": \"redact\"}).",
80
131
  "advanced": true
132
+ },
133
+ "allowlist": {
134
+ "label": "Allowlist / Exemptions",
135
+ "help": "Global and per-entity allowlist entries to exclude from enforcement. Supports exact values and regex patterns.",
136
+ "advanced": true
137
+ },
138
+ "auditEnabled": {
139
+ "label": "Audit Logging",
140
+ "help": "Emit structured audit summaries for guardrail decisions.",
141
+ "advanced": true
81
142
  }
82
143
  }
83
144
  }
package/package.json CHANGED
@@ -1,10 +1,17 @@
1
1
  {
2
2
  "name": "@datafog/fogclaw",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "OpenClaw plugin for PII detection & custom entity redaction powered by DataFog",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
8
+ "scripts": {
9
+ "build": "tsc",
10
+ "test": "vitest run",
11
+ "test:watch": "vitest",
12
+ "test:plugin-smoke": "vitest run tests/plugin-smoke.test.ts",
13
+ "lint": "tsc --noEmit"
14
+ },
8
15
  "dependencies": {
9
16
  "gliner": "^0.0.19",
10
17
  "onnxruntime-node": "1.19.2",
@@ -32,12 +39,5 @@
32
39
  "overrides": {
33
40
  "onnxruntime-web": "1.21.0",
34
41
  "sharp": "0.34.5"
35
- },
36
- "scripts": {
37
- "build": "tsc",
38
- "test": "vitest run",
39
- "test:watch": "vitest",
40
- "test:plugin-smoke": "vitest run tests/plugin-smoke.test.ts",
41
- "lint": "tsc --noEmit"
42
42
  }
43
- }
43
+ }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/src/config.ts CHANGED
@@ -1,8 +1,109 @@
1
- import type { FogClawConfig, GuardrailAction, RedactStrategy } from "./types.js";
1
+ import {
2
+ canonicalType,
3
+ type EntityAllowlist,
4
+ type FogClawConfig,
5
+ type GuardrailAction,
6
+ type RedactStrategy,
7
+ } from "./types.js";
2
8
 
3
9
  const VALID_GUARDRAIL_MODES: GuardrailAction[] = ["redact", "block", "warn"];
4
10
  const VALID_REDACT_STRATEGIES: RedactStrategy[] = ["token", "mask", "hash"];
5
11
 
12
+ function ensureStringList(value: unknown, path: string): string[] {
13
+ if (!Array.isArray(value)) {
14
+ throw new Error(`${path} must be an array of strings`);
15
+ }
16
+
17
+ const entries = value.filter((entry): entry is string => {
18
+ if (typeof entry !== "string") {
19
+ throw new Error(`${path} must contain only strings`);
20
+ }
21
+
22
+ return true;
23
+ });
24
+
25
+ return entries.map((entry) => entry.trim()).filter((entry) => entry.length > 0);
26
+ }
27
+
28
+ function ensureEntityAllowlist(value: unknown): EntityAllowlist {
29
+ if (value == null) {
30
+ return { values: [], patterns: [], entities: {} };
31
+ }
32
+
33
+ if (typeof value !== "object" || Array.isArray(value)) {
34
+ throw new Error("allowlist must be an object");
35
+ }
36
+
37
+ const raw = value as Record<string, unknown>;
38
+ const values = ensureStringList(raw.values ?? [], "allowlist.values");
39
+ const patterns = ensureStringList(raw.patterns ?? [], "allowlist.patterns");
40
+
41
+ for (const pattern of patterns) {
42
+ try {
43
+ new RegExp(pattern);
44
+ } catch {
45
+ throw new Error(`allowlist.patterns contains invalid regex pattern: "${pattern}"`);
46
+ }
47
+ }
48
+
49
+ const entitiesValue = raw.entities ?? {};
50
+ if (
51
+ typeof entitiesValue !== "object" ||
52
+ Array.isArray(entitiesValue) ||
53
+ entitiesValue === null
54
+ ) {
55
+ throw new Error("allowlist.entities must be an object mapping entity labels to string arrays");
56
+ }
57
+
58
+ const entities: Record<string, string[]> = {};
59
+ for (const [entityType, entryValue] of Object.entries(entitiesValue)) {
60
+ const normalizedType = canonicalType(entityType);
61
+ entities[normalizedType] = ensureStringList(entryValue, `allowlist.entities.${entityType}`);
62
+ }
63
+
64
+ return {
65
+ values: [...new Set(values)],
66
+ patterns: [...new Set(patterns)],
67
+ entities,
68
+ };
69
+ }
70
+
71
+ function ensureEntityConfidenceThresholds(
72
+ value: unknown,
73
+ ): Record<string, number> {
74
+ if (!value) {
75
+ return {};
76
+ }
77
+
78
+ if (typeof value !== "object" || Array.isArray(value) || value === null) {
79
+ throw new Error("entityConfidenceThresholds must be an object");
80
+ }
81
+
82
+ const raw = value as Record<string, unknown>;
83
+ const normalized: Record<string, number> = {};
84
+
85
+ for (const [entityType, rawThreshold] of Object.entries(raw)) {
86
+ if (typeof rawThreshold !== "number" || Number.isNaN(rawThreshold)) {
87
+ throw new Error(
88
+ `entityConfidenceThresholds["${entityType}"] must be a number between 0 and 1, got ${String(
89
+ rawThreshold,
90
+ )}`,
91
+ );
92
+ }
93
+
94
+ if (rawThreshold < 0 || rawThreshold > 1) {
95
+ throw new Error(
96
+ `entityConfidenceThresholds["${entityType}"] must be between 0 and 1, got ${rawThreshold}`,
97
+ );
98
+ }
99
+
100
+ const canonicalTypeKey = canonicalType(entityType);
101
+ normalized[canonicalTypeKey] = rawThreshold;
102
+ }
103
+
104
+ return normalized;
105
+ }
106
+
6
107
  export const DEFAULT_CONFIG: FogClawConfig = {
7
108
  enabled: true,
8
109
  guardrail_mode: "redact",
@@ -11,10 +112,37 @@ export const DEFAULT_CONFIG: FogClawConfig = {
11
112
  confidence_threshold: 0.5,
12
113
  custom_entities: [],
13
114
  entityActions: {},
115
+ entityConfidenceThresholds: {},
116
+ allowlist: {
117
+ values: [],
118
+ patterns: [],
119
+ entities: {},
120
+ },
121
+ auditEnabled: true,
14
122
  };
15
123
 
16
124
  export function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig {
17
- const config: FogClawConfig = { ...DEFAULT_CONFIG, ...overrides };
125
+ const config: FogClawConfig = {
126
+ ...DEFAULT_CONFIG,
127
+ ...overrides,
128
+ entityActions: {
129
+ ...DEFAULT_CONFIG.entityActions,
130
+ ...(overrides.entityActions ?? {}),
131
+ },
132
+ entityConfidenceThresholds: {
133
+ ...DEFAULT_CONFIG.entityConfidenceThresholds,
134
+ ...(overrides.entityConfidenceThresholds ?? {}),
135
+ },
136
+ };
137
+
138
+ config.allowlist = ensureEntityAllowlist(overrides.allowlist ?? DEFAULT_CONFIG.allowlist);
139
+ config.entityConfidenceThresholds = ensureEntityConfidenceThresholds(
140
+ config.entityConfidenceThresholds,
141
+ );
142
+
143
+ if (typeof config.enabled !== "boolean") {
144
+ throw new Error(`enabled must be true or false`);
145
+ }
18
146
 
19
147
  if (!VALID_GUARDRAIL_MODES.includes(config.guardrail_mode)) {
20
148
  throw new Error(
@@ -34,13 +162,22 @@ export function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig {
34
162
  );
35
163
  }
36
164
 
165
+ if (typeof config.auditEnabled !== "boolean") {
166
+ throw new Error(`auditEnabled must be true or false`);
167
+ }
168
+
169
+ const normalizedActions: Record<string, GuardrailAction> = {};
37
170
  for (const [entityType, action] of Object.entries(config.entityActions)) {
38
171
  if (!VALID_GUARDRAIL_MODES.includes(action)) {
39
172
  throw new Error(
40
173
  `Invalid action "${action}" for entity type "${entityType}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`,
41
174
  );
42
175
  }
176
+
177
+ const normalizedType = canonicalType(entityType);
178
+ normalizedActions[normalizedType] = action;
43
179
  }
180
+ config.entityActions = normalizedActions;
44
181
 
45
182
  return config;
46
183
  }