@datafog/fogclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/.github/workflows/harness-docs.yml +30 -0
  2. package/AGENTS.md +28 -0
  3. package/LICENSE +21 -0
  4. package/README.md +208 -0
  5. package/dist/config.d.ts +4 -0
  6. package/dist/config.d.ts.map +1 -0
  7. package/dist/config.js +30 -0
  8. package/dist/config.js.map +1 -0
  9. package/dist/engines/gliner.d.ts +14 -0
  10. package/dist/engines/gliner.d.ts.map +1 -0
  11. package/dist/engines/gliner.js +75 -0
  12. package/dist/engines/gliner.js.map +1 -0
  13. package/dist/engines/regex.d.ts +5 -0
  14. package/dist/engines/regex.d.ts.map +1 -0
  15. package/dist/engines/regex.js +54 -0
  16. package/dist/engines/regex.js.map +1 -0
  17. package/dist/index.d.ts +19 -0
  18. package/dist/index.d.ts.map +1 -0
  19. package/dist/index.js +157 -0
  20. package/dist/index.js.map +1 -0
  21. package/dist/redactor.d.ts +3 -0
  22. package/dist/redactor.d.ts.map +1 -0
  23. package/dist/redactor.js +37 -0
  24. package/dist/redactor.js.map +1 -0
  25. package/dist/scanner.d.ts +11 -0
  26. package/dist/scanner.d.ts.map +1 -0
  27. package/dist/scanner.js +77 -0
  28. package/dist/scanner.js.map +1 -0
  29. package/dist/types.d.ts +31 -0
  30. package/dist/types.d.ts.map +1 -0
  31. package/dist/types.js +18 -0
  32. package/dist/types.js.map +1 -0
  33. package/docs/DATA.md +28 -0
  34. package/docs/DESIGN.md +17 -0
  35. package/docs/DOMAIN_DOCS.md +30 -0
  36. package/docs/FRONTEND.md +24 -0
  37. package/docs/OBSERVABILITY.md +25 -0
  38. package/docs/PLANS.md +171 -0
  39. package/docs/PRODUCT_SENSE.md +20 -0
  40. package/docs/RELIABILITY.md +60 -0
  41. package/docs/SECURITY.md +50 -0
  42. package/docs/design-docs/core-beliefs.md +17 -0
  43. package/docs/design-docs/index.md +8 -0
  44. package/docs/generated/README.md +36 -0
  45. package/docs/generated/memory.md +1 -0
  46. package/docs/plans/2026-02-16-fogclaw-design.md +172 -0
  47. package/docs/plans/2026-02-16-fogclaw-implementation.md +1606 -0
  48. package/docs/plans/README.md +15 -0
  49. package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +386 -0
  50. package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +318 -0
  51. package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +244 -0
  52. package/docs/plans/tech-debt-tracker.md +42 -0
  53. package/docs/plugins/fogclaw.md +95 -0
  54. package/docs/runbooks/address-review-findings.md +30 -0
  55. package/docs/runbooks/ci-failures.md +46 -0
  56. package/docs/runbooks/code-review.md +34 -0
  57. package/docs/runbooks/merge-change.md +28 -0
  58. package/docs/runbooks/pull-request.md +45 -0
  59. package/docs/runbooks/record-evidence.md +43 -0
  60. package/docs/runbooks/reproduce-bug.md +42 -0
  61. package/docs/runbooks/respond-to-feedback.md +42 -0
  62. package/docs/runbooks/review-findings.md +31 -0
  63. package/docs/runbooks/submit-openclaw-plugin.md +68 -0
  64. package/docs/runbooks/update-agents-md.md +59 -0
  65. package/docs/runbooks/update-domain-docs.md +42 -0
  66. package/docs/runbooks/validate-current-state.md +41 -0
  67. package/docs/runbooks/verify-release.md +69 -0
  68. package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +115 -0
  69. package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +125 -0
  70. package/docs/specs/README.md +5 -0
  71. package/docs/specs/index.md +8 -0
  72. package/docs/spikes/README.md +8 -0
  73. package/fogclaw.config.example.json +15 -0
  74. package/openclaw.plugin.json +45 -0
  75. package/package.json +37 -0
  76. package/scripts/ci/he-docs-config.json +123 -0
  77. package/scripts/ci/he-docs-drift.sh +112 -0
  78. package/scripts/ci/he-docs-lint.sh +234 -0
  79. package/scripts/ci/he-plans-lint.sh +354 -0
  80. package/scripts/ci/he-runbooks-lint.sh +445 -0
  81. package/scripts/ci/he-specs-lint.sh +258 -0
  82. package/scripts/ci/he-spikes-lint.sh +249 -0
  83. package/scripts/runbooks/select-runbooks.sh +154 -0
  84. package/src/config.ts +46 -0
  85. package/src/engines/gliner.ts +88 -0
  86. package/src/engines/regex.ts +71 -0
  87. package/src/index.ts +223 -0
  88. package/src/redactor.ts +51 -0
  89. package/src/scanner.ts +90 -0
  90. package/src/types.ts +52 -0
  91. package/tests/config.test.ts +104 -0
  92. package/tests/gliner.test.ts +184 -0
  93. package/tests/plugin-smoke.test.ts +114 -0
  94. package/tests/redactor.test.ts +320 -0
  95. package/tests/regex.test.ts +345 -0
  96. package/tests/scanner.test.ts +199 -0
  97. package/tsconfig.json +20 -0
@@ -0,0 +1,30 @@
1
+ name: Harness Docs
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, reopened]
6
+
7
+ jobs:
8
+ docs_lint:
9
+ name: Docs Lint
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ fetch-depth: 0
15
+ - name: Lint harness docs
16
+ run: |
17
+ bash scripts/ci/he-docs-lint.sh
18
+ bash scripts/ci/he-specs-lint.sh
19
+ bash scripts/ci/he-plans-lint.sh
20
+ bash scripts/ci/he-spikes-lint.sh
21
+
22
+ docs_drift:
23
+ name: Docs Drift Gate
24
+ runs-on: ubuntu-latest
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0
29
+ - name: Enforce doc updates on relevant changes
30
+ run: bash scripts/ci/he-docs-drift.sh
package/AGENTS.md ADDED
@@ -0,0 +1,28 @@
1
+ # AGENTS.md
2
+
3
+ ## Start Here
4
+
5
+ This file is a map, not an encyclopedia.
6
+
7
+ The system of record is `docs/`. Keep durable knowledge (specs, plans, logs, decisions, checklists) there and link to it from here.
8
+
9
+ ## Golden Principles
10
+
11
+ - Prove it works: never claim completion without running the most relevant validation (tests, build, or a small end-to-end check) or explicitly recording why it could not be run.
12
+ - Keep AGENTS.md minimal and stable; detailed procedure belongs in `docs/runbooks/`.
13
+
14
+ ## Source Of Truth (Table Of Contents)
15
+
16
+ - Workflow contract + artifact rules: `docs/PLANS.md`
17
+ - Specs (intent): `docs/specs/`
18
+ - Spikes (investigation findings): `docs/spikes/`
19
+ - Plans (execution + evidence): `docs/plans/`
20
+ - Runbooks (process checklists): `docs/runbooks/`
21
+ - Generated context (scratchpad/reference): `docs/generated/`
22
+ - Architecture (if present): `ARCHITECTURE.md`
23
+
24
+ ## Workflow (Phases)
25
+
26
+ intake -> spike (optional) -> plan -> implement -> review -> verify-release -> learn
27
+
28
+ If this file grows beyond a compact index, move detailed guidance into `docs/` and keep links here.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 DataFog
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,208 @@
1
+ # FogClaw
2
+
3
+ An [OpenClaw](https://github.com/openclaw/openclaw) plugin for PII detection and custom entity redaction, powered by [DataFog](https://github.com/datafog/datafog-python).
4
+
5
+ FogClaw uses a dual-engine approach: battle-tested regex patterns for structured PII (emails, SSNs, credit cards, etc.) and [GLiNER](https://github.com/urchade/GLiNER) via ONNX for zero-shot named entity recognition — letting you redact not just PII but any custom terms, expressions, or entity types you define.
6
+
7
+ ## Features
8
+
9
+ - **Automatic guardrail** — intercepts messages before they reach the LLM via OpenClaw's `before_agent_start` hook
10
+ - **On-demand tools** — `fogclaw_scan` and `fogclaw_redact` tools the agent can invoke explicitly
11
+ - **Dual detection engine** — regex for structured PII (<1ms), GLiNER for zero-shot NER (~50-200ms)
12
+ - **Custom entity types** — define any entity label (e.g., "project codename", "competitor name") and GLiNER detects them with zero training
13
+ - **Configurable actions** — per-entity-type behavior: `redact`, `block`, or `warn`
14
+ - **Multiple redaction strategies** — `token`, `mask`, or `hash`
15
+ - **Graceful degradation** — falls back to regex-only mode if GLiNER fails to load
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ # From the OpenClaw CLI
21
+ openclaw plugins install @datafog/fogclaw
22
+
23
+ # Or manually
24
+ git clone https://github.com/DataFog/fogclaw.git ~/.openclaw/extensions/fogclaw
25
+ cd ~/.openclaw/extensions/fogclaw
26
+ npm install
27
+ npm run build
28
+ ```
29
+
30
+ ## Quick Start
31
+
32
+ 1. Copy the example config:
33
+
34
+ ```bash
35
+ cp fogclaw.config.example.json fogclaw.config.json
36
+ ```
37
+
38
+ 2. Edit `fogclaw.config.json` to your needs:
39
+
40
+ ```json
41
+ {
42
+ "enabled": true,
43
+ "guardrail_mode": "redact",
44
+ "redactStrategy": "token",
45
+ "model": "onnx-community/gliner_large-v2.1",
46
+ "confidence_threshold": 0.5,
47
+ "custom_entities": ["project codename", "competitor name"],
48
+ "entityActions": {
49
+ "SSN": "block",
50
+ "CREDIT_CARD": "block",
51
+ "EMAIL": "redact",
52
+ "PHONE": "redact",
53
+ "PERSON": "warn"
54
+ }
55
+ }
56
+ ```
57
+
58
+ 3. Enable the plugin in your OpenClaw config and restart.
59
+
60
+ ## Submission Readiness Evidence (Recommended)
61
+
62
+ These commands are the minimum evidence set for PR review:
63
+
64
+ ```bash
65
+ npm test
66
+ npm run build
67
+ npm run test:plugin-smoke
68
+ npm pkg get openclaw
69
+ npm run build
70
+ node - <<'NODE'
71
+ import plugin from './dist/index.js';
72
+ const result = plugin.register ? 'ok' : 'missing-register';
73
+ console.log(result, plugin.id, plugin.name);
74
+ NODE
75
+ ```
76
+
77
+ Expected output:
78
+
79
+ - All tests pass.
80
+ - `npm run build` exits with `0` and writes `dist/index.js`.
81
+ - `npm run test:plugin-smoke` passes and confirms hook/tool contracts.
82
+ - `npm pkg get openclaw` shows `{"extensions":["./dist/index.js"]}`.
83
+ - The inline node check prints `ok fogclaw FogClaw`.
84
+
85
+ ## How It Works
86
+
87
+ ```
88
+ Incoming message
89
+ |
90
+ v
91
+ +-----------+
92
+ | Regex Pass | emails, SSNs, phones, credit cards, IPs, dates, zips
93
+ | (<1ms) | confidence: 1.0
94
+ +-----+-----+
95
+ |
96
+ v
97
+ +-----------+
98
+ | GLiNER | persons, orgs, locations + your custom entities
99
+ | (ONNX) | confidence: 0.0-1.0
100
+ +-----+-----+
101
+ |
102
+ v
103
+ +-----------+
104
+ | Merge & | deduplicate overlapping spans, prefer higher confidence
105
+ | Normalize |
106
+ +-----+-----+
107
+ |
108
+ v
109
+ Apply action per entity type (redact / block / warn)
110
+ ```
111
+
112
+ ## Detected Entity Types
113
+
114
+ ### Regex Engine (structured PII)
115
+
116
+ | Type | Examples |
117
+ |------|----------|
118
+ | `EMAIL` | `john@example.com`, `user+tag@example.co.uk` |
119
+ | `PHONE` | `555-123-4567`, `(555) 123-4567`, `+44 20 7946 0958` |
120
+ | `SSN` | `123-45-6789` |
121
+ | `CREDIT_CARD` | Visa, Mastercard, Amex (with/without separators) |
122
+ | `IP_ADDRESS` | `192.168.1.1`, `10.0.0.1` |
123
+ | `DATE` | `01/15/1990`, `2020-01-15`, `January 15, 2000` |
124
+ | `ZIP_CODE` | `10001`, `10001-1234` |
125
+
126
+ ### GLiNER Engine (zero-shot NER)
127
+
128
+ Built-in labels: `person`, `organization`, `location`, `address`, `date of birth`, `medical record number`, `account number`, `passport number`
129
+
130
+ Plus any labels you add via `custom_entities` in the config.
131
+
132
+ ## Redaction Strategies
133
+
134
+ | Strategy | Input | Output |
135
+ |----------|-------|--------|
136
+ | `token` | `Contact john@example.com` | `Contact [EMAIL_1]` |
137
+ | `mask` | `Contact john@example.com` | `Contact ****************` |
138
+ | `hash` | `Contact john@example.com` | `Contact [EMAIL_a1b2c3d4e5f6]` |
139
+
140
+ ## Configuration
141
+
142
+ | Option | Type | Default | Description |
143
+ |--------|------|---------|-------------|
144
+ | `enabled` | `boolean` | `true` | Enable/disable the plugin |
145
+ | `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
146
+ | `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
147
+ | `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
148
+ | `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
149
+ | `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
150
+ | `entityActions` | `object` | `{}` | Per-entity-type action overrides |
151
+
152
+ ## OpenClaw Tools
153
+
154
+ ### `fogclaw_scan`
155
+
156
+ Scan text for PII and custom entities. Returns detected entities with types, positions, and confidence scores.
157
+
158
+ **Parameters:**
159
+ - `text` (required) — text to scan
160
+ - `custom_labels` (optional) — additional entity labels for zero-shot detection
161
+
162
+ ### `fogclaw_redact`
163
+
164
+ Scan and redact PII/custom entities from text. Returns sanitized text with entities replaced.
165
+
166
+ **Parameters:**
167
+ - `text` (required) — text to scan and redact
168
+ - `strategy` (optional) — `"token"`, `"mask"`, or `"hash"` (defaults to config)
169
+ - `custom_labels` (optional) — additional entity labels for zero-shot detection
170
+
171
+ ## Standalone Usage
172
+
173
+ FogClaw's core can also be used outside of OpenClaw:
174
+
175
+ ```typescript
176
+ import { Scanner, redact, loadConfig, DEFAULT_CONFIG } from "@datafog/fogclaw";
177
+
178
+ const scanner = new Scanner(DEFAULT_CONFIG);
179
+ await scanner.initialize();
180
+
181
+ // Scan for entities
182
+ const result = await scanner.scan("Contact john@example.com or call 555-123-4567");
183
+ console.log(result.entities);
184
+ // [
185
+ // { text: "john@example.com", label: "EMAIL", start: 8, end: 24, confidence: 1, source: "regex" },
186
+ // { text: "555-123-4567", label: "PHONE", start: 33, end: 45, confidence: 1, source: "regex" }
187
+ // ]
188
+
189
+ // Redact
190
+ const redacted = redact(result.text, result.entities, "token");
191
+ console.log(redacted.redacted_text);
192
+ // "Contact [EMAIL_1] or call [PHONE_1]"
193
+ ```
194
+
195
+ ## Development
196
+
197
+ ```bash
198
+ git clone https://github.com/DataFog/fogclaw.git
199
+ cd fogclaw
200
+ npm install
201
+ npm test # run tests
202
+ npm run build # compile TypeScript
203
+ npm run lint # type-check without emitting
204
+ ```
205
+
206
+ ## License
207
+
208
+ MIT
@@ -0,0 +1,4 @@
1
+ import type { FogClawConfig } from "./types.js";
2
+ export declare const DEFAULT_CONFIG: FogClawConfig;
3
+ export declare function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig;
4
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAmC,MAAM,YAAY,CAAC;AAKjF,eAAO,MAAM,cAAc,EAAE,aAQ5B,CAAC;AAEF,wBAAgB,UAAU,CAAC,SAAS,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,aAAa,CA8B3E"}
package/dist/config.js ADDED
@@ -0,0 +1,30 @@
1
+ const VALID_GUARDRAIL_MODES = ["redact", "block", "warn"];
2
+ const VALID_REDACT_STRATEGIES = ["token", "mask", "hash"];
3
+ export const DEFAULT_CONFIG = {
4
+ enabled: true,
5
+ guardrail_mode: "redact",
6
+ redactStrategy: "token",
7
+ model: "onnx-community/gliner_large-v2.1",
8
+ confidence_threshold: 0.5,
9
+ custom_entities: [],
10
+ entityActions: {},
11
+ };
12
+ export function loadConfig(overrides) {
13
+ const config = { ...DEFAULT_CONFIG, ...overrides };
14
+ if (!VALID_GUARDRAIL_MODES.includes(config.guardrail_mode)) {
15
+ throw new Error(`Invalid guardrail_mode "${config.guardrail_mode}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`);
16
+ }
17
+ if (!VALID_REDACT_STRATEGIES.includes(config.redactStrategy)) {
18
+ throw new Error(`Invalid redactStrategy "${config.redactStrategy}". Must be one of: ${VALID_REDACT_STRATEGIES.join(", ")}`);
19
+ }
20
+ if (config.confidence_threshold < 0 || config.confidence_threshold > 1) {
21
+ throw new Error(`confidence_threshold must be between 0 and 1, got ${config.confidence_threshold}`);
22
+ }
23
+ for (const [entityType, action] of Object.entries(config.entityActions)) {
24
+ if (!VALID_GUARDRAIL_MODES.includes(action)) {
25
+ throw new Error(`Invalid action "${action}" for entity type "${entityType}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`);
26
+ }
27
+ }
28
+ return config;
29
+ }
30
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAEA,MAAM,qBAAqB,GAAsB,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;AAC7E,MAAM,uBAAuB,GAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5E,MAAM,CAAC,MAAM,cAAc,GAAkB;IAC3C,OAAO,EAAE,IAAI;IACb,cAAc,EAAE,QAAQ;IACxB,cAAc,EAAE,OAAO;IACvB,KAAK,EAAE,kCAAkC;IACzC,oBAAoB,EAAE,GAAG;IACzB,eAAe,EAAE,EAAE;IACnB,aAAa,EAAE,EAAE;CAClB,CAAC;AAEF,MAAM,UAAU,UAAU,CAAC,SAAiC;IAC1D,MAAM,MAAM,GAAkB,EAAE,GAAG,cAAc,EAAE,GAAG,SAAS,EAAE,CAAC;IAElE,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;QAC3D,MAAM,IAAI,KAAK,CACb,2BAA2B,MAAM,CAAC,cAAc,sBAAsB,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACzG,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;QAC7D,MAAM,IAAI,KAAK,CACb,2BAA2B,MAAM,CAAC,cAAc,sBAAsB,uBAAuB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC3G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,oBAAoB,GAAG,CAAC,IAAI,MAAM,CAAC,oBAAoB,GAAG,CAAC,EAAE,CAAC;QACvE,MAAM,IAAI,KAAK,CACb,qDAAqD,MAAM,CAAC,oBAAoB,EAAE,CACnF,CAAC;IACJ,CAAC;IAED,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;QACxE,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CACb,mBAAmB,MAAM,sBAAsB,UAAU,sBAAsB,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAClH,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,14 @@
1
+ import type { Entity } from "../types.js";
2
+ export declare class GlinerEngine {
3
+ private model;
4
+ private modelPath;
5
+ private threshold;
6
+ private customLabels;
7
+ private initialized;
8
+ constructor(modelPath: string, threshold?: number);
9
+ initialize(): Promise<void>;
10
+ setCustomLabels(labels: string[]): void;
11
+ scan(text: string, extraLabels?: string[]): Promise<Entity[]>;
12
+ get isInitialized(): boolean;
13
+ }
14
+ //# sourceMappingURL=gliner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAc1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
@@ -0,0 +1,75 @@
1
+ import { canonicalType } from "../types.js";
2
+ const DEFAULT_NER_LABELS = [
3
+ "person",
4
+ "organization",
5
+ "location",
6
+ "address",
7
+ "date of birth",
8
+ "medical record number",
9
+ "account number",
10
+ "passport number",
11
+ ];
12
+ export class GlinerEngine {
13
+ model = null;
14
+ modelPath;
15
+ threshold;
16
+ customLabels = [];
17
+ initialized = false;
18
+ constructor(modelPath, threshold = 0.5) {
19
+ this.modelPath = modelPath;
20
+ this.threshold = threshold;
21
+ }
22
+ async initialize() {
23
+ if (this.initialized)
24
+ return;
25
+ try {
26
+ const { Gliner } = await import("gliner");
27
+ this.model = new Gliner({
28
+ tokenizerPath: this.modelPath,
29
+ onnxSettings: {
30
+ modelPath: this.modelPath,
31
+ executionProvider: "cpu",
32
+ },
33
+ maxWidth: 12,
34
+ modelType: "gliner",
35
+ });
36
+ await this.model.initialize();
37
+ this.initialized = true;
38
+ }
39
+ catch (err) {
40
+ throw new Error(`Failed to initialize GLiNER model "${this.modelPath}": ${err instanceof Error ? err.message : String(err)}`);
41
+ }
42
+ }
43
+ setCustomLabels(labels) {
44
+ this.customLabels = labels;
45
+ }
46
+ async scan(text, extraLabels) {
47
+ if (!text)
48
+ return [];
49
+ if (!this.model) {
50
+ throw new Error("GLiNER engine not initialized. Call initialize() first.");
51
+ }
52
+ const labels = [
53
+ ...DEFAULT_NER_LABELS,
54
+ ...this.customLabels,
55
+ ...(extraLabels ?? []),
56
+ ];
57
+ // Deduplicate labels
58
+ const uniqueLabels = [...new Set(labels)];
59
+ const results = await this.model.inference(text, uniqueLabels, {
60
+ threshold: this.threshold,
61
+ });
62
+ return results.map((r) => ({
63
+ text: r.text,
64
+ label: canonicalType(r.label),
65
+ start: r.start,
66
+ end: r.end,
67
+ confidence: r.score,
68
+ source: "gliner",
69
+ }));
70
+ }
71
+ get isInitialized() {
72
+ return this.initialized;
73
+ }
74
+ }
75
+ //# sourceMappingURL=gliner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
@@ -0,0 +1,5 @@
1
+ import type { Entity } from "../types.js";
2
+ export declare class RegexEngine {
3
+ scan(text: string): Entity[];
4
+ }
5
+ //# sourceMappingURL=regex.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"regex.d.ts","sourceRoot":"","sources":["../../src/engines/regex.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA4C1C,qBAAa,WAAW;IACtB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;CAyB7B"}
@@ -0,0 +1,54 @@
1
+ const PATTERNS = [
2
+ {
3
+ label: "EMAIL",
4
+ pattern: /(?<![A-Za-z0-9._%+\-@])(?![A-Za-z_]{2,20}=)[A-Za-z0-9!#$%&*+\-/=^_`{|}~][A-Za-z0-9!#$%&'*+\-/=?^_`{|}~.]*@(?:\.?[A-Za-z0-9-]+\.)+[A-Za-z]{2,}(?=$|[^A-Za-z])/gi,
5
+ },
6
+ {
7
+ label: "PHONE",
8
+ pattern: /(?<![A-Za-z0-9])(?:(?:(?:\+?1)[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}|\+\d{1,3}[\s\-.]?\d{1,4}(?:[\s\-.]?\d{2,4}){2,3})(?![-A-Za-z0-9])/gi,
9
+ },
10
+ {
11
+ label: "SSN",
12
+ pattern: /(?<!\d)(?:(?!000|666)\d{3}-(?!00)\d{2}-(?!0000)\d{4}|(?!000|666)\d{3}(?!00)\d{2}(?!0000)\d{4})(?!\d)/g,
13
+ },
14
+ {
15
+ label: "CREDIT_CARD",
16
+ pattern: /\b(?:4\d{12}(?:\d{3})?|5[1-5]\d{14}|3[47]\d{13}|(?:(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})|(?:3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5}))\b/g,
17
+ },
18
+ {
19
+ label: "IP_ADDRESS",
20
+ pattern: /\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d))\b/g,
21
+ },
22
+ {
23
+ label: "DATE",
24
+ pattern: /\b(?:(?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12]\d|3[01])[/-](?:\d{2}|\d{4})|(?:\d{4})-(?:0?[1-9]|1[0-2])-(?:0?[1-9]|[12]\d|3[01])|(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(?:0?[1-9]|[12]\d|3[01]),\s+(?:19|20)\d{2})\b/gi,
25
+ },
26
+ {
27
+ label: "ZIP_CODE",
28
+ pattern: /\b\d{5}(?:-\d{4})?\b/g,
29
+ },
30
+ ];
31
+ export class RegexEngine {
32
+ scan(text) {
33
+ const entities = [];
34
+ for (const { label, pattern } of PATTERNS) {
35
+ // Reset lastIndex to avoid stale state from previous calls
36
+ pattern.lastIndex = 0;
37
+ let match;
38
+ while ((match = pattern.exec(text)) !== null) {
39
+ entities.push({
40
+ text: match[0],
41
+ label,
42
+ start: match.index,
43
+ end: match.index + match[0].length,
44
+ confidence: 1.0,
45
+ source: "regex",
46
+ });
47
+ }
48
+ }
49
+ // Sort by start position for deterministic output
50
+ entities.sort((a, b) => a.start - b.start || a.end - b.end);
51
+ return entities;
52
+ }
53
+ }
54
+ //# sourceMappingURL=regex.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"regex.js","sourceRoot":"","sources":["../../src/engines/regex.ts"],"names":[],"mappings":"AAOA,MAAM,QAAQ,GAAiB;IAC7B;QACE,KAAK,EAAE,OAAO;QACd,OAAO,EACL,gKAAgK;KACnK;IACD;QACE,KAAK,EAAE,OAAO;QACd,OAAO,EACL,wJAAwJ;KAC3J;IACD;QACE,KAAK,EAAE,KAAK;QACZ,OAAO,EACL,uGAAuG;KAC1G;IACD;QACE,KAAK,EAAE,aAAa;QACpB,OAAO,EACL,iKAAiK;KACpK;IACD;QACE,KAAK,EAAE,YAAY;QACnB,OAAO,EACL,iIAAiI;KACpI;IACD;QACE,KAAK,EAAE,MAAM;QACb,OAAO,EACL,sUAAsU;KACzU;IACD;QACE,KAAK,EAAE,UAAU;QACjB,OAAO,EAAE,uBAAuB;KACjC;CACF,CAAC;AAEF,MAAM,OAAO,WAAW;IACtB,IAAI,CAAC,IAAY;QACf,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1C,2DAA2D;YAC3D,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YAEtB,IAAI,KAA6B,CAAC;YAClC,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBAC7C,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBACd,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;oBAClC,UAAU,EAAE,GAAG;oBACf,MAAM,EAAE,OAAO;iBAChB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF"}
@@ -0,0 +1,19 @@
1
+ export { Scanner } from "./scanner.js";
2
+ export { redact } from "./redactor.js";
3
+ export { loadConfig, DEFAULT_CONFIG } from "./config.js";
4
+ export type { Entity, FogClawConfig, ScanResult, RedactResult, RedactStrategy, GuardrailAction, } from "./types.js";
5
+ /**
6
+ * OpenClaw plugin definition.
7
+ *
8
+ * Registers:
9
+ * - `before_agent_start` hook for automatic PII guardrail
10
+ * - `fogclaw_scan` tool for on-demand entity detection
11
+ * - `fogclaw_redact` tool for on-demand redaction
12
+ */
13
+ declare const fogclaw: {
14
+ id: string;
15
+ name: string;
16
+ register(api: any): void;
17
+ };
18
+ export default fogclaw;
19
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACvC,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AACzD,YAAY,EACV,MAAM,EACN,aAAa,EACb,UAAU,EACV,YAAY,EACZ,cAAc,EACd,eAAe,GAChB,MAAM,YAAY,CAAC;AAEpB;;;;;;;GAOG;AACH,QAAA,MAAM,OAAO;;;kBAIG,GAAG;CA+LlB,CAAC;AAEF,eAAe,OAAO,CAAC"}