@datafog/fogclaw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/harness-docs.yml +30 -0
- package/AGENTS.md +28 -0
- package/LICENSE +21 -0
- package/README.md +208 -0
- package/dist/config.d.ts +4 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +30 -0
- package/dist/config.js.map +1 -0
- package/dist/engines/gliner.d.ts +14 -0
- package/dist/engines/gliner.d.ts.map +1 -0
- package/dist/engines/gliner.js +75 -0
- package/dist/engines/gliner.js.map +1 -0
- package/dist/engines/regex.d.ts +5 -0
- package/dist/engines/regex.d.ts.map +1 -0
- package/dist/engines/regex.js +54 -0
- package/dist/engines/regex.js.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +157 -0
- package/dist/index.js.map +1 -0
- package/dist/redactor.d.ts +3 -0
- package/dist/redactor.d.ts.map +1 -0
- package/dist/redactor.js +37 -0
- package/dist/redactor.js.map +1 -0
- package/dist/scanner.d.ts +11 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +77 -0
- package/dist/scanner.js.map +1 -0
- package/dist/types.d.ts +31 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +18 -0
- package/dist/types.js.map +1 -0
- package/docs/DATA.md +28 -0
- package/docs/DESIGN.md +17 -0
- package/docs/DOMAIN_DOCS.md +30 -0
- package/docs/FRONTEND.md +24 -0
- package/docs/OBSERVABILITY.md +25 -0
- package/docs/PLANS.md +171 -0
- package/docs/PRODUCT_SENSE.md +20 -0
- package/docs/RELIABILITY.md +60 -0
- package/docs/SECURITY.md +50 -0
- package/docs/design-docs/core-beliefs.md +17 -0
- package/docs/design-docs/index.md +8 -0
- package/docs/generated/README.md +36 -0
- package/docs/generated/memory.md +1 -0
- package/docs/plans/2026-02-16-fogclaw-design.md +172 -0
- package/docs/plans/2026-02-16-fogclaw-implementation.md +1606 -0
- package/docs/plans/README.md +15 -0
- package/docs/plans/active/2026-02-16-feat-openclaw-official-submission-plan.md +386 -0
- package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +318 -0
- package/docs/plans/active/2026-02-17-feat-submit-fogclaw-to-openclaw-plan.md +244 -0
- package/docs/plans/tech-debt-tracker.md +42 -0
- package/docs/plugins/fogclaw.md +95 -0
- package/docs/runbooks/address-review-findings.md +30 -0
- package/docs/runbooks/ci-failures.md +46 -0
- package/docs/runbooks/code-review.md +34 -0
- package/docs/runbooks/merge-change.md +28 -0
- package/docs/runbooks/pull-request.md +45 -0
- package/docs/runbooks/record-evidence.md +43 -0
- package/docs/runbooks/reproduce-bug.md +42 -0
- package/docs/runbooks/respond-to-feedback.md +42 -0
- package/docs/runbooks/review-findings.md +31 -0
- package/docs/runbooks/submit-openclaw-plugin.md +68 -0
- package/docs/runbooks/update-agents-md.md +59 -0
- package/docs/runbooks/update-domain-docs.md +42 -0
- package/docs/runbooks/validate-current-state.md +41 -0
- package/docs/runbooks/verify-release.md +69 -0
- package/docs/specs/2026-02-16-feat-openclaw-official-submission-spec.md +115 -0
- package/docs/specs/2026-02-17-feat-submit-fogclaw-to-openclaw.md +125 -0
- package/docs/specs/README.md +5 -0
- package/docs/specs/index.md +8 -0
- package/docs/spikes/README.md +8 -0
- package/fogclaw.config.example.json +15 -0
- package/openclaw.plugin.json +45 -0
- package/package.json +37 -0
- package/scripts/ci/he-docs-config.json +123 -0
- package/scripts/ci/he-docs-drift.sh +112 -0
- package/scripts/ci/he-docs-lint.sh +234 -0
- package/scripts/ci/he-plans-lint.sh +354 -0
- package/scripts/ci/he-runbooks-lint.sh +445 -0
- package/scripts/ci/he-specs-lint.sh +258 -0
- package/scripts/ci/he-spikes-lint.sh +249 -0
- package/scripts/runbooks/select-runbooks.sh +154 -0
- package/src/config.ts +46 -0
- package/src/engines/gliner.ts +88 -0
- package/src/engines/regex.ts +71 -0
- package/src/index.ts +223 -0
- package/src/redactor.ts +51 -0
- package/src/scanner.ts +90 -0
- package/src/types.ts +52 -0
- package/tests/config.test.ts +104 -0
- package/tests/gliner.test.ts +184 -0
- package/tests/plugin-smoke.test.ts +114 -0
- package/tests/redactor.test.ts +320 -0
- package/tests/regex.test.ts +345 -0
- package/tests/scanner.test.ts +199 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Harness Docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize, reopened]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
docs_lint:
|
|
9
|
+
name: Docs Lint
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
with:
|
|
14
|
+
fetch-depth: 0
|
|
15
|
+
- name: Lint harness docs
|
|
16
|
+
run: |
|
|
17
|
+
bash scripts/ci/he-docs-lint.sh
|
|
18
|
+
bash scripts/ci/he-specs-lint.sh
|
|
19
|
+
bash scripts/ci/he-plans-lint.sh
|
|
20
|
+
bash scripts/ci/he-spikes-lint.sh
|
|
21
|
+
|
|
22
|
+
docs_drift:
|
|
23
|
+
name: Docs Drift Gate
|
|
24
|
+
runs-on: ubuntu-latest
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0
|
|
29
|
+
- name: Enforce doc updates on relevant changes
|
|
30
|
+
run: bash scripts/ci/he-docs-drift.sh
|
package/AGENTS.md
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
## Start Here
|
|
4
|
+
|
|
5
|
+
This file is a map, not an encyclopedia.
|
|
6
|
+
|
|
7
|
+
The system of record is `docs/`. Keep durable knowledge (specs, plans, logs, decisions, checklists) there and link to it from here.
|
|
8
|
+
|
|
9
|
+
## Golden Principles
|
|
10
|
+
|
|
11
|
+
- Prove it works: never claim completion without running the most relevant validation (tests, build, or a small end-to-end check) or explicitly recording why it could not be run.
|
|
12
|
+
- Keep AGENTS.md minimal and stable; detailed procedure belongs in `docs/runbooks/`.
|
|
13
|
+
|
|
14
|
+
## Source Of Truth (Table Of Contents)
|
|
15
|
+
|
|
16
|
+
- Workflow contract + artifact rules: `docs/PLANS.md`
|
|
17
|
+
- Specs (intent): `docs/specs/`
|
|
18
|
+
- Spikes (investigation findings): `docs/spikes/`
|
|
19
|
+
- Plans (execution + evidence): `docs/plans/`
|
|
20
|
+
- Runbooks (process checklists): `docs/runbooks/`
|
|
21
|
+
- Generated context (scratchpad/reference): `docs/generated/`
|
|
22
|
+
- Architecture (if present): `ARCHITECTURE.md`
|
|
23
|
+
|
|
24
|
+
## Workflow (Phases)
|
|
25
|
+
|
|
26
|
+
intake -> spike (optional) -> plan -> implement -> review -> verify-release -> learn
|
|
27
|
+
|
|
28
|
+
If this file grows beyond a compact index, move detailed guidance into `docs/` and keep links here.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DataFog
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# FogClaw
|
|
2
|
+
|
|
3
|
+
An [OpenClaw](https://github.com/openclaw/openclaw) plugin for PII detection and custom entity redaction, powered by [DataFog](https://github.com/datafog/datafog-python).
|
|
4
|
+
|
|
5
|
+
FogClaw uses a dual-engine approach: battle-tested regex patterns for structured PII (emails, SSNs, credit cards, etc.) and [GLiNER](https://github.com/urchade/GLiNER) via ONNX for zero-shot named entity recognition — letting you redact not just PII but any custom terms, expressions, or entity types you define.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Automatic guardrail** — intercepts messages before they reach the LLM via OpenClaw's `before_agent_start` hook
|
|
10
|
+
- **On-demand tools** — `fogclaw_scan` and `fogclaw_redact` tools the agent can invoke explicitly
|
|
11
|
+
- **Dual detection engine** — regex for structured PII (<1ms), GLiNER for zero-shot NER (~50-200ms)
|
|
12
|
+
- **Custom entity types** — define any entity label (e.g., "project codename", "competitor name") and GLiNER detects them with zero training
|
|
13
|
+
- **Configurable actions** — per-entity-type behavior: `redact`, `block`, or `warn`
|
|
14
|
+
- **Multiple redaction strategies** — `token`, `mask`, or `hash`
|
|
15
|
+
- **Graceful degradation** — falls back to regex-only mode if GLiNER fails to load
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# From the OpenClaw CLI
|
|
21
|
+
openclaw plugins install @datafog/fogclaw
|
|
22
|
+
|
|
23
|
+
# Or manually
|
|
24
|
+
git clone https://github.com/DataFog/fogclaw.git ~/.openclaw/extensions/fogclaw
|
|
25
|
+
cd ~/.openclaw/extensions/fogclaw
|
|
26
|
+
npm install
|
|
27
|
+
npm run build
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
1. Copy the example config:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
cp fogclaw.config.example.json fogclaw.config.json
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
2. Edit `fogclaw.config.json` to your needs:
|
|
39
|
+
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"enabled": true,
|
|
43
|
+
"guardrail_mode": "redact",
|
|
44
|
+
"redactStrategy": "token",
|
|
45
|
+
"model": "onnx-community/gliner_large-v2.1",
|
|
46
|
+
"confidence_threshold": 0.5,
|
|
47
|
+
"custom_entities": ["project codename", "competitor name"],
|
|
48
|
+
"entityActions": {
|
|
49
|
+
"SSN": "block",
|
|
50
|
+
"CREDIT_CARD": "block",
|
|
51
|
+
"EMAIL": "redact",
|
|
52
|
+
"PHONE": "redact",
|
|
53
|
+
"PERSON": "warn"
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
3. Enable the plugin in your OpenClaw config and restart.
|
|
59
|
+
|
|
60
|
+
## Submission Readiness Evidence (Recommended)
|
|
61
|
+
|
|
62
|
+
These commands are the minimum evidence set for PR review:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
npm test
|
|
66
|
+
npm run build
|
|
67
|
+
npm run test:plugin-smoke
|
|
68
|
+
npm pkg get openclaw
|
|
69
|
+
npm run build
|
|
70
|
+
node - <<'NODE'
|
|
71
|
+
import plugin from './dist/index.js';
|
|
72
|
+
const result = plugin.register ? 'ok' : 'missing-register';
|
|
73
|
+
console.log(result, plugin.id, plugin.name);
|
|
74
|
+
NODE
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Expected output:
|
|
78
|
+
|
|
79
|
+
- All tests pass.
|
|
80
|
+
- `npm run build` exits with `0` and writes `dist/index.js`.
|
|
81
|
+
- `npm run test:plugin-smoke` passes and confirms hook/tool contracts.
|
|
82
|
+
- `npm pkg get openclaw` shows `{"extensions":["./dist/index.js"]}`.
|
|
83
|
+
- The inline node check prints `ok fogclaw FogClaw`.
|
|
84
|
+
|
|
85
|
+
## How It Works
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
Incoming message
|
|
89
|
+
|
|
|
90
|
+
v
|
|
91
|
+
+-----------+
|
|
92
|
+
| Regex Pass | emails, SSNs, phones, credit cards, IPs, dates, zips
|
|
93
|
+
| (<1ms) | confidence: 1.0
|
|
94
|
+
+-----+-----+
|
|
95
|
+
|
|
|
96
|
+
v
|
|
97
|
+
+-----------+
|
|
98
|
+
| GLiNER | persons, orgs, locations + your custom entities
|
|
99
|
+
| (ONNX) | confidence: 0.0-1.0
|
|
100
|
+
+-----+-----+
|
|
101
|
+
|
|
|
102
|
+
v
|
|
103
|
+
+-----------+
|
|
104
|
+
| Merge & | deduplicate overlapping spans, prefer higher confidence
|
|
105
|
+
| Normalize |
|
|
106
|
+
+-----+-----+
|
|
107
|
+
|
|
|
108
|
+
v
|
|
109
|
+
Apply action per entity type (redact / block / warn)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Detected Entity Types
|
|
113
|
+
|
|
114
|
+
### Regex Engine (structured PII)
|
|
115
|
+
|
|
116
|
+
| Type | Examples |
|
|
117
|
+
|------|----------|
|
|
118
|
+
| `EMAIL` | `john@example.com`, `user+tag@example.co.uk` |
|
|
119
|
+
| `PHONE` | `555-123-4567`, `(555) 123-4567`, `+44 20 7946 0958` |
|
|
120
|
+
| `SSN` | `123-45-6789` |
|
|
121
|
+
| `CREDIT_CARD` | Visa, Mastercard, Amex (with/without separators) |
|
|
122
|
+
| `IP_ADDRESS` | `192.168.1.1`, `10.0.0.1` |
|
|
123
|
+
| `DATE` | `01/15/1990`, `2020-01-15`, `January 15, 2000` |
|
|
124
|
+
| `ZIP_CODE` | `10001`, `10001-1234` |
|
|
125
|
+
|
|
126
|
+
### GLiNER Engine (zero-shot NER)
|
|
127
|
+
|
|
128
|
+
Built-in labels: `person`, `organization`, `location`, `address`, `date of birth`, `medical record number`, `account number`, `passport number`
|
|
129
|
+
|
|
130
|
+
Plus any labels you add via `custom_entities` in the config.
|
|
131
|
+
|
|
132
|
+
## Redaction Strategies
|
|
133
|
+
|
|
134
|
+
| Strategy | Input | Output |
|
|
135
|
+
|----------|-------|--------|
|
|
136
|
+
| `token` | `Contact john@example.com` | `Contact [EMAIL_1]` |
|
|
137
|
+
| `mask` | `Contact john@example.com` | `Contact ****************` |
|
|
138
|
+
| `hash` | `Contact john@example.com` | `Contact [EMAIL_a1b2c3d4e5f6]` |
|
|
139
|
+
|
|
140
|
+
## Configuration
|
|
141
|
+
|
|
142
|
+
| Option | Type | Default | Description |
|
|
143
|
+
|--------|------|---------|-------------|
|
|
144
|
+
| `enabled` | `boolean` | `true` | Enable/disable the plugin |
|
|
145
|
+
| `guardrail_mode` | `string` | `"redact"` | Default action: `"redact"`, `"block"`, or `"warn"` |
|
|
146
|
+
| `redactStrategy` | `string` | `"token"` | How to redact: `"token"`, `"mask"`, or `"hash"` |
|
|
147
|
+
| `model` | `string` | `"onnx-community/gliner_large-v2.1"` | HuggingFace model path for GLiNER |
|
|
148
|
+
| `confidence_threshold` | `number` | `0.5` | Minimum confidence for GLiNER detections (0-1) |
|
|
149
|
+
| `custom_entities` | `string[]` | `[]` | Custom entity labels for zero-shot detection |
|
|
150
|
+
| `entityActions` | `object` | `{}` | Per-entity-type action overrides |
|
|
151
|
+
|
|
152
|
+
## OpenClaw Tools
|
|
153
|
+
|
|
154
|
+
### `fogclaw_scan`
|
|
155
|
+
|
|
156
|
+
Scan text for PII and custom entities. Returns detected entities with types, positions, and confidence scores.
|
|
157
|
+
|
|
158
|
+
**Parameters:**
|
|
159
|
+
- `text` (required) — text to scan
|
|
160
|
+
- `custom_labels` (optional) — additional entity labels for zero-shot detection
|
|
161
|
+
|
|
162
|
+
### `fogclaw_redact`
|
|
163
|
+
|
|
164
|
+
Scan and redact PII/custom entities from text. Returns sanitized text with entities replaced.
|
|
165
|
+
|
|
166
|
+
**Parameters:**
|
|
167
|
+
- `text` (required) — text to scan and redact
|
|
168
|
+
- `strategy` (optional) — `"token"`, `"mask"`, or `"hash"` (defaults to config)
|
|
169
|
+
- `custom_labels` (optional) — additional entity labels for zero-shot detection
|
|
170
|
+
|
|
171
|
+
## Standalone Usage
|
|
172
|
+
|
|
173
|
+
FogClaw's core can also be used outside of OpenClaw:
|
|
174
|
+
|
|
175
|
+
```typescript
|
|
176
|
+
import { Scanner, redact, loadConfig, DEFAULT_CONFIG } from "@datafog/fogclaw";
|
|
177
|
+
|
|
178
|
+
const scanner = new Scanner(DEFAULT_CONFIG);
|
|
179
|
+
await scanner.initialize();
|
|
180
|
+
|
|
181
|
+
// Scan for entities
|
|
182
|
+
const result = await scanner.scan("Contact john@example.com or call 555-123-4567");
|
|
183
|
+
console.log(result.entities);
|
|
184
|
+
// [
|
|
185
|
+
// { text: "john@example.com", label: "EMAIL", start: 8, end: 24, confidence: 1, source: "regex" },
|
|
186
|
+
// { text: "555-123-4567", label: "PHONE", start: 33, end: 45, confidence: 1, source: "regex" }
|
|
187
|
+
// ]
|
|
188
|
+
|
|
189
|
+
// Redact
|
|
190
|
+
const redacted = redact(result.text, result.entities, "token");
|
|
191
|
+
console.log(redacted.redacted_text);
|
|
192
|
+
// "Contact [EMAIL_1] or call [PHONE_1]"
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Development
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
git clone https://github.com/DataFog/fogclaw.git
|
|
199
|
+
cd fogclaw
|
|
200
|
+
npm install
|
|
201
|
+
npm test # run tests
|
|
202
|
+
npm run build # compile TypeScript
|
|
203
|
+
npm run lint # type-check without emitting
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAmC,MAAM,YAAY,CAAC;AAKjF,eAAO,MAAM,cAAc,EAAE,aAQ5B,CAAC;AAEF,wBAAgB,UAAU,CAAC,SAAS,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,aAAa,CA8B3E"}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
const VALID_GUARDRAIL_MODES = ["redact", "block", "warn"];
|
|
2
|
+
const VALID_REDACT_STRATEGIES = ["token", "mask", "hash"];
|
|
3
|
+
export const DEFAULT_CONFIG = {
|
|
4
|
+
enabled: true,
|
|
5
|
+
guardrail_mode: "redact",
|
|
6
|
+
redactStrategy: "token",
|
|
7
|
+
model: "onnx-community/gliner_large-v2.1",
|
|
8
|
+
confidence_threshold: 0.5,
|
|
9
|
+
custom_entities: [],
|
|
10
|
+
entityActions: {},
|
|
11
|
+
};
|
|
12
|
+
export function loadConfig(overrides) {
|
|
13
|
+
const config = { ...DEFAULT_CONFIG, ...overrides };
|
|
14
|
+
if (!VALID_GUARDRAIL_MODES.includes(config.guardrail_mode)) {
|
|
15
|
+
throw new Error(`Invalid guardrail_mode "${config.guardrail_mode}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`);
|
|
16
|
+
}
|
|
17
|
+
if (!VALID_REDACT_STRATEGIES.includes(config.redactStrategy)) {
|
|
18
|
+
throw new Error(`Invalid redactStrategy "${config.redactStrategy}". Must be one of: ${VALID_REDACT_STRATEGIES.join(", ")}`);
|
|
19
|
+
}
|
|
20
|
+
if (config.confidence_threshold < 0 || config.confidence_threshold > 1) {
|
|
21
|
+
throw new Error(`confidence_threshold must be between 0 and 1, got ${config.confidence_threshold}`);
|
|
22
|
+
}
|
|
23
|
+
for (const [entityType, action] of Object.entries(config.entityActions)) {
|
|
24
|
+
if (!VALID_GUARDRAIL_MODES.includes(action)) {
|
|
25
|
+
throw new Error(`Invalid action "${action}" for entity type "${entityType}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return config;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAEA,MAAM,qBAAqB,GAAsB,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;AAC7E,MAAM,uBAAuB,GAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5E,MAAM,CAAC,MAAM,cAAc,GAAkB;IAC3C,OAAO,EAAE,IAAI;IACb,cAAc,EAAE,QAAQ;IACxB,cAAc,EAAE,OAAO;IACvB,KAAK,EAAE,kCAAkC;IACzC,oBAAoB,EAAE,GAAG;IACzB,eAAe,EAAE,EAAE;IACnB,aAAa,EAAE,EAAE;CAClB,CAAC;AAEF,MAAM,UAAU,UAAU,CAAC,SAAiC;IAC1D,MAAM,MAAM,GAAkB,EAAE,GAAG,cAAc,EAAE,GAAG,SAAS,EAAE,CAAC;IAElE,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;QAC3D,MAAM,IAAI,KAAK,CACb,2BAA2B,MAAM,CAAC,cAAc,sBAAsB,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACzG,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC;QAC7D,MAAM,IAAI,KAAK,CACb,2BAA2B,MAAM,CAAC,cAAc,sBAAsB,uBAAuB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC3G,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,oBAAoB,GAAG,CAAC,IAAI,MAAM,CAAC,oBAAoB,GAAG,CAAC,EAAE,CAAC;QACvE,MAAM,IAAI,KAAK,CACb,qDAAqD,MAAM,CAAC,oBAAoB,EAAE,CACnF,CAAC;IACJ,CAAC;IAED,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC;QACxE,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC5C,MAAM,IAAI,KAAK,CACb,mBAAmB,MAAM,sBAAsB,UAAU,sBAAsB,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAClH,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { Entity } from "../types.js";
|
|
2
|
+
export declare class GlinerEngine {
|
|
3
|
+
private model;
|
|
4
|
+
private modelPath;
|
|
5
|
+
private threshold;
|
|
6
|
+
private customLabels;
|
|
7
|
+
private initialized;
|
|
8
|
+
constructor(modelPath: string, threshold?: number);
|
|
9
|
+
initialize(): Promise<void>;
|
|
10
|
+
setCustomLabels(labels: string[]): void;
|
|
11
|
+
scan(text: string, extraLabels?: string[]): Promise<Entity[]>;
|
|
12
|
+
get isInitialized(): boolean;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=gliner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gliner.d.ts","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAc1C,qBAAa,YAAY;IACvB,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAgB;IACpC,OAAO,CAAC,WAAW,CAAS;gBAEhB,SAAS,EAAE,MAAM,EAAE,SAAS,GAAE,MAAY;IAKhD,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBjC,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,IAAI;IAIjC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IA+BnE,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { canonicalType } from "../types.js";
|
|
2
|
+
const DEFAULT_NER_LABELS = [
|
|
3
|
+
"person",
|
|
4
|
+
"organization",
|
|
5
|
+
"location",
|
|
6
|
+
"address",
|
|
7
|
+
"date of birth",
|
|
8
|
+
"medical record number",
|
|
9
|
+
"account number",
|
|
10
|
+
"passport number",
|
|
11
|
+
];
|
|
12
|
+
export class GlinerEngine {
|
|
13
|
+
model = null;
|
|
14
|
+
modelPath;
|
|
15
|
+
threshold;
|
|
16
|
+
customLabels = [];
|
|
17
|
+
initialized = false;
|
|
18
|
+
constructor(modelPath, threshold = 0.5) {
|
|
19
|
+
this.modelPath = modelPath;
|
|
20
|
+
this.threshold = threshold;
|
|
21
|
+
}
|
|
22
|
+
async initialize() {
|
|
23
|
+
if (this.initialized)
|
|
24
|
+
return;
|
|
25
|
+
try {
|
|
26
|
+
const { Gliner } = await import("gliner");
|
|
27
|
+
this.model = new Gliner({
|
|
28
|
+
tokenizerPath: this.modelPath,
|
|
29
|
+
onnxSettings: {
|
|
30
|
+
modelPath: this.modelPath,
|
|
31
|
+
executionProvider: "cpu",
|
|
32
|
+
},
|
|
33
|
+
maxWidth: 12,
|
|
34
|
+
modelType: "gliner",
|
|
35
|
+
});
|
|
36
|
+
await this.model.initialize();
|
|
37
|
+
this.initialized = true;
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
throw new Error(`Failed to initialize GLiNER model "${this.modelPath}": ${err instanceof Error ? err.message : String(err)}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
setCustomLabels(labels) {
|
|
44
|
+
this.customLabels = labels;
|
|
45
|
+
}
|
|
46
|
+
async scan(text, extraLabels) {
|
|
47
|
+
if (!text)
|
|
48
|
+
return [];
|
|
49
|
+
if (!this.model) {
|
|
50
|
+
throw new Error("GLiNER engine not initialized. Call initialize() first.");
|
|
51
|
+
}
|
|
52
|
+
const labels = [
|
|
53
|
+
...DEFAULT_NER_LABELS,
|
|
54
|
+
...this.customLabels,
|
|
55
|
+
...(extraLabels ?? []),
|
|
56
|
+
];
|
|
57
|
+
// Deduplicate labels
|
|
58
|
+
const uniqueLabels = [...new Set(labels)];
|
|
59
|
+
const results = await this.model.inference(text, uniqueLabels, {
|
|
60
|
+
threshold: this.threshold,
|
|
61
|
+
});
|
|
62
|
+
return results.map((r) => ({
|
|
63
|
+
text: r.text,
|
|
64
|
+
label: canonicalType(r.label),
|
|
65
|
+
start: r.start,
|
|
66
|
+
end: r.end,
|
|
67
|
+
confidence: r.score,
|
|
68
|
+
source: "gliner",
|
|
69
|
+
}));
|
|
70
|
+
}
|
|
71
|
+
get isInitialized() {
|
|
72
|
+
return this.initialized;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=gliner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gliner.js","sourceRoot":"","sources":["../../src/engines/gliner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,MAAM,kBAAkB,GAAG;IACzB,QAAQ;IACR,cAAc;IACd,UAAU;IACV,SAAS;IACT,eAAe;IACf,uBAAuB;IACvB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,KAAK,GAAQ,IAAI,CAAC;IAClB,SAAS,CAAS;IAClB,SAAS,CAAS;IAClB,YAAY,GAAa,EAAE,CAAC;IAC5B,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAAiB,EAAE,YAAoB,GAAG;QACpD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,IAAI,CAAC;YACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC;gBACtB,aAAa,EAAE,IAAI,CAAC,SAAS;gBAC7B,YAAY,EAAE;oBACZ,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,iBAAiB,EAAE,KAAK;iBACzB;gBACD,QAAQ,EAAE,EAAE;gBACZ,SAAS,EAAE,QAAQ;aACpB,CAAC,CAAC;YACH,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC9B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CACb,sCAAsC,IAAI,CAAC,SAAS,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC7G,CAAC;QACJ,CAAC;IACH,CAAC;IAED,eAAe,CAAC,MAAgB;QAC9B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAY,EAAE,WAAsB;QAC7C,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,MAAM,MAAM,GAAG;YACb,GAAG,kBAAkB;YACrB,GAAG,IAAI,CAAC,YAAY;YACpB,GAAG,CAAC,WAAW,IAAI,EAAE,CAAC;SACvB,CAAC;QAEF,qBAAqB;QACrB,MAAM,YAAY,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAE1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE;YAC7D,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAChB,CAAC,CAA6E,EAAE,EAAE,CAAC,CAAC;YAClF,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;YAC7B,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;YACV,UAAU,EAAE,CAAC,CAAC,KAAK;YACnB,MAAM,EAAE,QAAiB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex.d.ts","sourceRoot":"","sources":["../../src/engines/regex.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA4C1C,qBAAa,WAAW;IACtB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE;CAyB7B"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
const PATTERNS = [
|
|
2
|
+
{
|
|
3
|
+
label: "EMAIL",
|
|
4
|
+
pattern: /(?<![A-Za-z0-9._%+\-@])(?![A-Za-z_]{2,20}=)[A-Za-z0-9!#$%&*+\-/=^_`{|}~][A-Za-z0-9!#$%&'*+\-/=?^_`{|}~.]*@(?:\.?[A-Za-z0-9-]+\.)+[A-Za-z]{2,}(?=$|[^A-Za-z])/gi,
|
|
5
|
+
},
|
|
6
|
+
{
|
|
7
|
+
label: "PHONE",
|
|
8
|
+
pattern: /(?<![A-Za-z0-9])(?:(?:(?:\+?1)[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}|\+\d{1,3}[\s\-.]?\d{1,4}(?:[\s\-.]?\d{2,4}){2,3})(?![-A-Za-z0-9])/gi,
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
label: "SSN",
|
|
12
|
+
pattern: /(?<!\d)(?:(?!000|666)\d{3}-(?!00)\d{2}-(?!0000)\d{4}|(?!000|666)\d{3}(?!00)\d{2}(?!0000)\d{4})(?!\d)/g,
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
label: "CREDIT_CARD",
|
|
16
|
+
pattern: /\b(?:4\d{12}(?:\d{3})?|5[1-5]\d{14}|3[47]\d{13}|(?:(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})|(?:3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5}))\b/g,
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
label: "IP_ADDRESS",
|
|
20
|
+
pattern: /\b(?:(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d)\.(?:25[0-5]|2[0-4]\d|1?\d?\d))\b/g,
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
label: "DATE",
|
|
24
|
+
pattern: /\b(?:(?:0?[1-9]|1[0-2])[/-](?:0?[1-9]|[12]\d|3[01])[/-](?:\d{2}|\d{4})|(?:\d{4})-(?:0?[1-9]|1[0-2])-(?:0?[1-9]|[12]\d|3[01])|(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s+(?:0?[1-9]|[12]\d|3[01]),\s+(?:19|20)\d{2})\b/gi,
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
label: "ZIP_CODE",
|
|
28
|
+
pattern: /\b\d{5}(?:-\d{4})?\b/g,
|
|
29
|
+
},
|
|
30
|
+
];
|
|
31
|
+
export class RegexEngine {
|
|
32
|
+
scan(text) {
|
|
33
|
+
const entities = [];
|
|
34
|
+
for (const { label, pattern } of PATTERNS) {
|
|
35
|
+
// Reset lastIndex to avoid stale state from previous calls
|
|
36
|
+
pattern.lastIndex = 0;
|
|
37
|
+
let match;
|
|
38
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
39
|
+
entities.push({
|
|
40
|
+
text: match[0],
|
|
41
|
+
label,
|
|
42
|
+
start: match.index,
|
|
43
|
+
end: match.index + match[0].length,
|
|
44
|
+
confidence: 1.0,
|
|
45
|
+
source: "regex",
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Sort by start position for deterministic output
|
|
50
|
+
entities.sort((a, b) => a.start - b.start || a.end - b.end);
|
|
51
|
+
return entities;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=regex.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex.js","sourceRoot":"","sources":["../../src/engines/regex.ts"],"names":[],"mappings":"AAOA,MAAM,QAAQ,GAAiB;IAC7B;QACE,KAAK,EAAE,OAAO;QACd,OAAO,EACL,gKAAgK;KACnK;IACD;QACE,KAAK,EAAE,OAAO;QACd,OAAO,EACL,wJAAwJ;KAC3J;IACD;QACE,KAAK,EAAE,KAAK;QACZ,OAAO,EACL,uGAAuG;KAC1G;IACD;QACE,KAAK,EAAE,aAAa;QACpB,OAAO,EACL,iKAAiK;KACpK;IACD;QACE,KAAK,EAAE,YAAY;QACnB,OAAO,EACL,iIAAiI;KACpI;IACD;QACE,KAAK,EAAE,MAAM;QACb,OAAO,EACL,sUAAsU;KACzU;IACD;QACE,KAAK,EAAE,UAAU;QACjB,OAAO,EAAE,uBAAuB;KACjC;CACF,CAAC;AAEF,MAAM,OAAO,WAAW;IACtB,IAAI,CAAC,IAAY;QACf,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1C,2DAA2D;YAC3D,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YAEtB,IAAI,KAA6B,CAAC;YAClC,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBAC7C,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBACd,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;oBAClC,UAAU,EAAE,GAAG;oBACf,MAAM,EAAE,OAAO;iBAChB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QAE5D,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export { Scanner } from "./scanner.js";
|
|
2
|
+
export { redact } from "./redactor.js";
|
|
3
|
+
export { loadConfig, DEFAULT_CONFIG } from "./config.js";
|
|
4
|
+
export type { Entity, FogClawConfig, ScanResult, RedactResult, RedactStrategy, GuardrailAction, } from "./types.js";
|
|
5
|
+
/**
|
|
6
|
+
* OpenClaw plugin definition.
|
|
7
|
+
*
|
|
8
|
+
* Registers:
|
|
9
|
+
* - `before_agent_start` hook for automatic PII guardrail
|
|
10
|
+
* - `fogclaw_scan` tool for on-demand entity detection
|
|
11
|
+
* - `fogclaw_redact` tool for on-demand redaction
|
|
12
|
+
*/
|
|
13
|
+
declare const fogclaw: {
|
|
14
|
+
id: string;
|
|
15
|
+
name: string;
|
|
16
|
+
register(api: any): void;
|
|
17
|
+
};
|
|
18
|
+
export default fogclaw;
|
|
19
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AACvC,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AACzD,YAAY,EACV,MAAM,EACN,aAAa,EACb,UAAU,EACV,YAAY,EACZ,cAAc,EACd,eAAe,GAChB,MAAM,YAAY,CAAC;AAEpB;;;;;;;GAOG;AACH,QAAA,MAAM,OAAO;;;kBAIG,GAAG;CA+LlB,CAAC;AAEF,eAAe,OAAO,CAAC"}
|