pseudonym-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +354 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +40 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/manager.d.ts +40 -0
- package/dist/config/manager.d.ts.map +1 -0
- package/dist/config/manager.js +75 -0
- package/dist/config/manager.js.map +1 -0
- package/dist/core/engine.d.ts +32 -0
- package/dist/core/engine.d.ts.map +1 -0
- package/dist/core/engine.js +110 -0
- package/dist/core/engine.js.map +1 -0
- package/dist/core/mapping-store.d.ts +24 -0
- package/dist/core/mapping-store.d.ts.map +1 -0
- package/dist/core/mapping-store.js +47 -0
- package/dist/core/mapping-store.js.map +1 -0
- package/dist/core/ollama-client.d.ts +21 -0
- package/dist/core/ollama-client.d.ts.map +1 -0
- package/dist/core/ollama-client.js +67 -0
- package/dist/core/ollama-client.js.map +1 -0
- package/dist/languages/en/rules.d.ts +3 -0
- package/dist/languages/en/rules.d.ts.map +1 -0
- package/dist/languages/en/rules.js +69 -0
- package/dist/languages/en/rules.js.map +1 -0
- package/dist/languages/pl/rules.d.ts +3 -0
- package/dist/languages/pl/rules.d.ts.map +1 -0
- package/dist/languages/pl/rules.js +44 -0
- package/dist/languages/pl/rules.js.map +1 -0
- package/dist/languages/types.d.ts +12 -0
- package/dist/languages/types.d.ts.map +1 -0
- package/dist/languages/types.js +2 -0
- package/dist/languages/types.js.map +1 -0
- package/dist/mcp/server.d.ts +4 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +93 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/setup/check-ollama.d.ts +15 -0
- package/dist/setup/check-ollama.d.ts.map +1 -0
- package/dist/setup/check-ollama.js +58 -0
- package/dist/setup/check-ollama.js.map +1 -0
- package/mcp-config.json +8 -0
- package/package.json +103 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Adrian Wolczuk
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
# pseudonym-mcp
|
|
2
|
+
|
|
3
|
+
Local privacy proxy for LLMs — pseudonymizes sensitive data before it reaches the cloud, restores it on the way back.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/pseudonym-mcp)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](#)
|
|
8
|
+
[](#gdpr--ai-compliance)
|
|
9
|
+
[](#)
|
|
10
|
+
[](#)
|
|
11
|
+
|
|
12
|
+
Sits between your application and any cloud LLM (Claude, GPT-4, Gemini…). Replaces PII with opaque tokens locally before the prompt ever leaves your machine, then seamlessly restores original values in the response — so users never see the tags.
|
|
13
|
+
|
|
14
|
+
## What you get
|
|
15
|
+
|
|
16
|
+
- **Multi-language PII detection**: Built-in support for English (SSN, credit cards, US phone) and Polish (PESEL, IBAN, Polish phone). Extensible to any language.
|
|
17
|
+
- **Hybrid NER engine**: Regex for structured PII (SSN, credit cards, IBAN, email, phone) + local Ollama LLM for unstructured entities (names, organizations).
|
|
18
|
+
- **Zero-trust architecture**: All detection and substitution happens on your machine. No PII reaches a third-party API.
|
|
19
|
+
- **Session-keyed mapping store**: Tokens like `[PERSON:1]` map back to originals in an isolated, per-request session. Multiple round-trips preserve token coherence.
|
|
20
|
+
- **Auto-unmask**: Optional mode that automatically restores tokens in the LLM's response before returning it to the user.
|
|
21
|
+
- **Flexible engines**: Run `regex` only (no Ollama required), `llm` only, or `hybrid` (default).
|
|
22
|
+
- **Strict validation**: SSN area-number validation, credit card Luhn checksum, PESEL checksum — all configurable.
|
|
23
|
+
- **Graceful degradation**: If Ollama is unavailable, the regex phase still runs and no exception is thrown.
|
|
24
|
+
- **MCP-native**: Works with Claude Code, Claude Desktop, Cursor — any MCP-compatible client.
|
|
25
|
+
|
|
26
|
+
## ❌ Without / ✅ With
|
|
27
|
+
|
|
28
|
+
❌ **Without pseudonym-mcp:**
|
|
29
|
+
|
|
30
|
+
- Prompt: `"John Smith, SSN 123-45-6789, card 4111 1111 1111 1111"` → sent verbatim to OpenAI / Anthropic servers
|
|
31
|
+
- Every name, ID number, and credit card in your prompt is processed and potentially logged by the LLM provider
|
|
32
|
+
- A data breach at the provider's end exposes your users' real PII
|
|
33
|
+
- Sending personal data to a US-based LLM provider without explicit safeguards may violate GDPR Article 44 (international data transfers)
|
|
34
|
+
|
|
35
|
+
✅ **With pseudonym-mcp:**
|
|
36
|
+
|
|
37
|
+
- The same prompt becomes `"[PERSON:1], SSN [SSN:1], card [CREDIT_CARD:1]"` before it leaves your machine
|
|
38
|
+
- The LLM reasons about the structure and content without ever seeing the real values
|
|
39
|
+
- The response is automatically de-tokenized locally before reaching the user
|
|
40
|
+
- Your GDPR DPA can truthfully state: *personal data never left the local environment*
|
|
41
|
+
|
|
42
|
+
## GDPR & AI Compliance
|
|
43
|
+
|
|
44
|
+
pseudonym-mcp directly addresses the regulatory challenges of using cloud AI in data-sensitive contexts.
|
|
45
|
+
|
|
46
|
+
### Why this matters
|
|
47
|
+
|
|
48
|
+
The EU **General Data Protection Regulation (GDPR)** classifies names, national ID numbers (like SSN or PESEL), bank account numbers (IBAN), email addresses, credit card numbers, and phone numbers as **personal data** under Article 4(1). Sending this data to a cloud LLM provider constitutes **processing** under Article 4(2) and triggers a range of obligations:
|
|
49
|
+
|
|
50
|
+
| GDPR Article | Obligation | How pseudonym-mcp helps |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| Art. 5(1)(c) | **Data minimisation** — only necessary data should be processed | Strips PII before transmission; the LLM receives only what it needs to reason |
|
|
53
|
+
| Art. 25 | **Privacy by design and by default** | Pseudonymization layer is built into the MCP transport, not bolted on |
|
|
54
|
+
| Art. 32 | **Security of processing** — appropriate technical measures | Local token substitution is a recognized technical measure under Recital 83 |
|
|
55
|
+
| Art. 44 | **Transfers to third countries** — requires safeguards | If no personal data is transferred, Art. 44 restrictions do not apply |
|
|
56
|
+
| Art. 4(5) | **Pseudonymisation** — explicitly recognized as a protective measure | Tokens are opaque; re-identification requires access to the local mapping store |
|
|
57
|
+
|
|
58
|
+
> **Note:** Pseudonymisation under GDPR (Art. 4(5)) does not equal anonymisation — the data is still personal data in your system. However, it substantially reduces risk and demonstrates compliance with the accountability principle (Art. 5(2)).
|
|
59
|
+
|
|
60
|
+
### AI Act alignment
|
|
61
|
+
|
|
62
|
+
The EU **AI Act** (in force from 2024) places additional requirements on high-risk AI systems that process personal data. Using pseudonym-mcp as an intermediary layer:
|
|
63
|
+
|
|
64
|
+
- Reduces the risk classification of downstream LLM usage by ensuring the model never processes identifiable natural persons' data directly.
|
|
65
|
+
- Supports documentation requirements for AI system transparency and human oversight.
|
|
66
|
+
- Aligns with the principle of **technical robustness and safety** (Art. 15) by limiting PII exposure surface.
|
|
67
|
+
|
|
68
|
+
### US & international applicability
|
|
69
|
+
|
|
70
|
+
While GDPR originates in the EU, pseudonym-mcp is equally relevant for:
|
|
71
|
+
|
|
72
|
+
- **CCPA / CPRA** (California) — consumers have the right to know what personal information is collected; minimising data sent to third-party LLMs reduces disclosure surface.
|
|
73
|
+
- **HIPAA** (US healthcare) — PHI (Protected Health Information) must not be sent to non-BAA cloud providers; local pseudonymization allows LLM use without a BAA.
|
|
74
|
+
- **PCI DSS** (payment industry) — credit card numbers (PAN) must never be stored or transmitted in the clear; masking before LLM transit satisfies requirement 3.4.
|
|
75
|
+
- **SOC 2** — data handling controls are strengthened by demonstrating that PII is replaced before leaving the trust boundary.
|
|
76
|
+
- **PIPEDA** (Canada), **LGPD** (Brazil), **POPIA** (South Africa) — all require appropriate safeguards for cross-border personal data transfers.
|
|
77
|
+
|
|
78
|
+
### Sector-specific applicability
|
|
79
|
+
|
|
80
|
+
| Sector | Relevant regulation | PII types commonly handled |
|
|
81
|
+
|---|---|---|
|
|
82
|
+
| Healthcare | GDPR + HIPAA + national health data laws | Patient names, SSN, diagnoses |
|
|
83
|
+
| Banking & Finance | GDPR + PCI DSS + PSD2 + DORA | Credit cards, IBAN, SSN, PESEL |
|
|
84
|
+
| HR & Recruitment | GDPR Art. 9 (special categories) | Names, national IDs, contact details |
|
|
85
|
+
| Legal | GDPR + attorney-client privilege | Names, case numbers, personal details |
|
|
86
|
+
| Insurance | GDPR + Solvency II | Personal identifiers, health data |
|
|
87
|
+
| Public Sector (US) | CCPA + state privacy laws | SSN, driver's license numbers |
|
|
88
|
+
| Public Sector (PL) | GDPR + UODO + KRI | PESEL, NIP, REGON |
|
|
89
|
+
|
|
90
|
+
## How it works
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
Your App / Claude Desktop
|
|
94
|
+
│
|
|
95
|
+
│ prompt with PII
|
|
96
|
+
▼
|
|
97
|
+
┌─────────────────────────┐
|
|
98
|
+
│ pseudonym-mcp │
|
|
99
|
+
│ │
|
|
100
|
+
│ Phase 1: Regex NER │ ← SSN, CREDIT_CARD, EMAIL, PHONE (en)
|
|
101
|
+
│ │ ← PESEL, IBAN, EMAIL, PHONE (pl)
|
|
102
|
+
│ Phase 2: Ollama NER │ ← PERSON, ORG (local LLM)
|
|
103
|
+
│ MappingStore (session) │ ← [TAG:N] ↔ original value
|
|
104
|
+
└────────────┬────────────┘
|
|
105
|
+
│ sanitized prompt (no PII)
|
|
106
|
+
▼
|
|
107
|
+
Cloud LLM API
|
|
108
|
+
(Claude / GPT-4 / Gemini)
|
|
109
|
+
│
|
|
110
|
+
│ response with [TAG:N] tokens
|
|
111
|
+
▼
|
|
112
|
+
┌─────────────────────────┐
|
|
113
|
+
│ pseudonym-mcp │
|
|
114
|
+
│ unmask_text / revert │ ← tokens → originals
|
|
115
|
+
└────────────┬────────────┘
|
|
116
|
+
│ restored response
|
|
117
|
+
▼
|
|
118
|
+
Your App / User
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Token format
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
English (--lang en, default):
|
|
125
|
+
[PERSON:1] John Smith
|
|
126
|
+
[SSN:1] 123-45-6789
|
|
127
|
+
[CREDIT_CARD:1] 4111 1111 1111 1111
|
|
128
|
+
[ORG:1] Acme Corp
|
|
129
|
+
[EMAIL:1] john@acme.com
|
|
130
|
+
[PHONE:1] (555) 123-4567
|
|
131
|
+
|
|
132
|
+
Polish (--lang pl):
|
|
133
|
+
[PERSON:1] Jan Kowalski
|
|
134
|
+
[PESEL:1] 90010112318
|
|
135
|
+
[ORG:1] Auto-Lux
|
|
136
|
+
[IBAN:1] PL27114020040000300201355387
|
|
137
|
+
[EMAIL:1] jan@example.pl
|
|
138
|
+
[PHONE:1] +48 123 456 789
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The mapping is stored in a session-scoped in-memory store. Each `mask_text` call returns a `session_id`; pass it back to `unmask_text` to restore originals.
|
|
142
|
+
|
|
143
|
+
## Quick Start
|
|
144
|
+
|
|
145
|
+
**Step 1** — Install the package:
|
|
146
|
+
|
|
147
|
+
```sh
|
|
148
|
+
npm install -g pseudonym-mcp
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
**Step 2** — (Optional) Pull an Ollama model for full hybrid NER:
|
|
152
|
+
|
|
153
|
+
```sh
|
|
154
|
+
ollama pull llama3
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Skip this step if you only need regex-based masking (`--engines regex`).
|
|
158
|
+
|
|
159
|
+
**Step 3** — Add to your MCP client (example for Claude Code):
|
|
160
|
+
|
|
161
|
+
```sh
|
|
162
|
+
claude mcp add pseudonym-mcp -- pseudonym-mcp --engines hybrid
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Restart your client. The `mask_text` and `unmask_text` tools appear automatically.
|
|
166
|
+
|
|
167
|
+
## Available Tools
|
|
168
|
+
|
|
169
|
+
| Tool | What it does | Example prompt |
|
|
170
|
+
|---|---|---|
|
|
171
|
+
| `mask_text` | Pseudonymize PII in text. Returns `masked_text` + `session_id`. | *"Use mask_text on this customer letter before summarizing it"* |
|
|
172
|
+
| `unmask_text` | Restore original values from a session. Pass the `session_id` returned by `mask_text`. | *"Use unmask_text with session_id X to restore the response"* |
|
|
173
|
+
|
|
174
|
+
### `mask_text` input
|
|
175
|
+
|
|
176
|
+
```json
|
|
177
|
+
{
|
|
178
|
+
"text": "John Smith (SSN: 123-45-6789) works at Acme Corp.",
|
|
179
|
+
"session_id": "optional — omit to create a new session"
|
|
180
|
+
}
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### `mask_text` output
|
|
184
|
+
|
|
185
|
+
```json
|
|
186
|
+
{
|
|
187
|
+
"session_id": "3f2a1b...",
|
|
188
|
+
"masked_text": "[PERSON:1] (SSN: [SSN:1]) works at [ORG:1].",
|
|
189
|
+
"auto_unmask": false
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### `unmask_text` input
|
|
194
|
+
|
|
195
|
+
```json
|
|
196
|
+
{
|
|
197
|
+
"text": "The case concerns [PERSON:1] at [ORG:1].",
|
|
198
|
+
"session_id": "3f2a1b..."
|
|
199
|
+
}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Configuration
|
|
203
|
+
|
|
204
|
+
### `mcp-config.json` (project root)
|
|
205
|
+
|
|
206
|
+
```json
|
|
207
|
+
{
|
|
208
|
+
"lang": "en",
|
|
209
|
+
"engines": "hybrid",
|
|
210
|
+
"ollamaModel": "llama3",
|
|
211
|
+
"ollamaBaseUrl": "http://localhost:11434",
|
|
212
|
+
"autoUnmask": false,
|
|
213
|
+
"strictValidation": true
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
| Key | Values | Default | Description |
|
|
218
|
+
|---|---|---|---|
|
|
219
|
+
| `lang` | `en`, `pl` | `en` | Language pack for regex rules |
|
|
220
|
+
| `engines` | `regex` \| `llm` \| `hybrid` | `hybrid` | Which NER engines to run |
|
|
221
|
+
| `ollamaModel` | any Ollama model name | `llama3` | Local LLM for entity detection |
|
|
222
|
+
| `ollamaBaseUrl` | URL | `http://localhost:11434` | Ollama API endpoint |
|
|
223
|
+
| `autoUnmask` | `true` \| `false` | `false` | Auto-restore tokens in LLM responses |
|
|
224
|
+
| `strictValidation` | `true` \| `false` | `true` | Enable checksum / format validation (SSN area check, Luhn for cards, PESEL checksum) |
|
|
225
|
+
|
|
226
|
+
### CLI flags
|
|
227
|
+
|
|
228
|
+
All config keys can be overridden at startup (highest priority):
|
|
229
|
+
|
|
230
|
+
```sh
|
|
231
|
+
pseudonym-mcp --lang en --engines regex --ollama-model llama3 --auto-unmask
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
| Flag | Description |
|
|
235
|
+
|---|---|
|
|
236
|
+
| `--lang` | Language for regex rules: `en` or `pl` (default: `en`) |
|
|
237
|
+
| `--engines` | `regex`, `llm`, or `hybrid` (default: `hybrid`) |
|
|
238
|
+
| `--ollama-model` | Ollama model to use for NER |
|
|
239
|
+
| `--ollama-base-url` | Ollama base URL |
|
|
240
|
+
| `--config` | Path to a custom JSON config file |
|
|
241
|
+
| `--auto-unmask` | Enable automatic response de-tokenization |
|
|
242
|
+
|
|
243
|
+
### Claude Code
|
|
244
|
+
|
|
245
|
+
```sh
|
|
246
|
+
claude mcp add pseudonym-mcp -- pseudonym-mcp --engines hybrid
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Claude Desktop
|
|
250
|
+
|
|
251
|
+
Edit `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
252
|
+
|
|
253
|
+
```json
|
|
254
|
+
{
|
|
255
|
+
"mcpServers": {
|
|
256
|
+
"pseudonym-mcp": {
|
|
257
|
+
"command": "pseudonym-mcp",
|
|
258
|
+
"args": ["--engines", "hybrid"]
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Cursor
|
|
265
|
+
|
|
266
|
+
Add to `~/.cursor/mcp.json`:
|
|
267
|
+
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"mcpServers": {
|
|
271
|
+
"pseudonym-mcp": {
|
|
272
|
+
"command": "pseudonym-mcp",
|
|
273
|
+
"args": ["--engines", "regex"]
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
## Supported PII types
|
|
280
|
+
|
|
281
|
+
### English (`--lang en`, default)
|
|
282
|
+
|
|
283
|
+
| Tag | Pattern | Validation |
|
|
284
|
+
|---|---|---|
|
|
285
|
+
| `SSN` | `XXX-XX-XXXX` (US Social Security Number) | Area number check (rejects 000, 666, 900+) |
|
|
286
|
+
| `CREDIT_CARD` | 13–19 digits (Visa, Mastercard, Amex, Discover) | Luhn checksum |
|
|
287
|
+
| `EMAIL` | RFC 5321-compatible | Format match |
|
|
288
|
+
| `PHONE` | `+1 (XXX) XXX-XXXX`, `XXX-XXX-XXXX`, `XXX.XXX.XXXX` | Format match |
|
|
289
|
+
| `PERSON` | Full names | Ollama NER (hybrid / llm engines) |
|
|
290
|
+
| `ORG` | Company / organization names | Ollama NER (hybrid / llm engines) |
|
|
291
|
+
|
|
292
|
+
### Polish (`--lang pl`)
|
|
293
|
+
|
|
294
|
+
| Tag | Pattern | Validation |
|
|
295
|
+
|---|---|---|
|
|
296
|
+
| `PESEL` | 11-digit national ID | Full checksum (weights `[1,3,7,9,1,3,7,9,1,3]`) |
|
|
297
|
+
| `IBAN` | `PL` + 26 digits, compact or spaced | Format match |
|
|
298
|
+
| `EMAIL` | RFC 5321-compatible | Format match |
|
|
299
|
+
| `PHONE` | `+48` / `0048` prefix, 9-digit mobile, landline `(XX) XXX-XX-XX` | Format match |
|
|
300
|
+
| `PERSON` | Full names | Ollama NER (hybrid / llm engines) |
|
|
301
|
+
| `ORG` | Company / organization names | Ollama NER (hybrid / llm engines) |
|
|
302
|
+
|
|
303
|
+
## Engine modes
|
|
304
|
+
|
|
305
|
+
| Mode | Requires Ollama | Detects structured PII | Detects names / orgs |
|
|
306
|
+
|---|---|---|---|
|
|
307
|
+
| `regex` | No | Yes | No |
|
|
308
|
+
| `llm` | Yes | No | Yes |
|
|
309
|
+
| `hybrid` (default) | Yes (graceful fallback) | Yes | Yes |
|
|
310
|
+
|
|
311
|
+
In `hybrid` mode, Ollama runs after the regex pass so the LLM never sees already-tokenized values. If Ollama is unreachable, the server logs a warning to stderr and returns the regex-only masked text — no crash, no hang.
|
|
312
|
+
|
|
313
|
+
## Privacy & Security notes
|
|
314
|
+
|
|
315
|
+
- **No telemetry.** pseudonym-mcp makes no network requests except to your local Ollama instance and (optionally) the MCP stdio transport.
|
|
316
|
+
- **In-memory only.** The mapping store is never written to disk. Sessions are scoped to the server process lifetime.
|
|
317
|
+
- **Idempotent tokens.** The same original value always maps to the same token within a session (`[PERSON:1]` will not become `[PERSON:2]` for the same name on a second occurrence), preserving semantic coherence in LLM reasoning.
|
|
318
|
+
- **No model training.** The local Ollama model operates entirely offline. Your data is not used to train any model.
|
|
319
|
+
- **Strict validation by default.** Invalid SSNs (area 000/666/900+), failed-Luhn credit card numbers, and invalid-checksum PESELs are not masked, preventing false positives from OCR errors or random digit sequences.
|
|
320
|
+
|
|
321
|
+
## Development
|
|
322
|
+
|
|
323
|
+
```sh
|
|
324
|
+
git clone https://github.com/woladi/pseudonym-mcp
|
|
325
|
+
cd pseudonym-mcp
|
|
326
|
+
npm install
|
|
327
|
+
npm run build # tsc compile
|
|
328
|
+
npm test # vitest (77 tests, no Ollama required)
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
The test suite runs fully offline — Ollama calls are injected via constructor and mocked in all tests. No live LLM required.
|
|
332
|
+
|
|
333
|
+
### Adding a new language pack
|
|
334
|
+
|
|
335
|
+
1. Create `src/languages/<lang>/rules.ts`
|
|
336
|
+
2. Export an object that implements `LanguageRules` from `src/languages/types.ts`
|
|
337
|
+
3. Register it in the `LANGUAGE_MAP` in `src/core/engine.ts`
|
|
338
|
+
4. Pass `--lang <lang>` at startup
|
|
339
|
+
|
|
340
|
+
Each language pack defines an array of `PatternDef` entries with a `tag`, `regex`, and optional `validate` callback. See `src/languages/en/rules.ts` and `src/languages/pl/rules.ts` for examples.
|
|
341
|
+
|
|
342
|
+
## Contributing
|
|
343
|
+
|
|
344
|
+
Contributions are welcome. Please follow [Conventional Commits](https://www.conventionalcommits.org/) for commit messages — this project uses `release-it` with `@release-it/conventional-changelog` to automate releases.
|
|
345
|
+
|
|
346
|
+
Language pack contributions are especially welcome — German (Personalausweis, Steuer-ID), French (NIR, SIRET), Spanish (DNI/NIE) and others would significantly expand the tool's usefulness.
|
|
347
|
+
|
|
348
|
+
## Keyword index
|
|
349
|
+
|
|
350
|
+
> For discoverability: **AI privacy**, **LLM data privacy**, **PII masking**, **PII redaction**, **PII detection**, **data pseudonymization**, **GDPR LLM compliance**, **GDPR AI**, **EU AI Act**, **CCPA compliance**, **HIPAA AI**, **PCI DSS tokenization**, **SOC 2 data handling**, **personal data protection**, **sensitive data scrubbing**, **NER anonymization**, **named entity recognition privacy**, **Claude privacy layer**, **MCP privacy proxy**, **local AI processing**, **on-premise AI**, **zero-trust AI**, **data minimisation**, **privacy by design**, **SSN masking**, **credit card masking**, **Luhn validation**, **PESEL masking**, **Polish PII**, **RODO**, **UODO compliance**, **healthcare AI privacy**, **financial data redaction**, **PSD2 privacy**, **tokenization NLP**, **prompt sanitization**, **context window privacy**, **offline NER**, **Ollama privacy**, **local LLM privacy**, **cross-border data transfer**, **data protection by design**, **PIPEDA**, **LGPD**, **POPIA**.
|
|
351
|
+
|
|
352
|
+
## License
|
|
353
|
+
|
|
354
|
+
MIT — Adrian Wolczuk
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import { ConfigManager } from './config/manager.js';
|
|
4
|
+
import { printOllamaStatus } from './setup/check-ollama.js';
|
|
5
|
+
import { startServer } from './mcp/server.js';
|
|
6
|
+
const VALID_ENGINES = ['regex', 'llm', 'hybrid'];
|
|
7
|
+
const program = new Command();
|
|
8
|
+
program
|
|
9
|
+
.name('pseudonym-mcp')
|
|
10
|
+
.description('MCP server that pseudonymizes sensitive data locally before it reaches a cloud LLM')
|
|
11
|
+
.version('0.1.0')
|
|
12
|
+
.option('--lang <lang>', 'Language for regex rules: en | pl', 'en')
|
|
13
|
+
.option('--engines <mode>', 'Processing engines: regex | llm | hybrid', 'hybrid')
|
|
14
|
+
.option('--ollama-model <model>', 'Ollama model for LLM NER', 'llama3')
|
|
15
|
+
.option('--ollama-base-url <url>', 'Ollama base URL', 'http://localhost:11434')
|
|
16
|
+
.option('--config <path>', 'Path to a JSON config file (default: ./mcp-config.json)')
|
|
17
|
+
.option('--auto-unmask', 'Automatically unmask tokens in LLM responses', false)
|
|
18
|
+
.action(async (opts) => {
|
|
19
|
+
const engines = VALID_ENGINES.includes(opts.engines)
|
|
20
|
+
? opts.engines
|
|
21
|
+
: 'hybrid';
|
|
22
|
+
ConfigManager.init({
|
|
23
|
+
lang: opts.lang,
|
|
24
|
+
engines,
|
|
25
|
+
ollamaModel: opts.ollamaModel,
|
|
26
|
+
ollamaBaseUrl: opts.ollamaBaseUrl,
|
|
27
|
+
config: opts.config,
|
|
28
|
+
autoUnmask: opts.autoUnmask,
|
|
29
|
+
});
|
|
30
|
+
const cfg = ConfigManager.getInstance().get();
|
|
31
|
+
if (cfg.engines === 'hybrid' || cfg.engines === 'llm') {
|
|
32
|
+
await printOllamaStatus(cfg.ollamaBaseUrl, cfg.ollamaModel);
|
|
33
|
+
}
|
|
34
|
+
await startServer();
|
|
35
|
+
});
|
|
36
|
+
program.parseAsync(process.argv).catch((err) => {
|
|
37
|
+
process.stderr.write(`Fatal: ${String(err)}\n`);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
});
|
|
40
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAmB,MAAM,qBAAqB,CAAA;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAA;AAC3D,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAA;AAE7C,MAAM,aAAa,GAAiB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;AAE9D,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAA;AAE7B,OAAO;KACJ,IAAI,CAAC,eAAe,CAAC;KACrB,WAAW,CACV,oFAAoF,CACrF;KACA,OAAO,CAAC,OAAO,CAAC;KAChB,MAAM,CAAC,eAAe,EAAE,mCAAmC,EAAE,IAAI,CAAC;KAClE,MAAM,CACL,kBAAkB,EAClB,0CAA0C,EAC1C,QAAQ,CACT;KACA,MAAM,CAAC,wBAAwB,EAAE,0BAA0B,EAAE,QAAQ,CAAC;KACtE,MAAM,CACL,yBAAyB,EACzB,iBAAiB,EACjB,wBAAwB,CACzB;KACA,MAAM,CAAC,iBAAiB,EAAE,yDAAyD,CAAC;KACpF,MAAM,CAAC,eAAe,EAAE,8CAA8C,EAAE,KAAK,CAAC;KAC9E,MAAM,CACL,KAAK,EAAE,IAON,EAAE,EAAE;IACH,MAAM,OAAO,GAAe,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAqB,CAAC;QAC5E,CAAC,CAAE,IAAI,CAAC,OAAsB;QAC9B,CAAC,CAAC,QAAQ,CAAA;IAEZ,aAAa,CAAC,IAAI,CAAC;QACjB,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,OAAO;QACP,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,UAAU,EAAE,IAAI,CAAC,UAAU;KAC5B,CAAC,CAAA;IAEF,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;IAE7C,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;QACtD,MAAM,iBAAiB,CAAC,GAAG,CAAC,aAAa,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;IAC7D,CAAC;IAED,MAAM,WAAW,EAAE,CAAA;AACrB,CAAC,CACF,CAAA;AAEH,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,GAAY,EAAE,EAAE;IACtD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;IAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export type EngineMode = 'regex' | 'llm' | 'hybrid';
|
|
2
|
+
export interface Config {
|
|
3
|
+
lang: string;
|
|
4
|
+
engines: EngineMode;
|
|
5
|
+
ollamaModel: string;
|
|
6
|
+
ollamaBaseUrl: string;
|
|
7
|
+
autoUnmask: boolean;
|
|
8
|
+
strictValidation: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface CliArgs {
|
|
11
|
+
lang?: string;
|
|
12
|
+
engines?: EngineMode;
|
|
13
|
+
ollamaModel?: string;
|
|
14
|
+
ollamaBaseUrl?: string;
|
|
15
|
+
config?: string;
|
|
16
|
+
autoUnmask?: boolean;
|
|
17
|
+
strictValidation?: boolean;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Singleton configuration manager.
|
|
21
|
+
*
|
|
22
|
+
* Priority (highest to lowest): CLI args > mcp-config.json > built-in defaults
|
|
23
|
+
*
|
|
24
|
+
* Usage:
|
|
25
|
+
* ConfigManager.init(cliArgs) // once, in cli.ts
|
|
26
|
+
* ConfigManager.getInstance() // everywhere else
|
|
27
|
+
* ConfigManager.reset() // in tests, to reset between cases
|
|
28
|
+
*/
|
|
29
|
+
export declare class ConfigManager {
|
|
30
|
+
private static instance;
|
|
31
|
+
private readonly config;
|
|
32
|
+
private constructor();
|
|
33
|
+
static init(cliArgs?: CliArgs): ConfigManager;
|
|
34
|
+
static getInstance(): ConfigManager;
|
|
35
|
+
/** Reset the singleton — for use in tests only */
|
|
36
|
+
static reset(): void;
|
|
37
|
+
/** Returns a shallow copy of the current config to prevent mutation */
|
|
38
|
+
get(): Config;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"manager.d.ts","sourceRoot":"","sources":["../../src/config/manager.ts"],"names":[],"mappings":"AAGA,MAAM,MAAM,UAAU,GAAG,OAAO,GAAG,KAAK,GAAG,QAAQ,CAAA;AAEnD,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,UAAU,CAAA;IACnB,WAAW,EAAE,MAAM,CAAA;IACnB,aAAa,EAAE,MAAM,CAAA;IACrB,UAAU,EAAE,OAAO,CAAA;IACnB,gBAAgB,EAAE,OAAO,CAAA;CAC1B;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,OAAO,CAAC,EAAE,UAAU,CAAA;IACpB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAA;CAC3B;AAWD;;;;;;;;;GASG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAC,QAAQ,CAA6B;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAQ;IAE/B,OAAO;IA+BP,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,OAAO,GAAG,aAAa;IAK7C,MAAM,CAAC,WAAW,IAAI,aAAa;IAOnC,kDAAkD;IAClD,MAAM,CAAC,KAAK,IAAI,IAAI;IAIpB,uEAAuE;IACvE,GAAG,IAAI,MAAM;CAGd"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
const DEFAULTS = {
|
|
4
|
+
lang: 'en',
|
|
5
|
+
engines: 'hybrid',
|
|
6
|
+
ollamaModel: 'llama3',
|
|
7
|
+
ollamaBaseUrl: 'http://localhost:11434',
|
|
8
|
+
autoUnmask: false,
|
|
9
|
+
strictValidation: true,
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Singleton configuration manager.
|
|
13
|
+
*
|
|
14
|
+
* Priority (highest to lowest): CLI args > mcp-config.json > built-in defaults
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* ConfigManager.init(cliArgs) // once, in cli.ts
|
|
18
|
+
* ConfigManager.getInstance() // everywhere else
|
|
19
|
+
* ConfigManager.reset() // in tests, to reset between cases
|
|
20
|
+
*/
|
|
21
|
+
export class ConfigManager {
|
|
22
|
+
static instance = null;
|
|
23
|
+
config;
|
|
24
|
+
constructor(cliArgs = {}) {
|
|
25
|
+
// Layer 1: defaults
|
|
26
|
+
let cfg = { ...DEFAULTS };
|
|
27
|
+
// Layer 2: JSON config file
|
|
28
|
+
const configPath = cliArgs.config
|
|
29
|
+
? resolve(cliArgs.config)
|
|
30
|
+
: resolve(process.cwd(), 'mcp-config.json');
|
|
31
|
+
if (existsSync(configPath)) {
|
|
32
|
+
try {
|
|
33
|
+
const raw = readFileSync(configPath, 'utf-8');
|
|
34
|
+
const file = JSON.parse(raw);
|
|
35
|
+
cfg = { ...cfg, ...file };
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
// Malformed config — silently fall back to defaults
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// Layer 3: CLI args override everything
|
|
42
|
+
if (cliArgs.lang !== undefined)
|
|
43
|
+
cfg.lang = cliArgs.lang;
|
|
44
|
+
if (cliArgs.engines !== undefined)
|
|
45
|
+
cfg.engines = cliArgs.engines;
|
|
46
|
+
if (cliArgs.ollamaModel !== undefined)
|
|
47
|
+
cfg.ollamaModel = cliArgs.ollamaModel;
|
|
48
|
+
if (cliArgs.ollamaBaseUrl !== undefined)
|
|
49
|
+
cfg.ollamaBaseUrl = cliArgs.ollamaBaseUrl;
|
|
50
|
+
if (cliArgs.autoUnmask !== undefined)
|
|
51
|
+
cfg.autoUnmask = cliArgs.autoUnmask;
|
|
52
|
+
if (cliArgs.strictValidation !== undefined)
|
|
53
|
+
cfg.strictValidation = cliArgs.strictValidation;
|
|
54
|
+
this.config = cfg;
|
|
55
|
+
}
|
|
56
|
+
static init(cliArgs) {
|
|
57
|
+
ConfigManager.instance = new ConfigManager(cliArgs);
|
|
58
|
+
return ConfigManager.instance;
|
|
59
|
+
}
|
|
60
|
+
static getInstance() {
|
|
61
|
+
if (!ConfigManager.instance) {
|
|
62
|
+
ConfigManager.instance = new ConfigManager();
|
|
63
|
+
}
|
|
64
|
+
return ConfigManager.instance;
|
|
65
|
+
}
|
|
66
|
+
/** Reset the singleton — for use in tests only */
|
|
67
|
+
static reset() {
|
|
68
|
+
ConfigManager.instance = null;
|
|
69
|
+
}
|
|
70
|
+
/** Returns a shallow copy of the current config to prevent mutation */
|
|
71
|
+
get() {
|
|
72
|
+
return { ...this.config };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"manager.js","sourceRoot":"","sources":["../../src/config/manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAuBnC,MAAM,QAAQ,GAAW;IACvB,IAAI,EAAE,IAAI;IACV,OAAO,EAAE,QAAQ;IACjB,WAAW,EAAE,QAAQ;IACrB,aAAa,EAAE,wBAAwB;IACvC,UAAU,EAAE,KAAK;IACjB,gBAAgB,EAAE,IAAI;CACvB,CAAA;AAED;;;;;;;;;GASG;AACH,MAAM,OAAO,aAAa;IAChB,MAAM,CAAC,QAAQ,GAAyB,IAAI,CAAA;IACnC,MAAM,CAAQ;IAE/B,YAAoB,UAAmB,EAAE;QACvC,oBAAoB;QACpB,IAAI,GAAG,GAAW,EAAE,GAAG,QAAQ,EAAE,CAAA;QAEjC,4BAA4B;QAC5B,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM;YAC/B,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC;YACzB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,iBAAiB,CAAC,CAAA;QAE7C,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAA;gBAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAoB,CAAA;gBAC/C,GAAG,GAAG,EAAE,GAAG,GAAG,EAAE,GAAG,IAAI,EAAE,CAAA;YAC3B,CAAC;YAAC,MAAM,CAAC;gBACP,oDAAoD;YACtD,CAAC;QACH,CAAC;QAED,wCAAwC;QACxC,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS;YAAE,GAAG,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAA;QACvD,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS;YAAE,GAAG,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAA;QAChE,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS;YAAE,GAAG,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAA;QAC5E,IAAI,OAAO,CAAC,aAAa,KAAK,SAAS;YAAE,GAAG,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa,CAAA;QAClF,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS;YAAE,GAAG,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAA;QACzE,IAAI,OAAO,CAAC,gBAAgB,KAAK,SAAS;YACxC,GAAG,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAA;QAEjD,IAAI,CAAC,MAAM,GAAG,GAAG,CAAA;IACnB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAiB;QAC3B,aAAa,CAAC,QAAQ,GAAG,IAAI,aAAa,CAAC,OAAO,CAAC,CAAA;QACnD,OAAO,aAAa,CAAC,QAAQ,CAAA;IAC/B,CAAC;IAED,MAAM,CAAC,WAAW;QAChB,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,CAAC;YAC5B,aAAa,CAAC,QAAQ,GAAG,IAAI,aAAa,EAAE,CAAA;QAC9C,CAAC;QACD,OAAO,aAAa,CAAC,QAAQ,CAAA;IAC/B,CAAC;IAED,kDAAkD;IAClD,MAAM,CAAC,KAAK;QACV,aAAa,CAAC,QAAQ,GAAG,IAAI,CAAA;IAC/B,CAAC;IAED,uEAAuE;IACvE,GAAG;QACD,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAA;IAC3B,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { MappingStore } from './mapping-store.js';
|
|
2
|
+
import { OllamaClient } from './ollama-client.js';
|
|
3
|
+
/**
|
|
4
|
+
* Main orchestrator. Coordinates regex-based masking and optional Ollama LLM NER.
|
|
5
|
+
*
|
|
6
|
+
* @param store - Optional pre-constructed MappingStore (useful for session reuse)
|
|
7
|
+
* @param ollamaClientOverride - Pass an OllamaClient (or null) to override auto-creation.
|
|
8
|
+
* Used in tests to inject mocks without vi.mock hoisting.
|
|
9
|
+
*/
|
|
10
|
+
export declare class Engine {
|
|
11
|
+
private readonly store;
|
|
12
|
+
private readonly ollamaClient;
|
|
13
|
+
constructor(store?: MappingStore, ollamaClientOverride?: OllamaClient | null);
|
|
14
|
+
getStore(): MappingStore;
|
|
15
|
+
/**
|
|
16
|
+
* Pseudonymize sensitive data in the given text.
|
|
17
|
+
*
|
|
18
|
+
* Phase 1 (regex | hybrid): Apply pattern-based masking for structured data
|
|
19
|
+
* (PESEL, IBAN, email, phone).
|
|
20
|
+
* Phase 2 (llm | hybrid): Call Ollama NER to detect PERSON / ORG names.
|
|
21
|
+
* If Ollama is unavailable, this phase is silently skipped.
|
|
22
|
+
*/
|
|
23
|
+
process(text: string): Promise<string>;
|
|
24
|
+
private applyRegexRules;
|
|
25
|
+
private applyLlmNer;
|
|
26
|
+
/**
|
|
27
|
+
* Restore all [TAG:N] tokens in text to their original values.
|
|
28
|
+
* Tokens not found in the store are left unchanged.
|
|
29
|
+
*/
|
|
30
|
+
revert(text: string): string;
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AAWpE;;;;;;GAMG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAc;IACpC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAGhD,KAAK,CAAC,EAAE,YAAY,EACpB,oBAAoB,CAAC,EAAE,YAAY,GAAG,IAAI;IAgB5C,QAAQ,IAAI,YAAY;IAIxB;;;;;;;OAOG;IACG,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAoB5C,OAAO,CAAC,eAAe;YAuBT,WAAW;IA+BzB;;;OAGG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;CAK7B"}
|