pseudonym-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +354 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +40 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/manager.d.ts +40 -0
- package/dist/config/manager.d.ts.map +1 -0
- package/dist/config/manager.js +75 -0
- package/dist/config/manager.js.map +1 -0
- package/dist/core/engine.d.ts +32 -0
- package/dist/core/engine.d.ts.map +1 -0
- package/dist/core/engine.js +110 -0
- package/dist/core/engine.js.map +1 -0
- package/dist/core/mapping-store.d.ts +24 -0
- package/dist/core/mapping-store.d.ts.map +1 -0
- package/dist/core/mapping-store.js +47 -0
- package/dist/core/mapping-store.js.map +1 -0
- package/dist/core/ollama-client.d.ts +21 -0
- package/dist/core/ollama-client.d.ts.map +1 -0
- package/dist/core/ollama-client.js +67 -0
- package/dist/core/ollama-client.js.map +1 -0
- package/dist/languages/en/rules.d.ts +3 -0
- package/dist/languages/en/rules.d.ts.map +1 -0
- package/dist/languages/en/rules.js +69 -0
- package/dist/languages/en/rules.js.map +1 -0
- package/dist/languages/pl/rules.d.ts +3 -0
- package/dist/languages/pl/rules.d.ts.map +1 -0
- package/dist/languages/pl/rules.js +44 -0
- package/dist/languages/pl/rules.js.map +1 -0
- package/dist/languages/types.d.ts +12 -0
- package/dist/languages/types.d.ts.map +1 -0
- package/dist/languages/types.js +2 -0
- package/dist/languages/types.js.map +1 -0
- package/dist/mcp/server.d.ts +4 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +93 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/setup/check-ollama.d.ts +15 -0
- package/dist/setup/check-ollama.d.ts.map +1 -0
- package/dist/setup/check-ollama.js +58 -0
- package/dist/setup/check-ollama.js.map +1 -0
- package/mcp-config.json +8 -0
- package/package.json +103 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { MappingStore } from './mapping-store.js';
|
|
2
|
+
import { OllamaClient } from './ollama-client.js';
|
|
3
|
+
import { ConfigManager } from '../config/manager.js';
|
|
4
|
+
import { EnglishRules } from '../languages/en/rules.js';
|
|
5
|
+
import { PolishRules } from '../languages/pl/rules.js';
|
|
6
|
+
const LANGUAGE_MAP = {
|
|
7
|
+
en: EnglishRules,
|
|
8
|
+
pl: PolishRules,
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Main orchestrator. Coordinates regex-based masking and optional Ollama LLM NER.
|
|
12
|
+
*
|
|
13
|
+
* @param store - Optional pre-constructed MappingStore (useful for session reuse)
|
|
14
|
+
* @param ollamaClientOverride - Pass an OllamaClient (or null) to override auto-creation.
|
|
15
|
+
* Used in tests to inject mocks without vi.mock hoisting.
|
|
16
|
+
*/
|
|
17
|
+
export class Engine {
|
|
18
|
+
store;
|
|
19
|
+
ollamaClient;
|
|
20
|
+
constructor(store, ollamaClientOverride) {
|
|
21
|
+
this.store = store ?? new MappingStore();
|
|
22
|
+
if (ollamaClientOverride !== undefined) {
|
|
23
|
+
// Explicit injection (tests pass mock or null here)
|
|
24
|
+
this.ollamaClient = ollamaClientOverride;
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
const cfg = ConfigManager.getInstance().get();
|
|
28
|
+
const needsLlm = cfg.engines === 'hybrid' || cfg.engines === 'llm';
|
|
29
|
+
this.ollamaClient = needsLlm
|
|
30
|
+
? new OllamaClient({ baseUrl: cfg.ollamaBaseUrl, model: cfg.ollamaModel })
|
|
31
|
+
: null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
getStore() {
|
|
35
|
+
return this.store;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Pseudonymize sensitive data in the given text.
|
|
39
|
+
*
|
|
40
|
+
* Phase 1 (regex | hybrid): Apply pattern-based masking for structured data
|
|
41
|
+
* (PESEL, IBAN, email, phone).
|
|
42
|
+
* Phase 2 (llm | hybrid): Call Ollama NER to detect PERSON / ORG names.
|
|
43
|
+
* If Ollama is unavailable, this phase is silently skipped.
|
|
44
|
+
*/
|
|
45
|
+
async process(text) {
|
|
46
|
+
const cfg = ConfigManager.getInstance().get();
|
|
47
|
+
const rules = LANGUAGE_MAP[cfg.lang] ?? EnglishRules;
|
|
48
|
+
let result = text;
|
|
49
|
+
if (cfg.engines === 'regex' || cfg.engines === 'hybrid') {
|
|
50
|
+
result = this.applyRegexRules(result, rules, cfg.strictValidation);
|
|
51
|
+
}
|
|
52
|
+
if ((cfg.engines === 'llm' || cfg.engines === 'hybrid') &&
|
|
53
|
+
this.ollamaClient !== null) {
|
|
54
|
+
result = await this.applyLlmNer(result);
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
applyRegexRules(text, rules, strictValidation) {
|
|
59
|
+
let result = text;
|
|
60
|
+
for (const patternDef of rules.patterns) {
|
|
61
|
+
// Clone the regex to reset lastIndex — /g regexes are stateful
|
|
62
|
+
const regex = new RegExp(patternDef.regex.source, patternDef.regex.flags);
|
|
63
|
+
result = result.replace(regex, (match) => {
|
|
64
|
+
if (patternDef.validate && strictValidation) {
|
|
65
|
+
const clean = match.replace(/\s/g, '');
|
|
66
|
+
if (!patternDef.validate(clean))
|
|
67
|
+
return match;
|
|
68
|
+
}
|
|
69
|
+
return this.store.add(patternDef.tag, match);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
async applyLlmNer(text) {
|
|
75
|
+
let entities;
|
|
76
|
+
try {
|
|
77
|
+
entities = await this.ollamaClient.extractEntities(text);
|
|
78
|
+
}
|
|
79
|
+
catch (err) {
|
|
80
|
+
process.stderr.write(`[pseudonym-mcp] Ollama NER failed (skipping LLM phase): ${String(err)}\n`);
|
|
81
|
+
return text;
|
|
82
|
+
}
|
|
83
|
+
if (entities.length === 0)
|
|
84
|
+
return text;
|
|
85
|
+
let result = text;
|
|
86
|
+
// Sort longest-first to prevent partial matches
|
|
87
|
+
// e.g. "Auto-Lux International" must be replaced before "Auto-Lux"
|
|
88
|
+
const sorted = [...entities].sort((a, b) => b.value.length - a.value.length);
|
|
89
|
+
for (const entity of sorted) {
|
|
90
|
+
const val = entity.value.trim();
|
|
91
|
+
if (!val)
|
|
92
|
+
continue;
|
|
93
|
+
// Escape regex special characters in the entity value
|
|
94
|
+
const escaped = val.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
95
|
+
const re = new RegExp(escaped, 'g');
|
|
96
|
+
result = result.replace(re, () => this.store.add(entity.type, val));
|
|
97
|
+
}
|
|
98
|
+
return result;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Restore all [TAG:N] tokens in text to their original values.
|
|
102
|
+
* Tokens not found in the store are left unchanged.
|
|
103
|
+
*/
|
|
104
|
+
revert(text) {
|
|
105
|
+
return text.replace(/\[[A-Z]+:\d+\]/g, (token) => {
|
|
106
|
+
return this.store.get(token) ?? token;
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
//# sourceMappingURL=engine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.js","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAEpD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAA;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAA;AAEtD,MAAM,YAAY,GAAkC;IAClD,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,WAAW;CAChB,CAAA;AAED;;;;;;GAMG;AACH,MAAM,OAAO,MAAM;IACA,KAAK,CAAc;IACnB,YAAY,CAAqB;IAElD,YACE,KAAoB,EACpB,oBAA0C;QAE1C,IAAI,CAAC,KAAK,GAAG,KAAK,IAAI,IAAI,YAAY,EAAE,CAAA;QAExC,IAAI,oBAAoB,KAAK,SAAS,EAAE,CAAC;YACvC,oDAAoD;YACpD,IAAI,CAAC,YAAY,GAAG,oBAAoB,CAAA;QAC1C,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;YAC7C,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,KAAK,KAAK,CAAA;YAClE,IAAI,CAAC,YAAY,GAAG,QAAQ;gBAC1B,CAAC,CAAC,IAAI,YAAY,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,aAAa,EAAE,KAAK,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;gBAC1E,CAAC,CAAC,IAAI,CAAA;QACV,CAAC;IACH,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,IAAY;QACxB,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;QAC7C,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAA;QAEpD,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,IAAI,GAAG,CAAC,OAAO,KAAK,OAAO,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACxD,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAA;QACpE,CAAC;QAED,IACE,CAAC,GAAG,CAAC,OAAO,KAAK,KAAK,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC;YACnD,IAAI,CAAC,YAAY,KAAK,IAAI,EAC1B,CAAC;YACD,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAA;QACzC,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,eAAe,CACrB,IAAY,EACZ,KAAoB,EACpB,gBAAyB;QAEzB,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACxC,+DAA+D;YAC/D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YAEzE,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;gBACvC,IAAI,UAAU,CAAC,QAAQ,IAAI,gBAAgB,EAAE,CAAC;oBAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;oBACtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC;wBAAE,OAAO,KAAK,CAAA;gBAC/C,CAAC;gBACD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;QACJ,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,IAAY;QACpC,IAAI,QAAwB,CAAA;QAC5B,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAa,CAAC,eAAe,CAAC,IAAI,CAAC,CAAA;QAC3D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,2DAA2D,MAAM,CAAC,GAAG,CAAC,IAAI,CAC3E,CAAA;YACD,OAAO,IAAI,CAAA;QACb,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;QAEtC,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,gDAAgD;QAChD,mEAAmE;QACnE,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QAE5E,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAA;YAC/B,IAAI,CAAC,GAAG;gBAAE,SAAQ;YAClB,sDAAsD;YACtD,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;YAC1D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAA;YACnC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAA;QACrE,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,CAAC,KAAK,EAAE,EAAE;YAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session-isolated store that maps [TAG:N] tokens to their original values.
|
|
3
|
+
*
|
|
4
|
+
* The `add()` method is idempotent: if the same original value is added
|
|
5
|
+
* under the same tag more than once, the existing token is returned,
|
|
6
|
+
* preserving reference coherence in the masked text.
|
|
7
|
+
*/
|
|
8
|
+
export declare class MappingStore {
|
|
9
|
+
private readonly sessionId;
|
|
10
|
+
private readonly store;
|
|
11
|
+
private readonly counters;
|
|
12
|
+
constructor(sessionId?: string);
|
|
13
|
+
getSessionId(): string;
|
|
14
|
+
/**
|
|
15
|
+
* Register an original value and return its token `[TAG:N]`.
|
|
16
|
+
* If the same original+tag combination was already registered,
|
|
17
|
+
* the existing token is returned (idempotent).
|
|
18
|
+
*/
|
|
19
|
+
add(tag: string, original: string): string;
|
|
20
|
+
get(token: string): string | undefined;
|
|
21
|
+
getAll(): ReadonlyMap<string, string>;
|
|
22
|
+
clear(): void;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=mapping-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mapping-store.d.ts","sourceRoot":"","sources":["../../src/core/mapping-store.ts"],"names":[],"mappings":"AAEA;;;;;;GAMG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAQ;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA4B;IAClD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;gBAEzC,SAAS,CAAC,EAAE,MAAM;IAI9B,YAAY,IAAI,MAAM;IAItB;;;;OAIG;IACH,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM;IAa1C,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAItC,MAAM,IAAI,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC;IAIrC,KAAK,IAAI,IAAI;CAId"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
/**
|
|
3
|
+
* Session-isolated store that maps [TAG:N] tokens to their original values.
|
|
4
|
+
*
|
|
5
|
+
* The `add()` method is idempotent: if the same original value is added
|
|
6
|
+
* under the same tag more than once, the existing token is returned,
|
|
7
|
+
* preserving reference coherence in the masked text.
|
|
8
|
+
*/
|
|
9
|
+
export class MappingStore {
|
|
10
|
+
sessionId;
|
|
11
|
+
store = new Map();
|
|
12
|
+
counters = new Map();
|
|
13
|
+
constructor(sessionId) {
|
|
14
|
+
this.sessionId = sessionId ?? randomUUID();
|
|
15
|
+
}
|
|
16
|
+
getSessionId() {
|
|
17
|
+
return this.sessionId;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Register an original value and return its token `[TAG:N]`.
|
|
21
|
+
* If the same original+tag combination was already registered,
|
|
22
|
+
* the existing token is returned (idempotent).
|
|
23
|
+
*/
|
|
24
|
+
add(tag, original) {
|
|
25
|
+
for (const [token, value] of this.store) {
|
|
26
|
+
if (value === original && token.startsWith(`[${tag}:`)) {
|
|
27
|
+
return token;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const n = (this.counters.get(tag) ?? 0) + 1;
|
|
31
|
+
this.counters.set(tag, n);
|
|
32
|
+
const token = `[${tag}:${n}]`;
|
|
33
|
+
this.store.set(token, original);
|
|
34
|
+
return token;
|
|
35
|
+
}
|
|
36
|
+
get(token) {
|
|
37
|
+
return this.store.get(token);
|
|
38
|
+
}
|
|
39
|
+
getAll() {
|
|
40
|
+
return this.store;
|
|
41
|
+
}
|
|
42
|
+
clear() {
|
|
43
|
+
this.store.clear();
|
|
44
|
+
this.counters.clear();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=mapping-store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mapping-store.js","sourceRoot":"","sources":["../../src/core/mapping-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAExC;;;;;;GAMG;AACH,MAAM,OAAO,YAAY;IACN,SAAS,CAAQ;IACjB,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAA;IACjC,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;IAErD,YAAY,SAAkB;QAC5B,IAAI,CAAC,SAAS,GAAG,SAAS,IAAI,UAAU,EAAE,CAAA;IAC5C,CAAC;IAED,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAA;IACvB,CAAC;IAED;;;;OAIG;IACH,GAAG,CAAC,GAAW,EAAE,QAAgB;QAC/B,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACxC,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC;gBACvD,OAAO,KAAK,CAAA;YACd,CAAC;QACH,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAA;QAC3C,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC,GAAG,CAAA;QAC7B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;QAC/B,OAAO,KAAK,CAAA;IACd,CAAC;IAED,GAAG,CAAC,KAAa;QACf,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;IAC9B,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAED,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAA;QAClB,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAA;IACvB,CAAC;CACF"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface OllamaEntity {
|
|
2
|
+
type: 'PERSON' | 'ORG';
|
|
3
|
+
value: string;
|
|
4
|
+
}
|
|
5
|
+
export interface OllamaClientOptions {
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
model: string;
|
|
8
|
+
timeoutMs?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Thin wrapper around the Ollama /api/chat endpoint.
|
|
12
|
+
* Designed to be injected into Engine so it can be mocked in tests.
|
|
13
|
+
*/
|
|
14
|
+
export declare class OllamaClient {
|
|
15
|
+
private readonly baseUrl;
|
|
16
|
+
private readonly model;
|
|
17
|
+
private readonly timeoutMs;
|
|
18
|
+
constructor(opts: OllamaClientOptions);
|
|
19
|
+
extractEntities(text: string): Promise<OllamaEntity[]>;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=ollama-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ollama-client.d.ts","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,GAAG,KAAK,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAQD;;;GAGG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAQ;IAChC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAQ;IAC9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAQ;gBAEtB,IAAI,EAAE,mBAAmB;IAM/B,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;CAqD7D"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
const SYSTEM_PROMPT = `You are a Named Entity Recognition engine.
|
|
2
|
+
Extract all PERSON names and ORGANIZATION names from the user's text.
|
|
3
|
+
Return ONLY a JSON array of objects with this exact shape:
|
|
4
|
+
[{"type":"PERSON","value":"..."}, {"type":"ORG","value":"..."}]
|
|
5
|
+
Do not return any text outside the JSON array. If no entities are found, return [].`;
|
|
6
|
+
/**
|
|
7
|
+
* Thin wrapper around the Ollama /api/chat endpoint.
|
|
8
|
+
* Designed to be injected into Engine so it can be mocked in tests.
|
|
9
|
+
*/
|
|
10
|
+
export class OllamaClient {
|
|
11
|
+
baseUrl;
|
|
12
|
+
model;
|
|
13
|
+
timeoutMs;
|
|
14
|
+
constructor(opts) {
|
|
15
|
+
this.baseUrl = opts.baseUrl;
|
|
16
|
+
this.model = opts.model;
|
|
17
|
+
this.timeoutMs = opts.timeoutMs ?? 10_000;
|
|
18
|
+
}
|
|
19
|
+
async extractEntities(text) {
|
|
20
|
+
const controller = new AbortController();
|
|
21
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
22
|
+
let res;
|
|
23
|
+
try {
|
|
24
|
+
res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
25
|
+
method: 'POST',
|
|
26
|
+
headers: { 'Content-Type': 'application/json' },
|
|
27
|
+
signal: controller.signal,
|
|
28
|
+
body: JSON.stringify({
|
|
29
|
+
model: this.model,
|
|
30
|
+
stream: false,
|
|
31
|
+
messages: [
|
|
32
|
+
{ role: 'system', content: SYSTEM_PROMPT },
|
|
33
|
+
{ role: 'user', content: text },
|
|
34
|
+
],
|
|
35
|
+
}),
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
finally {
|
|
39
|
+
clearTimeout(timeout);
|
|
40
|
+
}
|
|
41
|
+
if (!res.ok) {
|
|
42
|
+
throw new Error(`Ollama returned HTTP ${res.status}`);
|
|
43
|
+
}
|
|
44
|
+
const data = (await res.json());
|
|
45
|
+
const content = data?.message?.content ?? '[]';
|
|
46
|
+
// Robustly extract the JSON array even if the LLM wraps it in prose
|
|
47
|
+
const jsonMatch = content.match(/\[[\s\S]*\]/);
|
|
48
|
+
if (!jsonMatch)
|
|
49
|
+
return [];
|
|
50
|
+
let parsed;
|
|
51
|
+
try {
|
|
52
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
return parsed.filter((e) => typeof e === 'object' &&
|
|
58
|
+
e !== null &&
|
|
59
|
+
'type' in e &&
|
|
60
|
+
'value' in e &&
|
|
61
|
+
(e.type === 'PERSON' ||
|
|
62
|
+
e.type === 'ORG') &&
|
|
63
|
+
typeof e.value === 'string' &&
|
|
64
|
+
e.value.trim().length > 0);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=ollama-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ollama-client.js","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAWA,MAAM,aAAa,GAAG;;;;oFAI8D,CAAA;AAEpF;;;GAGG;AACH,MAAM,OAAO,YAAY;IACN,OAAO,CAAQ;IACf,KAAK,CAAQ;IACb,SAAS,CAAQ;IAElC,YAAY,IAAyB;QACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAA;QAC3B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAA;QACvB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAA;IAC3C,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,IAAY;QAChC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;QACxC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAA;QAEpE,IAAI,GAAa,CAAA;QACjB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,WAAW,EAAE;gBAC5C,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,KAAK;oBACb,QAAQ,EAAE;wBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;wBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;qBAChC;iBACF,CAAC;aACH,CAAC,CAAA;QACJ,CAAC;gBAAS,CAAC;YACT,YAAY,CAAC,OAAO,CAAC,CAAA;QACvB,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,CAAC,MAAM,EAAE,CAAC,CAAA;QACvD,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAuC,CAAA;QACrE,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,EAAE,OAAO,IAAI,IAAI,CAAA;QAE9C,oEAAoE;QACpE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAA;QAC9C,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAA;QAEzB,IAAI,MAAiB,CAAA;QACrB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAc,CAAA;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAA;QACX,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,CAClB,CAAC,CAAC,EAAqB,EAAE,CACvB,OAAO,CAAC,KAAK,QAAQ;YACrB,CAAC,KAAK,IAAI;YACV,MAAM,IAAI,CAAC;YACX,OAAO,IAAI,CAAC;YACZ,CAAE,CAA6B,CAAC,IAAI,KAAK,QAAQ;gBAC9C,CAA6B,CAAC,IAAI,KAAK,KAAK,CAAC;YAChD,OAAQ,CAA6B,CAAC,KAAK,KAAK,QAAQ;YACtD,CAA6B,CAAC,KAAgB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CACrE,CAAA;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../../../src/languages/en/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAwChD,eAAO,MAAM,YAAY,EAAE,aA0B1B,CAAA"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Luhn algorithm for credit card number validation.
|
|
3
|
+
* Strips spaces and dashes, verifies the checksum digit.
|
|
4
|
+
*/
|
|
5
|
+
function luhnCheck(number) {
|
|
6
|
+
const digits = number.replace(/[\s-]/g, '');
|
|
7
|
+
if (!/^\d{13,19}$/.test(digits))
|
|
8
|
+
return false;
|
|
9
|
+
let sum = 0;
|
|
10
|
+
let double = false;
|
|
11
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
12
|
+
let digit = Number(digits[i]);
|
|
13
|
+
if (double) {
|
|
14
|
+
digit *= 2;
|
|
15
|
+
if (digit > 9)
|
|
16
|
+
digit -= 9;
|
|
17
|
+
}
|
|
18
|
+
sum += digit;
|
|
19
|
+
double = !double;
|
|
20
|
+
}
|
|
21
|
+
return sum % 10 === 0;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Basic SSN area-number validation.
|
|
25
|
+
* Rejects known invalid patterns: area 000, 666, 900-999.
|
|
26
|
+
*/
|
|
27
|
+
function ssnValidate(ssn) {
|
|
28
|
+
const clean = ssn.replace(/[\s-]/g, '');
|
|
29
|
+
if (!/^\d{9}$/.test(clean))
|
|
30
|
+
return false;
|
|
31
|
+
const area = parseInt(clean.substring(0, 3), 10);
|
|
32
|
+
const group = parseInt(clean.substring(3, 5), 10);
|
|
33
|
+
const serial = parseInt(clean.substring(5, 9), 10);
|
|
34
|
+
if (area === 0 || area === 666 || area >= 900)
|
|
35
|
+
return false;
|
|
36
|
+
if (group === 0)
|
|
37
|
+
return false;
|
|
38
|
+
if (serial === 0)
|
|
39
|
+
return false;
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
export const EnglishRules = {
|
|
43
|
+
patterns: [
|
|
44
|
+
{
|
|
45
|
+
tag: 'SSN',
|
|
46
|
+
// US Social Security Number: XXX-XX-XXXX (with required dashes to avoid false positives)
|
|
47
|
+
regex: /\b\d{3}-\d{2}-\d{4}\b/g,
|
|
48
|
+
validate: ssnValidate,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
tag: 'CREDIT_CARD',
|
|
52
|
+
// 13–19 digits, optionally separated by spaces or dashes in groups of 4
|
|
53
|
+
// Covers Visa (4xxx), Mastercard (5[1-5]xx, 2[2-7]xx), Amex (3[47]x), Discover (6xxx)
|
|
54
|
+
regex: /\b(?:\d[ -]*?){13,19}\b/g,
|
|
55
|
+
validate: luhnCheck,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
tag: 'EMAIL',
|
|
59
|
+
regex: /\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/g,
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
tag: 'PHONE',
|
|
63
|
+
// US/international phone formats:
|
|
64
|
+
// +1 (XXX) XXX-XXXX, +1-XXX-XXX-XXXX, (XXX) XXX-XXXX, XXX-XXX-XXXX, XXX.XXX.XXXX
|
|
65
|
+
regex: /(?:\+1[\s.-]?)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}\b/g,
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
};
|
|
69
|
+
//# sourceMappingURL=rules.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.js","sourceRoot":"","sources":["../../../src/languages/en/rules.ts"],"names":[],"mappings":"AAEA;;;GAGG;AACH,SAAS,SAAS,CAAC,MAAc;IAC/B,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IAC3C,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAA;IAE7C,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,IAAI,MAAM,GAAG,KAAK,CAAA;IAClB,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;QAC7B,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,IAAI,CAAC,CAAA;YACV,IAAI,KAAK,GAAG,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAA;QAC3B,CAAC;QACD,GAAG,IAAI,KAAK,CAAA;QACZ,MAAM,GAAG,CAAC,MAAM,CAAA;IAClB,CAAC;IACD,OAAO,GAAG,GAAG,EAAE,KAAK,CAAC,CAAA;AACvB,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IACvC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;IACjD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;IAClD,IAAI,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,IAAI,GAAG;QAAE,OAAO,KAAK,CAAA;IAC3D,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IAC7B,IAAI,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IAC9B,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAkB;IACzC,QAAQ,EAAE;QACR;YACE,GAAG,EAAE,KAAK;YACV,yFAAyF;YACzF,KAAK,EAAE,wBAAwB;YAC/B,QAAQ,EAAE,WAAW;SACtB;QACD;YACE,GAAG,EAAE,aAAa;YAClB,wEAAwE;YACxE,sFAAsF;YACtF,KAAK,EAAE,0BAA0B;YACjC,QAAQ,EAAE,SAAS;SACpB;QACD;YACE,GAAG,EAAE,OAAO;YACZ,KAAK,EAAE,uDAAuD;SAC/D;QACD;YACE,GAAG,EAAE,OAAO;YACZ,kCAAkC;YAClC,iFAAiF;YACjF,KAAK,EAAE,qDAAqD;SAC7D;KACF;CACF,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../../../src/languages/pl/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAgBhD,eAAO,MAAM,WAAW,EAAE,aA4BzB,CAAA"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validates a Polish PESEL number using the official checksum algorithm.
|
|
3
|
+
* Weights: [1, 3, 7, 9, 1, 3, 7, 9, 1, 3]
|
|
4
|
+
* Check digit = (10 - (weighted_sum % 10)) % 10
|
|
5
|
+
*/
|
|
6
|
+
function peselChecksum(pesel) {
|
|
7
|
+
if (!/^\d{11}$/.test(pesel))
|
|
8
|
+
return false;
|
|
9
|
+
const weights = [1, 3, 7, 9, 1, 3, 7, 9, 1, 3];
|
|
10
|
+
const digits = pesel.split('').map(Number);
|
|
11
|
+
const sum = weights.reduce((acc, w, i) => acc + w * digits[i], 0);
|
|
12
|
+
const check = (10 - (sum % 10)) % 10;
|
|
13
|
+
return check === digits[10];
|
|
14
|
+
}
|
|
15
|
+
export const PolishRules = {
|
|
16
|
+
patterns: [
|
|
17
|
+
{
|
|
18
|
+
tag: 'PESEL',
|
|
19
|
+
// Matches exactly 11 consecutive digits NOT adjacent to another digit
|
|
20
|
+
regex: /(?<!\d)\d{11}(?!\d)/g,
|
|
21
|
+
validate: peselChecksum,
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
tag: 'IBAN',
|
|
25
|
+
// Matches PL + 26 digits (2 check + 24 BBAN), optionally space-separated every 4 chars
|
|
26
|
+
// e.g. PL27114020040000300201355387 or PL 27 1140 2004 0000 3002 0135 5387
|
|
27
|
+
// Structure: PL + \d{2} (check) + (\d{4}){6} (24 BBAN) = 26 total digits
|
|
28
|
+
regex: /\bPL\s*\d{2}\s*(?:\d{4}\s*){6}\b/gi,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
tag: 'EMAIL',
|
|
32
|
+
regex: /\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b/g,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
tag: 'PHONE',
|
|
36
|
+
// Three alternatives:
|
|
37
|
+
// 1. International prefix: +48 or 0048, then 9 digits (with optional spaces/dashes)
|
|
38
|
+
// 2. 9-digit mobile starting with 4–8 (Polish numbering plan)
|
|
39
|
+
// 3. Landline with area code in parens: (XX) XXX-XX-XX
|
|
40
|
+
regex: /(?:\+48|0048)[\s\-]?\d{3}[\s\-]?\d{3}[\s\-]?\d{3}|\b[4-8]\d{2}[\s\-]?\d{3}[\s\-]?\d{3}\b|\(\d{2}\)[\s\-]?\d{3}[\s\-]?\d{2}[\s\-]?\d{2}/g,
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
};
|
|
44
|
+
//# sourceMappingURL=rules.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.js","sourceRoot":"","sources":["../../../src/languages/pl/rules.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,SAAS,aAAa,CAAC,KAAa;IAClC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACzC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IAC9C,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAC1C,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IACjE,MAAM,KAAK,GAAG,CAAC,EAAE,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAA;IACpC,OAAO,KAAK,KAAK,MAAM,CAAC,EAAE,CAAC,CAAA;AAC7B,CAAC;AAED,MAAM,CAAC,MAAM,WAAW,GAAkB;IACxC,QAAQ,EAAE;QACR;YACE,GAAG,EAAE,OAAO;YACZ,sEAAsE;YACtE,KAAK,EAAE,sBAAsB;YAC7B,QAAQ,EAAE,aAAa;SACxB;QACD;YACE,GAAG,EAAE,MAAM;YACX,uFAAuF;YACvF,2EAA2E;YAC3E,yEAAyE;YACzE,KAAK,EAAE,oCAAoC;SAC5C;QACD;YACE,GAAG,EAAE,OAAO;YACZ,KAAK,EAAE,uDAAuD;SAC/D;QACD;YACE,GAAG,EAAE,OAAO;YACZ,sBAAsB;YACtB,oFAAoF;YACpF,8DAA8D;YAC9D,uDAAuD;YACvD,KAAK,EAAE,yIAAyI;SACjJ;KACF;CACF,CAAA"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface PatternDef {
|
|
2
|
+
/** Tag name used in replacement tokens, e.g. "PESEL", "PHONE" */
|
|
3
|
+
tag: string;
|
|
4
|
+
/** Regex to match raw candidates — must have the 'g' flag */
|
|
5
|
+
regex: RegExp;
|
|
6
|
+
/** Optional post-match validator (e.g. PESEL checksum). Receives the match with whitespace stripped. */
|
|
7
|
+
validate?: (match: string) => boolean;
|
|
8
|
+
}
|
|
9
|
+
export interface LanguageRules {
|
|
10
|
+
patterns: PatternDef[];
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/languages/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,iEAAiE;IACjE,GAAG,EAAE,MAAM,CAAA;IACX,6DAA6D;IAC7D,KAAK,EAAE,MAAM,CAAA;IACb,wGAAwG;IACxG,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAA;CACtC;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,UAAU,EAAE,CAAA;CACvB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/languages/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AAmBnE,wBAAgB,eAAe,IAAI,SAAS,CA4F3C;AAED,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAIjD"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { Engine } from '../core/engine.js';
|
|
5
|
+
import { MappingStore } from '../core/mapping-store.js';
|
|
6
|
+
import { ConfigManager } from '../config/manager.js';
|
|
7
|
+
// Session registry: session_id → Engine (each Engine holds its own MappingStore)
|
|
8
|
+
const sessions = new Map();
|
|
9
|
+
function getOrCreateEngine(sessionId) {
|
|
10
|
+
let engine = sessions.get(sessionId);
|
|
11
|
+
if (!engine) {
|
|
12
|
+
engine = new Engine(new MappingStore(sessionId));
|
|
13
|
+
sessions.set(sessionId, engine);
|
|
14
|
+
}
|
|
15
|
+
return engine;
|
|
16
|
+
}
|
|
17
|
+
export function createMcpServer() {
|
|
18
|
+
const server = new McpServer({
|
|
19
|
+
name: 'pseudonym-mcp',
|
|
20
|
+
version: '0.1.0',
|
|
21
|
+
});
|
|
22
|
+
server.tool('mask_text', `Pseudonymize sensitive entities in text before sending to a cloud LLM.
|
|
23
|
+
|
|
24
|
+
Replaces PESEL numbers, phone numbers, IBANs, and email addresses via regex,
|
|
25
|
+
and person names and organization names via local Ollama NER — with opaque
|
|
26
|
+
tokens like [PESEL:1], [PERSON:2], [ORG:1].
|
|
27
|
+
|
|
28
|
+
Returns the masked text plus a session_id. Store the session_id to restore
|
|
29
|
+
the original values later using unmask_text.`, {
|
|
30
|
+
text: z.string().describe('The text to pseudonymize'),
|
|
31
|
+
session_id: z
|
|
32
|
+
.string()
|
|
33
|
+
.optional()
|
|
34
|
+
.describe('Optional: reuse an existing session to preserve token numbering across multiple calls'),
|
|
35
|
+
}, async ({ text, session_id }) => {
|
|
36
|
+
const sid = session_id ?? crypto.randomUUID();
|
|
37
|
+
const engine = getOrCreateEngine(sid);
|
|
38
|
+
let maskedText;
|
|
39
|
+
try {
|
|
40
|
+
maskedText = await engine.process(text);
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
return {
|
|
44
|
+
content: [{ type: 'text', text: `Error during masking: ${String(err)}` }],
|
|
45
|
+
isError: true,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
const cfg = ConfigManager.getInstance().get();
|
|
49
|
+
return {
|
|
50
|
+
content: [
|
|
51
|
+
{
|
|
52
|
+
type: 'text',
|
|
53
|
+
text: JSON.stringify({
|
|
54
|
+
session_id: sid,
|
|
55
|
+
masked_text: maskedText,
|
|
56
|
+
auto_unmask: cfg.autoUnmask,
|
|
57
|
+
}, null, 2),
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
server.tool('unmask_text', `Restore original sensitive values in text that was previously masked by mask_text.
|
|
63
|
+
|
|
64
|
+
Replaces tokens like [PESEL:1], [PERSON:2] with the original values stored
|
|
65
|
+
in the session identified by session_id.`, {
|
|
66
|
+
text: z.string().describe('The text containing [TAG:N] tokens to restore'),
|
|
67
|
+
session_id: z.string().describe('The session_id returned by mask_text'),
|
|
68
|
+
}, async ({ text, session_id }) => {
|
|
69
|
+
const engine = sessions.get(session_id);
|
|
70
|
+
if (!engine) {
|
|
71
|
+
return {
|
|
72
|
+
content: [
|
|
73
|
+
{
|
|
74
|
+
type: 'text',
|
|
75
|
+
text: `Error: session "${session_id}" not found. It may have expired or never existed.`,
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
isError: true,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
const restored = engine.revert(text);
|
|
82
|
+
return {
|
|
83
|
+
content: [{ type: 'text', text: restored }],
|
|
84
|
+
};
|
|
85
|
+
});
|
|
86
|
+
return server;
|
|
87
|
+
}
|
|
88
|
+
export async function startServer() {
|
|
89
|
+
const server = createMcpServer();
|
|
90
|
+
const transport = new StdioServerTransport();
|
|
91
|
+
await server.connect(transport);
|
|
92
|
+
}
|
|
93
|
+
//# sourceMappingURL=server.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AACvB,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAA;AAC1C,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAA;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAEpD,iFAAiF;AACjF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAA;AAE1C,SAAS,iBAAiB,CAAC,SAAiB;IAC1C,IAAI,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;IACpC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC,CAAA;QAChD,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IACjC,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,MAAM,UAAU,eAAe;IAC7B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,eAAe;QACrB,OAAO,EAAE,OAAO;KACjB,CAAC,CAAA;IAEF,MAAM,CAAC,IAAI,CACT,WAAW,EACX;;;;;;;6CAOyC,EACzC;QACE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;QACrD,UAAU,EAAE,CAAC;aACV,MAAM,EAAE;aACR,QAAQ,EAAE;aACV,QAAQ,CACP,uFAAuF,CACxF;KACJ,EACD,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7B,MAAM,GAAG,GAAG,UAAU,IAAI,MAAM,CAAC,UAAU,EAAE,CAAA;QAC7C,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAA;QAErC,IAAI,UAAkB,CAAA;QACtB,IAAI,CAAC;YACH,UAAU,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,yBAAyB,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;gBAClF,OAAO,EAAE,IAAI;aACd,CAAA;QACH,CAAC;QAED,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;QAE7C,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB;wBACE,UAAU,EAAE,GAAG;wBACf,WAAW,EAAE,UAAU;wBACvB,WAAW,EAAE,GAAG,CAAC,UAAU;qBAC5B,EACD,IAAI,EACJ,CAAC,CACF;iBACF;aACF;SACF,CAAA;IACH,CAAC,CACF,CAAA;IAED,MAAM,CAAC,IAAI,CACT,aAAa,EACb;;;yCAGqC,EACrC;QACE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;QAC1E,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;KACxE,EACD,KAAK,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;QACvC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,mBAAmB,UAAU,oDAAoD;qBACxF;iBACF;gBACD,OAAO,EAAE,IAAI;aACd,CAAA;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACpC,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SACrD,CAAA;IACH,CAAC,CACF,CAAA;IAED,OAAO,MAAM,CAAA;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,MAAM,MAAM,GAAG,eAAe,EAAE,CAAA;IAChC,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAA;IAC5C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;AACjC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export interface OllamaCheckResult {
|
|
2
|
+
running: boolean;
|
|
3
|
+
modelAvailable: boolean;
|
|
4
|
+
error?: string;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Check whether Ollama is running and whether the required model is available.
|
|
8
|
+
*/
|
|
9
|
+
export declare function checkOllama(baseUrl: string, model: string, timeoutMs?: number): Promise<OllamaCheckResult>;
|
|
10
|
+
/**
|
|
11
|
+
* Print a user-friendly warning to stderr if Ollama is unavailable or
|
|
12
|
+
* the required model is missing. Never throws — errors are non-fatal.
|
|
13
|
+
*/
|
|
14
|
+
export declare function printOllamaStatus(baseUrl: string, model: string): Promise<void>;
|
|
15
|
+
//# sourceMappingURL=check-ollama.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"check-ollama.d.ts","sourceRoot":"","sources":["../../src/setup/check-ollama.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,OAAO,CAAA;IAChB,cAAc,EAAE,OAAO,CAAA;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,EACb,SAAS,SAAQ,GAChB,OAAO,CAAC,iBAAiB,CAAC,CAmC5B;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAqBf"}
|