pseudonym-mcp 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/engine.d.ts +5 -0
- package/dist/core/engine.d.ts.map +1 -1
- package/dist/core/engine.js +59 -9
- package/dist/core/engine.js.map +1 -1
- package/dist/core/ollama-client.d.ts +1 -1
- package/dist/core/ollama-client.d.ts.map +1 -1
- package/dist/core/ollama-client.js +8 -3
- package/dist/core/ollama-client.js.map +1 -1
- package/dist/patterns/locale/pl/nip.js +4 -4
- package/dist/patterns/locale/pl/nip.js.map +1 -1
- package/dist/patterns/locale/pl/phone.js +2 -2
- package/dist/patterns/locale/pl/phone.js.map +1 -1
- package/package.json +1 -1
package/dist/core/engine.d.ts
CHANGED
|
@@ -23,6 +23,11 @@ export declare class Engine {
|
|
|
23
23
|
process(text: string, extraLiterals?: string[]): Promise<string>;
|
|
24
24
|
private applyCustomLiterals;
|
|
25
25
|
private applyRegexRules;
|
|
26
|
+
/**
|
|
27
|
+
* Split text into sentence-boundary chunks with 1-sentence overlap.
|
|
28
|
+
* Visible for testing.
|
|
29
|
+
*/
|
|
30
|
+
static splitIntoChunks(text: string, maxLen?: number): string[];
|
|
26
31
|
private applyLlmNer;
|
|
27
32
|
/**
|
|
28
33
|
* Restore all [TAG:N] tokens in text to their original values.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AAWpE;;;;;;GAMG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAc;IACpC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAEtC,KAAK,CAAC,EAAE,YAAY,EAAE,oBAAoB,CAAC,EAAE,YAAY,GAAG,IAAI;IAe5E,QAAQ,IAAI,YAAY;IAIxB;;;;;;;OAOG;IACG,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAsBtE,OAAO,CAAC,mBAAmB;IAW3B,OAAO,CAAC,eAAe;
|
|
1
|
+
{"version":3,"file":"engine.d.ts","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AAWpE;;;;;;GAMG;AACH,qBAAa,MAAM;IACjB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAc;IACpC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAEtC,KAAK,CAAC,EAAE,YAAY,EAAE,oBAAoB,CAAC,EAAE,YAAY,GAAG,IAAI;IAe5E,QAAQ,IAAI,YAAY;IAIxB;;;;;;;OAOG;IACG,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAsBtE,OAAO,CAAC,mBAAmB;IAW3B,OAAO,CAAC,eAAe;IAmBvB;;;OAGG;IACH,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,GAAE,MAAY,GAAG,MAAM,EAAE;YAmCtD,WAAW;IAkDzB;;;OAGG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;CAK7B"}
|
package/dist/core/engine.js
CHANGED
|
@@ -84,21 +84,71 @@ export class Engine {
|
|
|
84
84
|
}
|
|
85
85
|
return result;
|
|
86
86
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
/**
|
|
88
|
+
* Split text into sentence-boundary chunks with 1-sentence overlap.
|
|
89
|
+
* Visible for testing.
|
|
90
|
+
*/
|
|
91
|
+
static splitIntoChunks(text, maxLen = 800) {
|
|
92
|
+
// Split into sentences at . ! ? followed by whitespace
|
|
93
|
+
const sentences = [];
|
|
94
|
+
let lastIdx = 0;
|
|
95
|
+
const re = /[.!?][\s]+/g;
|
|
96
|
+
let match;
|
|
97
|
+
while ((match = re.exec(text)) !== null) {
|
|
98
|
+
const end = match.index + match[0].length;
|
|
99
|
+
sentences.push(text.slice(lastIdx, end));
|
|
100
|
+
lastIdx = end;
|
|
91
101
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
102
|
+
if (lastIdx < text.length) {
|
|
103
|
+
sentences.push(text.slice(lastIdx));
|
|
104
|
+
}
|
|
105
|
+
if (sentences.length === 0)
|
|
106
|
+
return [text];
|
|
107
|
+
// Build chunks with 1-sentence overlap
|
|
108
|
+
const chunks = [];
|
|
109
|
+
let current = '';
|
|
110
|
+
let lastSentence = '';
|
|
111
|
+
for (const sentence of sentences) {
|
|
112
|
+
if (current.length + sentence.length > maxLen && current.length > 0) {
|
|
113
|
+
chunks.push(current);
|
|
114
|
+
current = lastSentence; // overlap: start with last sentence of previous chunk
|
|
115
|
+
}
|
|
116
|
+
current += sentence;
|
|
117
|
+
lastSentence = sentence;
|
|
118
|
+
}
|
|
119
|
+
if (current)
|
|
120
|
+
chunks.push(current);
|
|
121
|
+
return chunks;
|
|
122
|
+
}
|
|
123
|
+
async applyLlmNer(text) {
|
|
124
|
+
const chunks = Engine.splitIntoChunks(text);
|
|
125
|
+
// Collect all entities across chunks, passing known entities as context
|
|
126
|
+
const allEntities = [];
|
|
127
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
128
|
+
let chunkEntities;
|
|
129
|
+
try {
|
|
130
|
+
chunkEntities = await this.ollamaClient.extractEntities(chunks[i], allEntities.length > 0 ? allEntities : undefined);
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
process.stderr.write(`[pseudonym-mcp] Ollama NER failed on chunk ${i + 1}/${chunks.length} (skipping): ${String(err)}\n`);
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
// Deduplicate: add only entities not already known
|
|
137
|
+
for (const entity of chunkEntities) {
|
|
138
|
+
const val = entity.value.trim();
|
|
139
|
+
if (!val)
|
|
140
|
+
continue;
|
|
141
|
+
if (!allEntities.some((e) => e.value === val && e.type === entity.type)) {
|
|
142
|
+
allEntities.push(entity);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
95
145
|
}
|
|
96
|
-
if (
|
|
146
|
+
if (allEntities.length === 0)
|
|
97
147
|
return text;
|
|
98
148
|
let result = text;
|
|
99
149
|
// Sort longest-first to prevent partial matches
|
|
100
150
|
// e.g. "Auto-Lux International" must be replaced before "Auto-Lux"
|
|
101
|
-
const sorted = [...
|
|
151
|
+
const sorted = [...allEntities].sort((a, b) => b.value.length - a.value.length);
|
|
102
152
|
for (const entity of sorted) {
|
|
103
153
|
const val = entity.value.trim();
|
|
104
154
|
if (!val)
|
package/dist/core/engine.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"engine.js","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAEpD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAA;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAA;AAEtD,MAAM,YAAY,GAAkC;IAClD,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,WAAW;CAChB,CAAA;AAED;;;;;;GAMG;AACH,MAAM,OAAO,MAAM;IACA,KAAK,CAAc;IACnB,YAAY,CAAqB;IAElD,YAAY,KAAoB,EAAE,oBAA0C;QAC1E,IAAI,CAAC,KAAK,GAAG,KAAK,IAAI,IAAI,YAAY,EAAE,CAAA;QAExC,IAAI,oBAAoB,KAAK,SAAS,EAAE,CAAC;YACvC,oDAAoD;YACpD,IAAI,CAAC,YAAY,GAAG,oBAAoB,CAAA;QAC1C,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;YAC7C,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,KAAK,KAAK,CAAA;YAClE,IAAI,CAAC,YAAY,GAAG,QAAQ;gBAC1B,CAAC,CAAC,IAAI,YAAY,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,aAAa,EAAE,KAAK,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;gBAC1E,CAAC,CAAC,IAAI,CAAA;QACV,CAAC;IACH,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,IAAY,EAAE,aAAwB;QAClD,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;QAC7C,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAA;QAEpD,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,IAAI,GAAG,CAAC,OAAO,KAAK,OAAO,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACxD,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAA;QACpE,CAAC;QAED,MAAM,WAAW,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,CAAA;QAC7E,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;QACxD,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,OAAO,KAAK,KAAK,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,IAAI,IAAI,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;YACtF,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAA;QACzC,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,mBAAmB,CAAC,IAAY,EAAE,QAAkB;QAC1D,IAAI,MAAM,GAAG,IAAI,CAAA;QACjB,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAA;QAChF,KAAK,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;YAC9D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;YACpC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAA;QACzE,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,eAAe,CAAC,IAAY,EAAE,KAAoB,EAAE,gBAAyB;QACnF,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACxC,+DAA+D;YAC/D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YAEzE,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;gBACvC,IAAI,UAAU,CAAC,QAAQ,IAAI,gBAAgB,EAAE,CAAC;oBAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;oBACtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC;wBAAE,OAAO,KAAK,CAAA;gBAC/C,CAAC;gBACD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;QACJ,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,IAAY;QACpC,IAAI,
|
|
1
|
+
{"version":3,"file":"engine.js","sourceRoot":"","sources":["../../src/core/engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAA;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAEpD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAA;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAA;AAEtD,MAAM,YAAY,GAAkC;IAClD,EAAE,EAAE,YAAY;IAChB,EAAE,EAAE,WAAW;CAChB,CAAA;AAED;;;;;;GAMG;AACH,MAAM,OAAO,MAAM;IACA,KAAK,CAAc;IACnB,YAAY,CAAqB;IAElD,YAAY,KAAoB,EAAE,oBAA0C;QAC1E,IAAI,CAAC,KAAK,GAAG,KAAK,IAAI,IAAI,YAAY,EAAE,CAAA;QAExC,IAAI,oBAAoB,KAAK,SAAS,EAAE,CAAC;YACvC,oDAAoD;YACpD,IAAI,CAAC,YAAY,GAAG,oBAAoB,CAAA;QAC1C,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;YAC7C,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,KAAK,KAAK,CAAA;YAClE,IAAI,CAAC,YAAY,GAAG,QAAQ;gBAC1B,CAAC,CAAC,IAAI,YAAY,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,aAAa,EAAE,KAAK,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;gBAC1E,CAAC,CAAC,IAAI,CAAA;QACV,CAAC;IACH,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,IAAY,EAAE,aAAwB;QAClD,MAAM,GAAG,GAAG,aAAa,CAAC,WAAW,EAAE,CAAC,GAAG,EAAE,CAAA;QAC7C,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAA;QAEpD,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,IAAI,GAAG,CAAC,OAAO,KAAK,OAAO,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACxD,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,CAAC,gBAAgB,CAAC,CAAA;QACpE,CAAC;QAED,MAAM,WAAW,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,CAAA;QAC7E,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;QACxD,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,OAAO,KAAK,KAAK,IAAI,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,IAAI,IAAI,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;YACtF,MAAM,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAA;QACzC,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,mBAAmB,CAAC,IAAY,EAAE,QAAkB;QAC1D,IAAI,MAAM,GAAG,IAAI,CAAA;QACjB,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAA;QAChF,KAAK,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;YAC9D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;YACpC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAA;QACzE,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,eAAe,CAAC,IAAY,EAAE,KAAoB,EAAE,gBAAyB;QACnF,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,KAAK,MAAM,UAAU,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACxC,+DAA+D;YAC/D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;YAEzE,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;gBACvC,IAAI,UAAU,CAAC,QAAQ,IAAI,gBAAgB,EAAE,CAAC;oBAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;oBACtC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC;wBAAE,OAAO,KAAK,CAAA;gBAC/C,CAAC;gBACD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;QACJ,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,eAAe,CAAC,IAAY,EAAE,SAAiB,GAAG;QACvD,uDAAuD;QACvD,MAAM,SAAS,GAAa,EAAE,CAAA;QAC9B,IAAI,OAAO,GAAG,CAAC,CAAA;QACf,MAAM,EAAE,GAAG,aAAa,CAAA;QACxB,IAAI,KAA6B,CAAA;QAEjC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;YACzC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAA;YACxC,OAAO,GAAG,GAAG,CAAA;QACf,CAAC;QACD,IAAI,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC1B,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAA;QACrC,CAAC;QACD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAA;QAEzC,uCAAuC;QACvC,MAAM,MAAM,GAAa,EAAE,CAAA;QAC3B,IAAI,OAAO,GAAG,EAAE,CAAA;QAChB,IAAI,YAAY,GAAG,EAAE,CAAA;QAErB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,IAAI,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;gBACpB,OAAO,GAAG,YAAY,CAAA,CAAC,sDAAsD;YAC/E,CAAC;YACD,OAAO,IAAI,QAAQ,CAAA;YACnB,YAAY,GAAG,QAAQ,CAAA;QACzB,CAAC;QACD,IAAI,OAAO;YAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAEjC,OAAO,MAAM,CAAA;IACf,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,IAAY;QACpC,MAAM,MAAM,GAAG,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,CAAA;QAE3C,wEAAwE;QACxE,MAAM,WAAW,GAAmB,EAAE,CAAA;QAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,aAA6B,CAAA;YACjC,IAAI,CAAC;gBACH,aAAa,GAAG,MAAM,IAAI,CAAC,YAAa,CAAC,eAAe,CACtD,MAAM,CAAC,CAAC,CAAC,EACT,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS,CACjD,CAAA;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,8CAA8C,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,gBAAgB,MAAM,CAAC,GAAG,CAAC,IAAI,CACpG,CAAA;gBACD,SAAQ;YACV,CAAC;YAED,mDAAmD;YACnD,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;gBACnC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAA;gBAC/B,IAAI,CAAC,GAAG;oBAAE,SAAQ;gBAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,GAAG,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;oBACxE,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;QAEzC,IAAI,MAAM,GAAG,IAAI,CAAA;QAEjB,gDAAgD;QAChD,mEAAmE;QACnE,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAA;QAE/E,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAA;YAC/B,IAAI,CAAC,GAAG;gBAAE,SAAQ;YAClB,sDAAsD;YACtD,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAA;YAC1D,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAA;YACnC,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAA;QACrE,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,CAAC,KAAK,EAAE,EAAE;YAC/C,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC;CACF"}
|
|
@@ -16,6 +16,6 @@ export declare class OllamaClient {
|
|
|
16
16
|
private readonly model;
|
|
17
17
|
private readonly timeoutMs;
|
|
18
18
|
constructor(opts: OllamaClientOptions);
|
|
19
|
-
extractEntities(text: string): Promise<OllamaEntity[]>;
|
|
19
|
+
extractEntities(text: string, knownEntities?: OllamaEntity[]): Promise<OllamaEntity[]>;
|
|
20
20
|
}
|
|
21
21
|
//# sourceMappingURL=ollama-client.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ollama-client.d.ts","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,GAAG,KAAK,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAQD;;;GAGG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAQ;IAChC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAQ;IAC9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAQ;gBAEtB,IAAI,EAAE,mBAAmB;IAM/B,eAAe,
|
|
1
|
+
{"version":3,"file":"ollama-client.d.ts","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,QAAQ,GAAG,KAAK,CAAA;IACtB,KAAK,EAAE,MAAM,CAAA;CACd;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAQD;;;GAGG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAQ;IAChC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAQ;IAC9B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAQ;gBAEtB,IAAI,EAAE,mBAAmB;IAM/B,eAAe,CACnB,IAAI,EAAE,MAAM,EACZ,aAAa,CAAC,EAAE,YAAY,EAAE,GAC7B,OAAO,CAAC,YAAY,EAAE,CAAC;CA2D3B"}
|
|
@@ -14,11 +14,16 @@ export class OllamaClient {
|
|
|
14
14
|
constructor(opts) {
|
|
15
15
|
this.baseUrl = opts.baseUrl;
|
|
16
16
|
this.model = opts.model;
|
|
17
|
-
this.timeoutMs = opts.timeoutMs ??
|
|
17
|
+
this.timeoutMs = opts.timeoutMs ?? 15_000;
|
|
18
18
|
}
|
|
19
|
-
async extractEntities(text) {
|
|
19
|
+
async extractEntities(text, knownEntities) {
|
|
20
20
|
const controller = new AbortController();
|
|
21
21
|
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
22
|
+
let systemContent = SYSTEM_PROMPT;
|
|
23
|
+
if (knownEntities && knownEntities.length > 0) {
|
|
24
|
+
const list = knownEntities.map((e) => `"${e.value}" = ${e.type}`).join('; ');
|
|
25
|
+
systemContent += `\nPreviously identified entities (reuse these exact values if they appear again): ${list}`;
|
|
26
|
+
}
|
|
22
27
|
let res;
|
|
23
28
|
try {
|
|
24
29
|
res = await fetch(`${this.baseUrl}/api/chat`, {
|
|
@@ -29,7 +34,7 @@ export class OllamaClient {
|
|
|
29
34
|
model: this.model,
|
|
30
35
|
stream: false,
|
|
31
36
|
messages: [
|
|
32
|
-
{ role: 'system', content:
|
|
37
|
+
{ role: 'system', content: systemContent },
|
|
33
38
|
{ role: 'user', content: text },
|
|
34
39
|
],
|
|
35
40
|
}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ollama-client.js","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAWA,MAAM,aAAa,GAAG;;;;oFAI8D,CAAA;AAEpF;;;GAGG;AACH,MAAM,OAAO,YAAY;IACN,OAAO,CAAQ;IACf,KAAK,CAAQ;IACb,SAAS,CAAQ;IAElC,YAAY,IAAyB;QACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAA;QAC3B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAA;QACvB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAA;IAC3C,CAAC;IAED,KAAK,CAAC,eAAe,
|
|
1
|
+
{"version":3,"file":"ollama-client.js","sourceRoot":"","sources":["../../src/core/ollama-client.ts"],"names":[],"mappings":"AAWA,MAAM,aAAa,GAAG;;;;oFAI8D,CAAA;AAEpF;;;GAGG;AACH,MAAM,OAAO,YAAY;IACN,OAAO,CAAQ;IACf,KAAK,CAAQ;IACb,SAAS,CAAQ;IAElC,YAAY,IAAyB;QACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAA;QAC3B,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAA;QACvB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,MAAM,CAAA;IAC3C,CAAC;IAED,KAAK,CAAC,eAAe,CACnB,IAAY,EACZ,aAA8B;QAE9B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;QACxC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,SAAS,CAAC,CAAA;QAEpE,IAAI,aAAa,GAAG,aAAa,CAAA;QACjC,IAAI,aAAa,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC5E,aAAa,IAAI,qFAAqF,IAAI,EAAE,CAAA;QAC9G,CAAC;QAED,IAAI,GAAa,CAAA;QACjB,IAAI,CAAC;YACH,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,WAAW,EAAE;gBAC5C,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,KAAK;oBACb,QAAQ,EAAE;wBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;wBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;qBAChC;iBACF,CAAC;aACH,CAAC,CAAA;QACJ,CAAC;gBAAS,CAAC;YACT,YAAY,CAAC,OAAO,CAAC,CAAA;QACvB,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,CAAC,MAAM,EAAE,CAAC,CAAA;QACvD,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAuC,CAAA;QACrE,MAAM,OAAO,GAAG,IAAI,EAAE,OAAO,EAAE,OAAO,IAAI,IAAI,CAAA;QAE9C,oEAAoE;QACpE,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAA;QAC9C,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAA;QAEzB,IAAI,MAAiB,CAAA;QACrB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAc,CAAA;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAA;QACX,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,CAClB,CAAC,CAAC,EAAqB,EAAE,CACvB,OAAO,CAAC,KAAK,QAAQ;YACrB,CAAC,KAAK,IAAI;YACV,MAAM,IAAI,CAAC;YACX,OAAO,IAAI,CAAC;YACZ,CAAE,CAA6B,CAAC,IAAI,KAAK,QAAQ;gBAC9C,CAA6B,CAAC,IAAI,KAAK,KAAK,CAAC;YAChD,OAAQ,CAA6B,CAAC,KAAK,KAAK,QAAQ;YACtD,CAA6B,CAAC,KAAgB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CACrE,CAAA;IACH,CAAC;CACF"}
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
* Check digit = weighted_sum % 11; valid if result equals last digit (and != 10)
|
|
5
5
|
*/
|
|
6
6
|
function nipChecksum(raw) {
|
|
7
|
-
const digits = raw.replace(/
|
|
8
|
-
if (
|
|
7
|
+
const digits = raw.replace(/\D/g, '');
|
|
8
|
+
if (digits.length !== 10)
|
|
9
9
|
return false;
|
|
10
10
|
const weights = [6, 5, 7, 2, 3, 4, 5, 6, 7];
|
|
11
11
|
const d = digits.split('').map(Number);
|
|
@@ -16,8 +16,8 @@ function nipChecksum(raw) {
|
|
|
16
16
|
export const nipRule = {
|
|
17
17
|
id: 'pl.nip',
|
|
18
18
|
entityType: 'NIP',
|
|
19
|
-
// 10 digits in XXX-XXX-XX-XX format (hyphens required)
|
|
20
|
-
pattern:
|
|
19
|
+
// Optional "NIP" label + 10 digits in XXX-XXX-XX-XX format (hyphens required)
|
|
20
|
+
pattern: /(?:NIP[\s:]+)?\b\d{3}-\d{3}-\d{2}-\d{2}\b/g,
|
|
21
21
|
locales: ['pl'],
|
|
22
22
|
engines: ['strict', 'paranoid'],
|
|
23
23
|
description: 'Polish tax identification number (NIP) — 10 digits with checksum',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"nip.js","sourceRoot":"","sources":["../../../../src/patterns/locale/pl/nip.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"nip.js","sourceRoot":"","sources":["../../../../src/patterns/locale/pl/nip.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IACrC,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE;QAAE,OAAO,KAAK,CAAA;IACtC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IAC5D,MAAM,KAAK,GAAG,GAAG,GAAG,EAAE,CAAA;IACtB,OAAO,KAAK,KAAK,EAAE,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;AACvC,CAAC;AAED,MAAM,CAAC,MAAM,OAAO,GAAgB;IAClC,EAAE,EAAE,QAAQ;IACZ,UAAU,EAAE,KAAK;IACjB,8EAA8E;IAC9E,OAAO,EAAE,4CAA4C;IACrD,OAAO,EAAE,CAAC,IAAI,CAAC;IACf,OAAO,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC/B,WAAW,EAAE,kEAAkE;IAC/E,QAAQ,EAAE,WAAW;CACtB,CAAA"}
|
|
@@ -2,11 +2,11 @@ export const plPhoneRule = {
|
|
|
2
2
|
id: 'pl.phone',
|
|
3
3
|
entityType: 'PHONE',
|
|
4
4
|
// Four alternatives:
|
|
5
|
-
// 1. International prefix: +48 or 0048, then 9 digits
|
|
5
|
+
// 1. International prefix: +48 or 0048, then exactly 9 digits in any grouping
|
|
6
6
|
// 2. 9-digit mobile starting with 4–8 (Polish numbering plan)
|
|
7
7
|
// 3. Landline with area code in parens: (XX) XXX-XX-XX
|
|
8
8
|
// 4. Landline without prefix: 2-digit area code + 7 digits (XX XXX XX XX or XX XXXXXXX)
|
|
9
|
-
pattern: /(?:\+48|0048)[\s\-]
|
|
9
|
+
pattern: /(?:\+48|0048)[\s\-]?(?:\d[\s\-]?){8}\d(?!\d)|\b[4-8]\d{2}[\s\-]?\d{3}[\s\-]?\d{3}\b|\(\d{2}\)[\s\-]?\d{3}[\s\-]?\d{2}[\s\-]?\d{2}|\b[1-9]\d[\s\-]?\d{3}[\s\-]?\d{2}[\s\-]?\d{2}\b/g,
|
|
10
10
|
locales: ['pl'],
|
|
11
11
|
engines: ['balanced', 'strict', 'paranoid'],
|
|
12
12
|
description: 'Polish phone number (+48 / 0048 prefix, 9-digit mobile, landline)',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"phone.js","sourceRoot":"","sources":["../../../../src/patterns/locale/pl/phone.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,WAAW,GAAgB;IACtC,EAAE,EAAE,UAAU;IACd,UAAU,EAAE,OAAO;IACnB,qBAAqB;IACrB,
|
|
1
|
+
{"version":3,"file":"phone.js","sourceRoot":"","sources":["../../../../src/patterns/locale/pl/phone.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,WAAW,GAAgB;IACtC,EAAE,EAAE,UAAU;IACd,UAAU,EAAE,OAAO;IACnB,qBAAqB;IACrB,8EAA8E;IAC9E,8DAA8D;IAC9D,uDAAuD;IACvD,wFAAwF;IACxF,OAAO,EACL,oLAAoL;IACtL,OAAO,EAAE,CAAC,IAAI,CAAC;IACf,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,UAAU,CAAC;IAC3C,WAAW,EAAE,mEAAmE;CACjF,CAAA"}
|
package/package.json
CHANGED