@besales/anonymizer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +81 -0
- package/dist/anonymizer.d.ts +31 -0
- package/dist/anonymizer.d.ts.map +1 -0
- package/dist/anonymizer.js +175 -0
- package/dist/anonymizer.js.map +1 -0
- package/dist/constants.d.ts +11 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +16 -0
- package/dist/constants.js.map +1 -0
- package/dist/detectors/contextual.detector.d.ts +10 -0
- package/dist/detectors/contextual.detector.d.ts.map +1 -0
- package/dist/detectors/contextual.detector.js +56 -0
- package/dist/detectors/contextual.detector.js.map +1 -0
- package/dist/detectors/detection-pipeline.d.ts +6 -0
- package/dist/detectors/detection-pipeline.d.ts.map +1 -0
- package/dist/detectors/detection-pipeline.js +102 -0
- package/dist/detectors/detection-pipeline.js.map +1 -0
- package/dist/detectors/detector.interface.d.ts +6 -0
- package/dist/detectors/detector.interface.d.ts.map +1 -0
- package/dist/detectors/detector.interface.js +3 -0
- package/dist/detectors/detector.interface.js.map +1 -0
- package/dist/detectors/dictionary.detector.d.ts +12 -0
- package/dist/detectors/dictionary.detector.d.ts.map +1 -0
- package/dist/detectors/dictionary.detector.js +222 -0
- package/dist/detectors/dictionary.detector.js.map +1 -0
- package/dist/detectors/regex.detector.d.ts +10 -0
- package/dist/detectors/regex.detector.d.ts.map +1 -0
- package/dist/detectors/regex.detector.js +41 -0
- package/dist/detectors/regex.detector.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-protocol/response-validator.d.ts +5 -0
- package/dist/llm-protocol/response-validator.d.ts.map +1 -0
- package/dist/llm-protocol/response-validator.js +96 -0
- package/dist/llm-protocol/response-validator.js.map +1 -0
- package/dist/llm-protocol/system-prompt-builder.d.ts +2 -0
- package/dist/llm-protocol/system-prompt-builder.d.ts.map +1 -0
- package/dist/llm-protocol/system-prompt-builder.js +8 -0
- package/dist/llm-protocol/system-prompt-builder.js.map +1 -0
- package/dist/locales/index.d.ts +9 -0
- package/dist/locales/index.d.ts.map +1 -0
- package/dist/locales/index.js +15 -0
- package/dist/locales/index.js.map +1 -0
- package/dist/locales/locale.interface.d.ts +35 -0
- package/dist/locales/locale.interface.d.ts.map +1 -0
- package/dist/locales/locale.interface.js +3 -0
- package/dist/locales/locale.interface.js.map +1 -0
- package/dist/locales/ru/data/names.csv +1291 -0
- package/dist/locales/ru/data/stopwords.csv +236 -0
- package/dist/locales/ru/index.d.ts +3 -0
- package/dist/locales/ru/index.d.ts.map +1 -0
- package/dist/locales/ru/index.js +227 -0
- package/dist/locales/ru/index.js.map +1 -0
- package/dist/tokenizer/tokenizer.d.ts +13 -0
- package/dist/tokenizer/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer/tokenizer.js +50 -0
- package/dist/tokenizer/tokenizer.js.map +1 -0
- package/dist/traversers/completions.traverser.d.ts +7 -0
- package/dist/traversers/completions.traverser.d.ts.map +1 -0
- package/dist/traversers/completions.traverser.js +111 -0
- package/dist/traversers/completions.traverser.js.map +1 -0
- package/dist/traversers/json.traverser.d.ts +12 -0
- package/dist/traversers/json.traverser.d.ts.map +1 -0
- package/dist/traversers/json.traverser.js +94 -0
- package/dist/traversers/json.traverser.js.map +1 -0
- package/dist/traversers/responses.traverser.d.ts +8 -0
- package/dist/traversers/responses.traverser.d.ts.map +1 -0
- package/dist/traversers/responses.traverser.js +86 -0
- package/dist/traversers/responses.traverser.js.map +1 -0
- package/dist/traversers/text.traverser.d.ts +6 -0
- package/dist/traversers/text.traverser.d.ts.map +1 -0
- package/dist/traversers/text.traverser.js +15 -0
- package/dist/traversers/text.traverser.js.map +1 -0
- package/dist/traversers/traverser.interface.d.ts +9 -0
- package/dist/traversers/traverser.interface.d.ts.map +1 -0
- package/dist/traversers/traverser.interface.js +3 -0
- package/dist/traversers/traverser.interface.js.map +1 -0
- package/dist/types/anonymize-result.interface.d.ts +23 -0
- package/dist/types/anonymize-result.interface.d.ts.map +1 -0
- package/dist/types/anonymize-result.interface.js +3 -0
- package/dist/types/anonymize-result.interface.js.map +1 -0
- package/dist/types/deanonymize-result.interface.d.ts +18 -0
- package/dist/types/deanonymize-result.interface.d.ts.map +1 -0
- package/dist/types/deanonymize-result.interface.js +3 -0
- package/dist/types/deanonymize-result.interface.js.map +1 -0
- package/dist/types/detected-entity.interface.d.ts +23 -0
- package/dist/types/detected-entity.interface.d.ts.map +1 -0
- package/dist/types/detected-entity.interface.js +3 -0
- package/dist/types/detected-entity.interface.js.map +1 -0
- package/dist/types/detection-context.d.ts +10 -0
- package/dist/types/detection-context.d.ts.map +1 -0
- package/dist/types/detection-context.js +12 -0
- package/dist/types/detection-context.js.map +1 -0
- package/dist/types/entity-type.d.ts +16 -0
- package/dist/types/entity-type.d.ts.map +1 -0
- package/dist/types/entity-type.js +18 -0
- package/dist/types/entity-type.js.map +1 -0
- package/dist/types/llm-message.interface.d.ts +23 -0
- package/dist/types/llm-message.interface.d.ts.map +1 -0
- package/dist/types/llm-message.interface.js +3 -0
- package/dist/types/llm-message.interface.js.map +1 -0
- package/dist/types/llm-validation.interface.d.ts +8 -0
- package/dist/types/llm-validation.interface.d.ts.map +1 -0
- package/dist/types/llm-validation.interface.js +3 -0
- package/dist/types/llm-validation.interface.js.map +1 -0
- package/dist/types/malformed-token.interface.d.ts +6 -0
- package/dist/types/malformed-token.interface.d.ts.map +1 -0
- package/dist/types/malformed-token.interface.js +3 -0
- package/dist/types/malformed-token.interface.js.map +1 -0
- package/dist/types/mapping-data.interface.d.ts +9 -0
- package/dist/types/mapping-data.interface.d.ts.map +1 -0
- package/dist/types/mapping-data.interface.js +12 -0
- package/dist/types/mapping-data.interface.js.map +1 -0
- package/package.json +48 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 BeSales
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# @besales/anonymizer
|
|
2
|
+
|
|
3
|
+
Stateless **обратимая** псевдонимизация PII для besales/animaly.
|
|
4
|
+
|
|
5
|
+
`detect → tokenize → mapping → detokenize`. Реальные `Иван Петров` / `+79161234567` →
|
|
6
|
+
токены `[PERSON_1]` / `[PHONE_1]` перед отправкой в LLM; обратная подстановка реальных
|
|
7
|
+
значений в ответе и в аргументах tool-call'ов (например, реальный email в поле CRM).
|
|
8
|
+
|
|
9
|
+
## Чем это НЕ является
|
|
10
|
+
|
|
11
|
+
- **Не сервис, а in-process библиотека.** Чистые функции, без БД/сети/фреймворков/очередей.
|
|
12
|
+
Вызывающий сам решает, где хранить `MappingData` (в этом проекте — request-scoped в памяти).
|
|
13
|
+
В отличие от `transcription-service`, анонимайзер на горячем пути каждого LLM-вызова —
|
|
14
|
+
сетевой round-trip недопустим.
|
|
15
|
+
- **Не lossy-скраббер.** В отличие от одностороннего `[EMAIL]`-маскирования, здесь маппинг
|
|
16
|
+
токен→значение обратим и идемпотентен (один PII → один токен).
|
|
17
|
+
|
|
18
|
+
## Установка
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
yarn add @besales/anonymizer
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Публичный пакет на npm (как `@besales/mcp`) — токены/`.npmrc` не нужны.
|
|
25
|
+
|
|
26
|
+
## Использование
|
|
27
|
+
|
|
28
|
+
```ts
|
|
29
|
+
import {
|
|
30
|
+
anonymizeMessages,
|
|
31
|
+
deanonymizeMessages,
|
|
32
|
+
buildSystemPromptInstruction,
|
|
33
|
+
getLocale,
|
|
34
|
+
ENTITY_TYPE,
|
|
35
|
+
DETECTION_CONTEXT,
|
|
36
|
+
createEmptyMapping,
|
|
37
|
+
} from '@besales/anonymizer';
|
|
38
|
+
|
|
39
|
+
// Outbound: реальные messages → токены (накапливаем mapping)
|
|
40
|
+
const { messages: anon, mappingData } = anonymizeMessages(
|
|
41
|
+
realMessages,
|
|
42
|
+
DETECTION_CONTEXT.CHAT_MESSAGE,
|
|
43
|
+
createEmptyMapping(),
|
|
44
|
+
// chat-профиль: исключаем AMOUNT/COMPANY (ложная токенизация сумм в продажах)
|
|
45
|
+
{ locale: 'ru', excludeEntityTypes: [ENTITY_TYPE.AMOUNT, ENTITY_TYPE.COMPANY] },
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
// Инструкция в system, чтобы модель сохраняла токены как есть
|
|
49
|
+
const instruction = getLocale('ru').systemPrompt.buildInstruction(
|
|
50
|
+
Object.keys(mappingData.tokens).map((k) => `[${k}]`),
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
// ... вызов LLM с anon + instruction ...
|
|
54
|
+
|
|
55
|
+
// Inbound: ответ модели (content + tool_calls.arguments) → реальные значения
|
|
56
|
+
const { messages: restored } = deanonymizeMessages(llmResponseMessages, mappingData);
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Для repair искажённых моделью токенов (`[EMAIL1]` → `[EMAIL_1]`) — `autoFixMalformedTokens`
|
|
60
|
+
перед деанонимизацией. Поддержка Responses API — `anonymizePayload` / `deanonymizePayload`.
|
|
61
|
+
|
|
62
|
+
## Детектируемые типы
|
|
63
|
+
|
|
64
|
+
`PERSON`, `PHONE`, `EMAIL`, `INN`, `PASSPORT`, `CARD`, `AMOUNT`, `DATE_OF_BIRTH`,
|
|
65
|
+
`ADDRESS`, `COMPANY`, `ACCOUNT`. Локали: `ru` (имена — словарь; phone/email/card —
|
|
66
|
+
языконезависимый regex). `excludeEntityTypes` сужает набор под конкретный профиль.
|
|
67
|
+
|
|
68
|
+
## Потребители
|
|
69
|
+
|
|
70
|
+
- `ai-aniomaly` — runtime-анонимизация чата (LLM-пайплайн).
|
|
71
|
+
- `prompt-services` — анонимизация CRM-диалогов в sandbox/ICP (план).
|
|
72
|
+
- `leak-engine` / dialogue-ingestion — анализ звонков/переписок (план).
|
|
73
|
+
|
|
74
|
+
## Разработка
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
yarn install
|
|
78
|
+
yarn build # tsc → dist + копирование CSV-словарей в dist/locales/ru/data
|
|
79
|
+
yarn test
|
|
80
|
+
yarn lint:errors
|
|
81
|
+
```
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { type Locale } from './locales';
|
|
2
|
+
import { AnonymizeMessagesResult, AnonymizePayloadResult, AnonymizeTextResult } from './types/anonymize-result.interface';
|
|
3
|
+
import { DeanonymizeMessagesResult, DeanonymizePayloadResult, DeanonymizeTextResult } from './types/deanonymize-result.interface';
|
|
4
|
+
import { DetectionContext } from './types/detection-context';
|
|
5
|
+
import { EntityType } from './types/entity-type';
|
|
6
|
+
import { CompletionsMessage } from './types/llm-message.interface';
|
|
7
|
+
import { MappingData } from './types/mapping-data.interface';
|
|
8
|
+
export interface AnonymizeTextOptions {
|
|
9
|
+
locale?: Locale;
|
|
10
|
+
excludeEntityTypes?: EntityType[];
|
|
11
|
+
}
|
|
12
|
+
export interface AnonymizeMessagesOptions {
|
|
13
|
+
locale?: Locale;
|
|
14
|
+
excludeEntityTypes?: EntityType[];
|
|
15
|
+
}
|
|
16
|
+
export interface AnonymizePayloadOptions {
|
|
17
|
+
targetPaths?: string[];
|
|
18
|
+
excludePaths?: string[];
|
|
19
|
+
locale?: Locale;
|
|
20
|
+
excludeEntityTypes?: EntityType[];
|
|
21
|
+
}
|
|
22
|
+
export declare function anonymizeText(text: string, context: DetectionContext, existingMapping?: MappingData, options?: AnonymizeTextOptions): AnonymizeTextResult;
|
|
23
|
+
export declare function deanonymizeText(text: string, mappingData: MappingData): DeanonymizeTextResult;
|
|
24
|
+
export declare function anonymizeMessages(messages: CompletionsMessage[], context: DetectionContext, existingMapping?: MappingData, options?: AnonymizeMessagesOptions): AnonymizeMessagesResult;
|
|
25
|
+
export declare function deanonymizeMessages(messages: CompletionsMessage[], mappingData: MappingData): DeanonymizeMessagesResult;
|
|
26
|
+
export declare function anonymizePayload<T extends Record<string, unknown>>(payload: T, context: DetectionContext, options?: AnonymizePayloadOptions, existingMapping?: MappingData): AnonymizePayloadResult<T>;
|
|
27
|
+
export declare function deanonymizePayload<T extends Record<string, unknown>>(payload: T, mappingData: MappingData, options?: {
|
|
28
|
+
targetPaths?: string[];
|
|
29
|
+
excludePaths?: string[];
|
|
30
|
+
}): DeanonymizePayloadResult<T>;
|
|
31
|
+
//# sourceMappingURL=anonymizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anonymizer.d.ts","sourceRoot":"","sources":["../src/anonymizer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,MAAM,EAAE,MAAM,WAAW,CAAC;AAOxC,OAAO,EACL,uBAAuB,EACvB,sBAAsB,EACtB,mBAAmB,EACpB,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACL,yBAAyB,EACzB,wBAAwB,EACxB,qBAAqB,EACtB,MAAM,sCAAsC,CAAC;AAE9C,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAEnE,OAAO,EAAE,WAAW,EAAsB,MAAM,gCAAgC,CAAC;AAWjF,MAAM,WAAW,oBAAoB;IAEnC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,kBAAkB,CAAC,EAAE,UAAU,EAAE,CAAC;CACnC;AAGD,MAAM,WAAW,wBAAwB;IAEvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,kBAAkB,CAAC,EAAE,UAAU,EAAE,CAAC;CACnC;AAGD,MAAM,WAAW,uBAAuB;IAEtC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEvB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IAExB,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB,kBAAkB,CAAC,EAAE,UAAU,EAAE,CAAC;CACnC;AA0DD,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,gBAAgB,EACzB,eAAe,CAAC,EAAE,WAAW,EAC7B,OAAO,CAAC,EAAE,oBAAoB,GAC7B,mBAAmB,CAqCrB;AAKD,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,WAAW,GACvB,qBAAqB,CAMvB;AAMD,wBAAgB,iBAAiB,CAC/B,QAAQ,EAAE,kBAAkB,EAAE,EAC9B,OAAO,EAAE,gBAAgB,EACzB,eAAe,CAAC,EAAE,WAAW,EAC7B,OAAO,CAAC,EAAE,wBAAwB,GACjC,uBAAuB,CAyCzB;AAKD,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,kBAAkB,EAAE,EAC9B,WAAW,EAAE,WAAW,GACvB,yBAAyB,CAkB3B;AAKD,wBAAgB,gBAAgB,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAChE,OAAO,EAAE,CAAC,EACV,OAAO,EAAE,gBAAgB,EACzB,OAAO,CAAC,EAAE,uBAAuB,EACjC,eAAe,CAAC,EAAE,WAAW,GAC5B,sBAAsB,CAAC,CAAC,CAAC,CA4C3B;AAKD,wBAAgB,kBAAkB,CAAC,CAAC,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAClE,OAAO,EAAE,CAAC,EACV,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE;IAAE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GAC5D,wBAAwB,CAAC,CAAC,CAAC,CAuB7B"}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.anonymizeText = anonymizeText;
|
|
4
|
+
exports.deanonymizeText = deanonymizeText;
|
|
5
|
+
exports.anonymizeMessages = anonymizeMessages;
|
|
6
|
+
exports.deanonymizeMessages = deanonymizeMessages;
|
|
7
|
+
exports.anonymizePayload = anonymizePayload;
|
|
8
|
+
exports.deanonymizePayload = deanonymizePayload;
|
|
9
|
+
const constants_1 = require("./constants");
|
|
10
|
+
const detection_pipeline_1 = require("./detectors/detection-pipeline");
|
|
11
|
+
const response_validator_1 = require("./llm-protocol/response-validator");
|
|
12
|
+
const tokenizer_1 = require("./tokenizer/tokenizer");
|
|
13
|
+
const completions_traverser_1 = require("./traversers/completions.traverser");
|
|
14
|
+
const json_traverser_1 = require("./traversers/json.traverser");
|
|
15
|
+
const responses_traverser_1 = require("./traversers/responses.traverser");
|
|
16
|
+
const mapping_data_interface_1 = require("./types/mapping-data.interface");
|
|
17
|
+
const completionsTraverser = new completions_traverser_1.CompletionsTraverser();
|
|
18
|
+
const jsonTraverser = new json_traverser_1.JsonTraverser();
|
|
19
|
+
const responsesTraverser = new responses_traverser_1.ResponsesTraverser();
|
|
20
|
+
function toMeta(entity, mapping, sourcePath) {
|
|
21
|
+
const normalized = (0, tokenizer_1.normalizeValue)(entity.value);
|
|
22
|
+
const key = mapping.reverseIndex[normalized];
|
|
23
|
+
return {
|
|
24
|
+
type: entity.type,
|
|
25
|
+
token: key ? `[${key}]` : '[UNKNOWN]',
|
|
26
|
+
position: entity.position,
|
|
27
|
+
detectedBy: entity.detectedBy,
|
|
28
|
+
confidence: entity.confidence,
|
|
29
|
+
sourcePath,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
function countByType(entities) {
|
|
33
|
+
const stats = {};
|
|
34
|
+
for (const e of entities) {
|
|
35
|
+
stats[e.type] = (stats[e.type] ?? 0) + 1;
|
|
36
|
+
}
|
|
37
|
+
return stats;
|
|
38
|
+
}
|
|
39
|
+
function knownTokensFromMapping(mapping) {
|
|
40
|
+
return Object.keys(mapping.tokens).map((k) => `[${k}]`);
|
|
41
|
+
}
|
|
42
|
+
function shouldUseResponsesTraverser(payload, options) {
|
|
43
|
+
if (options?.targetPaths || options?.excludePaths)
|
|
44
|
+
return false;
|
|
45
|
+
return ('input' in payload ||
|
|
46
|
+
'instructions' in payload ||
|
|
47
|
+
'output' in payload);
|
|
48
|
+
}
|
|
49
|
+
function anonymizeText(text, context, existingMapping, options) {
|
|
50
|
+
if (text.length > constants_1.MAX_TEXT_LENGTH) {
|
|
51
|
+
throw new Error(`Текст превышает максимальную длину: ${text.length} > ${constants_1.MAX_TEXT_LENGTH}`);
|
|
52
|
+
}
|
|
53
|
+
if (!text) {
|
|
54
|
+
const mapping = existingMapping
|
|
55
|
+
? structuredClone(existingMapping)
|
|
56
|
+
: (0, mapping_data_interface_1.createEmptyMapping)();
|
|
57
|
+
return {
|
|
58
|
+
anonymized: '',
|
|
59
|
+
mappingData: mapping,
|
|
60
|
+
detectedEntities: [],
|
|
61
|
+
stats: {},
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
const mapping = existingMapping
|
|
65
|
+
? structuredClone(existingMapping)
|
|
66
|
+
: (0, mapping_data_interface_1.createEmptyMapping)();
|
|
67
|
+
const entities = (0, detection_pipeline_1.runDetectionPipeline)(text, context, options?.locale, options?.excludeEntityTypes);
|
|
68
|
+
const { tokenized, updatedMapping } = (0, tokenizer_1.tokenize)(text, entities, mapping);
|
|
69
|
+
return {
|
|
70
|
+
anonymized: tokenized,
|
|
71
|
+
mappingData: updatedMapping,
|
|
72
|
+
detectedEntities: entities.map((e) => toMeta(e, updatedMapping, null)),
|
|
73
|
+
stats: countByType(entities),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
function deanonymizeText(text, mappingData) {
|
|
77
|
+
const { restored, unresolvedTokens } = (0, tokenizer_1.detokenize)(text, mappingData);
|
|
78
|
+
const knownTokens = knownTokensFromMapping(mappingData);
|
|
79
|
+
const { malformedTokens } = (0, response_validator_1.validateLlmResponse)(text, knownTokens);
|
|
80
|
+
return { restored, unresolvedTokens, malformedTokens };
|
|
81
|
+
}
|
|
82
|
+
function anonymizeMessages(messages, context, existingMapping, options) {
|
|
83
|
+
let localMapping = structuredClone(existingMapping ?? (0, mapping_data_interface_1.createEmptyMapping)());
|
|
84
|
+
const extractedStrings = completionsTraverser.extractStrings(messages);
|
|
85
|
+
const allEntities = [];
|
|
86
|
+
const processedMap = new Map();
|
|
87
|
+
for (const str of extractedStrings) {
|
|
88
|
+
const entities = (0, detection_pipeline_1.runDetectionPipeline)(str.value, context, options?.locale, options?.excludeEntityTypes);
|
|
89
|
+
const { tokenized, updatedMapping } = (0, tokenizer_1.tokenize)(str.value, entities, localMapping);
|
|
90
|
+
localMapping = updatedMapping;
|
|
91
|
+
processedMap.set(str.path, tokenized);
|
|
92
|
+
for (const entity of entities) {
|
|
93
|
+
allEntities.push({ ...entity, sourcePath: str.path });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const anonymizedMessages = completionsTraverser.reassemble(messages, processedMap);
|
|
97
|
+
return {
|
|
98
|
+
messages: anonymizedMessages,
|
|
99
|
+
mappingData: localMapping,
|
|
100
|
+
detectedEntities: allEntities.map((e) => toMeta(e, localMapping, e.sourcePath)),
|
|
101
|
+
stats: countByType(allEntities),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
function deanonymizeMessages(messages, mappingData) {
|
|
105
|
+
const extractedStrings = completionsTraverser.extractStrings(messages);
|
|
106
|
+
const knownTokens = knownTokensFromMapping(mappingData);
|
|
107
|
+
const { reassembled, unresolvedTokens, malformedTokens } = deanonymizeExtracted(extractedStrings, mappingData, knownTokens, (processedMap) => completionsTraverser.reassemble(messages, processedMap));
|
|
108
|
+
return {
|
|
109
|
+
messages: reassembled,
|
|
110
|
+
unresolvedTokens,
|
|
111
|
+
malformedTokens,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
function anonymizePayload(payload, context, options, existingMapping) {
|
|
115
|
+
let localMapping = structuredClone(existingMapping ?? (0, mapping_data_interface_1.createEmptyMapping)());
|
|
116
|
+
const extractedStrings = shouldUseResponsesTraverser(payload, options)
|
|
117
|
+
? responsesTraverser.extractStrings(payload)
|
|
118
|
+
: jsonTraverser.extractStrings(payload, options);
|
|
119
|
+
const allEntities = [];
|
|
120
|
+
const processedMap = new Map();
|
|
121
|
+
for (const str of extractedStrings) {
|
|
122
|
+
const entities = (0, detection_pipeline_1.runDetectionPipeline)(str.value, context, options?.locale, options?.excludeEntityTypes);
|
|
123
|
+
const { tokenized, updatedMapping } = (0, tokenizer_1.tokenize)(str.value, entities, localMapping);
|
|
124
|
+
localMapping = updatedMapping;
|
|
125
|
+
processedMap.set(str.path, tokenized);
|
|
126
|
+
for (const entity of entities) {
|
|
127
|
+
allEntities.push({ ...entity, sourcePath: str.path });
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const anonymizedPayload = shouldUseResponsesTraverser(payload, options)
|
|
131
|
+
? responsesTraverser.reassemble(payload, processedMap)
|
|
132
|
+
: jsonTraverser.reassemble(payload, processedMap);
|
|
133
|
+
return {
|
|
134
|
+
payload: anonymizedPayload,
|
|
135
|
+
mappingData: localMapping,
|
|
136
|
+
detectedEntities: allEntities.map((e) => toMeta(e, localMapping, e.sourcePath)),
|
|
137
|
+
stats: countByType(allEntities),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
function deanonymizePayload(payload, mappingData, options) {
|
|
141
|
+
const knownTokens = knownTokensFromMapping(mappingData);
|
|
142
|
+
const extractedStrings = shouldUseResponsesTraverser(payload, options)
|
|
143
|
+
? responsesTraverser.extractStrings(payload)
|
|
144
|
+
: jsonTraverser.extractStrings(payload, options);
|
|
145
|
+
const { unresolvedTokens, malformedTokens, reassembled } = deanonymizeExtracted(extractedStrings, mappingData, knownTokens, (processedMap) => shouldUseResponsesTraverser(payload, options)
|
|
146
|
+
? responsesTraverser.reassemble(payload, processedMap)
|
|
147
|
+
: jsonTraverser.reassemble(payload, processedMap));
|
|
148
|
+
return {
|
|
149
|
+
payload: reassembled,
|
|
150
|
+
unresolvedTokens,
|
|
151
|
+
malformedTokens,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
function deanonymizeExtracted(extractedStrings, mappingData, knownTokens, reassembleFn) {
|
|
155
|
+
const unresolvedSet = new Set();
|
|
156
|
+
const malformedMap = new Map();
|
|
157
|
+
const processedMap = new Map();
|
|
158
|
+
for (const str of extractedStrings) {
|
|
159
|
+
const { restored, unresolvedTokens } = (0, tokenizer_1.detokenize)(str.value, mappingData);
|
|
160
|
+
const { malformedTokens } = (0, response_validator_1.validateLlmResponse)(str.value, knownTokens);
|
|
161
|
+
for (const t of unresolvedTokens)
|
|
162
|
+
unresolvedSet.add(t);
|
|
163
|
+
for (const m of malformedTokens) {
|
|
164
|
+
if (!malformedMap.has(m.found))
|
|
165
|
+
malformedMap.set(m.found, m);
|
|
166
|
+
}
|
|
167
|
+
processedMap.set(str.path, restored);
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
reassembled: reassembleFn(processedMap),
|
|
171
|
+
unresolvedTokens: [...unresolvedSet],
|
|
172
|
+
malformedTokens: [...malformedMap.values()],
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
//# sourceMappingURL=anonymizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anonymizer.js","sourceRoot":"","sources":["../src/anonymizer.ts"],"names":[],"mappings":";;AAsHA,sCA0CC;AAKD,0CASC;AAMD,8CA8CC;AAKD,kDAqBC;AAKD,4CAiDC;AAKD,gDA2BC;AAlVD,2CAA8C;AAC9C,uEAAsE;AAEtE,0EAAwE;AACxE,qDAA6E;AAC7E,8EAA0E;AAC1E,gEAA4D;AAC5D,0EAAsE;AAiBtE,2EAAiF;AAIjF,MAAM,oBAAoB,GAAG,IAAI,4CAAoB,EAAE,CAAC;AACxD,MAAM,aAAa,GAAG,IAAI,8BAAa,EAAE,CAAC;AAC1C,MAAM,kBAAkB,GAAG,IAAI,wCAAkB,EAAE,CAAC;AAkCpD,SAAS,MAAM,CACb,MAAsB,EACtB,OAAoB,EACpB,UAAyB;IAEzB,MAAM,UAAU,GAAG,IAAA,0BAAc,EAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;IAC7C,OAAO;QACL,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,WAAW;QACrC,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,UAAU;KACX,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAClB,QAA0B;IAE1B,MAAM,KAAK,GAAwC,EAAE,CAAC;IACtD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,sBAAsB,CAAC,OAAoB;IAClD,OAAO,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC1D,CAAC;AAMD,SAAS,2BAA2B,CAClC,OAAgC,EAChC,OAAiC;IAGjC,IAAI,OAAO,EAAE,WAAW,IAAI,OAAO,EAAE,YAAY;QAAE,OAAO,KAAK,CAAC;IAChE,OAAO,CACL,OAAO,IAAI,OAAO;QAClB,cAAc,IAAI,OAAO;QACzB,QAAQ,IAAI,OAAO,CACpB,CAAC;AACJ,CAAC;AAQD,SAAgB,aAAa,CAC3B,IAAY,EACZ,OAAyB,EACzB,eAA6B,EAC7B,OAA8B;IAE9B,IAAI,IAAI,CAAC,MAAM,GAAG,2BAAe,EAAE,CAAC;QAClC,MAAM,IAAI,KAAK,CACb,uCAAuC,IAAI,CAAC,MAAM,MAAM,2BAAe,EAAE,CAC1E,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,OAAO,GAAG,eAAe;YAC7B,CAAC,CAAC,eAAe,CAAC,eAAe,CAAC;YAClC,CAAC,CAAC,IAAA,2CAAkB,GAAE,CAAC;QACzB,OAAO;YACL,UAAU,EAAE,EAAE;YACd,WAAW,EAAE,OAAO;YACpB,gBAAgB,EAAE,EAAE;YACpB,KAAK,EAAE,EAAE;SACV,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,eAAe;QAC7B,CAAC,CAAC,eAAe,CAAC,eAAe,CAAC;QAClC,CAAC,CAAC,IAAA,2CAAkB,GAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,IAAA,yCAAoB,EACnC,IAAI,EACJ,OAAO,EACP,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,kBAAkB,CAC5B,CAAC;IACF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,IAAA,oBAAQ,EAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAExE,OAAO;QACL,UAAU,EAAE,SAAS;QACrB,WAAW,EAAE,cAAc;QAC3B,gBAAgB,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;QACtE,KAAK,EAAE,WAAW,CAAC,QAAQ,CAAC;KAC7B,CAAC;AACJ,CAAC;AAKD,SAAgB,eAAe,CAC7B,IAAY,EACZ,WAAwB;IAExB,MAAM,EAAE,QAAQ,EAAE,gBAAgB,EAAE,GAAG,IAAA,sBAAU,EAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IACrE,MAAM,WAAW,GAAG,sBAAsB,CAAC,WAAW,CAAC,CAAC;IACxD,MAAM,EAAE,eAAe,EAAE,GAAG,IAAA,wCAAmB,EAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnE,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,eAAe,EAAE,CAAC;AACzD,CAAC;AAMD,SAAgB,iBAAiB,CAC/B,QAA8B,EAC9B,OAAyB,EACzB,eAA6B,EAC7B,OAAkC;IAElC,IAAI,YAAY,GAAG,eAAe,CAChC,eAAe,IAAI,IAAA,2CAAkB,GAAE,CACxC,CAAC;IACF,MAAM,gBAAgB,GAAG,oBAAoB,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IACvE,MAAM,WAAW,GAAgD,EAAE,CAAC;IACpE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE/C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,IAAA,yCAAoB,EACnC,GAAG,CAAC,KAAK,EACT,OAAO,EACP,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,kBAAkB,CAC5B,CAAC;QACF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,IAAA,oBAAQ,EAC5C,GAAG,CAAC,KAAK,EACT,QAAQ,EACR,YAAY,CACb,CAAC;QACF,YAAY,GAAG,cAAc,CAAC;QAC9B,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,oBAAoB,CAAC,UAAU,CACxD,QAAQ,EACR,YAAY,CACb,CAAC;IAEF,OAAO;QACL,QAAQ,EAAE,kBAAkB;QAC5B,WAAW,EAAE,YAAY;QACzB,gBAAgB,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACtC,MAAM,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,UAAU,CAAC,CACtC;QACD,KAAK,EAAE,WAAW,CAAC,WAAW,CAAC;KAChC,CAAC;AACJ,CAAC;AAKD,SAAgB,mBAAmB,CACjC,QAA8B,EAC9B,WAAwB;IAExB,MAAM,gBAAgB,GAAG,oBAAoB,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IACvE,MAAM,WAAW,GAAG,sBAAsB,CAAC,WAAW,CAAC,CAAC;IAExD,MAAM,EAAE,WAAW,EAAE,gBAAgB,EAAE,eAAe,EAAE,GACtD,oBAAoB,CAClB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACX,CAAC,YAAY,EAAE,EAAE,CACf,oBAAoB,CAAC,UAAU,CAAC,QAAQ,EAAE,YAAY,CAAC,CAC1D,CAAC;IAEJ,OAAO;QACL,QAAQ,EAAE,WAAW;QACrB,gBAAgB;QAChB,eAAe;KAChB,CAAC;AACJ,CAAC;AAKD,SAAgB,gBAAgB,CAC9B,OAAU,EACV,OAAyB,EACzB,OAAiC,EACjC,eAA6B;IAE7B,IAAI,YAAY,GAAG,eAAe,CAChC,eAAe,IAAI,IAAA,2CAAkB,GAAE,CACxC,CAAC;IAEF,MAAM,gBAAgB,GAAG,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC;QACpE,CAAC,CAAC,kBAAkB,CAAC,cAAc,CAAC,OAAO,CAAC;QAC5C,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAEnD,MAAM,WAAW,GAAgD,EAAE,CAAC;IACpE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE/C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,MAAM,QAAQ,GAAG,IAAA,yCAAoB,EACnC,GAAG,CAAC,KAAK,EACT,OAAO,EACP,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,kBAAkB,CAC5B,CAAC;QACF,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,IAAA,oBAAQ,EAC5C,GAAG,CAAC,KAAK,EACT,QAAQ,EACR,YAAY,CACb,CAAC;QACF,YAAY,GAAG,cAAc,CAAC;QAC9B,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAEtC,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,MAAM,iBAAiB,GAAG,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC;QACrE,CAAC,CAAC,kBAAkB,CAAC,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC;QACtD,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;IAEpD,OAAO;QACL,OAAO,EAAE,iBAAsB;QAC/B,WAAW,EAAE,YAAY;QACzB,gBAAgB,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CACtC,MAAM,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,UAAU,CAAC,CACtC;QACD,KAAK,EAAE,WAAW,CAAC,WAAW,CAAC;KAChC,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAChC,OAAU,EACV,WAAwB,EACxB,OAA6D;IAE7D,MAAM,WAAW,GAAG,sBAAsB,CAAC,WAAW,CAAC,CAAC;IAExD,MAAM,gBAAgB,GAAG,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC;QACpE,CAAC,CAAC,kBAAkB,CAAC,cAAc,CAAC,OAAO,CAAC;QAC5C,CAAC,CAAC,aAAa,CAAC,cAAc,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAEnD,MAAM,EAAE,gBAAgB,EAAE,eAAe,EAAE,WAAW,EAAE,GACtD,oBAAoB,CAClB,gBAAgB,EAChB,WAAW,EACX,WAAW,EACX,CAAC,YAAY,EAAE,EAAE,CACf,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC;QAC3C,CAAC,CAAC,kBAAkB,CAAC,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC;QACtD,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC,OAAO,EAAE,YAAY,CAAC,CACtD,CAAC;IAEJ,OAAO;QACL,OAAO,EAAE,WAAgB;QACzB,gBAAgB;QAChB,eAAe;KAChB,CAAC;AACJ,CAAC;AAUD,SAAS,oBAAoB,CAC3B,gBAAmC,EACnC,WAAwB,EACxB,WAAqB,EACrB,YAAsD;IAEtD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,MAAM,YAAY,GAAG,IAAI,GAAG,EAA0B,CAAC;IACvD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE/C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;QACnC,MAAM,EAAE,QAAQ,EAAE,gBAAgB,EAAE,GAAG,IAAA,sBAAU,EAAC,GAAG,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QAC1E,MAAM,EAAE,eAAe,EAAE,GAAG,IAAA,wCAAmB,EAAC,GAAG,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QAExE,KAAK,MAAM,CAAC,IAAI,gBAAgB;YAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACvD,KAAK,MAAM,CAAC,IAAI,eAAe,EAAE,CAAC;YAChC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;gBAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED,OAAO;QACL,WAAW,EAAE,YAAY,CAAC,YAAY,CAAC;QACvC,gBAAgB,EAAE,CAAC,GAAG,aAAa,CAAC;QACpC,eAAe,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;KAC5C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare const MAX_TEXT_LENGTH = 100000;
|
|
2
|
+
export declare const MIN_CONFIDENCE_THRESHOLD = 0.5;
|
|
3
|
+
export declare const TOKEN_TEMPLATE: (type: string, n: number) => string;
|
|
4
|
+
export declare const MAPPING_KEY_TEMPLATE: (type: string, n: number) => string;
|
|
5
|
+
export declare const TOKEN_PATTERN: RegExp;
|
|
6
|
+
export declare const TOKEN_PARSE_PATTERN: RegExp;
|
|
7
|
+
export declare const MALFORMED_TOKEN_PATTERN: RegExp;
|
|
8
|
+
export declare const MAX_LEVENSHTEIN_DISTANCE = 2;
|
|
9
|
+
export declare const MAX_TOKENS_PER_TYPE = 100;
|
|
10
|
+
export declare const AUTO_FIX_CONFIDENCE_THRESHOLD = 0.8;
|
|
11
|
+
//# sourceMappingURL=constants.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AACA,eAAO,MAAM,eAAe,SAAU,CAAC;AACvC,eAAO,MAAM,wBAAwB,MAAM,CAAC;AAS5C,eAAO,MAAM,cAAc,SAAU,MAAM,KAAK,MAAM,KAAG,MACvC,CAAC;AACnB,eAAO,MAAM,oBAAoB,SAAU,MAAM,KAAK,MAAM,KAAG,MAC/C,CAAC;AACjB,eAAO,MAAM,aAAa,QAAuB,CAAC;AAClD,eAAO,MAAM,mBAAmB,QAA0B,CAAC;AAC3D,eAAO,MAAM,uBAAuB,QACY,CAAC;AACjD,eAAO,MAAM,wBAAwB,IAAI,CAAC;AAG1C,eAAO,MAAM,mBAAmB,MAAM,CAAC;AACvC,eAAO,MAAM,6BAA6B,MAAM,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AUTO_FIX_CONFIDENCE_THRESHOLD = exports.MAX_TOKENS_PER_TYPE = exports.MAX_LEVENSHTEIN_DISTANCE = exports.MALFORMED_TOKEN_PATTERN = exports.TOKEN_PARSE_PATTERN = exports.TOKEN_PATTERN = exports.MAPPING_KEY_TEMPLATE = exports.TOKEN_TEMPLATE = exports.MIN_CONFIDENCE_THRESHOLD = exports.MAX_TEXT_LENGTH = void 0;
|
|
4
|
+
exports.MAX_TEXT_LENGTH = 100_000;
|
|
5
|
+
exports.MIN_CONFIDENCE_THRESHOLD = 0.5;
|
|
6
|
+
const TOKEN_TEMPLATE = (type, n) => `[${type}_${n}]`;
|
|
7
|
+
exports.TOKEN_TEMPLATE = TOKEN_TEMPLATE;
|
|
8
|
+
const MAPPING_KEY_TEMPLATE = (type, n) => `${type}_${n}`;
|
|
9
|
+
exports.MAPPING_KEY_TEMPLATE = MAPPING_KEY_TEMPLATE;
|
|
10
|
+
exports.TOKEN_PATTERN = /\[([A-Z_]+_\d+)\]/g;
|
|
11
|
+
exports.TOKEN_PARSE_PATTERN = /^\[([A-Z_]+)_(\d+)\]$/;
|
|
12
|
+
exports.MALFORMED_TOKEN_PATTERN = /\[([A-Za-z_]+_\d+)\]?|\[?([A-Za-z_]+_\d+)\]/g;
|
|
13
|
+
exports.MAX_LEVENSHTEIN_DISTANCE = 2;
|
|
14
|
+
exports.MAX_TOKENS_PER_TYPE = 100;
|
|
15
|
+
exports.AUTO_FIX_CONFIDENCE_THRESHOLD = 0.8;
|
|
16
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":";;;AACa,QAAA,eAAe,GAAG,OAAO,CAAC;AAC1B,QAAA,wBAAwB,GAAG,GAAG,CAAC;AASrC,MAAM,cAAc,GAAG,CAAC,IAAY,EAAE,CAAS,EAAU,EAAE,CAChE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC;AADN,QAAA,cAAc,kBACR;AACZ,MAAM,oBAAoB,GAAG,CAAC,IAAY,EAAE,CAAS,EAAU,EAAE,CACtE,GAAG,IAAI,IAAI,CAAC,EAAE,CAAC;AADJ,QAAA,oBAAoB,wBAChB;AACJ,QAAA,aAAa,GAAG,oBAAoB,CAAC;AACrC,QAAA,mBAAmB,GAAG,uBAAuB,CAAC;AAC9C,QAAA,uBAAuB,GAClC,8CAA8C,CAAC;AACpC,QAAA,wBAAwB,GAAG,CAAC,CAAC;AAG7B,QAAA,mBAAmB,GAAG,GAAG,CAAC;AAC1B,QAAA,6BAA6B,GAAG,GAAG,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { DetectedEntity } from '../types/detected-entity.interface';
|
|
2
|
+
import { DetectionContext } from '../types/detection-context';
|
|
3
|
+
import { LocaleContextualRule } from '../locales/locale.interface';
|
|
4
|
+
import { DetectorPort } from './detector.interface';
|
|
5
|
+
export declare class ContextualDetector implements DetectorPort {
|
|
6
|
+
private readonly rules;
|
|
7
|
+
constructor(rules: LocaleContextualRule[]);
|
|
8
|
+
detect(text: string, context: DetectionContext): DetectedEntity[];
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=contextual.detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contextual.detector.d.ts","sourceRoot":"","sources":["../../src/detectors/contextual.detector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAyB;gBAEnC,KAAK,EAAE,oBAAoB,EAAE;IAIzC,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,cAAc,EAAE;CAiElE"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ContextualDetector = void 0;
|
|
4
|
+
class ContextualDetector {
|
|
5
|
+
rules;
|
|
6
|
+
constructor(rules) {
|
|
7
|
+
this.rules = rules;
|
|
8
|
+
}
|
|
9
|
+
detect(text, context) {
|
|
10
|
+
const activeRules = this.rules.filter((r) => r.activeIn.includes(context));
|
|
11
|
+
if (activeRules.length === 0)
|
|
12
|
+
return [];
|
|
13
|
+
const results = [];
|
|
14
|
+
for (const rule of activeRules) {
|
|
15
|
+
for (const triggerMatch of text.matchAll(rule.triggerPattern)) {
|
|
16
|
+
const triggerEnd = triggerMatch.index + triggerMatch[0].length;
|
|
17
|
+
let searchWindow;
|
|
18
|
+
let windowOffset;
|
|
19
|
+
if (rule.direction === 'after') {
|
|
20
|
+
const windowEnd = Math.min(text.length, triggerEnd + rule.maxDistance);
|
|
21
|
+
searchWindow = text.slice(triggerEnd, windowEnd);
|
|
22
|
+
windowOffset = triggerEnd;
|
|
23
|
+
}
|
|
24
|
+
else if (rule.direction === 'before') {
|
|
25
|
+
const windowStart = Math.max(0, triggerMatch.index - rule.maxDistance);
|
|
26
|
+
searchWindow = text.slice(windowStart, triggerMatch.index);
|
|
27
|
+
windowOffset = windowStart;
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
const windowStart = Math.max(0, triggerMatch.index - rule.maxDistance);
|
|
31
|
+
const windowEnd = Math.min(text.length, triggerEnd + rule.maxDistance);
|
|
32
|
+
searchWindow = text.slice(windowStart, windowEnd);
|
|
33
|
+
windowOffset = windowStart;
|
|
34
|
+
}
|
|
35
|
+
const captureMatch = searchWindow.match(rule.capturePattern);
|
|
36
|
+
if (captureMatch && captureMatch[0].trim()) {
|
|
37
|
+
const value = captureMatch[0].trim();
|
|
38
|
+
const captureStart = windowOffset + captureMatch.index;
|
|
39
|
+
const trimLeading = captureMatch[0].length - captureMatch[0].trimStart().length;
|
|
40
|
+
const actualStart = captureStart + trimLeading;
|
|
41
|
+
const actualEnd = actualStart + value.length;
|
|
42
|
+
results.push({
|
|
43
|
+
type: rule.entityType,
|
|
44
|
+
value,
|
|
45
|
+
position: { start: actualStart, end: actualEnd },
|
|
46
|
+
detectedBy: 'contextual',
|
|
47
|
+
confidence: rule.confidence,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return results;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
exports.ContextualDetector = ContextualDetector;
|
|
56
|
+
//# sourceMappingURL=contextual.detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contextual.detector.js","sourceRoot":"","sources":["../../src/detectors/contextual.detector.ts"],"names":[],"mappings":";;;AAKA,MAAa,kBAAkB;IACZ,KAAK,CAAyB;IAE/C,YAAY,KAA6B;QACvC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED,MAAM,CAAC,IAAY,EAAE,OAAyB;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QAE3E,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAExC,MAAM,OAAO,GAAqB,EAAE,CAAC;QAErC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC;gBAC9D,MAAM,UAAU,GAAG,YAAY,CAAC,KAAM,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBAEhE,IAAI,YAAoB,CAAC;gBACzB,IAAI,YAAoB,CAAC;gBAEzB,IAAI,IAAI,CAAC,SAAS,KAAK,OAAO,EAAE,CAAC;oBAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,IAAI,CAAC,MAAM,EACX,UAAU,GAAG,IAAI,CAAC,WAAW,CAC9B,CAAC;oBACF,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;oBACjD,YAAY,GAAG,UAAU,CAAC;gBAC5B,CAAC;qBAAM,IAAI,IAAI,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;oBACvC,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,YAAY,CAAC,KAAM,GAAG,IAAI,CAAC,WAAW,CACvC,CAAC;oBACF,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,YAAY,CAAC,KAAM,CAAC,CAAC;oBAC5D,YAAY,GAAG,WAAW,CAAC;gBAC7B,CAAC;qBAAM,CAAC;oBAEN,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,CAAC,EACD,YAAY,CAAC,KAAM,GAAG,IAAI,CAAC,WAAW,CACvC,CAAC;oBACF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,IAAI,CAAC,MAAM,EACX,UAAU,GAAG,IAAI,CAAC,WAAW,CAC9B,CAAC;oBACF,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;oBAClD,YAAY,GAAG,WAAW,CAAC;gBAC7B,CAAC;gBAED,MAAM,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC7D,IAAI,YAAY,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;oBAC3C,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oBACrC,MAAM,YAAY,GAAG,YAAY,GAAG,YAAY,CAAC,KAAM,CAAC;oBAExD,MAAM,WAAW,GACf,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,MAAM,CAAC;oBAC9D,MAAM,WAAW,GAAG,YAAY,GAAG,WAAW,CAAC;oBAC/C,MAAM,SAAS,GAAG,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC;oBAE7C,OAAO,CAAC,IAAI,CAAC;wBACX,IAAI,EAAE,IAAI,CAAC,UAAU;wBACrB,KAAK;wBACL,QAAQ,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,EAAE,SAAS,EAAE;wBAChD,UAAU,EAAE,YAAY;wBACxB,UAAU,EAAE,IAAI,CAAC,UAAU;qBAC5B,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAxED,gDAwEC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { DetectedEntity } from '../types/detected-entity.interface';
|
|
2
|
+
import { DetectionContext } from '../types/detection-context';
|
|
3
|
+
import { EntityType } from '../types/entity-type';
|
|
4
|
+
export declare function runDetectionPipeline(text: string, context: DetectionContext, locale?: string, excludeEntityTypes?: readonly EntityType[]): DetectedEntity[];
|
|
5
|
+
export declare function mergeEntities(entities: DetectedEntity[]): DetectedEntity[];
|
|
6
|
+
//# sourceMappingURL=detection-pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detection-pipeline.d.ts","sourceRoot":"","sources":["../../src/detectors/detection-pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAgDlD,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,gBAAgB,EACzB,MAAM,CAAC,EAAE,MAAM,EACf,kBAAkB,CAAC,EAAE,SAAS,UAAU,EAAE,GACzC,cAAc,EAAE,CAqBlB;AASD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,cAAc,EAAE,CA8D1E"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runDetectionPipeline = runDetectionPipeline;
|
|
4
|
+
exports.mergeEntities = mergeEntities;
|
|
5
|
+
const constants_1 = require("../constants");
|
|
6
|
+
const locales_1 = require("../locales");
|
|
7
|
+
const contextual_detector_1 = require("./contextual.detector");
|
|
8
|
+
const dictionary_detector_1 = require("./dictionary.detector");
|
|
9
|
+
const regex_detector_1 = require("./regex.detector");
|
|
10
|
+
const DETECTOR_PRIORITY = {
|
|
11
|
+
regex: 1,
|
|
12
|
+
dictionary: 2,
|
|
13
|
+
contextual: 3,
|
|
14
|
+
};
|
|
15
|
+
const PIPELINE_CACHE = new Map();
|
|
16
|
+
function buildPipeline(locale) {
|
|
17
|
+
return {
|
|
18
|
+
regex: new regex_detector_1.RegexDetector(locale.regexRules),
|
|
19
|
+
dict: new dictionary_detector_1.DictionaryDetector(locale.code, locale.dictionary),
|
|
20
|
+
ctx: new contextual_detector_1.ContextualDetector(locale.contextualRules),
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function getPipeline(localeCode) {
|
|
24
|
+
let pipeline = PIPELINE_CACHE.get(localeCode);
|
|
25
|
+
if (!pipeline) {
|
|
26
|
+
pipeline = buildPipeline((0, locales_1.getLocale)(localeCode));
|
|
27
|
+
PIPELINE_CACHE.set(localeCode, pipeline);
|
|
28
|
+
}
|
|
29
|
+
return pipeline;
|
|
30
|
+
}
|
|
31
|
+
function runDetectionPipeline(text, context, locale, excludeEntityTypes) {
|
|
32
|
+
const loc = (0, locales_1.getLocale)(locale);
|
|
33
|
+
const pipeline = getPipeline(loc.code);
|
|
34
|
+
const all = [
|
|
35
|
+
...pipeline.regex.detect(text, context),
|
|
36
|
+
...pipeline.dict.detect(text, context),
|
|
37
|
+
...pipeline.ctx.detect(text, context),
|
|
38
|
+
];
|
|
39
|
+
const merged = mergeEntities(all);
|
|
40
|
+
const excluded = excludeEntityTypes && excludeEntityTypes.length > 0
|
|
41
|
+
? new Set(excludeEntityTypes)
|
|
42
|
+
: null;
|
|
43
|
+
return merged.filter((e) => e.confidence >= constants_1.MIN_CONFIDENCE_THRESHOLD &&
|
|
44
|
+
(excluded === null || !excluded.has(e.type)));
|
|
45
|
+
}
|
|
46
|
+
function mergeEntities(entities) {
|
|
47
|
+
if (entities.length <= 1)
|
|
48
|
+
return [...entities];
|
|
49
|
+
const sorted = [...entities].sort((a, b) => {
|
|
50
|
+
const startDiff = a.position.start - b.position.start;
|
|
51
|
+
if (startDiff !== 0)
|
|
52
|
+
return startDiff;
|
|
53
|
+
const aLen = a.position.end - a.position.start;
|
|
54
|
+
const bLen = b.position.end - b.position.start;
|
|
55
|
+
return bLen - aLen;
|
|
56
|
+
});
|
|
57
|
+
const result = [];
|
|
58
|
+
for (const entity of sorted) {
|
|
59
|
+
let hasConflict = false;
|
|
60
|
+
let conflictIndex = -1;
|
|
61
|
+
for (let j = 0; j < result.length; j++) {
|
|
62
|
+
const existing = result[j];
|
|
63
|
+
if (entity.position.start < existing.position.end &&
|
|
64
|
+
entity.position.end > existing.position.start) {
|
|
65
|
+
hasConflict = true;
|
|
66
|
+
conflictIndex = j;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (!hasConflict) {
|
|
71
|
+
result.push(entity);
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
const existing = result[conflictIndex];
|
|
75
|
+
if (entity.position.start >= existing.position.start &&
|
|
76
|
+
entity.position.end <= existing.position.end) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
if (existing.position.start >= entity.position.start &&
|
|
80
|
+
existing.position.end <= entity.position.end) {
|
|
81
|
+
result[conflictIndex] = entity;
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
const winner = pickWinner(entity, existing);
|
|
85
|
+
result[conflictIndex] = winner;
|
|
86
|
+
}
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
function pickWinner(a, b) {
|
|
90
|
+
const priorityA = DETECTOR_PRIORITY[a.detectedBy];
|
|
91
|
+
const priorityB = DETECTOR_PRIORITY[b.detectedBy];
|
|
92
|
+
if (priorityA !== priorityB) {
|
|
93
|
+
return priorityA < priorityB ? a : b;
|
|
94
|
+
}
|
|
95
|
+
const lenA = a.position.end - a.position.start;
|
|
96
|
+
const lenB = b.position.end - b.position.start;
|
|
97
|
+
if (lenA !== lenB) {
|
|
98
|
+
return lenA > lenB ? a : b;
|
|
99
|
+
}
|
|
100
|
+
return a.confidence >= b.confidence ? a : b;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=detection-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detection-pipeline.js","sourceRoot":"","sources":["../../src/detectors/detection-pipeline.ts"],"names":[],"mappings":";;AAmDA,oDA0BC;AASD,sCA8DC;AApJD,4CAAwD;AAIxD,wCAAuC;AAEvC,+DAA2D;AAC3D,+DAA2D;AAC3D,qDAAiD;AAGjD,MAAM,iBAAiB,GAAiD;IACtE,KAAK,EAAE,CAAC;IACR,UAAU,EAAE,CAAC;IACb,UAAU,EAAE,CAAC;CACd,CAAC;AASF,MAAM,cAAc,GAAG,IAAI,GAAG,EAA4B,CAAC;AAE3D,SAAS,aAAa,CAAC,MAAoB;IACzC,OAAO;QACL,KAAK,EAAE,IAAI,8BAAa,CAAC,MAAM,CAAC,UAAU,CAAC;QAC3C,IAAI,EAAE,IAAI,wCAAkB,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,CAAC;QAC5D,GAAG,EAAE,IAAI,wCAAkB,CAAC,MAAM,CAAC,eAAe,CAAC;KACpD,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,UAAkB;IACrC,IAAI,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,QAAQ,GAAG,aAAa,CAAC,IAAA,mBAAS,EAAC,UAAU,CAAC,CAAC,CAAC;QAChD,cAAc,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IAC3C,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAUD,SAAgB,oBAAoB,CAClC,IAAY,EACZ,OAAyB,EACzB,MAAe,EACf,kBAA0C;IAE1C,MAAM,GAAG,GAAG,IAAA,mBAAS,EAAC,MAAM,CAAC,CAAC;IAC9B,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAEvC,MAAM,GAAG,GAAG;QACV,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC;QACvC,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC;QACtC,GAAG,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC;KACtC,CAAC;IAEF,MAAM,MAAM,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;IAClC,MAAM,QAAQ,GACZ,kBAAkB,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC;QACjD,CAAC,CAAC,IAAI,GAAG,CAAa,kBAAkB,CAAC;QACzC,CAAC,CAAC,IAAI,CAAC;IAEX,OAAO,MAAM,CAAC,MAAM,CAClB,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,UAAU,IAAI,oCAAwB;QACxC,CAAC,QAAQ,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAC/C,CAAC;AACJ,CAAC;AASD,SAAgB,aAAa,CAAC,QAA0B;IACtD,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAC;IAG/C,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACzC,MAAM,SAAS,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;QACtD,IAAI,SAAS,KAAK,CAAC;YAAE,OAAO,SAAS,CAAC;QACtC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;QAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;QAC/C,OAAO,IAAI,GAAG,IAAI,CAAC;IACrB,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAqB,EAAE,CAAC;IAEpC,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,IAAI,WAAW,GAAG,KAAK,CAAC;QACxB,IAAI,aAAa,GAAG,CAAC,CAAC,CAAC;QAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAG3B,IACE,MAAM,CAAC,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG;gBAC7C,MAAM,CAAC,QAAQ,CAAC,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAC7C,CAAC;gBACD,WAAW,GAAG,IAAI,CAAC;gBACnB,aAAa,GAAG,CAAC,CAAC;gBAClB,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACpB,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC;QAGvC,IACE,MAAM,CAAC,QAAQ,CAAC,KAAK,IAAI,QAAQ,CAAC,QAAQ,CAAC,KAAK;YAChD,MAAM,CAAC,QAAQ,CAAC,GAAG,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,EAC5C,CAAC;YACD,SAAS;QACX,CAAC;QAGD,IACE,QAAQ,CAAC,QAAQ,CAAC,KAAK,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK;YAChD,QAAQ,CAAC,QAAQ,CAAC,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,EAC5C,CAAC;YACD,MAAM,CAAC,aAAa,CAAC,GAAG,MAAM,CAAC;YAC/B,SAAS;QACX,CAAC;QAGD,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC5C,MAAM,CAAC,aAAa,CAAC,GAAG,MAAM,CAAC;IACjC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,UAAU,CAAC,CAAiB,EAAE,CAAiB;IACtD,MAAM,SAAS,GAAG,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAGlD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;IAC/C,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC;IAE/C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC9C,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { DetectedEntity } from '../types/detected-entity.interface';
|
|
2
|
+
import { DetectionContext } from '../types/detection-context';
|
|
3
|
+
export interface DetectorPort {
|
|
4
|
+
detect(text: string, context: DetectionContext): DetectedEntity[];
|
|
5
|
+
}
|
|
6
|
+
//# sourceMappingURL=detector.interface.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.interface.d.ts","sourceRoot":"","sources":["../../src/detectors/detector.interface.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAM9D,MAAM,WAAW,YAAY;IAO3B,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,cAAc,EAAE,CAAC;CACnE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.interface.js","sourceRoot":"","sources":["../../src/detectors/detector.interface.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { DetectedEntity } from '../types/detected-entity.interface';
|
|
2
|
+
import { DetectionContext } from '../types/detection-context';
|
|
3
|
+
import { LocaleDictionaryConfig } from '../locales/locale.interface';
|
|
4
|
+
import { DetectorPort } from './detector.interface';
|
|
5
|
+
export declare function resetDictionaryCache(): void;
|
|
6
|
+
export declare class DictionaryDetector implements DetectorPort {
|
|
7
|
+
private readonly localeCode;
|
|
8
|
+
private readonly config;
|
|
9
|
+
constructor(localeCode: string, config: LocaleDictionaryConfig);
|
|
10
|
+
detect(text: string, _context: DetectionContext): DetectedEntity[];
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=dictionary.detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dictionary.detector.d.ts","sourceRoot":"","sources":["../../src/detectors/dictionary.detector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AACrE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AA+DpD,wBAAgB,oBAAoB,IAAI,IAAI,CAE3C;AAwCD,qBAAa,kBAAmB,YAAW,YAAY;IAEnD,OAAO,CAAC,QAAQ,CAAC,UAAU;IAC3B,OAAO,CAAC,QAAQ,CAAC,MAAM;gBADN,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,sBAAsB;IAGjD,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,gBAAgB,GAAG,cAAc,EAAE;CA6GnE"}
|