@besales/anonymizer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +81 -0
- package/dist/anonymizer.d.ts +31 -0
- package/dist/anonymizer.d.ts.map +1 -0
- package/dist/anonymizer.js +175 -0
- package/dist/anonymizer.js.map +1 -0
- package/dist/constants.d.ts +11 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +16 -0
- package/dist/constants.js.map +1 -0
- package/dist/detectors/contextual.detector.d.ts +10 -0
- package/dist/detectors/contextual.detector.d.ts.map +1 -0
- package/dist/detectors/contextual.detector.js +56 -0
- package/dist/detectors/contextual.detector.js.map +1 -0
- package/dist/detectors/detection-pipeline.d.ts +6 -0
- package/dist/detectors/detection-pipeline.d.ts.map +1 -0
- package/dist/detectors/detection-pipeline.js +102 -0
- package/dist/detectors/detection-pipeline.js.map +1 -0
- package/dist/detectors/detector.interface.d.ts +6 -0
- package/dist/detectors/detector.interface.d.ts.map +1 -0
- package/dist/detectors/detector.interface.js +3 -0
- package/dist/detectors/detector.interface.js.map +1 -0
- package/dist/detectors/dictionary.detector.d.ts +12 -0
- package/dist/detectors/dictionary.detector.d.ts.map +1 -0
- package/dist/detectors/dictionary.detector.js +222 -0
- package/dist/detectors/dictionary.detector.js.map +1 -0
- package/dist/detectors/regex.detector.d.ts +10 -0
- package/dist/detectors/regex.detector.d.ts.map +1 -0
- package/dist/detectors/regex.detector.js +41 -0
- package/dist/detectors/regex.detector.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +27 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-protocol/response-validator.d.ts +5 -0
- package/dist/llm-protocol/response-validator.d.ts.map +1 -0
- package/dist/llm-protocol/response-validator.js +96 -0
- package/dist/llm-protocol/response-validator.js.map +1 -0
- package/dist/llm-protocol/system-prompt-builder.d.ts +2 -0
- package/dist/llm-protocol/system-prompt-builder.d.ts.map +1 -0
- package/dist/llm-protocol/system-prompt-builder.js +8 -0
- package/dist/llm-protocol/system-prompt-builder.js.map +1 -0
- package/dist/locales/index.d.ts +9 -0
- package/dist/locales/index.d.ts.map +1 -0
- package/dist/locales/index.js +15 -0
- package/dist/locales/index.js.map +1 -0
- package/dist/locales/locale.interface.d.ts +35 -0
- package/dist/locales/locale.interface.d.ts.map +1 -0
- package/dist/locales/locale.interface.js +3 -0
- package/dist/locales/locale.interface.js.map +1 -0
- package/dist/locales/ru/data/names.csv +1291 -0
- package/dist/locales/ru/data/stopwords.csv +236 -0
- package/dist/locales/ru/index.d.ts +3 -0
- package/dist/locales/ru/index.d.ts.map +1 -0
- package/dist/locales/ru/index.js +227 -0
- package/dist/locales/ru/index.js.map +1 -0
- package/dist/tokenizer/tokenizer.d.ts +13 -0
- package/dist/tokenizer/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer/tokenizer.js +50 -0
- package/dist/tokenizer/tokenizer.js.map +1 -0
- package/dist/traversers/completions.traverser.d.ts +7 -0
- package/dist/traversers/completions.traverser.d.ts.map +1 -0
- package/dist/traversers/completions.traverser.js +111 -0
- package/dist/traversers/completions.traverser.js.map +1 -0
- package/dist/traversers/json.traverser.d.ts +12 -0
- package/dist/traversers/json.traverser.d.ts.map +1 -0
- package/dist/traversers/json.traverser.js +94 -0
- package/dist/traversers/json.traverser.js.map +1 -0
- package/dist/traversers/responses.traverser.d.ts +8 -0
- package/dist/traversers/responses.traverser.d.ts.map +1 -0
- package/dist/traversers/responses.traverser.js +86 -0
- package/dist/traversers/responses.traverser.js.map +1 -0
- package/dist/traversers/text.traverser.d.ts +6 -0
- package/dist/traversers/text.traverser.d.ts.map +1 -0
- package/dist/traversers/text.traverser.js +15 -0
- package/dist/traversers/text.traverser.js.map +1 -0
- package/dist/traversers/traverser.interface.d.ts +9 -0
- package/dist/traversers/traverser.interface.d.ts.map +1 -0
- package/dist/traversers/traverser.interface.js +3 -0
- package/dist/traversers/traverser.interface.js.map +1 -0
- package/dist/types/anonymize-result.interface.d.ts +23 -0
- package/dist/types/anonymize-result.interface.d.ts.map +1 -0
- package/dist/types/anonymize-result.interface.js +3 -0
- package/dist/types/anonymize-result.interface.js.map +1 -0
- package/dist/types/deanonymize-result.interface.d.ts +18 -0
- package/dist/types/deanonymize-result.interface.d.ts.map +1 -0
- package/dist/types/deanonymize-result.interface.js +3 -0
- package/dist/types/deanonymize-result.interface.js.map +1 -0
- package/dist/types/detected-entity.interface.d.ts +23 -0
- package/dist/types/detected-entity.interface.d.ts.map +1 -0
- package/dist/types/detected-entity.interface.js +3 -0
- package/dist/types/detected-entity.interface.js.map +1 -0
- package/dist/types/detection-context.d.ts +10 -0
- package/dist/types/detection-context.d.ts.map +1 -0
- package/dist/types/detection-context.js +12 -0
- package/dist/types/detection-context.js.map +1 -0
- package/dist/types/entity-type.d.ts +16 -0
- package/dist/types/entity-type.d.ts.map +1 -0
- package/dist/types/entity-type.js +18 -0
- package/dist/types/entity-type.js.map +1 -0
- package/dist/types/llm-message.interface.d.ts +23 -0
- package/dist/types/llm-message.interface.d.ts.map +1 -0
- package/dist/types/llm-message.interface.js +3 -0
- package/dist/types/llm-message.interface.js.map +1 -0
- package/dist/types/llm-validation.interface.d.ts +8 -0
- package/dist/types/llm-validation.interface.d.ts.map +1 -0
- package/dist/types/llm-validation.interface.js +3 -0
- package/dist/types/llm-validation.interface.js.map +1 -0
- package/dist/types/malformed-token.interface.d.ts +6 -0
- package/dist/types/malformed-token.interface.d.ts.map +1 -0
- package/dist/types/malformed-token.interface.js +3 -0
- package/dist/types/malformed-token.interface.js.map +1 -0
- package/dist/types/mapping-data.interface.d.ts +9 -0
- package/dist/types/mapping-data.interface.d.ts.map +1 -0
- package/dist/types/mapping-data.interface.js +12 -0
- package/dist/types/mapping-data.interface.js.map +1 -0
- package/package.json +48 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
value
|
|
2
|
+
август
|
|
3
|
+
австралия
|
|
4
|
+
австрия
|
|
5
|
+
агентство
|
|
6
|
+
азербайджан
|
|
7
|
+
азия
|
|
8
|
+
академия
|
|
9
|
+
акционер
|
|
10
|
+
анадырь
|
|
11
|
+
аналитик
|
|
12
|
+
апрель
|
|
13
|
+
апрельский
|
|
14
|
+
аргентина
|
|
15
|
+
армения
|
|
16
|
+
архангельск
|
|
17
|
+
астрахань
|
|
18
|
+
банк
|
|
19
|
+
барнаул
|
|
20
|
+
беларусь
|
|
21
|
+
белгород
|
|
22
|
+
бельгия
|
|
23
|
+
биробиджан
|
|
24
|
+
благовещенск
|
|
25
|
+
болгария
|
|
26
|
+
больница
|
|
27
|
+
бразилия
|
|
28
|
+
брянск
|
|
29
|
+
бульвар
|
|
30
|
+
бухгалтер
|
|
31
|
+
великобритания
|
|
32
|
+
венгрия
|
|
33
|
+
владивосток
|
|
34
|
+
владимир
|
|
35
|
+
волгоград
|
|
36
|
+
воронеж
|
|
37
|
+
воскресенье
|
|
38
|
+
восток
|
|
39
|
+
вторник
|
|
40
|
+
генеральный
|
|
41
|
+
германия
|
|
42
|
+
город
|
|
43
|
+
греция
|
|
44
|
+
грузия
|
|
45
|
+
группа
|
|
46
|
+
дальний восток
|
|
47
|
+
дания
|
|
48
|
+
декабрь
|
|
49
|
+
департамент
|
|
50
|
+
деревня
|
|
51
|
+
директор
|
|
52
|
+
европа
|
|
53
|
+
екатеринбург
|
|
54
|
+
заказчик
|
|
55
|
+
запад
|
|
56
|
+
иваново
|
|
57
|
+
ижевск
|
|
58
|
+
инвестор
|
|
59
|
+
индия
|
|
60
|
+
инженер
|
|
61
|
+
институт
|
|
62
|
+
интернет
|
|
63
|
+
иркутск
|
|
64
|
+
испания
|
|
65
|
+
италия
|
|
66
|
+
июль
|
|
67
|
+
июнь
|
|
68
|
+
июньский
|
|
69
|
+
кавказ
|
|
70
|
+
казань
|
|
71
|
+
казахстан
|
|
72
|
+
калининград
|
|
73
|
+
канада
|
|
74
|
+
кемерово
|
|
75
|
+
киргизия
|
|
76
|
+
киров
|
|
77
|
+
китай
|
|
78
|
+
клиент
|
|
79
|
+
клиника
|
|
80
|
+
комитет
|
|
81
|
+
компания
|
|
82
|
+
консультант
|
|
83
|
+
корпорация
|
|
84
|
+
край
|
|
85
|
+
краснодар
|
|
86
|
+
красноярск
|
|
87
|
+
красноярская
|
|
88
|
+
курск
|
|
89
|
+
ленинградская
|
|
90
|
+
липецк
|
|
91
|
+
магадан
|
|
92
|
+
магнитогорск
|
|
93
|
+
май
|
|
94
|
+
майский
|
|
95
|
+
маркетолог
|
|
96
|
+
март
|
|
97
|
+
мартовский
|
|
98
|
+
махачкала
|
|
99
|
+
мексика
|
|
100
|
+
менеджер
|
|
101
|
+
министерство
|
|
102
|
+
молдова
|
|
103
|
+
москва
|
|
104
|
+
москвич
|
|
105
|
+
московская
|
|
106
|
+
мурманск
|
|
107
|
+
набережная
|
|
108
|
+
набережные челны
|
|
109
|
+
нальчик
|
|
110
|
+
нарьян-мар
|
|
111
|
+
нидерланды
|
|
112
|
+
нижний новгород
|
|
113
|
+
нижний тагил
|
|
114
|
+
новокузнецк
|
|
115
|
+
новосибирск
|
|
116
|
+
новосибирская
|
|
117
|
+
норвегия
|
|
118
|
+
ноябрь
|
|
119
|
+
область
|
|
120
|
+
общество
|
|
121
|
+
округ
|
|
122
|
+
октябрь
|
|
123
|
+
омск
|
|
124
|
+
организация
|
|
125
|
+
оренбург
|
|
126
|
+
отдел
|
|
127
|
+
партнер
|
|
128
|
+
пенза
|
|
129
|
+
переулок
|
|
130
|
+
пермь
|
|
131
|
+
петербуржец
|
|
132
|
+
петрозаводск
|
|
133
|
+
петропавловск-камчатский
|
|
134
|
+
платформа
|
|
135
|
+
площадь
|
|
136
|
+
поволжье
|
|
137
|
+
подмосковье
|
|
138
|
+
подрядчик
|
|
139
|
+
покупатель
|
|
140
|
+
польша
|
|
141
|
+
понедельник
|
|
142
|
+
португалия
|
|
143
|
+
поставщик
|
|
144
|
+
почта
|
|
145
|
+
предприятие
|
|
146
|
+
президент
|
|
147
|
+
приложение
|
|
148
|
+
продавец
|
|
149
|
+
продукт
|
|
150
|
+
проект
|
|
151
|
+
проспект
|
|
152
|
+
пятница
|
|
153
|
+
разработчик
|
|
154
|
+
район
|
|
155
|
+
республика
|
|
156
|
+
решение
|
|
157
|
+
россия
|
|
158
|
+
россиянин
|
|
159
|
+
россиянка
|
|
160
|
+
ростов-на-дону
|
|
161
|
+
руководитель
|
|
162
|
+
румыния
|
|
163
|
+
рязань
|
|
164
|
+
салехард
|
|
165
|
+
самара
|
|
166
|
+
санкт-петербург
|
|
167
|
+
саратов
|
|
168
|
+
свердловская
|
|
169
|
+
севастополь
|
|
170
|
+
север
|
|
171
|
+
село
|
|
172
|
+
сентябрь
|
|
173
|
+
сербия
|
|
174
|
+
сервис
|
|
175
|
+
сибирь
|
|
176
|
+
симферополь
|
|
177
|
+
система
|
|
178
|
+
служба
|
|
179
|
+
сотрудник
|
|
180
|
+
сочи
|
|
181
|
+
специалист
|
|
182
|
+
среда
|
|
183
|
+
ставрополь
|
|
184
|
+
суббота
|
|
185
|
+
сша
|
|
186
|
+
сыктывкар
|
|
187
|
+
таджикистан
|
|
188
|
+
тверь
|
|
189
|
+
телефон
|
|
190
|
+
технолог
|
|
191
|
+
технология
|
|
192
|
+
тольятти
|
|
193
|
+
томск
|
|
194
|
+
тула
|
|
195
|
+
туркменистан
|
|
196
|
+
турция
|
|
197
|
+
тюменская
|
|
198
|
+
тюмень
|
|
199
|
+
узбекистан
|
|
200
|
+
украина
|
|
201
|
+
улан-удэ
|
|
202
|
+
улица
|
|
203
|
+
ульяновск
|
|
204
|
+
университет
|
|
205
|
+
управление
|
|
206
|
+
урал
|
|
207
|
+
уфа
|
|
208
|
+
учредитель
|
|
209
|
+
февраль
|
|
210
|
+
февральский
|
|
211
|
+
финляндия
|
|
212
|
+
фирма
|
|
213
|
+
франция
|
|
214
|
+
хабаровск
|
|
215
|
+
ханты-мансийск
|
|
216
|
+
холдинг
|
|
217
|
+
хорватия
|
|
218
|
+
центр
|
|
219
|
+
чебоксары
|
|
220
|
+
челябинск
|
|
221
|
+
четверг
|
|
222
|
+
чехия
|
|
223
|
+
чита
|
|
224
|
+
швейцария
|
|
225
|
+
швеция
|
|
226
|
+
школа
|
|
227
|
+
шоссе
|
|
228
|
+
экономист
|
|
229
|
+
юг
|
|
230
|
+
южно-сахалинск
|
|
231
|
+
юрист
|
|
232
|
+
якутск
|
|
233
|
+
январский
|
|
234
|
+
январь
|
|
235
|
+
япония
|
|
236
|
+
ярославль
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/locales/ru/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAiFnD,eAAO,MAAM,SAAS,EAAE,YA2JvB,CAAC"}
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.RU_LOCALE = void 0;
|
|
37
|
+
const path = __importStar(require("path"));
|
|
38
|
+
const detection_context_1 = require("../../types/detection-context");
|
|
39
|
+
const entity_type_1 = require("../../types/entity-type");
|
|
40
|
+
const PHONE_PATTERN = /(?:\+7|8)[\s-]?\(?[\d]{3}\)?[\s-]?[\d]{3}[\s-]?[\d]{2}[\s-]?[\d]{2}/g;
|
|
41
|
+
const EMAIL_PATTERN = /[a-zA-Z0-9._%+-]{1,64}@[a-zA-Z0-9.-]{1,253}\.[a-zA-Z]{2,63}/g;
|
|
42
|
+
const INN_PATTERN = /\b\d{10}(?:\d{2})?\b/g;
|
|
43
|
+
const INN_CONTEXT = /[иИ][нН]{2}/i;
|
|
44
|
+
const CARD_PATTERN = /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g;
|
|
45
|
+
const PASSPORT_PATTERN = /\b\d{2}\s?\d{2}\s+\d{6}\b/g;
|
|
46
|
+
const PASSPORT_CONTEXT = /паспорт|серия|документ/i;
|
|
47
|
+
const DOB_PATTERN = /\b\d{2}[./]\d{2}[./]\d{4}\b/g;
|
|
48
|
+
const DOB_CONTEXT = /рожд|д\.?\s?р\.?|born/i;
|
|
49
|
+
const AMOUNT_PATTERN = /\b\d[\d\s.,]{0,20}?\s*(?:руб|₽|\$|€|[KкКk]|млн|тыс|000)(?:\b|(?=\s|$|[^а-яёa-z]))/gi;
|
|
50
|
+
const AMOUNT_CONTEXT = /сделк|бюджет|чек|оплат|стоимост|цен|сумм|платёж|платеж/i;
|
|
51
|
+
const ACCOUNT_PATTERN = /\b\d{20}\b/g;
|
|
52
|
+
const ACCOUNT_CONTEXT = /сч[её]т|р[/.]с|расч[её]тн/i;
|
|
53
|
+
const CONTEXT_RADIUS_INN = 30;
|
|
54
|
+
const CONTEXT_RADIUS_PASSPORT = 50;
|
|
55
|
+
const CONTEXT_RADIUS_DOB = 50;
|
|
56
|
+
const CONTEXT_RADIUS_AMOUNT = 80;
|
|
57
|
+
const CONTEXT_RADIUS_ACCOUNT = 50;
|
|
58
|
+
const CONFIDENCE_HIGH = 0.95;
|
|
59
|
+
const CONFIDENCE_INN = 0.90;
|
|
60
|
+
const CONFIDENCE_PASSPORT = 0.85;
|
|
61
|
+
const CONFIDENCE_DOB = 0.85;
|
|
62
|
+
const CONFIDENCE_AMOUNT = 0.80;
|
|
63
|
+
const CONFIDENCE_ACCOUNT = 0.90;
|
|
64
|
+
const MAX_EXAMPLE_TOKENS = 5;
|
|
65
|
+
function buildRuInstruction(activeTokens) {
|
|
66
|
+
if (activeTokens.length === 0)
|
|
67
|
+
return '';
|
|
68
|
+
const examples = activeTokens.slice(0, MAX_EXAMPLE_TOKENS).join(', ');
|
|
69
|
+
return `---
|
|
70
|
+
ВАЖНО: В тексте используются токены-заполнители для персональных данных.
|
|
71
|
+
Ты ОБЯЗАН сохранять их ТОЧНО в оригинальном формате: [ТИП_НОМЕР]
|
|
72
|
+
Примеры токенов в этом диалоге: ${examples}
|
|
73
|
+
Правила:
|
|
74
|
+
- НЕ заменяй токены на реальные данные или описания
|
|
75
|
+
- НЕ изменяй регистр, скобки или формат токенов
|
|
76
|
+
- Используй токены как есть в своих ответах
|
|
77
|
+
- Если нужно упомянуть человека — используй его токен, например [PERSON_1]
|
|
78
|
+
---`;
|
|
79
|
+
}
|
|
80
|
+
exports.RU_LOCALE = {
|
|
81
|
+
code: 'ru',
|
|
82
|
+
regexRules: [
|
|
83
|
+
{
|
|
84
|
+
type: entity_type_1.ENTITY_TYPE.PHONE,
|
|
85
|
+
pattern: PHONE_PATTERN,
|
|
86
|
+
contextPattern: null,
|
|
87
|
+
contextRadius: 0,
|
|
88
|
+
contextDirection: 'both',
|
|
89
|
+
confidence: CONFIDENCE_HIGH,
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
type: entity_type_1.ENTITY_TYPE.EMAIL,
|
|
93
|
+
pattern: EMAIL_PATTERN,
|
|
94
|
+
contextPattern: null,
|
|
95
|
+
contextRadius: 0,
|
|
96
|
+
contextDirection: 'both',
|
|
97
|
+
confidence: CONFIDENCE_HIGH,
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
type: entity_type_1.ENTITY_TYPE.INN,
|
|
101
|
+
pattern: INN_PATTERN,
|
|
102
|
+
contextPattern: INN_CONTEXT,
|
|
103
|
+
contextRadius: CONTEXT_RADIUS_INN,
|
|
104
|
+
contextDirection: 'before',
|
|
105
|
+
confidence: CONFIDENCE_INN,
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
type: entity_type_1.ENTITY_TYPE.CARD,
|
|
109
|
+
pattern: CARD_PATTERN,
|
|
110
|
+
contextPattern: null,
|
|
111
|
+
contextRadius: 0,
|
|
112
|
+
contextDirection: 'both',
|
|
113
|
+
confidence: CONFIDENCE_HIGH,
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
type: entity_type_1.ENTITY_TYPE.PASSPORT,
|
|
117
|
+
pattern: PASSPORT_PATTERN,
|
|
118
|
+
contextPattern: PASSPORT_CONTEXT,
|
|
119
|
+
contextRadius: CONTEXT_RADIUS_PASSPORT,
|
|
120
|
+
contextDirection: 'both',
|
|
121
|
+
confidence: CONFIDENCE_PASSPORT,
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
type: entity_type_1.ENTITY_TYPE.DATE_OF_BIRTH,
|
|
125
|
+
pattern: DOB_PATTERN,
|
|
126
|
+
contextPattern: DOB_CONTEXT,
|
|
127
|
+
contextRadius: CONTEXT_RADIUS_DOB,
|
|
128
|
+
contextDirection: 'both',
|
|
129
|
+
confidence: CONFIDENCE_DOB,
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
type: entity_type_1.ENTITY_TYPE.AMOUNT,
|
|
133
|
+
pattern: AMOUNT_PATTERN,
|
|
134
|
+
contextPattern: AMOUNT_CONTEXT,
|
|
135
|
+
contextRadius: CONTEXT_RADIUS_AMOUNT,
|
|
136
|
+
contextDirection: 'both',
|
|
137
|
+
confidence: CONFIDENCE_AMOUNT,
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
type: entity_type_1.ENTITY_TYPE.ACCOUNT,
|
|
141
|
+
pattern: ACCOUNT_PATTERN,
|
|
142
|
+
contextPattern: ACCOUNT_CONTEXT,
|
|
143
|
+
contextRadius: CONTEXT_RADIUS_ACCOUNT,
|
|
144
|
+
contextDirection: 'both',
|
|
145
|
+
confidence: CONFIDENCE_ACCOUNT,
|
|
146
|
+
},
|
|
147
|
+
],
|
|
148
|
+
contextualRules: [
|
|
149
|
+
{
|
|
150
|
+
id: 'C1',
|
|
151
|
+
activeIn: [
|
|
152
|
+
detection_context_1.DETECTION_CONTEXT.SALES_CALL_TRANSCRIPT,
|
|
153
|
+
detection_context_1.DETECTION_CONTEXT.LEAK_ANALYSIS,
|
|
154
|
+
detection_context_1.DETECTION_CONTEXT.TRAINING_SCENARIO,
|
|
155
|
+
],
|
|
156
|
+
entityType: entity_type_1.ENTITY_TYPE.AMOUNT,
|
|
157
|
+
triggerPattern: /сделк[аи]|бюджет|чек|оплат|стоимост/gi,
|
|
158
|
+
capturePattern: /\d[\d\s,.]*\s*(?:руб|₽|тыс|млн|[KkКк])?/,
|
|
159
|
+
direction: 'after',
|
|
160
|
+
maxDistance: 50,
|
|
161
|
+
confidence: 0.70,
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
id: 'C2',
|
|
165
|
+
activeIn: [
|
|
166
|
+
detection_context_1.DETECTION_CONTEXT.SALES_CALL_TRANSCRIPT,
|
|
167
|
+
detection_context_1.DETECTION_CONTEXT.OUTREACH_MESSAGE,
|
|
168
|
+
detection_context_1.DETECTION_CONTEXT.LEAK_ANALYSIS,
|
|
169
|
+
detection_context_1.DETECTION_CONTEXT.TRAINING_SCENARIO,
|
|
170
|
+
],
|
|
171
|
+
entityType: entity_type_1.ENTITY_TYPE.COMPANY,
|
|
172
|
+
triggerPattern: /компани[яию]|организаци[яию]|ООО|ИП|АО|ЗАО/gi,
|
|
173
|
+
capturePattern: /[А-ЯЁ"][а-яёА-ЯЁ\s\-"]+/,
|
|
174
|
+
direction: 'after',
|
|
175
|
+
maxDistance: 30,
|
|
176
|
+
confidence: 0.70,
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
id: 'C3',
|
|
180
|
+
activeIn: [
|
|
181
|
+
detection_context_1.DETECTION_CONTEXT.OUTREACH_MESSAGE,
|
|
182
|
+
detection_context_1.DETECTION_CONTEXT.TRAINING_SCENARIO,
|
|
183
|
+
],
|
|
184
|
+
entityType: entity_type_1.ENTITY_TYPE.PERSON,
|
|
185
|
+
triggerPattern: /уважаем\S*|здравствуйте|добрый день/gi,
|
|
186
|
+
capturePattern: /[А-ЯЁ][а-яё]+\s+[А-ЯЁ][а-яё]+(?:\s+[А-ЯЁ][а-яё]+)?/,
|
|
187
|
+
direction: 'after',
|
|
188
|
+
maxDistance: 50,
|
|
189
|
+
confidence: 0.75,
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
id: 'C4',
|
|
193
|
+
activeIn: [
|
|
194
|
+
detection_context_1.DETECTION_CONTEXT.SALES_CALL_TRANSCRIPT,
|
|
195
|
+
detection_context_1.DETECTION_CONTEXT.CHAT_MESSAGE,
|
|
196
|
+
detection_context_1.DETECTION_CONTEXT.TRAINING_SCENARIO,
|
|
197
|
+
],
|
|
198
|
+
entityType: entity_type_1.ENTITY_TYPE.PERSON,
|
|
199
|
+
triggerPattern: /клиент|заказчик|покупатель/gi,
|
|
200
|
+
capturePattern: /[А-ЯЁ][а-яё]+(?:\s+[А-ЯЁ][а-яё]+)?/,
|
|
201
|
+
direction: 'after',
|
|
202
|
+
maxDistance: 30,
|
|
203
|
+
confidence: 0.65,
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
id: 'C6',
|
|
207
|
+
activeIn: [
|
|
208
|
+
detection_context_1.DETECTION_CONTEXT.LEAK_ANALYSIS,
|
|
209
|
+
],
|
|
210
|
+
entityType: entity_type_1.ENTITY_TYPE.AMOUNT,
|
|
211
|
+
triggerPattern: /выручк|доход|убыт|потер/gi,
|
|
212
|
+
capturePattern: /\d[\d\s,.]*\s*(?:руб|₽|тыс|млн|[KkКк])?/,
|
|
213
|
+
direction: 'after',
|
|
214
|
+
maxDistance: 80,
|
|
215
|
+
confidence: 0.70,
|
|
216
|
+
},
|
|
217
|
+
],
|
|
218
|
+
dictionary: {
|
|
219
|
+
namesPath: path.join(__dirname, 'data', 'names.csv'),
|
|
220
|
+
stopwordsPath: path.join(__dirname, 'data', 'stopwords.csv'),
|
|
221
|
+
tokenSplitPattern: /[^а-яёА-ЯЁa-zA-Z-]+/,
|
|
222
|
+
},
|
|
223
|
+
systemPrompt: {
|
|
224
|
+
buildInstruction: buildRuInstruction,
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/locales/ru/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2CAA6B;AAC7B,qEAAkE;AAClE,yDAAsD;AAMtD,MAAM,aAAa,GACjB,sEAAsE,CAAC;AAGzE,MAAM,aAAa,GACjB,8DAA8D,CAAC;AAGjE,MAAM,WAAW,GAAG,uBAAuB,CAAC;AAC5C,MAAM,WAAW,GAAG,cAAc,CAAC;AAGnC,MAAM,YAAY,GAAG,6CAA6C,CAAC;AAGnE,MAAM,gBAAgB,GAAG,4BAA4B,CAAC;AACtD,MAAM,gBAAgB,GAAG,yBAAyB,CAAC;AAGnD,MAAM,WAAW,GAAG,8BAA8B,CAAC;AACnD,MAAM,WAAW,GAAG,wBAAwB,CAAC;AAG7C,MAAM,cAAc,GAClB,qFAAqF,CAAC;AACxF,MAAM,cAAc,GAClB,yDAAyD,CAAC;AAG5D,MAAM,eAAe,GAAG,aAAa,CAAC;AACtC,MAAM,eAAe,GAAG,4BAA4B,CAAC;AAErD,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,uBAAuB,GAAG,EAAE,CAAC;AACnC,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,qBAAqB,GAAG,EAAE,CAAC;AACjC,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAElC,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,cAAc,GAAG,IAAI,CAAC;AAC5B,MAAM,mBAAmB,GAAG,IAAI,CAAC;AACjC,MAAM,cAAc,GAAG,IAAI,CAAC;AAC5B,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAC/B,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAYhC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAE7B,SAAS,kBAAkB,CAAC,YAAsB;IAChD,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEzC,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEtE,OAAO;;;kCAGyB,QAAQ;;;;;;IAMtC,CAAC;AACL,CAAC;AAIY,QAAA,SAAS,GAAiB;IACrC,IAAI,EAAE,IAAI;IAEV,UAAU,EAAE;QACV;YACE,IAAI,EAAE,yBAAW,CAAC,KAAK;YACvB,OAAO,EAAE,aAAa;YACtB,cAAc,EAAE,IAAI;YACpB,aAAa,EAAE,CAAC;YAChB,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,eAAe;SAC5B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,KAAK;YACvB,OAAO,EAAE,aAAa;YACtB,cAAc,EAAE,IAAI;YACpB,aAAa,EAAE,CAAC;YAChB,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,eAAe;SAC5B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,GAAG;YACrB,OAAO,EAAE,WAAW;YACpB,cAAc,EAAE,WAAW;YAC3B,aAAa,EAAE,kBAAkB;YACjC,gBAAgB,EAAE,QAAQ;YAC1B,UAAU,EAAE,cAAc;SAC3B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,IAAI;YACtB,OAAO,EAAE,YAAY;YACrB,cAAc,EAAE,IAAI;YACpB,aAAa,EAAE,CAAC;YAChB,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,eAAe;SAC5B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,QAAQ;YAC1B,OAAO,EAAE,gBAAgB;YACzB,cAAc,EAAE,gBAAgB;YAChC,aAAa,EAAE,uBAAuB;YACtC,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,mBAAmB;SAChC;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,aAAa;YAC/B,OAAO,EAAE,WAAW;YACpB,cAAc,EAAE,WAAW;YAC3B,aAAa,EAAE,kBAAkB;YACjC,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,cAAc;SAC3B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,MAAM;YACxB,OAAO,EAAE,cAAc;YACvB,cAAc,EAAE,cAAc;YAC9B,aAAa,EAAE,qBAAqB;YACpC,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,iBAAiB;SAC9B;QACD;YACE,IAAI,EAAE,yBAAW,CAAC,OAAO;YACzB,OAAO,EAAE,eAAe;YACxB,cAAc,EAAE,eAAe;YAC/B,aAAa,EAAE,sBAAsB;YACrC,gBAAgB,EAAE,MAAM;YACxB,UAAU,EAAE,kBAAkB;SAC/B;KACF;IAED,eAAe,EAAE;QAEf;YACE,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE;gBACR,qCAAiB,CAAC,qBAAqB;gBACvC,qCAAiB,CAAC,aAAa;gBAC/B,qCAAiB,CAAC,iBAAiB;aACpC;YACD,UAAU,EAAE,yBAAW,CAAC,MAAM;YAC9B,cAAc,EAAE,uCAAuC;YACvD,cAAc,EAAE,yCAAyC;YACzD,SAAS,EAAE,OAAO;YAClB,WAAW,EAAE,EAAE;YACf,UAAU,EAAE,IAAI;SACjB;QAED;YACE,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE;gBACR,qCAAiB,CAAC,qBAAqB;gBACvC,qCAAiB,CAAC,gBAAgB;gBAClC,qCAAiB,CAAC,aAAa;gBAC/B,qCAAiB,CAAC,iBAAiB;aACpC;YACD,UAAU,EAAE,yBAAW,CAAC,OAAO;YAC/B,cAAc,EAAE,8CAA8C;YAC9D,cAAc,EAAE,yBAAyB;YACzC,SAAS,EAAE,OAAO;YAClB,WAAW,EAAE,EAAE;YACf,UAAU,EAAE,IAAI;SACjB;QAED;YACE,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE;gBACR,qCAAiB,CAAC,gBAAgB;gBAClC,qCAAiB,CAAC,iBAAiB;aACpC;YACD,UAAU,EAAE,yBAAW,CAAC,MAAM;YAC9B,cAAc,EAAE,uCAAuC;YACvD,cAAc,EAAE,oDAAoD;YACpE,SAAS,EAAE,OAAO;YAClB,WAAW,EAAE,EAAE;YACf,UAAU,EAAE,IAAI;SACjB;QAED;YACE,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE;gBACR,qCAAiB,CAAC,qBAAqB;gBACvC,qCAAiB,CAAC,YAAY;gBAC9B,qCAAiB,CAAC,iBAAiB;aACpC;YACD,UAAU,EAAE,yBAAW,CAAC,MAAM;YAC9B,cAAc,EAAE,8BAA8B;YAC9C,cAAc,EAAE,oCAAoC;YACpD,SAAS,EAAE,OAAO;YAClB,WAAW,EAAE,EAAE;YACf,UAAU,EAAE,IAAI;SACjB;QAED;YACE,EAAE,EAAE,IAAI;YACR,QAAQ,EAAE;gBACR,qCAAiB,CAAC,aAAa;aAChC;YACD,UAAU,EAAE,yBAAW,CAAC,MAAM;YAC9B,cAAc,EAAE,2BAA2B;YAC3C,cAAc,EAAE,yCAAyC;YACzD,SAAS,EAAE,OAAO;YAClB,WAAW,EAAE,EAAE;YACf,UAAU,EAAE,IAAI;SACjB;KACF;IAED,UAAU,EAAE;QACV,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,CAAC;QACpD,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,EAAE,eAAe,CAAC;QAC5D,iBAAiB,EAAE,qBAAqB;KACzC;IAED,YAAY,EAAE;QACZ,gBAAgB,EAAE,kBAAkB;KACrC;CACF,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { DetectedEntity } from '../types/detected-entity.interface';
|
|
2
|
+
import { MappingData } from '../types/mapping-data.interface';
|
|
3
|
+
export interface TokenizeResult {
|
|
4
|
+
tokenized: string;
|
|
5
|
+
updatedMapping: MappingData;
|
|
6
|
+
}
|
|
7
|
+
export declare function normalizeValue(value: string): string;
|
|
8
|
+
export declare function tokenize(text: string, entities: DetectedEntity[], existingMapping: MappingData): TokenizeResult;
|
|
9
|
+
export declare function detokenize(text: string, mappingData: MappingData): {
|
|
10
|
+
restored: string;
|
|
11
|
+
unresolvedTokens: string[];
|
|
12
|
+
};
|
|
13
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/tokenizer/tokenizer.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,cAAc,EAAE,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAE,WAAW,EAAE,MAAM,iCAAiC,CAAC;AAE9D,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,WAAW,CAAC;CAC7B;AAMD,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAEpD;AAMD,wBAAgB,QAAQ,CACtB,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,cAAc,EAAE,EAC1B,eAAe,EAAE,WAAW,GAC3B,cAAc,CA8ChB;AAKD,wBAAgB,UAAU,CACxB,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,WAAW,GACvB;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,EAAE,CAAA;CAAE,CAgBlD"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeValue = normalizeValue;
|
|
4
|
+
exports.tokenize = tokenize;
|
|
5
|
+
exports.detokenize = detokenize;
|
|
6
|
+
const constants_1 = require("../constants");
|
|
7
|
+
function normalizeValue(value) {
|
|
8
|
+
return value.toLowerCase().trim().replace(/\s+/g, ' ');
|
|
9
|
+
}
|
|
10
|
+
function tokenize(text, entities, existingMapping) {
|
|
11
|
+
const mapping = JSON.parse(JSON.stringify(existingMapping));
|
|
12
|
+
if (entities.length === 0) {
|
|
13
|
+
return { tokenized: text, updatedMapping: mapping };
|
|
14
|
+
}
|
|
15
|
+
const sorted = [...entities].sort((a, b) => b.position.start - a.position.start);
|
|
16
|
+
let result = text;
|
|
17
|
+
for (const entity of sorted) {
|
|
18
|
+
const normalized = normalizeValue(entity.value);
|
|
19
|
+
let tokenKey = mapping.reverseIndex[normalized];
|
|
20
|
+
if (!tokenKey) {
|
|
21
|
+
const counter = mapping.nextCounters[entity.type] ?? 1;
|
|
22
|
+
if (counter > constants_1.MAX_TOKENS_PER_TYPE) {
|
|
23
|
+
throw new Error(`Превышен лимит токенов для типа ${entity.type}: максимум ${constants_1.MAX_TOKENS_PER_TYPE}`);
|
|
24
|
+
}
|
|
25
|
+
tokenKey = (0, constants_1.MAPPING_KEY_TEMPLATE)(entity.type, counter);
|
|
26
|
+
mapping.tokens[tokenKey] = entity.value;
|
|
27
|
+
mapping.reverseIndex[normalized] = tokenKey;
|
|
28
|
+
mapping.nextCounters[entity.type] = counter + 1;
|
|
29
|
+
}
|
|
30
|
+
const tokenText = `[${tokenKey}]`;
|
|
31
|
+
result =
|
|
32
|
+
result.slice(0, entity.position.start) +
|
|
33
|
+
tokenText +
|
|
34
|
+
result.slice(entity.position.end);
|
|
35
|
+
}
|
|
36
|
+
return { tokenized: result, updatedMapping: mapping };
|
|
37
|
+
}
|
|
38
|
+
function detokenize(text, mappingData) {
|
|
39
|
+
const unresolvedTokens = [];
|
|
40
|
+
const restored = text.replace(constants_1.TOKEN_PATTERN, (match, key) => {
|
|
41
|
+
const original = mappingData.tokens[key];
|
|
42
|
+
if (original !== undefined) {
|
|
43
|
+
return original;
|
|
44
|
+
}
|
|
45
|
+
unresolvedTokens.push(match);
|
|
46
|
+
return match;
|
|
47
|
+
});
|
|
48
|
+
return { restored, unresolvedTokens };
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/tokenizer/tokenizer.ts"],"names":[],"mappings":";;AAiBA,wCAEC;AAMD,4BAkDC;AAKD,gCAmBC;AAnGD,4CAIsB;AAatB,SAAgB,cAAc,CAAC,KAAa;IAC1C,OAAO,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACzD,CAAC;AAMD,SAAgB,QAAQ,CACtB,IAAY,EACZ,QAA0B,EAC1B,eAA4B;IAI5B,MAAM,OAAO,GAAgB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC,CAAC;IAEzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,CAAC;IACtD,CAAC;IAGD,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAC/B,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAC9C,CAAC;IAEF,IAAI,MAAM,GAAG,IAAI,CAAC;IAElB,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,QAAQ,GAAG,OAAO,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;QAEhD,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAEvD,IAAI,OAAO,GAAG,+BAAmB,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CACb,mCAAmC,MAAM,CAAC,IAAI,cAAc,+BAAmB,EAAE,CAClF,CAAC;YACJ,CAAC;YAED,QAAQ,GAAG,IAAA,gCAAoB,EAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACtD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC;YACxC,OAAO,CAAC,YAAY,CAAC,UAAU,CAAC,GAAG,QAAQ,CAAC;YAC5C,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,OAAO,GAAG,CAAC,CAAC;QAClD,CAAC;QAID,MAAM,SAAS,GAAG,IAAI,QAAQ,GAAG,CAAC;QAElC,MAAM;YACJ,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACtC,SAAS;gBACT,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IACtC,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,CAAC;AACxD,CAAC;AAKD,SAAgB,UAAU,CACxB,IAAY,EACZ,WAAwB;IAExB,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAC3B,yBAAa,EACb,CAAC,KAAa,EAAE,GAAW,EAAE,EAAE;QAC7B,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACzC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,QAAQ,CAAC;QAClB,CAAC;QACD,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,OAAO,KAAK,CAAC;IACf,CAAC,CACF,CAAC;IAEF,OAAO,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { CompletionsMessage } from '../types/llm-message.interface';
|
|
2
|
+
import { TraverserPort, ExtractedString } from './traverser.interface';
|
|
3
|
+
export declare class CompletionsTraverser implements TraverserPort<CompletionsMessage[]> {
|
|
4
|
+
extractStrings(messages: CompletionsMessage[]): ExtractedString[];
|
|
5
|
+
reassemble(messages: CompletionsMessage[], processed: Map<string, string>): CompletionsMessage[];
|
|
6
|
+
}
|
|
7
|
+
//# sourceMappingURL=completions.traverser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"completions.traverser.d.ts","sourceRoot":"","sources":["../../src/traversers/completions.traverser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,gCAAgC,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAOvE,qBAAa,oBACX,YAAW,aAAa,CAAC,kBAAkB,EAAE,CAAC;IAE9C,cAAc,CAAC,QAAQ,EAAE,kBAAkB,EAAE,GAAG,eAAe,EAAE;IA4CjE,UAAU,CACR,QAAQ,EAAE,kBAAkB,EAAE,EAC9B,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC7B,kBAAkB,EAAE;CAyCxB"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CompletionsTraverser = void 0;
|
|
4
|
+
class CompletionsTraverser {
|
|
5
|
+
extractStrings(messages) {
|
|
6
|
+
const result = [];
|
|
7
|
+
messages.forEach((msg, i) => {
|
|
8
|
+
if (typeof msg.content === 'string' && msg.content.length > 0) {
|
|
9
|
+
result.push({
|
|
10
|
+
path: `messages[${i}].content`,
|
|
11
|
+
value: msg.content,
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
if (Array.isArray(msg.content)) {
|
|
15
|
+
msg.content.forEach((part, j) => {
|
|
16
|
+
if (part.type === 'text' && part.text && part.text.length > 0) {
|
|
17
|
+
result.push({
|
|
18
|
+
path: `messages[${i}].content[${j}].text`,
|
|
19
|
+
value: part.text,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
if (msg.tool_calls) {
|
|
25
|
+
msg.tool_calls.forEach((tc, k) => {
|
|
26
|
+
if (tc.function?.arguments) {
|
|
27
|
+
const basePath = `messages[${i}].tool_calls[${k}].function.arguments`;
|
|
28
|
+
try {
|
|
29
|
+
const parsed = JSON.parse(tc.function.arguments);
|
|
30
|
+
extractFromValue(parsed, basePath, result);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
result.push({ path: basePath, value: tc.function.arguments });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
reassemble(messages, processed) {
|
|
42
|
+
const cloned = structuredClone(messages);
|
|
43
|
+
cloned.forEach((msg, i) => {
|
|
44
|
+
const contentPath = `messages[${i}].content`;
|
|
45
|
+
if (typeof msg.content === 'string' && processed.has(contentPath)) {
|
|
46
|
+
msg.content = processed.get(contentPath);
|
|
47
|
+
}
|
|
48
|
+
if (Array.isArray(msg.content)) {
|
|
49
|
+
msg.content.forEach((part, j) => {
|
|
50
|
+
const partPath = `messages[${i}].content[${j}].text`;
|
|
51
|
+
if (part.type === 'text' && processed.has(partPath)) {
|
|
52
|
+
part.text = processed.get(partPath);
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
if (msg.tool_calls) {
|
|
57
|
+
msg.tool_calls.forEach((tc, k) => {
|
|
58
|
+
const basePath = `messages[${i}].tool_calls[${k}].function.arguments`;
|
|
59
|
+
if (tc.function?.arguments) {
|
|
60
|
+
try {
|
|
61
|
+
const parsed = JSON.parse(tc.function.arguments);
|
|
62
|
+
const rebuilt = reassembleValue(parsed, basePath, processed);
|
|
63
|
+
tc.function.arguments = JSON.stringify(rebuilt);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
if (processed.has(basePath)) {
|
|
67
|
+
tc.function.arguments = processed.get(basePath);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
return cloned;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.CompletionsTraverser = CompletionsTraverser;
|
|
78
|
+
function extractFromValue(value, path, result) {
|
|
79
|
+
if (typeof value === 'string' && value.length > 0) {
|
|
80
|
+
result.push({ path, value });
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
if (Array.isArray(value)) {
|
|
84
|
+
value.forEach((item, i) => {
|
|
85
|
+
extractFromValue(item, `${path}[${i}]`, result);
|
|
86
|
+
});
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
if (value !== null && typeof value === 'object') {
|
|
90
|
+
for (const [key, val] of Object.entries(value)) {
|
|
91
|
+
extractFromValue(val, `${path}.${key}`, result);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function reassembleValue(value, path, processed) {
|
|
96
|
+
if (typeof value === 'string') {
|
|
97
|
+
return processed.has(path) ? processed.get(path) : value;
|
|
98
|
+
}
|
|
99
|
+
if (Array.isArray(value)) {
|
|
100
|
+
return value.map((item, i) => reassembleValue(item, `${path}[${i}]`, processed));
|
|
101
|
+
}
|
|
102
|
+
if (value !== null && typeof value === 'object') {
|
|
103
|
+
const result = {};
|
|
104
|
+
for (const [key, val] of Object.entries(value)) {
|
|
105
|
+
result[key] = reassembleValue(val, `${path}.${key}`, processed);
|
|
106
|
+
}
|
|
107
|
+
return result;
|
|
108
|
+
}
|
|
109
|
+
return value;
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=completions.traverser.js.map
|