@iola_adm/iola-cli 0.1.82 → 0.1.84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/experiments/small-model-concepts/README.md +34 -0
- package/experiments/small-model-concepts/concepts/agent-consensus/README.md +25 -0
- package/experiments/small-model-concepts/concepts/hybrid/README.md +23 -0
- package/experiments/small-model-concepts/concepts/model-architecture/README.md +42 -0
- package/experiments/small-model-concepts/datasets/adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/datasets/simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/lib/common.js +192 -0
- package/experiments/small-model-concepts/lib/concepts.js +210 -0
- package/experiments/small-model-concepts/results/latest/conditional-memory-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/conditional-memory-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/council-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/council-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/early-exit-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/early-exit-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/escalation-ladder-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/escalation-ladder-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/memory-verified-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/memory-verified-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/skill-router-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/skill-router-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/sparse-escalation-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/sparse-escalation-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/strict-skill-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/strict-skill-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/summary.json +313 -0
- package/experiments/small-model-concepts/results/latest/verify-adversarial-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest/verify-simple-facts.jsonl +100 -0
- package/experiments/small-model-concepts/results/latest-summary.json +313 -0
- package/experiments/small-model-concepts/scripts/generate-datasets.js +199 -0
- package/experiments/small-model-concepts/scripts/run-evaluation.js +133 -0
- package/experiments/small-model-concepts/scripts/summarize-results.js +19 -0
- package/package.json +2 -1
- package/src/cli.js +57 -5
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { fileURLToPath } from 'node:url';
|
|
3
|
+
import {
|
|
4
|
+
FIELD_LABELS,
|
|
5
|
+
LAYER_LABELS,
|
|
6
|
+
getFieldValue,
|
|
7
|
+
loadPublicData,
|
|
8
|
+
writeJsonl,
|
|
9
|
+
} from '../lib/common.js';
|
|
10
|
+
|
|
11
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const ROOT = path.resolve(__dirname, '..');
|
|
13
|
+
const DATASET_DIR = path.join(ROOT, 'datasets');
|
|
14
|
+
|
|
15
|
+
const SIMPLE_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn', 'license_status'];
|
|
16
|
+
const ADVERSARIAL_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn'];
|
|
17
|
+
|
|
18
|
+
const typoVariants = [
|
|
19
|
+
(text) => text,
|
|
20
|
+
(text) => text.replace('школу', 'вшколу').replace('сад', 'детсад'),
|
|
21
|
+
(text) => text.replace('директор', 'директр').replace('заведующий', 'заведущая'),
|
|
22
|
+
(text) => text.replace('какой', 'какои').replace('адрес', 'адресс'),
|
|
23
|
+
(text) => text.replace('№ ', '№').replace('номер ', ''),
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
const ordinal = [
|
|
27
|
+
null,
|
|
28
|
+
'первой',
|
|
29
|
+
'второй',
|
|
30
|
+
'третьей',
|
|
31
|
+
'четвертой',
|
|
32
|
+
'пятой',
|
|
33
|
+
'шестой',
|
|
34
|
+
'седьмой',
|
|
35
|
+
'восьмой',
|
|
36
|
+
'девятой',
|
|
37
|
+
'десятой',
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
function choose(items, index) {
|
|
41
|
+
return items[index % items.length];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function fieldQuestion(entity, field, index) {
|
|
45
|
+
const n = entity.number;
|
|
46
|
+
const layer = LAYER_LABELS[entity.layer];
|
|
47
|
+
const numberText = ordinal[n] && index % 5 === 0 ? ordinal[n] : `№ ${n}`;
|
|
48
|
+
const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
|
|
49
|
+
const nounAcc = entity.layer === 'schools' ? 'школу' : 'детский сад';
|
|
50
|
+
const person = layer.person;
|
|
51
|
+
const templates = {
|
|
52
|
+
head: [
|
|
53
|
+
`кто ${person} ${noun} ${numberText}?`,
|
|
54
|
+
`подскажи руководителя ${noun} ${numberText}`,
|
|
55
|
+
`кто главный в ${nounAcc} ${numberText}`,
|
|
56
|
+
`${person} ${noun} ${numberText} кто сейчас`,
|
|
57
|
+
],
|
|
58
|
+
address: [
|
|
59
|
+
`какой адрес у ${noun} ${numberText}?`,
|
|
60
|
+
`где находится ${nounAcc} ${numberText}`,
|
|
61
|
+
`куда ехать в ${nounAcc} ${numberText}`,
|
|
62
|
+
],
|
|
63
|
+
phone: [
|
|
64
|
+
`как позвонить в ${nounAcc} ${numberText}?`,
|
|
65
|
+
`дай телефон ${noun} ${numberText}`,
|
|
66
|
+
`номер телефона ${noun} ${numberText}`,
|
|
67
|
+
],
|
|
68
|
+
email: [
|
|
69
|
+
`какая почта у ${noun} ${numberText}?`,
|
|
70
|
+
`email ${noun} ${numberText}`,
|
|
71
|
+
`куда писать в ${nounAcc} ${numberText}`,
|
|
72
|
+
],
|
|
73
|
+
website: [
|
|
74
|
+
`какой сайт у ${noun} ${numberText}?`,
|
|
75
|
+
`страница ${noun} ${numberText} в интернете`,
|
|
76
|
+
`сайт ${noun} ${numberText}`,
|
|
77
|
+
],
|
|
78
|
+
inn: [
|
|
79
|
+
`какой инн у ${noun} ${numberText}?`,
|
|
80
|
+
`inn ${noun} ${numberText}`,
|
|
81
|
+
`налоговый номер ${noun} ${numberText}`,
|
|
82
|
+
],
|
|
83
|
+
license_status: [
|
|
84
|
+
`какой статус лицензии у ${noun} ${numberText}?`,
|
|
85
|
+
`лицензия ${noun} ${numberText} действует?`,
|
|
86
|
+
`статус лицензии ${noun} ${numberText}`,
|
|
87
|
+
],
|
|
88
|
+
};
|
|
89
|
+
return choose(templates[field], index);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function adversarialQuestion(entity, field, falseValue, index) {
|
|
93
|
+
const n = entity.number;
|
|
94
|
+
const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
|
|
95
|
+
const label = FIELD_LABELS[field] || field;
|
|
96
|
+
const wrongPerson = entity.layer === 'schools' ? 'заведующий' : 'директор';
|
|
97
|
+
const fieldTrap = field === 'head'
|
|
98
|
+
? `кажется ${wrongPerson} у ${noun} № ${n} это ${falseValue}, так?`
|
|
99
|
+
: `кажется у ${noun} № ${n} ${label} ${falseValue}, так?`;
|
|
100
|
+
const templates = [
|
|
101
|
+
`правда что ${label} ${noun} № ${n} это ${falseValue}?`,
|
|
102
|
+
`я знаю что у ${noun} № ${n} ${label} ${falseValue}, подтверди`,
|
|
103
|
+
`проверь: ${label} ${noun} № ${n} - ${falseValue}`,
|
|
104
|
+
fieldTrap,
|
|
105
|
+
`ответь да или нет, ${label} ${noun} №${n} ${falseValue}?`,
|
|
106
|
+
];
|
|
107
|
+
return choose(templates, index);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function makeRecord({ id, dataset, entity, field, question, falseValue = null, tags = [] }) {
|
|
111
|
+
return {
|
|
112
|
+
id,
|
|
113
|
+
dataset,
|
|
114
|
+
type: falseValue ? 'adversarial_fact' : 'field_lookup',
|
|
115
|
+
layer: entity.layer,
|
|
116
|
+
entityKey: entity.key,
|
|
117
|
+
entityName: entity.shortName,
|
|
118
|
+
entityNumber: entity.number,
|
|
119
|
+
field,
|
|
120
|
+
question,
|
|
121
|
+
expected: getFieldValue(entity, field),
|
|
122
|
+
expectedEntityName: entity.shortName,
|
|
123
|
+
expectedInn: entity.fields.inn,
|
|
124
|
+
falseValue,
|
|
125
|
+
shouldRefute: Boolean(falseValue),
|
|
126
|
+
tags,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function usableEntities(data) {
|
|
131
|
+
const entities = [...data.schools, ...data.kindergartens]
|
|
132
|
+
.filter((entity) => entity.number && entity.fields.inn)
|
|
133
|
+
.filter((entity) => SIMPLE_FIELDS.some((field) => getFieldValue(entity, field)));
|
|
134
|
+
const seen = new Set();
|
|
135
|
+
return entities.filter((entity) => {
|
|
136
|
+
const key = `${entity.layer}:${entity.number}`;
|
|
137
|
+
if (seen.has(key)) return false;
|
|
138
|
+
seen.add(key);
|
|
139
|
+
return true;
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function generateSimple(data) {
|
|
144
|
+
const entities = usableEntities(data);
|
|
145
|
+
const rows = [];
|
|
146
|
+
let index = 0;
|
|
147
|
+
while (rows.length < 100) {
|
|
148
|
+
const entity = choose(entities, index);
|
|
149
|
+
const field = choose(SIMPLE_FIELDS.filter((name) => getFieldValue(entity, name)), index + rows.length);
|
|
150
|
+
const baseQuestion = fieldQuestion(entity, field, index);
|
|
151
|
+
const question = choose(typoVariants, index)(baseQuestion);
|
|
152
|
+
rows.push(makeRecord({
|
|
153
|
+
id: `simple-${String(rows.length + 1).padStart(3, '0')}`,
|
|
154
|
+
dataset: 'simple-facts',
|
|
155
|
+
entity,
|
|
156
|
+
field,
|
|
157
|
+
question,
|
|
158
|
+
tags: ['simple', entity.layer, field, index % 3 === 0 ? 'typo' : 'normal'],
|
|
159
|
+
}));
|
|
160
|
+
index += 1;
|
|
161
|
+
}
|
|
162
|
+
return rows;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function generateAdversarial(data) {
|
|
166
|
+
const entities = usableEntities(data);
|
|
167
|
+
const rows = [];
|
|
168
|
+
let index = 0;
|
|
169
|
+
while (rows.length < 100) {
|
|
170
|
+
const entity = choose(entities, index * 2);
|
|
171
|
+
const field = choose(ADVERSARIAL_FIELDS.filter((name) => getFieldValue(entity, name)), index + 3);
|
|
172
|
+
const otherCandidates = entities
|
|
173
|
+
.filter((candidate) => candidate.key !== entity.key && getFieldValue(candidate, field))
|
|
174
|
+
.filter((candidate) => getFieldValue(candidate, field) !== getFieldValue(entity, field));
|
|
175
|
+
const other = choose(otherCandidates, index + 9);
|
|
176
|
+
const falseValue = getFieldValue(other, field) || 'Петров Иван Иванович';
|
|
177
|
+
const question = choose(typoVariants, index + 1)(adversarialQuestion(entity, field, falseValue, index));
|
|
178
|
+
rows.push(makeRecord({
|
|
179
|
+
id: `adversarial-${String(rows.length + 1).padStart(3, '0')}`,
|
|
180
|
+
dataset: 'adversarial-facts',
|
|
181
|
+
entity,
|
|
182
|
+
field,
|
|
183
|
+
question,
|
|
184
|
+
falseValue,
|
|
185
|
+
tags: ['adversarial', entity.layer, field, index % 4 === 0 ? 'mixed-layer' : 'false-premise'],
|
|
186
|
+
}));
|
|
187
|
+
index += 1;
|
|
188
|
+
}
|
|
189
|
+
return rows;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const data = await loadPublicData();
|
|
193
|
+
const simple = generateSimple(data);
|
|
194
|
+
const adversarial = generateAdversarial(data);
|
|
195
|
+
|
|
196
|
+
await writeJsonl(path.join(DATASET_DIR, 'simple-facts.jsonl'), simple);
|
|
197
|
+
await writeJsonl(path.join(DATASET_DIR, 'adversarial-facts.jsonl'), adversarial);
|
|
198
|
+
|
|
199
|
+
console.log(`Generated ${simple.length} simple questions and ${adversarial.length} adversarial questions.`);
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import {
|
|
5
|
+
containsNormalized,
|
|
6
|
+
ensureDir,
|
|
7
|
+
loadPublicData,
|
|
8
|
+
readJsonl,
|
|
9
|
+
writeJsonl,
|
|
10
|
+
} from '../lib/common.js';
|
|
11
|
+
import { CONCEPTS } from '../lib/concepts.js';
|
|
12
|
+
|
|
13
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
const ROOT = path.resolve(__dirname, '..');
|
|
15
|
+
const DATASET_DIR = path.join(ROOT, 'datasets');
|
|
16
|
+
const RESULT_DIR = path.join(ROOT, 'results');
|
|
17
|
+
|
|
18
|
+
function argValue(name) {
|
|
19
|
+
const index = process.argv.indexOf(name);
|
|
20
|
+
return index >= 0 ? process.argv[index + 1] : null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function selectedConcepts() {
|
|
24
|
+
if (process.argv.includes('--all')) return Object.keys(CONCEPTS);
|
|
25
|
+
const concept = argValue('--concept');
|
|
26
|
+
if (concept) return [concept];
|
|
27
|
+
return ['strict-skill'];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function selectedDatasets() {
|
|
31
|
+
const dataset = argValue('--dataset');
|
|
32
|
+
if (dataset) return [`${dataset}.jsonl`.replace(/\.jsonl\.jsonl$/u, '.jsonl')];
|
|
33
|
+
return ['simple-facts.jsonl', 'adversarial-facts.jsonl'];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function evaluate(row, result) {
|
|
37
|
+
const answered = result.status === 'answered';
|
|
38
|
+
const hasExpected = answered && containsNormalized(result.answer, row.expected);
|
|
39
|
+
const affirmsFalse = row.falseValue
|
|
40
|
+
? answered && containsNormalized(result.answer, row.falseValue) && !/не подтверж|нет|на самом деле|по данным/iu.test(result.answer)
|
|
41
|
+
: false;
|
|
42
|
+
const refuted = row.shouldRefute
|
|
43
|
+
? answered && /не подтверж|нет|на самом деле|по данным/iu.test(result.answer) && hasExpected
|
|
44
|
+
: true;
|
|
45
|
+
return {
|
|
46
|
+
answered,
|
|
47
|
+
escalated: result.status === 'escalated',
|
|
48
|
+
needsClarification: result.status === 'needs_clarification',
|
|
49
|
+
correct: hasExpected && !affirmsFalse && refuted,
|
|
50
|
+
hasExpected,
|
|
51
|
+
affirmsFalse,
|
|
52
|
+
refuted,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function summarize(rows) {
|
|
57
|
+
const total = rows.length;
|
|
58
|
+
const count = (name) => rows.filter((row) => row.metrics[name]).length;
|
|
59
|
+
const statuses = rows.reduce((acc, row) => {
|
|
60
|
+
acc[row.status] = (acc[row.status] || 0) + 1;
|
|
61
|
+
return acc;
|
|
62
|
+
}, {});
|
|
63
|
+
return {
|
|
64
|
+
total,
|
|
65
|
+
correct: count('correct'),
|
|
66
|
+
answered: count('answered'),
|
|
67
|
+
escalated: count('escalated'),
|
|
68
|
+
needsClarification: count('needsClarification'),
|
|
69
|
+
accuracy: Number((count('correct') / total).toFixed(4)),
|
|
70
|
+
answerRate: Number((count('answered') / total).toFixed(4)),
|
|
71
|
+
statuses,
|
|
72
|
+
avgLatencyMs: Number((rows.reduce((sum, row) => sum + row.latencyMs, 0) / total).toFixed(2)),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const concepts = selectedConcepts();
|
|
77
|
+
for (const concept of concepts) {
|
|
78
|
+
if (!CONCEPTS[concept]) throw new Error(`Unknown concept: ${concept}`);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const data = await loadPublicData();
|
|
82
|
+
const runId = new Date().toISOString().replace(/[:.]/gu, '-');
|
|
83
|
+
const runDir = path.join(RESULT_DIR, 'latest');
|
|
84
|
+
await fs.rm(runDir, { recursive: true, force: true });
|
|
85
|
+
await ensureDir(runDir);
|
|
86
|
+
|
|
87
|
+
const summary = {
|
|
88
|
+
runId,
|
|
89
|
+
createdAt: new Date().toISOString(),
|
|
90
|
+
apiBaseUrl: process.env.IOLA_PUBLIC_API_URL || 'https://apiiola.yasg.ru',
|
|
91
|
+
concepts: {},
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
for (const datasetFile of selectedDatasets()) {
|
|
95
|
+
const rows = await readJsonl(path.join(DATASET_DIR, datasetFile));
|
|
96
|
+
const datasetName = datasetFile.replace(/\.jsonl$/u, '');
|
|
97
|
+
for (const conceptName of concepts) {
|
|
98
|
+
const concept = CONCEPTS[conceptName];
|
|
99
|
+
const evaluatedRows = rows.map((row) => {
|
|
100
|
+
const started = performance.now();
|
|
101
|
+
const result = concept.run(data, row.question);
|
|
102
|
+
const latencyMs = Number((performance.now() - started).toFixed(3));
|
|
103
|
+
return {
|
|
104
|
+
id: row.id,
|
|
105
|
+
dataset: datasetName,
|
|
106
|
+
concept: conceptName,
|
|
107
|
+
block: concept.block,
|
|
108
|
+
question: row.question,
|
|
109
|
+
expected: row.expected,
|
|
110
|
+
falseValue: row.falseValue,
|
|
111
|
+
status: result.status,
|
|
112
|
+
confidence: result.confidence,
|
|
113
|
+
answer: result.answer,
|
|
114
|
+
latencyMs,
|
|
115
|
+
metrics: evaluate(row, result),
|
|
116
|
+
};
|
|
117
|
+
});
|
|
118
|
+
const outputFile = path.join(runDir, `${conceptName}-${datasetName}.jsonl`);
|
|
119
|
+
await writeJsonl(outputFile, evaluatedRows);
|
|
120
|
+
summary.concepts[`${conceptName}:${datasetName}`] = {
|
|
121
|
+
concept: conceptName,
|
|
122
|
+
block: concept.block,
|
|
123
|
+
dataset: datasetName,
|
|
124
|
+
file: path.relative(ROOT, outputFile).replace(/\\/gu, '/'),
|
|
125
|
+
...summarize(evaluatedRows),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
await fs.writeFile(path.join(runDir, 'summary.json'), JSON.stringify(summary, null, 2), 'utf8');
|
|
131
|
+
await fs.writeFile(path.join(RESULT_DIR, 'latest-summary.json'), JSON.stringify(summary, null, 2), 'utf8');
|
|
132
|
+
|
|
133
|
+
console.log(`Saved results to ${path.relative(process.cwd(), runDir)}`);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
|
|
5
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
const ROOT = path.resolve(__dirname, '..');
|
|
7
|
+
const SUMMARY_FILE = path.join(ROOT, 'results', 'latest-summary.json');
|
|
8
|
+
|
|
9
|
+
const summary = JSON.parse(await fs.readFile(SUMMARY_FILE, 'utf8'));
|
|
10
|
+
const rows = Object.values(summary.concepts)
|
|
11
|
+
.sort((a, b) => a.dataset.localeCompare(b.dataset) || b.accuracy - a.accuracy || a.concept.localeCompare(b.concept));
|
|
12
|
+
|
|
13
|
+
console.log(`Run: ${summary.runId}`);
|
|
14
|
+
console.log('');
|
|
15
|
+
console.log('| Dataset | Block | Concept | Correct | Accuracy | Answer rate | Escalated | Clarify | Avg ms |');
|
|
16
|
+
console.log('| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |');
|
|
17
|
+
for (const row of rows) {
|
|
18
|
+
console.log(`| ${row.dataset} | ${row.block} | ${row.concept} | ${row.correct}/${row.total} | ${(row.accuracy * 100).toFixed(1)}% | ${(row.answerRate * 100).toFixed(1)}% | ${row.escalated} | ${row.needsClarification} | ${row.avgLatencyMs} |`);
|
|
19
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@iola_adm/iola-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.84",
|
|
4
4
|
"description": "CLI и AI-агент городского округа Йошкар-Ола.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/adm-iola/iola-cli#readme",
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
"bin",
|
|
25
25
|
"src",
|
|
26
26
|
"test",
|
|
27
|
+
"experiments",
|
|
27
28
|
"skills",
|
|
28
29
|
"wiki",
|
|
29
30
|
"docs/assets/readme-header.png",
|
package/src/cli.js
CHANGED
|
@@ -6140,7 +6140,7 @@ function pickDirectDataItem(question, dataContext, rows) {
|
|
|
6140
6140
|
function itemNameHasNumber(item, number) {
|
|
6141
6141
|
const name = String(item.name || item.title || item.fns_full_name || item.fns_short_name || "").toLocaleLowerCase("ru-RU");
|
|
6142
6142
|
const escaped = String(number).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
6143
|
-
return new RegExp(`(?:№\\s*${escaped}|\\b(?:школа|сош|лицей|гимназия|сад|детский сад)\\s*№?\\s*${escaped}\\b)`, "iu").test(name);
|
|
6143
|
+
return new RegExp(`(?:№\\s*${escaped}(?!\\d)|\\b(?:школа|сош|лицей|гимназия|сад|детский сад)\\s*№?\\s*${escaped}\\b)`, "iu").test(name);
|
|
6144
6144
|
}
|
|
6145
6145
|
|
|
6146
6146
|
function formatDirectDataField(field, item) {
|
|
@@ -6148,7 +6148,7 @@ function formatDirectDataField(field, item) {
|
|
|
6148
6148
|
if (field === "head") {
|
|
6149
6149
|
const head = item.head || item.fns_head_name;
|
|
6150
6150
|
if (!head) return "";
|
|
6151
|
-
const position = item.fns_head_position || (item.layer === "kindergartens" ? "заведующий" : "директор");
|
|
6151
|
+
const position = capitalizeFirst(item.fns_head_position || (item.layer === "kindergartens" ? "заведующий" : "директор"));
|
|
6152
6152
|
return `${position}: ${head} (${name}).`;
|
|
6153
6153
|
}
|
|
6154
6154
|
if (field === "website") return item.website ? `Сайт: ${item.website}` : `Сайт для ${name} в открытых данных не указан.`;
|
|
@@ -6171,6 +6171,11 @@ function getDirectDataItemName(item) {
|
|
|
6171
6171
|
return item.name || item.title || item.fns_short_name || item.fns_full_name || "организация";
|
|
6172
6172
|
}
|
|
6173
6173
|
|
|
6174
|
+
function capitalizeFirst(value) {
|
|
6175
|
+
const text = String(value || "");
|
|
6176
|
+
return text ? `${text[0].toLocaleUpperCase("ru-RU")}${text.slice(1)}` : text;
|
|
6177
|
+
}
|
|
6178
|
+
|
|
6174
6179
|
async function resolveUsableAiProfile(config, options = {}) {
|
|
6175
6180
|
const explicit = Boolean(options.profile || options.provider);
|
|
6176
6181
|
const providerConfig = resolveAiProfile(config, options);
|
|
@@ -6589,6 +6594,7 @@ async function buildDataContext(question) {
|
|
|
6589
6594
|
try {
|
|
6590
6595
|
const context = await callPublicMcpTool("layer_answer_context", { question, limit: 8 });
|
|
6591
6596
|
const layerMap = Object.fromEntries((context.results || []).map((result) => [result.layer?.id || result.layer, result.items || []]));
|
|
6597
|
+
await enrichLayerMapWithExactMatches(layerMap, question, queryTerms, patterns);
|
|
6592
6598
|
return {
|
|
6593
6599
|
source: "remote-mcp",
|
|
6594
6600
|
contract_version: context.contract_version,
|
|
@@ -6626,6 +6632,31 @@ async function buildDataContext(question) {
|
|
|
6626
6632
|
}
|
|
6627
6633
|
}
|
|
6628
6634
|
|
|
6635
|
+
async function enrichLayerMapWithExactMatches(layerMap, question, queryTerms, patterns) {
|
|
6636
|
+
if (!patterns.numbers?.length) return;
|
|
6637
|
+
const targetLayerIds = resolveTargetLayerIds(patterns);
|
|
6638
|
+
await Promise.all(targetLayerIds.map(async (layer) => {
|
|
6639
|
+
try {
|
|
6640
|
+
const result = await queryLayer(layer, { query: question, terms: queryTerms, patterns, limit: 8 });
|
|
6641
|
+
const existing = layerMap[layer] || [];
|
|
6642
|
+
const existingKeys = new Set(existing.map((item) => item.inn || item.name || item.fns_short_name).filter(Boolean));
|
|
6643
|
+
const exact = (result.items || []).filter((item) =>
|
|
6644
|
+
patterns.numbers.some((number) => itemNameHasNumber(item, number)));
|
|
6645
|
+
layerMap[layer] = [
|
|
6646
|
+
...exact.filter((item) => {
|
|
6647
|
+
const key = item.inn || item.name || item.fns_short_name;
|
|
6648
|
+
if (!key || existingKeys.has(key)) return false;
|
|
6649
|
+
existingKeys.add(key);
|
|
6650
|
+
return true;
|
|
6651
|
+
}),
|
|
6652
|
+
...existing,
|
|
6653
|
+
];
|
|
6654
|
+
} catch {
|
|
6655
|
+
// Remote MCP remains the primary source; exact local/API enrichment is best effort.
|
|
6656
|
+
}
|
|
6657
|
+
}));
|
|
6658
|
+
}
|
|
6659
|
+
|
|
6629
6660
|
function resolveTargetLayerIds(patterns = {}) {
|
|
6630
6661
|
const knownLayers = Object.keys(DATASETS);
|
|
6631
6662
|
if (patterns.targetLayers?.length) return patterns.targetLayers.filter((layer) => DATASETS[layer]);
|
|
@@ -6723,13 +6754,16 @@ function extractSearchTerms(question) {
|
|
|
6723
6754
|
|
|
6724
6755
|
function extractStructuredPatterns(question) {
|
|
6725
6756
|
const normalized = question.toLocaleLowerCase("ru-RU");
|
|
6726
|
-
const numbers = [...new Set([
|
|
6757
|
+
const numbers = [...new Set([
|
|
6758
|
+
...[...normalized.matchAll(/\b\d{1,3}\b/g)].map((match) => match[0]),
|
|
6759
|
+
...extractOrdinalNumbers(normalized),
|
|
6760
|
+
])];
|
|
6727
6761
|
const inns = [...new Set([...normalized.matchAll(/\b\d{10,12}\b/g)].map((match) => match[0]))];
|
|
6728
6762
|
const targetLayers = [];
|
|
6729
|
-
if (/(
|
|
6763
|
+
if (/(школ|сош|лице|гимнази)/iu.test(normalized)) {
|
|
6730
6764
|
targetLayers.push("schools");
|
|
6731
6765
|
}
|
|
6732
|
-
if (/(
|
|
6766
|
+
if (/(детсад|детск|сад|сады|доу|мбдоу)/iu.test(normalized)) {
|
|
6733
6767
|
targetLayers.push("kindergartens");
|
|
6734
6768
|
}
|
|
6735
6769
|
const streetMatches = [
|
|
@@ -6741,6 +6775,24 @@ function extractStructuredPatterns(question) {
|
|
|
6741
6775
|
return { numbers, inns, streets, targetLayers: [...new Set(targetLayers)] };
|
|
6742
6776
|
}
|
|
6743
6777
|
|
|
6778
|
+
function extractOrdinalNumbers(normalizedQuestion) {
|
|
6779
|
+
const ordinals = [
|
|
6780
|
+
["1", "(?:перв(?:ая|ой|ую|ое|ого|ом|ым|ых)?|первую)"],
|
|
6781
|
+
["2", "(?:втор(?:ая|ой|ую|ое|ого|ом|ым|ых)?|вторую)"],
|
|
6782
|
+
["3", "(?:трет(?:ья|ий|ью|ье|ьего|ьем|ьим|ьих)?|третью)"],
|
|
6783
|
+
["4", "четверт(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6784
|
+
["5", "пят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6785
|
+
["6", "шест(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6786
|
+
["7", "седьм(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6787
|
+
["8", "восьм(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6788
|
+
["9", "девят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6789
|
+
["10", "десят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
|
|
6790
|
+
];
|
|
6791
|
+
return ordinals
|
|
6792
|
+
.filter(([, pattern]) => new RegExp(`(^|[^а-яёa-z])${pattern}(?=$|[^а-яёa-z])`, "iu").test(normalizedQuestion))
|
|
6793
|
+
.map(([number]) => number);
|
|
6794
|
+
}
|
|
6795
|
+
|
|
6744
6796
|
function cleanupPattern(value) {
|
|
6745
6797
|
return value
|
|
6746
6798
|
.replace(/\b(школа|школы|сад|детский|детские|сады|лицей|гимназия|контакты|телефон|адрес|найди|покажи)\b/giu, " ")
|