@iola_adm/iola-cli 0.1.82 → 0.1.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/experiments/small-model-concepts/README.md +34 -0
  2. package/experiments/small-model-concepts/concepts/agent-consensus/README.md +25 -0
  3. package/experiments/small-model-concepts/concepts/hybrid/README.md +23 -0
  4. package/experiments/small-model-concepts/concepts/model-architecture/README.md +42 -0
  5. package/experiments/small-model-concepts/datasets/adversarial-facts.jsonl +100 -0
  6. package/experiments/small-model-concepts/datasets/simple-facts.jsonl +100 -0
  7. package/experiments/small-model-concepts/lib/common.js +192 -0
  8. package/experiments/small-model-concepts/lib/concepts.js +210 -0
  9. package/experiments/small-model-concepts/results/latest/conditional-memory-adversarial-facts.jsonl +100 -0
  10. package/experiments/small-model-concepts/results/latest/conditional-memory-simple-facts.jsonl +100 -0
  11. package/experiments/small-model-concepts/results/latest/council-adversarial-facts.jsonl +100 -0
  12. package/experiments/small-model-concepts/results/latest/council-simple-facts.jsonl +100 -0
  13. package/experiments/small-model-concepts/results/latest/early-exit-adversarial-facts.jsonl +100 -0
  14. package/experiments/small-model-concepts/results/latest/early-exit-simple-facts.jsonl +100 -0
  15. package/experiments/small-model-concepts/results/latest/escalation-ladder-adversarial-facts.jsonl +100 -0
  16. package/experiments/small-model-concepts/results/latest/escalation-ladder-simple-facts.jsonl +100 -0
  17. package/experiments/small-model-concepts/results/latest/memory-verified-adversarial-facts.jsonl +100 -0
  18. package/experiments/small-model-concepts/results/latest/memory-verified-simple-facts.jsonl +100 -0
  19. package/experiments/small-model-concepts/results/latest/skill-router-adversarial-facts.jsonl +100 -0
  20. package/experiments/small-model-concepts/results/latest/skill-router-simple-facts.jsonl +100 -0
  21. package/experiments/small-model-concepts/results/latest/sparse-escalation-adversarial-facts.jsonl +100 -0
  22. package/experiments/small-model-concepts/results/latest/sparse-escalation-simple-facts.jsonl +100 -0
  23. package/experiments/small-model-concepts/results/latest/strict-skill-adversarial-facts.jsonl +100 -0
  24. package/experiments/small-model-concepts/results/latest/strict-skill-simple-facts.jsonl +100 -0
  25. package/experiments/small-model-concepts/results/latest/summary.json +313 -0
  26. package/experiments/small-model-concepts/results/latest/verify-adversarial-facts.jsonl +100 -0
  27. package/experiments/small-model-concepts/results/latest/verify-simple-facts.jsonl +100 -0
  28. package/experiments/small-model-concepts/results/latest-summary.json +313 -0
  29. package/experiments/small-model-concepts/scripts/generate-datasets.js +199 -0
  30. package/experiments/small-model-concepts/scripts/run-evaluation.js +133 -0
  31. package/experiments/small-model-concepts/scripts/summarize-results.js +19 -0
  32. package/package.json +2 -1
  33. package/src/cli.js +57 -5
@@ -0,0 +1,199 @@
1
+ import path from 'node:path';
2
+ import { fileURLToPath } from 'node:url';
3
+ import {
4
+ FIELD_LABELS,
5
+ LAYER_LABELS,
6
+ getFieldValue,
7
+ loadPublicData,
8
+ writeJsonl,
9
+ } from '../lib/common.js';
10
+
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+ const ROOT = path.resolve(__dirname, '..');
13
+ const DATASET_DIR = path.join(ROOT, 'datasets');
14
+
15
+ const SIMPLE_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn', 'license_status'];
16
+ const ADVERSARIAL_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn'];
17
+
18
+ const typoVariants = [
19
+ (text) => text,
20
+ (text) => text.replace('школу', 'вшколу').replace('сад', 'детсад'),
21
+ (text) => text.replace('директор', 'директр').replace('заведующий', 'заведущая'),
22
+ (text) => text.replace('какой', 'какои').replace('адрес', 'адресс'),
23
+ (text) => text.replace('№ ', '№').replace('номер ', ''),
24
+ ];
25
+
26
+ const ordinal = [
27
+ null,
28
+ 'первой',
29
+ 'второй',
30
+ 'третьей',
31
+ 'четвертой',
32
+ 'пятой',
33
+ 'шестой',
34
+ 'седьмой',
35
+ 'восьмой',
36
+ 'девятой',
37
+ 'десятой',
38
+ ];
39
+
40
+ function choose(items, index) {
41
+ return items[index % items.length];
42
+ }
43
+
44
+ function fieldQuestion(entity, field, index) {
45
+ const n = entity.number;
46
+ const layer = LAYER_LABELS[entity.layer];
47
+ const numberText = ordinal[n] && index % 5 === 0 ? ordinal[n] : `№ ${n}`;
48
+ const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
49
+ const nounAcc = entity.layer === 'schools' ? 'школу' : 'детский сад';
50
+ const person = layer.person;
51
+ const templates = {
52
+ head: [
53
+ `кто ${person} ${noun} ${numberText}?`,
54
+ `подскажи руководителя ${noun} ${numberText}`,
55
+ `кто главный в ${nounAcc} ${numberText}`,
56
+ `${person} ${noun} ${numberText} кто сейчас`,
57
+ ],
58
+ address: [
59
+ `какой адрес у ${noun} ${numberText}?`,
60
+ `где находится ${nounAcc} ${numberText}`,
61
+ `куда ехать в ${nounAcc} ${numberText}`,
62
+ ],
63
+ phone: [
64
+ `как позвонить в ${nounAcc} ${numberText}?`,
65
+ `дай телефон ${noun} ${numberText}`,
66
+ `номер телефона ${noun} ${numberText}`,
67
+ ],
68
+ email: [
69
+ `какая почта у ${noun} ${numberText}?`,
70
+ `email ${noun} ${numberText}`,
71
+ `куда писать в ${nounAcc} ${numberText}`,
72
+ ],
73
+ website: [
74
+ `какой сайт у ${noun} ${numberText}?`,
75
+ `страница ${noun} ${numberText} в интернете`,
76
+ `сайт ${noun} ${numberText}`,
77
+ ],
78
+ inn: [
79
+ `какой инн у ${noun} ${numberText}?`,
80
+ `inn ${noun} ${numberText}`,
81
+ `налоговый номер ${noun} ${numberText}`,
82
+ ],
83
+ license_status: [
84
+ `какой статус лицензии у ${noun} ${numberText}?`,
85
+ `лицензия ${noun} ${numberText} действует?`,
86
+ `статус лицензии ${noun} ${numberText}`,
87
+ ],
88
+ };
89
+ return choose(templates[field], index);
90
+ }
91
+
92
+ function adversarialQuestion(entity, field, falseValue, index) {
93
+ const n = entity.number;
94
+ const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
95
+ const label = FIELD_LABELS[field] || field;
96
+ const wrongPerson = entity.layer === 'schools' ? 'заведующий' : 'директор';
97
+ const fieldTrap = field === 'head'
98
+ ? `кажется ${wrongPerson} у ${noun} № ${n} это ${falseValue}, так?`
99
+ : `кажется у ${noun} № ${n} ${label} ${falseValue}, так?`;
100
+ const templates = [
101
+ `правда что ${label} ${noun} № ${n} это ${falseValue}?`,
102
+ `я знаю что у ${noun} № ${n} ${label} ${falseValue}, подтверди`,
103
+ `проверь: ${label} ${noun} № ${n} - ${falseValue}`,
104
+ fieldTrap,
105
+ `ответь да или нет, ${label} ${noun} №${n} ${falseValue}?`,
106
+ ];
107
+ return choose(templates, index);
108
+ }
109
+
110
+ function makeRecord({ id, dataset, entity, field, question, falseValue = null, tags = [] }) {
111
+ return {
112
+ id,
113
+ dataset,
114
+ type: falseValue ? 'adversarial_fact' : 'field_lookup',
115
+ layer: entity.layer,
116
+ entityKey: entity.key,
117
+ entityName: entity.shortName,
118
+ entityNumber: entity.number,
119
+ field,
120
+ question,
121
+ expected: getFieldValue(entity, field),
122
+ expectedEntityName: entity.shortName,
123
+ expectedInn: entity.fields.inn,
124
+ falseValue,
125
+ shouldRefute: Boolean(falseValue),
126
+ tags,
127
+ };
128
+ }
129
+
130
+ function usableEntities(data) {
131
+ const entities = [...data.schools, ...data.kindergartens]
132
+ .filter((entity) => entity.number && entity.fields.inn)
133
+ .filter((entity) => SIMPLE_FIELDS.some((field) => getFieldValue(entity, field)));
134
+ const seen = new Set();
135
+ return entities.filter((entity) => {
136
+ const key = `${entity.layer}:${entity.number}`;
137
+ if (seen.has(key)) return false;
138
+ seen.add(key);
139
+ return true;
140
+ });
141
+ }
142
+
143
+ function generateSimple(data) {
144
+ const entities = usableEntities(data);
145
+ const rows = [];
146
+ let index = 0;
147
+ while (rows.length < 100) {
148
+ const entity = choose(entities, index);
149
+ const field = choose(SIMPLE_FIELDS.filter((name) => getFieldValue(entity, name)), index + rows.length);
150
+ const baseQuestion = fieldQuestion(entity, field, index);
151
+ const question = choose(typoVariants, index)(baseQuestion);
152
+ rows.push(makeRecord({
153
+ id: `simple-${String(rows.length + 1).padStart(3, '0')}`,
154
+ dataset: 'simple-facts',
155
+ entity,
156
+ field,
157
+ question,
158
+ tags: ['simple', entity.layer, field, index % 3 === 0 ? 'typo' : 'normal'],
159
+ }));
160
+ index += 1;
161
+ }
162
+ return rows;
163
+ }
164
+
165
+ function generateAdversarial(data) {
166
+ const entities = usableEntities(data);
167
+ const rows = [];
168
+ let index = 0;
169
+ while (rows.length < 100) {
170
+ const entity = choose(entities, index * 2);
171
+ const field = choose(ADVERSARIAL_FIELDS.filter((name) => getFieldValue(entity, name)), index + 3);
172
+ const otherCandidates = entities
173
+ .filter((candidate) => candidate.key !== entity.key && getFieldValue(candidate, field))
174
+ .filter((candidate) => getFieldValue(candidate, field) !== getFieldValue(entity, field));
175
+ const other = choose(otherCandidates, index + 9);
176
+ const falseValue = getFieldValue(other, field) || 'Петров Иван Иванович';
177
+ const question = choose(typoVariants, index + 1)(adversarialQuestion(entity, field, falseValue, index));
178
+ rows.push(makeRecord({
179
+ id: `adversarial-${String(rows.length + 1).padStart(3, '0')}`,
180
+ dataset: 'adversarial-facts',
181
+ entity,
182
+ field,
183
+ question,
184
+ falseValue,
185
+ tags: ['adversarial', entity.layer, field, index % 4 === 0 ? 'mixed-layer' : 'false-premise'],
186
+ }));
187
+ index += 1;
188
+ }
189
+ return rows;
190
+ }
191
+
192
+ const data = await loadPublicData();
193
+ const simple = generateSimple(data);
194
+ const adversarial = generateAdversarial(data);
195
+
196
+ await writeJsonl(path.join(DATASET_DIR, 'simple-facts.jsonl'), simple);
197
+ await writeJsonl(path.join(DATASET_DIR, 'adversarial-facts.jsonl'), adversarial);
198
+
199
+ console.log(`Generated ${simple.length} simple questions and ${adversarial.length} adversarial questions.`);
@@ -0,0 +1,133 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import {
5
+ containsNormalized,
6
+ ensureDir,
7
+ loadPublicData,
8
+ readJsonl,
9
+ writeJsonl,
10
+ } from '../lib/common.js';
11
+ import { CONCEPTS } from '../lib/concepts.js';
12
+
13
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
14
+ const ROOT = path.resolve(__dirname, '..');
15
+ const DATASET_DIR = path.join(ROOT, 'datasets');
16
+ const RESULT_DIR = path.join(ROOT, 'results');
17
+
18
+ function argValue(name) {
19
+ const index = process.argv.indexOf(name);
20
+ return index >= 0 ? process.argv[index + 1] : null;
21
+ }
22
+
23
+ function selectedConcepts() {
24
+ if (process.argv.includes('--all')) return Object.keys(CONCEPTS);
25
+ const concept = argValue('--concept');
26
+ if (concept) return [concept];
27
+ return ['strict-skill'];
28
+ }
29
+
30
+ function selectedDatasets() {
31
+ const dataset = argValue('--dataset');
32
+ if (dataset) return [`${dataset}.jsonl`.replace(/\.jsonl\.jsonl$/u, '.jsonl')];
33
+ return ['simple-facts.jsonl', 'adversarial-facts.jsonl'];
34
+ }
35
+
36
+ function evaluate(row, result) {
37
+ const answered = result.status === 'answered';
38
+ const hasExpected = answered && containsNormalized(result.answer, row.expected);
39
+ const affirmsFalse = row.falseValue
40
+ ? answered && containsNormalized(result.answer, row.falseValue) && !/не подтверж|нет|на самом деле|по данным/iu.test(result.answer)
41
+ : false;
42
+ const refuted = row.shouldRefute
43
+ ? answered && /не подтверж|нет|на самом деле|по данным/iu.test(result.answer) && hasExpected
44
+ : true;
45
+ return {
46
+ answered,
47
+ escalated: result.status === 'escalated',
48
+ needsClarification: result.status === 'needs_clarification',
49
+ correct: hasExpected && !affirmsFalse && refuted,
50
+ hasExpected,
51
+ affirmsFalse,
52
+ refuted,
53
+ };
54
+ }
55
+
56
+ function summarize(rows) {
57
+ const total = rows.length;
58
+ const count = (name) => rows.filter((row) => row.metrics[name]).length;
59
+ const statuses = rows.reduce((acc, row) => {
60
+ acc[row.status] = (acc[row.status] || 0) + 1;
61
+ return acc;
62
+ }, {});
63
+ return {
64
+ total,
65
+ correct: count('correct'),
66
+ answered: count('answered'),
67
+ escalated: count('escalated'),
68
+ needsClarification: count('needsClarification'),
69
+ accuracy: Number((count('correct') / total).toFixed(4)),
70
+ answerRate: Number((count('answered') / total).toFixed(4)),
71
+ statuses,
72
+ avgLatencyMs: Number((rows.reduce((sum, row) => sum + row.latencyMs, 0) / total).toFixed(2)),
73
+ };
74
+ }
75
+
76
+ const concepts = selectedConcepts();
77
+ for (const concept of concepts) {
78
+ if (!CONCEPTS[concept]) throw new Error(`Unknown concept: ${concept}`);
79
+ }
80
+
81
+ const data = await loadPublicData();
82
+ const runId = new Date().toISOString().replace(/[:.]/gu, '-');
83
+ const runDir = path.join(RESULT_DIR, 'latest');
84
+ await fs.rm(runDir, { recursive: true, force: true });
85
+ await ensureDir(runDir);
86
+
87
+ const summary = {
88
+ runId,
89
+ createdAt: new Date().toISOString(),
90
+ apiBaseUrl: process.env.IOLA_PUBLIC_API_URL || 'https://apiiola.yasg.ru',
91
+ concepts: {},
92
+ };
93
+
94
+ for (const datasetFile of selectedDatasets()) {
95
+ const rows = await readJsonl(path.join(DATASET_DIR, datasetFile));
96
+ const datasetName = datasetFile.replace(/\.jsonl$/u, '');
97
+ for (const conceptName of concepts) {
98
+ const concept = CONCEPTS[conceptName];
99
+ const evaluatedRows = rows.map((row) => {
100
+ const started = performance.now();
101
+ const result = concept.run(data, row.question);
102
+ const latencyMs = Number((performance.now() - started).toFixed(3));
103
+ return {
104
+ id: row.id,
105
+ dataset: datasetName,
106
+ concept: conceptName,
107
+ block: concept.block,
108
+ question: row.question,
109
+ expected: row.expected,
110
+ falseValue: row.falseValue,
111
+ status: result.status,
112
+ confidence: result.confidence,
113
+ answer: result.answer,
114
+ latencyMs,
115
+ metrics: evaluate(row, result),
116
+ };
117
+ });
118
+ const outputFile = path.join(runDir, `${conceptName}-${datasetName}.jsonl`);
119
+ await writeJsonl(outputFile, evaluatedRows);
120
+ summary.concepts[`${conceptName}:${datasetName}`] = {
121
+ concept: conceptName,
122
+ block: concept.block,
123
+ dataset: datasetName,
124
+ file: path.relative(ROOT, outputFile).replace(/\\/gu, '/'),
125
+ ...summarize(evaluatedRows),
126
+ };
127
+ }
128
+ }
129
+
130
+ await fs.writeFile(path.join(runDir, 'summary.json'), JSON.stringify(summary, null, 2), 'utf8');
131
+ await fs.writeFile(path.join(RESULT_DIR, 'latest-summary.json'), JSON.stringify(summary, null, 2), 'utf8');
132
+
133
+ console.log(`Saved results to ${path.relative(process.cwd(), runDir)}`);
@@ -0,0 +1,19 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+
5
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
6
+ const ROOT = path.resolve(__dirname, '..');
7
+ const SUMMARY_FILE = path.join(ROOT, 'results', 'latest-summary.json');
8
+
9
+ const summary = JSON.parse(await fs.readFile(SUMMARY_FILE, 'utf8'));
10
+ const rows = Object.values(summary.concepts)
11
+ .sort((a, b) => a.dataset.localeCompare(b.dataset) || b.accuracy - a.accuracy || a.concept.localeCompare(b.concept));
12
+
13
+ console.log(`Run: ${summary.runId}`);
14
+ console.log('');
15
+ console.log('| Dataset | Block | Concept | Correct | Accuracy | Answer rate | Escalated | Clarify | Avg ms |');
16
+ console.log('| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |');
17
+ for (const row of rows) {
18
+ console.log(`| ${row.dataset} | ${row.block} | ${row.concept} | ${row.correct}/${row.total} | ${(row.accuracy * 100).toFixed(1)}% | ${(row.answerRate * 100).toFixed(1)}% | ${row.escalated} | ${row.needsClarification} | ${row.avgLatencyMs} |`);
19
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iola_adm/iola-cli",
3
- "version": "0.1.82",
3
+ "version": "0.1.84",
4
4
  "description": "CLI и AI-агент городского округа Йошкар-Ола.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/adm-iola/iola-cli#readme",
@@ -24,6 +24,7 @@
24
24
  "bin",
25
25
  "src",
26
26
  "test",
27
+ "experiments",
27
28
  "skills",
28
29
  "wiki",
29
30
  "docs/assets/readme-header.png",
package/src/cli.js CHANGED
@@ -6140,7 +6140,7 @@ function pickDirectDataItem(question, dataContext, rows) {
6140
6140
  function itemNameHasNumber(item, number) {
6141
6141
  const name = String(item.name || item.title || item.fns_full_name || item.fns_short_name || "").toLocaleLowerCase("ru-RU");
6142
6142
  const escaped = String(number).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
6143
- return new RegExp(`(?:№\\s*${escaped}|\\b(?:школа|сош|лицей|гимназия|сад|детский сад)\\s*№?\\s*${escaped}\\b)`, "iu").test(name);
6143
+ return new RegExp(`(?:№\\s*${escaped}(?!\\d)|\\b(?:школа|сош|лицей|гимназия|сад|детский сад)\\s*№?\\s*${escaped}\\b)`, "iu").test(name);
6144
6144
  }
6145
6145
 
6146
6146
  function formatDirectDataField(field, item) {
@@ -6148,7 +6148,7 @@ function formatDirectDataField(field, item) {
6148
6148
  if (field === "head") {
6149
6149
  const head = item.head || item.fns_head_name;
6150
6150
  if (!head) return "";
6151
- const position = item.fns_head_position || (item.layer === "kindergartens" ? "заведующий" : "директор");
6151
+ const position = capitalizeFirst(item.fns_head_position || (item.layer === "kindergartens" ? "заведующий" : "директор"));
6152
6152
  return `${position}: ${head} (${name}).`;
6153
6153
  }
6154
6154
  if (field === "website") return item.website ? `Сайт: ${item.website}` : `Сайт для ${name} в открытых данных не указан.`;
@@ -6171,6 +6171,11 @@ function getDirectDataItemName(item) {
6171
6171
  return item.name || item.title || item.fns_short_name || item.fns_full_name || "организация";
6172
6172
  }
6173
6173
 
6174
+ function capitalizeFirst(value) {
6175
+ const text = String(value || "");
6176
+ return text ? `${text[0].toLocaleUpperCase("ru-RU")}${text.slice(1)}` : text;
6177
+ }
6178
+
6174
6179
  async function resolveUsableAiProfile(config, options = {}) {
6175
6180
  const explicit = Boolean(options.profile || options.provider);
6176
6181
  const providerConfig = resolveAiProfile(config, options);
@@ -6589,6 +6594,7 @@ async function buildDataContext(question) {
6589
6594
  try {
6590
6595
  const context = await callPublicMcpTool("layer_answer_context", { question, limit: 8 });
6591
6596
  const layerMap = Object.fromEntries((context.results || []).map((result) => [result.layer?.id || result.layer, result.items || []]));
6597
+ await enrichLayerMapWithExactMatches(layerMap, question, queryTerms, patterns);
6592
6598
  return {
6593
6599
  source: "remote-mcp",
6594
6600
  contract_version: context.contract_version,
@@ -6626,6 +6632,31 @@ async function buildDataContext(question) {
6626
6632
  }
6627
6633
  }
6628
6634
 
6635
+ async function enrichLayerMapWithExactMatches(layerMap, question, queryTerms, patterns) {
6636
+ if (!patterns.numbers?.length) return;
6637
+ const targetLayerIds = resolveTargetLayerIds(patterns);
6638
+ await Promise.all(targetLayerIds.map(async (layer) => {
6639
+ try {
6640
+ const result = await queryLayer(layer, { query: question, terms: queryTerms, patterns, limit: 8 });
6641
+ const existing = layerMap[layer] || [];
6642
+ const existingKeys = new Set(existing.map((item) => item.inn || item.name || item.fns_short_name).filter(Boolean));
6643
+ const exact = (result.items || []).filter((item) =>
6644
+ patterns.numbers.some((number) => itemNameHasNumber(item, number)));
6645
+ layerMap[layer] = [
6646
+ ...exact.filter((item) => {
6647
+ const key = item.inn || item.name || item.fns_short_name;
6648
+ if (!key || existingKeys.has(key)) return false;
6649
+ existingKeys.add(key);
6650
+ return true;
6651
+ }),
6652
+ ...existing,
6653
+ ];
6654
+ } catch {
6655
+ // Remote MCP remains the primary source; exact local/API enrichment is best effort.
6656
+ }
6657
+ }));
6658
+ }
6659
+
6629
6660
  function resolveTargetLayerIds(patterns = {}) {
6630
6661
  const knownLayers = Object.keys(DATASETS);
6631
6662
  if (patterns.targetLayers?.length) return patterns.targetLayers.filter((layer) => DATASETS[layer]);
@@ -6723,13 +6754,16 @@ function extractSearchTerms(question) {
6723
6754
 
6724
6755
  function extractStructuredPatterns(question) {
6725
6756
  const normalized = question.toLocaleLowerCase("ru-RU");
6726
- const numbers = [...new Set([...normalized.matchAll(/\b\d{1,3}\b/g)].map((match) => match[0]))];
6757
+ const numbers = [...new Set([
6758
+ ...[...normalized.matchAll(/\b\d{1,3}\b/g)].map((match) => match[0]),
6759
+ ...extractOrdinalNumbers(normalized),
6760
+ ])];
6727
6761
  const inns = [...new Set([...normalized.matchAll(/\b\d{10,12}\b/g)].map((match) => match[0]))];
6728
6762
  const targetLayers = [];
6729
- if (/(^|[^а-яёa-z])(школа|школы|лицей|лицея|гимназия|гимназии)(?=$|[^а-яёa-z])/iu.test(normalized)) {
6763
+ if (/(школ|сош|лице|гимнази)/iu.test(normalized)) {
6730
6764
  targetLayers.push("schools");
6731
6765
  }
6732
- if (/(^|[^а-яёa-z])(сад|сады|детсад|детский|детские|доу|мбдоу)(?=$|[^а-яёa-z])/iu.test(normalized)) {
6766
+ if (/(детсад|детск|сад|сады|доу|мбдоу)/iu.test(normalized)) {
6733
6767
  targetLayers.push("kindergartens");
6734
6768
  }
6735
6769
  const streetMatches = [
@@ -6741,6 +6775,24 @@ function extractStructuredPatterns(question) {
6741
6775
  return { numbers, inns, streets, targetLayers: [...new Set(targetLayers)] };
6742
6776
  }
6743
6777
 
6778
+ function extractOrdinalNumbers(normalizedQuestion) {
6779
+ const ordinals = [
6780
+ ["1", "(?:перв(?:ая|ой|ую|ое|ого|ом|ым|ых)?|первую)"],
6781
+ ["2", "(?:втор(?:ая|ой|ую|ое|ого|ом|ым|ых)?|вторую)"],
6782
+ ["3", "(?:трет(?:ья|ий|ью|ье|ьего|ьем|ьим|ьих)?|третью)"],
6783
+ ["4", "четверт(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6784
+ ["5", "пят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6785
+ ["6", "шест(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6786
+ ["7", "седьм(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6787
+ ["8", "восьм(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6788
+ ["9", "девят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6789
+ ["10", "десят(?:ая|ой|ую|ое|ого|ом|ым|ых)?"],
6790
+ ];
6791
+ return ordinals
6792
+ .filter(([, pattern]) => new RegExp(`(^|[^а-яёa-z])${pattern}(?=$|[^а-яёa-z])`, "iu").test(normalizedQuestion))
6793
+ .map(([number]) => number);
6794
+ }
6795
+
6744
6796
  function cleanupPattern(value) {
6745
6797
  return value
6746
6798
  .replace(/\b(школа|школы|сад|детский|детские|сады|лицей|гимназия|контакты|телефон|адрес|найди|покажи)\b/giu, " ")