@iola_adm/iola-cli 0.1.84 → 0.1.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +13 -5
  2. package/package.json +2 -3
  3. package/src/cli.js +448 -48
  4. package/src/iola_hf_runner.py +136 -0
  5. package/experiments/small-model-concepts/README.md +0 -34
  6. package/experiments/small-model-concepts/concepts/agent-consensus/README.md +0 -25
  7. package/experiments/small-model-concepts/concepts/hybrid/README.md +0 -23
  8. package/experiments/small-model-concepts/concepts/model-architecture/README.md +0 -42
  9. package/experiments/small-model-concepts/datasets/adversarial-facts.jsonl +0 -100
  10. package/experiments/small-model-concepts/datasets/simple-facts.jsonl +0 -100
  11. package/experiments/small-model-concepts/lib/common.js +0 -192
  12. package/experiments/small-model-concepts/lib/concepts.js +0 -210
  13. package/experiments/small-model-concepts/results/latest/conditional-memory-adversarial-facts.jsonl +0 -100
  14. package/experiments/small-model-concepts/results/latest/conditional-memory-simple-facts.jsonl +0 -100
  15. package/experiments/small-model-concepts/results/latest/council-adversarial-facts.jsonl +0 -100
  16. package/experiments/small-model-concepts/results/latest/council-simple-facts.jsonl +0 -100
  17. package/experiments/small-model-concepts/results/latest/early-exit-adversarial-facts.jsonl +0 -100
  18. package/experiments/small-model-concepts/results/latest/early-exit-simple-facts.jsonl +0 -100
  19. package/experiments/small-model-concepts/results/latest/escalation-ladder-adversarial-facts.jsonl +0 -100
  20. package/experiments/small-model-concepts/results/latest/escalation-ladder-simple-facts.jsonl +0 -100
  21. package/experiments/small-model-concepts/results/latest/memory-verified-adversarial-facts.jsonl +0 -100
  22. package/experiments/small-model-concepts/results/latest/memory-verified-simple-facts.jsonl +0 -100
  23. package/experiments/small-model-concepts/results/latest/skill-router-adversarial-facts.jsonl +0 -100
  24. package/experiments/small-model-concepts/results/latest/skill-router-simple-facts.jsonl +0 -100
  25. package/experiments/small-model-concepts/results/latest/sparse-escalation-adversarial-facts.jsonl +0 -100
  26. package/experiments/small-model-concepts/results/latest/sparse-escalation-simple-facts.jsonl +0 -100
  27. package/experiments/small-model-concepts/results/latest/strict-skill-adversarial-facts.jsonl +0 -100
  28. package/experiments/small-model-concepts/results/latest/strict-skill-simple-facts.jsonl +0 -100
  29. package/experiments/small-model-concepts/results/latest/summary.json +0 -313
  30. package/experiments/small-model-concepts/results/latest/verify-adversarial-facts.jsonl +0 -100
  31. package/experiments/small-model-concepts/results/latest/verify-simple-facts.jsonl +0 -100
  32. package/experiments/small-model-concepts/results/latest-summary.json +0 -313
  33. package/experiments/small-model-concepts/scripts/generate-datasets.js +0 -199
  34. package/experiments/small-model-concepts/scripts/run-evaluation.js +0 -133
  35. package/experiments/small-model-concepts/scripts/summarize-results.js +0 -19
@@ -1,199 +0,0 @@
1
- import path from 'node:path';
2
- import { fileURLToPath } from 'node:url';
3
- import {
4
- FIELD_LABELS,
5
- LAYER_LABELS,
6
- getFieldValue,
7
- loadPublicData,
8
- writeJsonl,
9
- } from '../lib/common.js';
10
-
11
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
- const ROOT = path.resolve(__dirname, '..');
13
- const DATASET_DIR = path.join(ROOT, 'datasets');
14
-
15
- const SIMPLE_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn', 'license_status'];
16
- const ADVERSARIAL_FIELDS = ['head', 'address', 'phone', 'email', 'website', 'inn'];
17
-
18
- const typoVariants = [
19
- (text) => text,
20
- (text) => text.replace('школу', 'вшколу').replace('сад', 'детсад'),
21
- (text) => text.replace('директор', 'директр').replace('заведующий', 'заведущая'),
22
- (text) => text.replace('какой', 'какои').replace('адрес', 'адресс'),
23
- (text) => text.replace('№ ', '№').replace('номер ', ''),
24
- ];
25
-
26
- const ordinal = [
27
- null,
28
- 'первой',
29
- 'второй',
30
- 'третьей',
31
- 'четвертой',
32
- 'пятой',
33
- 'шестой',
34
- 'седьмой',
35
- 'восьмой',
36
- 'девятой',
37
- 'десятой',
38
- ];
39
-
40
- function choose(items, index) {
41
- return items[index % items.length];
42
- }
43
-
44
- function fieldQuestion(entity, field, index) {
45
- const n = entity.number;
46
- const layer = LAYER_LABELS[entity.layer];
47
- const numberText = ordinal[n] && index % 5 === 0 ? ordinal[n] : `№ ${n}`;
48
- const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
49
- const nounAcc = entity.layer === 'schools' ? 'школу' : 'детский сад';
50
- const person = layer.person;
51
- const templates = {
52
- head: [
53
- `кто ${person} ${noun} ${numberText}?`,
54
- `подскажи руководителя ${noun} ${numberText}`,
55
- `кто главный в ${nounAcc} ${numberText}`,
56
- `${person} ${noun} ${numberText} кто сейчас`,
57
- ],
58
- address: [
59
- `какой адрес у ${noun} ${numberText}?`,
60
- `где находится ${nounAcc} ${numberText}`,
61
- `куда ехать в ${nounAcc} ${numberText}`,
62
- ],
63
- phone: [
64
- `как позвонить в ${nounAcc} ${numberText}?`,
65
- `дай телефон ${noun} ${numberText}`,
66
- `номер телефона ${noun} ${numberText}`,
67
- ],
68
- email: [
69
- `какая почта у ${noun} ${numberText}?`,
70
- `email ${noun} ${numberText}`,
71
- `куда писать в ${nounAcc} ${numberText}`,
72
- ],
73
- website: [
74
- `какой сайт у ${noun} ${numberText}?`,
75
- `страница ${noun} ${numberText} в интернете`,
76
- `сайт ${noun} ${numberText}`,
77
- ],
78
- inn: [
79
- `какой инн у ${noun} ${numberText}?`,
80
- `inn ${noun} ${numberText}`,
81
- `налоговый номер ${noun} ${numberText}`,
82
- ],
83
- license_status: [
84
- `какой статус лицензии у ${noun} ${numberText}?`,
85
- `лицензия ${noun} ${numberText} действует?`,
86
- `статус лицензии ${noun} ${numberText}`,
87
- ],
88
- };
89
- return choose(templates[field], index);
90
- }
91
-
92
- function adversarialQuestion(entity, field, falseValue, index) {
93
- const n = entity.number;
94
- const noun = entity.layer === 'schools' ? 'школы' : 'детского сада';
95
- const label = FIELD_LABELS[field] || field;
96
- const wrongPerson = entity.layer === 'schools' ? 'заведующий' : 'директор';
97
- const fieldTrap = field === 'head'
98
- ? `кажется ${wrongPerson} у ${noun} № ${n} это ${falseValue}, так?`
99
- : `кажется у ${noun} № ${n} ${label} ${falseValue}, так?`;
100
- const templates = [
101
- `правда что ${label} ${noun} № ${n} это ${falseValue}?`,
102
- `я знаю что у ${noun} № ${n} ${label} ${falseValue}, подтверди`,
103
- `проверь: ${label} ${noun} № ${n} - ${falseValue}`,
104
- fieldTrap,
105
- `ответь да или нет, ${label} ${noun} №${n} ${falseValue}?`,
106
- ];
107
- return choose(templates, index);
108
- }
109
-
110
- function makeRecord({ id, dataset, entity, field, question, falseValue = null, tags = [] }) {
111
- return {
112
- id,
113
- dataset,
114
- type: falseValue ? 'adversarial_fact' : 'field_lookup',
115
- layer: entity.layer,
116
- entityKey: entity.key,
117
- entityName: entity.shortName,
118
- entityNumber: entity.number,
119
- field,
120
- question,
121
- expected: getFieldValue(entity, field),
122
- expectedEntityName: entity.shortName,
123
- expectedInn: entity.fields.inn,
124
- falseValue,
125
- shouldRefute: Boolean(falseValue),
126
- tags,
127
- };
128
- }
129
-
130
- function usableEntities(data) {
131
- const entities = [...data.schools, ...data.kindergartens]
132
- .filter((entity) => entity.number && entity.fields.inn)
133
- .filter((entity) => SIMPLE_FIELDS.some((field) => getFieldValue(entity, field)));
134
- const seen = new Set();
135
- return entities.filter((entity) => {
136
- const key = `${entity.layer}:${entity.number}`;
137
- if (seen.has(key)) return false;
138
- seen.add(key);
139
- return true;
140
- });
141
- }
142
-
143
- function generateSimple(data) {
144
- const entities = usableEntities(data);
145
- const rows = [];
146
- let index = 0;
147
- while (rows.length < 100) {
148
- const entity = choose(entities, index);
149
- const field = choose(SIMPLE_FIELDS.filter((name) => getFieldValue(entity, name)), index + rows.length);
150
- const baseQuestion = fieldQuestion(entity, field, index);
151
- const question = choose(typoVariants, index)(baseQuestion);
152
- rows.push(makeRecord({
153
- id: `simple-${String(rows.length + 1).padStart(3, '0')}`,
154
- dataset: 'simple-facts',
155
- entity,
156
- field,
157
- question,
158
- tags: ['simple', entity.layer, field, index % 3 === 0 ? 'typo' : 'normal'],
159
- }));
160
- index += 1;
161
- }
162
- return rows;
163
- }
164
-
165
- function generateAdversarial(data) {
166
- const entities = usableEntities(data);
167
- const rows = [];
168
- let index = 0;
169
- while (rows.length < 100) {
170
- const entity = choose(entities, index * 2);
171
- const field = choose(ADVERSARIAL_FIELDS.filter((name) => getFieldValue(entity, name)), index + 3);
172
- const otherCandidates = entities
173
- .filter((candidate) => candidate.key !== entity.key && getFieldValue(candidate, field))
174
- .filter((candidate) => getFieldValue(candidate, field) !== getFieldValue(entity, field));
175
- const other = choose(otherCandidates, index + 9);
176
- const falseValue = getFieldValue(other, field) || 'Петров Иван Иванович';
177
- const question = choose(typoVariants, index + 1)(adversarialQuestion(entity, field, falseValue, index));
178
- rows.push(makeRecord({
179
- id: `adversarial-${String(rows.length + 1).padStart(3, '0')}`,
180
- dataset: 'adversarial-facts',
181
- entity,
182
- field,
183
- question,
184
- falseValue,
185
- tags: ['adversarial', entity.layer, field, index % 4 === 0 ? 'mixed-layer' : 'false-premise'],
186
- }));
187
- index += 1;
188
- }
189
- return rows;
190
- }
191
-
192
- const data = await loadPublicData();
193
- const simple = generateSimple(data);
194
- const adversarial = generateAdversarial(data);
195
-
196
- await writeJsonl(path.join(DATASET_DIR, 'simple-facts.jsonl'), simple);
197
- await writeJsonl(path.join(DATASET_DIR, 'adversarial-facts.jsonl'), adversarial);
198
-
199
- console.log(`Generated ${simple.length} simple questions and ${adversarial.length} adversarial questions.`);
@@ -1,133 +0,0 @@
1
- import fs from 'node:fs/promises';
2
- import path from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
- import {
5
- containsNormalized,
6
- ensureDir,
7
- loadPublicData,
8
- readJsonl,
9
- writeJsonl,
10
- } from '../lib/common.js';
11
- import { CONCEPTS } from '../lib/concepts.js';
12
-
13
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
14
- const ROOT = path.resolve(__dirname, '..');
15
- const DATASET_DIR = path.join(ROOT, 'datasets');
16
- const RESULT_DIR = path.join(ROOT, 'results');
17
-
18
- function argValue(name) {
19
- const index = process.argv.indexOf(name);
20
- return index >= 0 ? process.argv[index + 1] : null;
21
- }
22
-
23
- function selectedConcepts() {
24
- if (process.argv.includes('--all')) return Object.keys(CONCEPTS);
25
- const concept = argValue('--concept');
26
- if (concept) return [concept];
27
- return ['strict-skill'];
28
- }
29
-
30
- function selectedDatasets() {
31
- const dataset = argValue('--dataset');
32
- if (dataset) return [`${dataset}.jsonl`.replace(/\.jsonl\.jsonl$/u, '.jsonl')];
33
- return ['simple-facts.jsonl', 'adversarial-facts.jsonl'];
34
- }
35
-
36
- function evaluate(row, result) {
37
- const answered = result.status === 'answered';
38
- const hasExpected = answered && containsNormalized(result.answer, row.expected);
39
- const affirmsFalse = row.falseValue
40
- ? answered && containsNormalized(result.answer, row.falseValue) && !/не подтверж|нет|на самом деле|по данным/iu.test(result.answer)
41
- : false;
42
- const refuted = row.shouldRefute
43
- ? answered && /не подтверж|нет|на самом деле|по данным/iu.test(result.answer) && hasExpected
44
- : true;
45
- return {
46
- answered,
47
- escalated: result.status === 'escalated',
48
- needsClarification: result.status === 'needs_clarification',
49
- correct: hasExpected && !affirmsFalse && refuted,
50
- hasExpected,
51
- affirmsFalse,
52
- refuted,
53
- };
54
- }
55
-
56
- function summarize(rows) {
57
- const total = rows.length;
58
- const count = (name) => rows.filter((row) => row.metrics[name]).length;
59
- const statuses = rows.reduce((acc, row) => {
60
- acc[row.status] = (acc[row.status] || 0) + 1;
61
- return acc;
62
- }, {});
63
- return {
64
- total,
65
- correct: count('correct'),
66
- answered: count('answered'),
67
- escalated: count('escalated'),
68
- needsClarification: count('needsClarification'),
69
- accuracy: Number((count('correct') / total).toFixed(4)),
70
- answerRate: Number((count('answered') / total).toFixed(4)),
71
- statuses,
72
- avgLatencyMs: Number((rows.reduce((sum, row) => sum + row.latencyMs, 0) / total).toFixed(2)),
73
- };
74
- }
75
-
76
- const concepts = selectedConcepts();
77
- for (const concept of concepts) {
78
- if (!CONCEPTS[concept]) throw new Error(`Unknown concept: ${concept}`);
79
- }
80
-
81
- const data = await loadPublicData();
82
- const runId = new Date().toISOString().replace(/[:.]/gu, '-');
83
- const runDir = path.join(RESULT_DIR, 'latest');
84
- await fs.rm(runDir, { recursive: true, force: true });
85
- await ensureDir(runDir);
86
-
87
- const summary = {
88
- runId,
89
- createdAt: new Date().toISOString(),
90
- apiBaseUrl: process.env.IOLA_PUBLIC_API_URL || 'https://apiiola.yasg.ru',
91
- concepts: {},
92
- };
93
-
94
- for (const datasetFile of selectedDatasets()) {
95
- const rows = await readJsonl(path.join(DATASET_DIR, datasetFile));
96
- const datasetName = datasetFile.replace(/\.jsonl$/u, '');
97
- for (const conceptName of concepts) {
98
- const concept = CONCEPTS[conceptName];
99
- const evaluatedRows = rows.map((row) => {
100
- const started = performance.now();
101
- const result = concept.run(data, row.question);
102
- const latencyMs = Number((performance.now() - started).toFixed(3));
103
- return {
104
- id: row.id,
105
- dataset: datasetName,
106
- concept: conceptName,
107
- block: concept.block,
108
- question: row.question,
109
- expected: row.expected,
110
- falseValue: row.falseValue,
111
- status: result.status,
112
- confidence: result.confidence,
113
- answer: result.answer,
114
- latencyMs,
115
- metrics: evaluate(row, result),
116
- };
117
- });
118
- const outputFile = path.join(runDir, `${conceptName}-${datasetName}.jsonl`);
119
- await writeJsonl(outputFile, evaluatedRows);
120
- summary.concepts[`${conceptName}:${datasetName}`] = {
121
- concept: conceptName,
122
- block: concept.block,
123
- dataset: datasetName,
124
- file: path.relative(ROOT, outputFile).replace(/\\/gu, '/'),
125
- ...summarize(evaluatedRows),
126
- };
127
- }
128
- }
129
-
130
- await fs.writeFile(path.join(runDir, 'summary.json'), JSON.stringify(summary, null, 2), 'utf8');
131
- await fs.writeFile(path.join(RESULT_DIR, 'latest-summary.json'), JSON.stringify(summary, null, 2), 'utf8');
132
-
133
- console.log(`Saved results to ${path.relative(process.cwd(), runDir)}`);
@@ -1,19 +0,0 @@
1
- import fs from 'node:fs/promises';
2
- import path from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
-
5
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
6
- const ROOT = path.resolve(__dirname, '..');
7
- const SUMMARY_FILE = path.join(ROOT, 'results', 'latest-summary.json');
8
-
9
- const summary = JSON.parse(await fs.readFile(SUMMARY_FILE, 'utf8'));
10
- const rows = Object.values(summary.concepts)
11
- .sort((a, b) => a.dataset.localeCompare(b.dataset) || b.accuracy - a.accuracy || a.concept.localeCompare(b.concept));
12
-
13
- console.log(`Run: ${summary.runId}`);
14
- console.log('');
15
- console.log('| Dataset | Block | Concept | Correct | Accuracy | Answer rate | Escalated | Clarify | Avg ms |');
16
- console.log('| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |');
17
- for (const row of rows) {
18
- console.log(`| ${row.dataset} | ${row.block} | ${row.concept} | ${row.correct}/${row.total} | ${(row.accuracy * 100).toFixed(1)}% | ${(row.answerRate * 100).toFixed(1)}% | ${row.escalated} | ${row.needsClarification} | ${row.avgLatencyMs} |`);
19
- }