@adia-ai/a2ui-retrieval 0.6.4 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/domain-router.js +362 -117
  3. package/embedding/chunk-embedding-retriever.js +47 -79
  4. package/embedding/embedding-provider.js +35 -71
  5. package/embedding/index.js +2 -10
  6. package/feedback/dialog-recorder.js +61 -145
  7. package/feedback/feedback-analyzer.js +46 -102
  8. package/feedback/feedback-store.js +91 -107
  9. package/feedback/feedback.js +36 -117
  10. package/feedback/gap-registry.js +40 -82
  11. package/feedback/index.js +14 -12
  12. package/index.d.ts +4 -0
  13. package/index.js +53 -16
  14. package/intent/clarity.js +61 -129
  15. package/intent/decomposer.js +51 -143
  16. package/intent/index.js +18 -14
  17. package/intent/intent-alignment.js +79 -150
  18. package/intent/intent-categorizer.js +34 -62
  19. package/intent/intent-gate.js +43 -102
  20. package/intent/prompt-analyzer.js +68 -126
  21. package/package.json +4 -2
  22. package/wiring-catalog.js +95 -146
  23. package/embedding/chunk-embedding-retriever.ts +0 -156
  24. package/embedding/embedding-provider.ts +0 -111
  25. package/embedding/index.ts +0 -10
  26. package/feedback/dialog-recorder.ts +0 -172
  27. package/feedback/feedback-analyzer.ts +0 -250
  28. package/feedback/feedback-store.ts +0 -229
  29. package/feedback/feedback.ts +0 -201
  30. package/feedback/gap-registry.ts +0 -137
  31. package/feedback/index.ts +0 -14
  32. package/intent/clarity.ts +0 -224
  33. package/intent/decomposer.ts +0 -229
  34. package/intent/index.ts +0 -20
  35. package/intent/intent-alignment.ts +0 -267
  36. package/intent/intent-categorizer.ts +0 -104
  37. package/intent/intent-gate.ts +0 -151
  38. package/intent/prompt-analyzer.ts +0 -231
@@ -1,172 +0,0 @@
1
- /**
2
- * Dialog Recorder — write every generation turn to disk for debugging,
3
- * regression analysis, and training-data bootstrapping.
4
- *
5
- * Output: logs/dialogs/<sessionId>/<NNN>-<turnId>.json
6
- * - <sessionId>: storeId / executionId of the multi-turn session, or
7
- * 'standalone-<isodate>' for one-shot generations.
8
- * - <NNN>: zero-padded turn index within the session (000, 001, …)
9
- * - <turnId>: short slug from the intent + a random suffix for uniqueness
10
- *
11
- * Gated by ADIA_LOG_DIALOGS=1 — opt-in, zero overhead when disabled. Browser
12
- * environment is also a no-op (no fs); only Node-side generation paths log.
13
- */
14
-
15
- // `process` is not in scope under "types": []
16
- declare const process: { versions?: { node?: string }; env?: Record<string, string | undefined> } | undefined;
17
-
18
- const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
19
- const ENABLED = IS_NODE && (process?.env?.['ADIA_LOG_DIALOGS'] === '1' || process?.env?.['ADIA_LOG_DIALOGS'] === 'true');
20
-
21
- // Lazy-loaded Node modules (only when ENABLED)
22
- let _fs: typeof import('node:fs/promises') | null = null;
23
- let _path: typeof import('node:path') | null = null;
24
- let _url: typeof import('node:url') | null = null;
25
- let _logsRoot: string | null = null;
26
-
27
- async function _ensureModules(): Promise<void> {
28
- if (_fs) return;
29
- _fs = await import(/* @vite-ignore */ 'node:fs/promises');
30
- _path = await import(/* @vite-ignore */ 'node:path');
31
- _url = await import(/* @vite-ignore */ 'node:url');
32
- // logs/ lives at the repo root: packages/a2ui/retrieval/feedback → up 4 → repo root
33
- const __dirname = _path.dirname(_url.fileURLToPath(import.meta.url));
34
- _logsRoot = _path.resolve(__dirname, '..', '..', '..', '..', 'logs', 'dialogs');
35
- }
36
-
37
- // In-memory turn counter per session
38
- const _turnCounter = new Map<string, number>();
39
- const _sessionMetaWritten = new Set<string>();
40
-
41
- export type TurnRecord = {
42
- sessionId?: string;
43
- intent?: string;
44
- mode?: string;
45
- engine?: string;
46
- model?: string;
47
- analysis?: {
48
- steelman?: string;
49
- concepts?: string[];
50
- impliedComponents?: string[];
51
- styleHints?: string[];
52
- };
53
- currentCanvas?: unknown;
54
- patterns?: Array<{ name?: string; score?: number; confidence?: number; keywords?: string[] | null }>;
55
- systemPrompt?: string;
56
- rawLLMResponse?: string;
57
- messages?: unknown[];
58
- validation?: unknown;
59
- drift?: unknown;
60
- suggestions?: string[];
61
- timing?: unknown;
62
- tokens?: unknown;
63
- engineDebug?: unknown;
64
- isIteration?: boolean;
65
- };
66
-
67
- /**
68
- * Record one generation turn. Safe to call unconditionally — when the env var
69
- * is unset, this is a no-op that returns immediately.
70
- *
71
- * @returns The path written, or null when logging is disabled.
72
- */
73
- export async function recordTurn(record: TurnRecord): Promise<string | null> {
74
- if (!ENABLED) return null;
75
-
76
- try {
77
- await _ensureModules();
78
-
79
- const sessionId = record.sessionId ?? `standalone-${new Date().toISOString().replace(/[:.]/g, '-')}`;
80
- const turnIdx = (_turnCounter.get(sessionId) ?? 0);
81
- _turnCounter.set(sessionId, turnIdx + 1);
82
-
83
- const slug = String(record.intent ?? 'turn')
84
- .toLowerCase()
85
- .replace(/[^a-z0-9]+/g, '-')
86
- .replace(/^-+|-+$/g, '')
87
- .slice(0, 32) || 'turn';
88
- const rand = Math.random().toString(36).slice(2, 6);
89
- const fileName = `${String(turnIdx).padStart(3, '0')}-${slug}-${rand}.json`;
90
-
91
- const sessionDir = _path!.join(_logsRoot!, sessionId);
92
- await _fs!.mkdir(sessionDir, { recursive: true });
93
-
94
- if (!_sessionMetaWritten.has(sessionId) && turnIdx === 0) {
95
- _sessionMetaWritten.add(sessionId);
96
- const sessionMeta = {
97
- sessionId,
98
- startedAt: new Date().toISOString(),
99
- originIntent: record.intent ?? null,
100
- engine: record.engine ?? null,
101
- mode: record.mode ?? null,
102
- model: record.model ?? null,
103
- originAnalysis: record.analysis ? {
104
- steelman: record.analysis.steelman ?? null,
105
- concepts: record.analysis.concepts ?? [],
106
- impliedComponents: record.analysis.impliedComponents ?? [],
107
- styleHints: record.analysis.styleHints ?? [],
108
- } : null,
109
- };
110
- await _fs!.writeFile(
111
- _path!.join(sessionDir, '_session.json'),
112
- JSON.stringify(sessionMeta, null, 2) + '\n',
113
- );
114
- }
115
-
116
- const payload = {
117
- // ── Identity ────────────────────────────────────────────────
118
- sessionId,
119
- turnIndex: turnIdx,
120
- timestamp: new Date().toISOString(),
121
- isIteration: !!record.isIteration,
122
-
123
- // ── Request ─────────────────────────────────────────────────
124
- intent: record.intent,
125
- mode: record.mode ?? null,
126
- engine: record.engine ?? null,
127
- model: record.model ?? null,
128
- currentCanvas: record.currentCanvas ?? null,
129
-
130
- // ── Pipeline-side reasoning ─────────────────────────────────
131
- analysis: record.analysis ?? null,
132
- patterns: (record.patterns ?? []).slice(0, 10).map(p => ({
133
- name: p.name,
134
- score: p.score ?? p.confidence ?? null,
135
- keywords: p.keywords ?? null,
136
- })),
137
-
138
- // ── LLM I/O ─────────────────────────────────────────────────
139
- systemPrompt: record.systemPrompt ?? null,
140
- rawLLMResponse: record.rawLLMResponse ?? null,
141
-
142
- // ── Result ──────────────────────────────────────────────────
143
- messages: record.messages ?? [],
144
- validation: record.validation ?? null,
145
- drift: record.drift ?? null,
146
- suggestions: record.suggestions ?? [],
147
-
148
- // ── Telemetry ───────────────────────────────────────────────
149
- timing: record.timing ?? null,
150
- tokens: record.tokens ?? null,
151
- engineDebug: record.engineDebug ?? null,
152
- };
153
-
154
- const filePath = _path!.join(sessionDir, fileName);
155
- await _fs!.writeFile(filePath, JSON.stringify(payload, null, 2) + '\n');
156
- return filePath;
157
- } catch (err) {
158
- // Logging must NEVER break a generation. Swallow + warn once per turn.
159
- console.warn('[dialog-recorder] failed to record turn:', (err as Error).message);
160
- return null;
161
- }
162
- }
163
-
164
- /** True when logging is on. Useful for guarding expensive capture work. */
165
- export function isRecording(): boolean {
166
- // ENABLED is already gated by IS_NODE at module init. The A2UI_COMPOSE_TRACE
167
- // bypass must guard `process` again — without it, browser callers throw
168
- // `ReferenceError: process is not defined` after a successful LLM round-trip.
169
- if (ENABLED) return true;
170
- const IS_NODE_RT = typeof process !== 'undefined' && process.versions?.node;
171
- return IS_NODE_RT ? !!(process?.env?.['A2UI_COMPOSE_TRACE']) : false;
172
- }
@@ -1,250 +0,0 @@
1
- /**
2
- * Feedback Analyzer
3
- *
4
- * Reads JSONL feedback files, aggregates by intent category,
5
- * and surfaces weak intents, promotion candidates, and pattern gaps.
6
- *
7
- * Usage:
8
- * import { FeedbackAnalyzer } from './feedback-analyzer.js';
9
- * const analyzer = new FeedbackAnalyzer();
10
- * const entries = await analyzer.readRange(30);
11
- * const aggregated = analyzer.aggregateByIntent(entries);
12
- * const weak = analyzer.findWeakIntents(aggregated);
13
- */
14
-
15
- import { feedbackStore } from './feedback-store.js';
16
- import { categorizeIntent } from '../intent/intent-categorizer.js';
17
-
18
- // `process` is not in scope under "types": []
19
- declare const process: { versions?: { node?: string } } | undefined;
20
-
21
- // Lazy top-level import pattern for Node-only modules
22
- let fs: typeof import('node:fs/promises') | null = null;
23
- let path: typeof import('node:path') | null = null;
24
- const IS_NODE = typeof process !== 'undefined' && process.versions?.node;
25
- if (IS_NODE) {
26
- try {
27
- fs = await import(/* @vite-ignore */ 'node:fs/promises');
28
- path = await import(/* @vite-ignore */ 'node:path');
29
- } catch {
30
- // Node builtins unavailable
31
- }
32
- }
33
-
34
- // packages/a2ui/retrieval/feedback → up 3 → packages/a2ui → corpus/feedback
35
- const FEEDBACK_DIR: string | null = path
36
- ? path.join(path.dirname(new URL(import.meta.url).pathname), '..', '..', '..', 'a2ui/corpus', 'feedback')
37
- : null;
38
-
39
- type FeedbackEntry = Record<string, unknown> & { type?: string; executionId?: string; intent?: string; patternMatch?: boolean; score?: number; rating?: number; _file?: string };
40
-
41
- type AggregatedBucket = {
42
- count: number;
43
- avgScore: number;
44
- avgRating: number;
45
- patternMatchRate: number;
46
- sampleIntents: string[];
47
- entries: FeedbackEntry[];
48
- };
49
-
50
- type InternalBucket = {
51
- count: number;
52
- totalScore: number;
53
- totalRating: number;
54
- ratingCount: number;
55
- patternMatchCount: number;
56
- entries: FeedbackEntry[];
57
- sampleIntents: string[];
58
- };
59
-
60
- export class FeedbackAnalyzer {
61
- /**
62
- * Read JSONL feedback files for the last N days.
63
- */
64
- async readRange(days = 30): Promise<FeedbackEntry[]> {
65
- if (!fs || !FEEDBACK_DIR) return [];
66
-
67
- const entries: FeedbackEntry[] = [];
68
- const now = new Date();
69
-
70
- // Build set of date strings we want
71
- const dateStrings = new Set<string>();
72
- for (let i = 0; i < days; i++) {
73
- const d = new Date(now);
74
- d.setDate(d.getDate() - i);
75
- dateStrings.add(d.toISOString().slice(0, 10));
76
- }
77
-
78
- try {
79
- const files = await fs.readdir(FEEDBACK_DIR);
80
- const jsonlFiles = files
81
- .filter(f => f.endsWith('.jsonl'))
82
- .filter(f => {
83
- const dateStr = f.replace('.jsonl', '');
84
- return dateStrings.has(dateStr);
85
- })
86
- .sort();
87
-
88
- for (const file of jsonlFiles) {
89
- try {
90
- const content = await fs.readFile(path!.join(FEEDBACK_DIR, file), 'utf8');
91
- const lines = content.trim().split('\n').filter(Boolean);
92
- for (const line of lines) {
93
- try {
94
- const entry = JSON.parse(line) as FeedbackEntry;
95
- entry['_file'] = file;
96
- entries.push(entry);
97
- } catch {
98
- // Skip malformed lines
99
- }
100
- }
101
- } catch {
102
- // Skip unreadable files
103
- }
104
- }
105
- } catch {
106
- // Feedback dir doesn't exist yet
107
- }
108
-
109
- return entries;
110
- }
111
-
112
- /**
113
- * Aggregate feedback entries by intent category.
114
- */
115
- aggregateByIntent(entries: FeedbackEntry[]): Map<string, AggregatedBucket> {
116
- const buckets = new Map<string, InternalBucket>();
117
-
118
- // First pass: group executions by category
119
- const executions = entries.filter(e => e.type === 'execution');
120
- const ratings = entries.filter(e => e.type === 'rating');
121
-
122
- // Index ratings by executionId for fast lookup
123
- const ratingsByExecId = new Map<string, FeedbackEntry[]>();
124
- for (const r of ratings) {
125
- if (!r.executionId) continue;
126
- if (!ratingsByExecId.has(r.executionId)) {
127
- ratingsByExecId.set(r.executionId, []);
128
- }
129
- ratingsByExecId.get(r.executionId)!.push(r);
130
- }
131
-
132
- for (const exec of executions) {
133
- const { category } = categorizeIntent(exec.intent ?? '');
134
-
135
- if (!buckets.has(category)) {
136
- buckets.set(category, {
137
- count: 0,
138
- totalScore: 0,
139
- totalRating: 0,
140
- ratingCount: 0,
141
- patternMatchCount: 0,
142
- entries: [],
143
- sampleIntents: [],
144
- });
145
- }
146
-
147
- const bucket = buckets.get(category)!;
148
- bucket.count++;
149
- bucket.totalScore += exec.score ?? 0;
150
- bucket.entries.push(exec);
151
-
152
- if (exec.patternMatch) {
153
- bucket.patternMatchCount++;
154
- }
155
-
156
- // Collect unique sample intents (up to 5)
157
- if (bucket.sampleIntents.length < 5 && exec.intent) {
158
- const intentLower = exec.intent.toLowerCase();
159
- if (!bucket.sampleIntents.some(s => s.toLowerCase() === intentLower)) {
160
- bucket.sampleIntents.push(exec.intent);
161
- }
162
- }
163
-
164
- // Attach ratings
165
- const execRatings = ratingsByExecId.get(exec.executionId ?? '') ?? [];
166
- for (const r of execRatings) {
167
- bucket.totalRating += (r.rating ?? 0);
168
- bucket.ratingCount++;
169
- }
170
- }
171
-
172
- // Compute averages
173
- const result = new Map<string, AggregatedBucket>();
174
- for (const [category, bucket] of buckets) {
175
- result.set(category, {
176
- count: bucket.count,
177
- avgScore: bucket.count > 0 ? Math.round(bucket.totalScore / bucket.count) : 0,
178
- avgRating: bucket.ratingCount > 0 ? Math.round((bucket.totalRating / bucket.ratingCount) * 10) / 10 : 0,
179
- patternMatchRate: bucket.count > 0 ? Math.round((bucket.patternMatchCount / bucket.count) * 100) : 0,
180
- sampleIntents: bucket.sampleIntents,
181
- entries: bucket.entries,
182
- });
183
- }
184
-
185
- return result;
186
- }
187
-
188
- /**
189
- * Find intent categories with weak performance.
190
- */
191
- findWeakIntents(aggregated: Map<string, AggregatedBucket>, threshold = 60): Array<{ category: string; count: number; avgScore: number; avgRating: number; sampleIntents: string[] }> {
192
- const weak = [];
193
- for (const [category, data] of aggregated) {
194
- if (data.avgScore < threshold) {
195
- weak.push({
196
- category,
197
- count: data.count,
198
- avgScore: data.avgScore,
199
- avgRating: data.avgRating,
200
- sampleIntents: data.sampleIntents,
201
- });
202
- }
203
- }
204
- return weak.sort((a, b) => a.avgScore - b.avgScore);
205
- }
206
-
207
- /**
208
- * Find intent categories ready for pattern promotion.
209
- * Criteria: avgScore >= 95, avgRating >= 4, count >= 3
210
- */
211
- findPromotionCandidates(aggregated: Map<string, AggregatedBucket>): Array<{ category: string; count: number; avgScore: number; avgRating: number; patternMatchRate: number; sampleIntents: string[] }> {
212
- const candidates = [];
213
- for (const [category, data] of aggregated) {
214
- if (data.avgScore >= 95 && data.avgRating >= 4 && data.count >= 3) {
215
- candidates.push({
216
- category,
217
- count: data.count,
218
- avgScore: data.avgScore,
219
- avgRating: data.avgRating,
220
- patternMatchRate: data.patternMatchRate,
221
- sampleIntents: data.sampleIntents,
222
- });
223
- }
224
- }
225
- return candidates.sort((a, b) => b.avgScore - a.avgScore);
226
- }
227
-
228
- /**
229
- * Find intent categories with no pattern match AND low scores — gaps in pattern coverage.
230
- */
231
- findPatternGaps(aggregated: Map<string, AggregatedBucket>): Array<{ category: string; count: number; avgScore: number; avgRating: number; patternMatchRate: number; sampleIntents: string[] }> {
232
- const gaps = [];
233
- for (const [category, data] of aggregated) {
234
- if (data.patternMatchRate === 0 && data.avgScore < 70) {
235
- gaps.push({
236
- category,
237
- count: data.count,
238
- avgScore: data.avgScore,
239
- avgRating: data.avgRating,
240
- patternMatchRate: data.patternMatchRate,
241
- sampleIntents: data.sampleIntents,
242
- });
243
- }
244
- }
245
- return gaps.sort((a, b) => a.avgScore - b.avgScore);
246
- }
247
- }
248
-
249
- // Export feedbackStore re-export so the module stays importable
250
- export { feedbackStore };
@@ -1,229 +0,0 @@
1
- /**
2
- * Persistent Feedback Store
3
- *
4
- * Writes execution metadata, ratings, LLM self-critique, and gap signals
5
- * to JSONL files on disk. One file per day. Browser-safe (no-ops if no fs).
6
- *
7
- * Usage:
8
- * import { feedbackStore } from './feedback-store.js';
9
- * feedbackStore.logExecution({ executionId, intent, model, domain, ... });
10
- * feedbackStore.logRating({ executionId, rating, ... });
11
- * feedbackStore.logGap({ type: 'pattern', description: '...' });
12
- * const recent = await feedbackStore.readRecent(50);
13
- */
14
-
15
- // `process` is not in scope under "types": []
16
- declare const process: { versions?: { node?: string } } | undefined;
17
-
18
- // Lazy top-level import pattern for Node-only modules
19
- let fs: typeof import('node:fs/promises') | null = null;
20
- let path: typeof import('node:path') | null = null;
21
- const IS_NODE = typeof process !== 'undefined' && process.versions?.node;
22
- if (IS_NODE) {
23
- try {
24
- fs = await import(/* @vite-ignore */ 'node:fs/promises');
25
- path = await import(/* @vite-ignore */ 'node:path');
26
- } catch {
27
- // Node builtins unavailable
28
- }
29
- }
30
-
31
- // packages/a2ui/retrieval/feedback → up 3 → packages/a2ui → corpus/feedback
32
- const FEEDBACK_DIR: string | null = path
33
- ? path.join(path.dirname(new URL(import.meta.url).pathname), '..', '..', '..', 'a2ui/corpus', 'feedback')
34
- : null;
35
-
36
- function todayFile(): string | null {
37
- const d = new Date().toISOString().slice(0, 10);
38
- return path ? path.join(FEEDBACK_DIR!, `${d}.jsonl`) : null;
39
- }
40
-
41
- async function append(entry: unknown): Promise<void> {
42
- if (!fs || !FEEDBACK_DIR) return;
43
- try {
44
- await fs.mkdir(FEEDBACK_DIR, { recursive: true });
45
- await fs.appendFile(todayFile()!, JSON.stringify(entry) + '\n');
46
- } catch (e) {
47
- console.warn('FeedbackStore: write failed', (e as Error).message);
48
- }
49
- }
50
-
51
- type ExecutionEntry = {
52
- type: 'execution';
53
- timestamp: string;
54
- executionId: string;
55
- intent: string;
56
- model: string;
57
- domain: string;
58
- mode: string;
59
- patternMatch: boolean | undefined;
60
- patternConfidence: number | undefined;
61
- score: number | undefined;
62
- componentCount: number | undefined;
63
- tokenCount: number | undefined;
64
- meta: unknown;
65
- };
66
-
67
- type RatingEntry = {
68
- type: 'rating';
69
- timestamp: string;
70
- executionId: string;
71
- rating: number;
72
- intent: string;
73
- };
74
-
75
- type PatternSaveEntry = {
76
- type: 'pattern_save';
77
- timestamp: string;
78
- executionId: string;
79
- patternName: string;
80
- intent: string;
81
- };
82
-
83
- type GapEntry = {
84
- type: 'gap';
85
- gapType: string;
86
- timestamp: string;
87
- description: string | undefined;
88
- source: string | undefined;
89
- executionId: string | undefined;
90
- };
91
-
92
- type AnyEntry = ExecutionEntry | RatingEntry | PatternSaveEntry | GapEntry | Record<string, unknown>;
93
-
94
- export const feedbackStore = {
95
- /** Log a completed generation execution. */
96
- async logExecution({
97
- executionId, intent, model, domain, mode,
98
- patternMatch, patternConfidence,
99
- score, componentCount, tokenCount,
100
- meta,
101
- }: {
102
- executionId: string;
103
- intent: string;
104
- model: string;
105
- domain: string;
106
- mode: string;
107
- patternMatch?: boolean;
108
- patternConfidence?: number;
109
- score?: number;
110
- componentCount?: number;
111
- tokenCount?: number;
112
- meta?: unknown;
113
- messages?: unknown; // optional, not stored
114
- }): Promise<void> {
115
- await append({
116
- type: 'execution',
117
- timestamp: new Date().toISOString(),
118
- executionId, intent, model, domain, mode,
119
- patternMatch, patternConfidence,
120
- score, componentCount, tokenCount,
121
- meta: meta ?? null,
122
- });
123
- },
124
-
125
- /** Log a user rating (👍/👎). */
126
- async logRating({ executionId, rating, intent }: { executionId: string; rating: number; intent: string }): Promise<void> {
127
- await append({
128
- type: 'rating',
129
- timestamp: new Date().toISOString(),
130
- executionId, rating, intent,
131
- });
132
- },
133
-
134
- /** Log a pattern save action. */
135
- async logPatternSave({ executionId, patternName, intent }: { executionId: string; patternName: string; intent: string }): Promise<void> {
136
- await append({
137
- type: 'pattern_save',
138
- timestamp: new Date().toISOString(),
139
- executionId, patternName, intent,
140
- });
141
- },
142
-
143
- /** Log a training gap identified by LLM meta or heuristics. */
144
- async logGap({ type, description, source, executionId }: { type: string; description?: string; source?: string; executionId?: string }): Promise<void> {
145
- await append({
146
- type: 'gap',
147
- gapType: type, // 'pattern' | 'domain' | 'component' | 'prompt'
148
- timestamp: new Date().toISOString(),
149
- description, source, executionId,
150
- });
151
- },
152
-
153
- /** Read recent feedback entries (Node only). */
154
- async readRecent(limit = 100): Promise<AnyEntry[]> {
155
- if (!fs || !FEEDBACK_DIR) return [];
156
- try {
157
- const files = (await fs.readdir(FEEDBACK_DIR))
158
- .filter(f => f.endsWith('.jsonl'))
159
- .sort()
160
- .reverse();
161
-
162
- const entries: AnyEntry[] = [];
163
- for (const file of files) {
164
- if (entries.length >= limit) break;
165
- const content = await fs.readFile(path!.join(FEEDBACK_DIR, file), 'utf8');
166
- const lines = content.trim().split('\n').filter(Boolean).reverse();
167
- for (const line of lines) {
168
- if (entries.length >= limit) break;
169
- try { entries.push(JSON.parse(line) as AnyEntry); } catch {}
170
- }
171
- }
172
- return entries;
173
- } catch { return []; }
174
- },
175
-
176
- /** Get gap summary — aggregate gap signals for training improvement. */
177
- async getGapSummary(): Promise<Record<string, string[]>> {
178
- const entries = await this.readRecent(500);
179
- const gaps = entries.filter(e => (e as Record<string, unknown>)['type'] === 'gap') as GapEntry[];
180
- const byType: Record<string, string[]> = {};
181
- for (const g of gaps) {
182
- if (!byType[g.gapType]) byType[g.gapType] = [];
183
- byType[g.gapType]!.push(g.description ?? '');
184
- }
185
- return byType;
186
- },
187
-
188
- /** Get quality metrics — aggregate from recent executions. */
189
- async getQualityMetrics(): Promise<{
190
- executions: number;
191
- avgScore: number;
192
- avgTokens: number;
193
- thumbUpRate: number;
194
- byDomain?: Record<string, { count: number; avgScore: number }>;
195
- gaps?: Record<string, string[]>;
196
- }> {
197
- const entries = await this.readRecent(500);
198
- const executions = entries.filter(e => (e as Record<string, unknown>)['type'] === 'execution') as ExecutionEntry[];
199
- const ratings = entries.filter(e => (e as Record<string, unknown>)['type'] === 'rating') as RatingEntry[];
200
-
201
- if (executions.length === 0) return { executions: 0, avgScore: 0, avgTokens: 0, thumbUpRate: 0 };
202
-
203
- const avgScore = executions.reduce((s, e) => s + (e.score ?? 0), 0) / executions.length;
204
- const avgTokens = executions.reduce((s, e) => s + (e.tokenCount ?? 0), 0) / executions.length;
205
- const thumbsUp = ratings.filter(r => r.rating >= 4).length;
206
- const thumbsDown = ratings.filter(r => r.rating < 4).length;
207
- const thumbUpRate = (thumbsUp + thumbsDown) > 0 ? thumbsUp / (thumbsUp + thumbsDown) : 0;
208
-
209
- // Per-domain breakdown
210
- const byDomain: Record<string, { count: number; totalScore: number }> = {};
211
- for (const e of executions) {
212
- const d = e.domain || 'unknown';
213
- if (!byDomain[d]) byDomain[d] = { count: 0, totalScore: 0 };
214
- byDomain[d]!.count++;
215
- byDomain[d]!.totalScore += e.score ?? 0;
216
- }
217
-
218
- return {
219
- executions: executions.length,
220
- avgScore: Math.round(avgScore),
221
- avgTokens: Math.round(avgTokens),
222
- thumbUpRate: Math.round(thumbUpRate * 100),
223
- byDomain: Object.fromEntries(
224
- Object.entries(byDomain).map(([d, v]) => [d, { count: v.count, avgScore: Math.round(v.totalScore / v.count) }])
225
- ),
226
- gaps: await this.getGapSummary(),
227
- };
228
- },
229
- };