@adia-ai/a2ui-retrieval 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Dialog Recorder — write every generation turn to disk for debugging,
3
+ * regression analysis, and training-data bootstrapping.
4
+ *
5
+ * Output: logs/dialogs/<sessionId>/<NNN>-<turnId>.json
6
+ * - <sessionId>: storeId / executionId of the multi-turn session, or
7
+ * 'standalone-<isodate>' for one-shot generations.
8
+ * - <NNN>: zero-padded turn index within the session (000, 001, …)
9
+ * - <turnId>: short slug from the intent + a random suffix for uniqueness
10
+ *
11
+ * Each file contains a flat JSON object with everything the pipeline knew at
12
+ * generation time: intent, mode, model, the analyzer's structured signals,
13
+ * pattern matches, the FULL system prompt, the raw LLM response, parsed A2UI
14
+ * messages, validation, drift, suggestions, and timing/token telemetry.
15
+ *
16
+ * Gated by ADIA_LOG_DIALOGS=1 — opt-in, zero overhead when disabled. Browser
17
+ * environment is also a no-op (no fs); only Node-side generation paths log.
18
+ *
19
+ * Use cases:
20
+ * - replay a single bad turn locally without paying the LLM cost again
21
+ * - diff two turns visually after wiring the headless renderer
22
+ * - bootstrap an eval set from real dogfood — every turn becomes a labeled example
23
+ * - regression detection on prompt / corpus / catalog changes
24
+ */
25
+
26
+ const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
27
+ const ENABLED = IS_NODE && (process.env.ADIA_LOG_DIALOGS === '1' || process.env.ADIA_LOG_DIALOGS === 'true');
28
+
29
+ let _fs = null;
30
+ let _path = null;
31
+ let _url = null;
32
+ let _logsRoot = null;
33
+
34
+ async function _ensureModules() {
35
+ if (_fs) return;
36
+ _fs = await import(/* @vite-ignore */ 'node:fs/promises');
37
+ _path = await import(/* @vite-ignore */ 'node:path');
38
+ _url = await import(/* @vite-ignore */ 'node:url');
39
+ // logs/ lives at the repo root: packages/a2ui/retrieval → up 3 → repo root
40
+ const __dirname = _path.dirname(_url.fileURLToPath(import.meta.url));
41
+ _logsRoot = _path.resolve(__dirname, '..', '..', '..', 'logs', 'dialogs');
42
+ }
43
+
44
+ // In-memory turn counter per session — keeps the on-disk turn ordering correct
45
+ // even when timestamps collide (sub-millisecond turns from automated probes).
46
+ const _turnCounter = new Map();
47
+ // Tracks which sessions we've already written `_session.json` for. Per-process
48
+ // memory; if the process restarts mid-session, the file is rewritten, which is
49
+ // fine — content is idempotent.
50
+ const _sessionMetaWritten = new Set();
51
+
52
+ /**
53
+ * Record one generation turn. Safe to call unconditionally — when the env var
54
+ * is unset, this is a no-op that returns immediately.
55
+ *
56
+ * @param {object} record
57
+ * @param {string} record.sessionId — multi-turn session handle (storeId / executionId)
58
+ * @param {string} record.intent — user's prompt this turn
59
+ * @param {string} [record.mode] — instant | pro | thinking | stream
60
+ * @param {string} [record.engine] — monolithic | zettel | <custom>
61
+ * @param {string} [record.model] — LLM model id
62
+ * @param {object} [record.analysis] — output of analyzePrompt() (concepts, steelman, ...)
63
+ * @param {object} [record.currentCanvas] — { components } or { messages } provided by caller
64
+ * @param {object[]} [record.patterns] — patterns retrieved by searchBlocks()
65
+ * @param {string} [record.systemPrompt] — full system prompt sent to LLM
66
+ * @param {string} [record.rawLLMResponse] — raw LLM text (pre-parse)
67
+ * @param {object[]} [record.messages] — parsed A2UI messages (the result)
68
+ * @param {object} [record.validation] — validateSchema() result
69
+ * @param {object} [record.drift] — getDriftMetrics() result
70
+ * @param {string[]} [record.suggestions] — follow-up suggestions
71
+ * @param {object} [record.timing] — { totalMs, llmMs, ... }
72
+ * @param {object} [record.tokens] — { input, output }
73
+ * @param {object} [record.engineDebug] — engine-specific extras (strategy, composition, fragmentsUsed for zettel; raw stage reports, etc.)
74
+ * @param {boolean} [record.isIteration] — derived from executionId / currentCanvas presence
75
+ * @returns {Promise<string|null>} the path written, or null when logging is disabled
76
+ */
77
+ export async function recordTurn(record) {
78
+ if (!ENABLED) return null;
79
+
80
+ try {
81
+ await _ensureModules();
82
+
83
+ const sessionId = record.sessionId || `standalone-${new Date().toISOString().replace(/[:.]/g, '-')}`;
84
+ const turnIdx = (_turnCounter.get(sessionId) ?? 0);
85
+ _turnCounter.set(sessionId, turnIdx + 1);
86
+
87
+ const slug = String(record.intent || 'turn')
88
+ .toLowerCase()
89
+ .replace(/[^a-z0-9]+/g, '-')
90
+ .replace(/^-+|-+$/g, '')
91
+ .slice(0, 32) || 'turn';
92
+ const rand = Math.random().toString(36).slice(2, 6);
93
+ const fileName = `${String(turnIdx).padStart(3, '0')}-${slug}-${rand}.json`;
94
+
95
+ const sessionDir = _path.join(_logsRoot, sessionId);
96
+ await _fs.mkdir(sessionDir, { recursive: true });
97
+
98
+ // Write per-session header on the first turn so a directory listing is
99
+ // immediately legible — origin intent, model, mode, first-turn analysis,
100
+ // start time. Future headless renderer reads this for its session list UI.
101
+ if (!_sessionMetaWritten.has(sessionId) && turnIdx === 0) {
102
+ _sessionMetaWritten.add(sessionId);
103
+ const sessionMeta = {
104
+ sessionId,
105
+ startedAt: new Date().toISOString(),
106
+ originIntent: record.intent || null,
107
+ engine: record.engine || null,
108
+ mode: record.mode || null,
109
+ model: record.model || null,
110
+ // The analyzer's enriched brief + concept tags from turn 0 — these
111
+ // characterize the session's intent space, not just the latest turn.
112
+ originAnalysis: record.analysis ? {
113
+ steelman: record.analysis.steelman || null,
114
+ concepts: record.analysis.concepts || [],
115
+ impliedComponents: record.analysis.impliedComponents || [],
116
+ styleHints: record.analysis.styleHints || [],
117
+ } : null,
118
+ };
119
+ await _fs.writeFile(
120
+ _path.join(sessionDir, '_session.json'),
121
+ JSON.stringify(sessionMeta, null, 2) + '\n',
122
+ );
123
+ }
124
+
125
+ const payload = {
126
+ // ── Identity ────────────────────────────────────────────────
127
+ sessionId,
128
+ turnIndex: turnIdx,
129
+ timestamp: new Date().toISOString(),
130
+ isIteration: !!record.isIteration,
131
+
132
+ // ── Request ─────────────────────────────────────────────────
133
+ intent: record.intent,
134
+ mode: record.mode || null,
135
+ engine: record.engine || null,
136
+ model: record.model || null,
137
+ currentCanvas: record.currentCanvas || null,
138
+
139
+ // ── Pipeline-side reasoning ─────────────────────────────────
140
+ analysis: record.analysis || null,
141
+ patterns: (record.patterns || []).slice(0, 10).map(p => ({
142
+ name: p.name,
143
+ score: p.score ?? p.confidence ?? null,
144
+ keywords: p.keywords || null,
145
+ })),
146
+
147
+ // ── LLM I/O (the largest fields — keep last for jq usability) ─
148
+ systemPrompt: record.systemPrompt || null,
149
+ rawLLMResponse: record.rawLLMResponse || null,
150
+
151
+ // ── Result ──────────────────────────────────────────────────
152
+ messages: record.messages || [],
153
+ validation: record.validation || null,
154
+ drift: record.drift || null,
155
+ suggestions: record.suggestions || [],
156
+
157
+ // ── Telemetry ───────────────────────────────────────────────
158
+ timing: record.timing || null,
159
+ tokens: record.tokens || null,
160
+ // Engine-specific extras (zettel: strategy/composition/fragmentsUsed,
161
+ // monolithic: nothing yet — but reserved). Whatever the engine put on
162
+ // result._debug above and beyond the standard fields lands here.
163
+ engineDebug: record.engineDebug || null,
164
+ };
165
+
166
+ const filePath = _path.join(sessionDir, fileName);
167
+ await _fs.writeFile(filePath, JSON.stringify(payload, null, 2) + '\n');
168
+ return filePath;
169
+ } catch (err) {
170
+ // Logging must NEVER break a generation. Swallow + warn once per turn.
171
+ console.warn('[dialog-recorder] failed to record turn:', err.message);
172
+ return null;
173
+ }
174
+ }
175
+
176
+ /** True when logging is on. Useful for guarding expensive capture work. */
177
+ export function isRecording() {
178
+ return ENABLED;
179
+ }
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Domain Router — Intent-to-domain classification via keyword matching.
3
+ *
4
+ * Five domains: forms, data, layout, agent, navigation.
5
+ * Each has signal keywords weighted for matching confidence.
6
+ */
7
+
8
+ const domains = {
9
+ forms: {
10
+ keywords: [
11
+ 'login', 'signup', 'sign up', 'register', 'input', 'field', 'form',
12
+ 'validation', 'submit', 'password', 'email', 'checkbox', 'toggle',
13
+ 'radio', 'select', 'dropdown', 'upload', 'otp', 'text field',
14
+ 'date picker', 'slider', 'textarea', 'required', 'label',
15
+ 'settings', 'configure', 'preference', 'margin', 'bleed',
16
+ 'preview', 'approve', 'approval', 'photo', 'design system',
17
+ 'button', 'cancel', 'action', 'reset',
18
+ 'color picker', 'theme', 'swatch', 'two factor', 'authentication',
19
+ 'verification', 'verify', 'subscribe', 'newsletter', 'survey',
20
+ 'rbac', 'role', 'roles', 'permission', 'permissions', 'mfa', 'two-factor',
21
+ 'session', 'sessions', 'device', 'devices', 'revoke', 'signing',
22
+ 'destructive', 'confirm', 'confirmation', 'type to', 'type-to',
23
+ 'mapping', 'mapper', 'csv', 'import',
24
+ ],
25
+ components: [
26
+ 'Input', 'CheckBox', 'Toggle', 'Switch', 'Slider', 'Select', 'Radio',
27
+ 'TextArea', 'Upload', 'OtpInput', 'CalendarPicker', 'ColorPicker',
28
+ 'Range', 'Button',
29
+ ],
30
+ },
31
+ data: {
32
+ keywords: [
33
+ 'table', 'chart', 'stat', 'metric', 'dashboard', 'graph', 'report',
34
+ 'analytics', 'data', 'list', 'grid', 'sort', 'filter', 'paginate',
35
+ 'pagination', 'row', 'column', 'progress', 'sparkline', 'kpi',
36
+ 'kanban', 'board', 'activity', 'feed', 'timeline', 'leaderboard',
37
+ 'inventory', 'project', 'task', 'tasks', 'team', 'calendar',
38
+ 'tracker', 'tracking', 'monitor', 'monitoring', 'weather',
39
+ 'pricing', 'product', 'bookmark', 'music', 'player',
40
+ 'shopping', 'cart', 'order', 'invoice', 'receipt', 'checkout',
41
+ 'inbox', 'notification', 'changelog', 'permission',
42
+ 'cohort', 'retention', 'heatmap', 'funnel', 'conversion',
43
+ 'audit', 'log', 'logs', 'viewer', 'audit log',
44
+ 'integration', 'integrations', 'marketplace',
45
+ 'webhook', 'webhooks', 'endpoint', 'delivery',
46
+ 'usage', 'quota', 'meter', 'limit', 'limits',
47
+ 'incident', 'status page', 'uptime',
48
+ 'release', 'releases', 'release notes',
49
+ 'flag', 'flags', 'feature flag', 'rollout',
50
+ 'api key', 'api keys', 'token', 'tokens',
51
+ 'object inspector', 'inspector', 'record',
52
+ 'virtualized', 'virtual scroll', 'sticky',
53
+ 'column manager', 'reorder',
54
+ 'saved view', 'saved views', 'view',
55
+ 'bulk', 'bulk action', 'selection',
56
+ ],
57
+ components: [
58
+ 'Table', 'Chart', 'Stat', 'Progress', 'Timeline', 'List', 'Grid',
59
+ 'Badge', 'Pagination', 'Avatar', 'Skeleton',
60
+ ],
61
+ },
62
+ layout: {
63
+ keywords: [
64
+ 'page', 'grid', 'sidebar', 'header', 'footer', 'hero', 'section',
65
+ 'column', 'row', 'card', 'panel', 'divider', 'stack', 'block',
66
+ 'container', 'layout', 'spacing', 'gap', 'toolbar', 'image', 'embed',
67
+ 'gallery', 'profile', 'testimonial', 'feature', 'cta', 'landing',
68
+ 'swiper', 'carousel', 'slideshow', 'slides', 'slider',
69
+ 'wizard', 'steps', 'stepper', 'accordion', 'faq',
70
+ 'toast', 'alert', 'banner', 'badge', 'tag', 'code', 'snippet',
71
+ 'skeleton', 'loading', 'empty', 'error', 'onboarding',
72
+ 'avatar', 'popover', 'tooltip', 'modal', 'dialog', 'drawer',
73
+ 'breadcrumb', 'recipe', 'blog', 'post', 'comparison',
74
+ 'three-pane', 'three pane', 'list-detail', 'master-detail',
75
+ 'split pane', 'bento', 'workspace', 'switcher', 'org',
76
+ 'shell', 'app shell',
77
+ ],
78
+ components: [
79
+ 'Row', 'Column', 'Grid', 'Card', 'Divider', 'Tabs', 'Tab',
80
+ 'Accordion', 'Toolbar', 'Drawer', 'Modal', 'Pane',
81
+ 'Image', 'Embed', 'Tag', 'Kbd', 'Code', 'Swiper',
82
+ ],
83
+ },
84
+ agent: {
85
+ keywords: [
86
+ 'chat', 'message', 'stream', 'ai', 'prompt', 'generate', 'conversation',
87
+ 'assistant', 'bot', 'agent', 'llm', 'response', 'streaming', 'a2ui',
88
+ 'surface', 'render', 'dynamic', 'empty', 'error', 'loading',
89
+ ],
90
+ components: [
91
+ 'Stream', 'Card', 'Text', 'Button', 'Column', 'Row', 'Alert', 'Toast',
92
+ 'EmptyState', 'Skeleton', 'Tooltip', 'Popover',
93
+ ],
94
+ },
95
+ navigation: {
96
+ keywords: [
97
+ 'menu', 'nav', 'breadcrumb', 'tab', 'tabs', 'sidebar', 'link', 'route',
98
+ 'router', 'navigate', 'page', 'sitemap', 'command palette', 'command',
99
+ 'segmented', 'breadcrumbs',
100
+ ],
101
+ components: [
102
+ 'Nav', 'Breadcrumb', 'Tabs', 'Tab', 'Menu', 'Command',
103
+ 'SegmentedControl', 'Segment', 'Pagination',
104
+ ],
105
+ },
106
+ };
107
+
108
+ /**
109
+ * Classify an intent string into a domain.
110
+ *
111
+ * @param {string} text — Natural language intent
112
+ * @returns {{ domain: string, confidence: number, matchedSignals: string[], suggestedComponents: string[] }}
113
+ */
114
+ export function classifyIntent(text) {
115
+ const lower = text.toLowerCase();
116
+ const scores = {};
117
+ const matches = {};
118
+
119
+ for (const [domain, config] of Object.entries(domains)) {
120
+ scores[domain] = 0;
121
+ matches[domain] = [];
122
+
123
+ for (const keyword of config.keywords) {
124
+ if (lower.includes(keyword)) {
125
+ scores[domain]++;
126
+ matches[domain].push(keyword);
127
+ }
128
+ }
129
+ }
130
+
131
+ // Find the top domain
132
+ let bestDomain = 'layout'; // default fallback
133
+ let bestScore = 0;
134
+
135
+ for (const [domain, score] of Object.entries(scores)) {
136
+ if (score > bestScore) {
137
+ bestScore = score;
138
+ bestDomain = domain;
139
+ }
140
+ }
141
+
142
+ // Confidence: ratio of matched keywords to total keywords in the domain
143
+ const totalKeywords = domains[bestDomain].keywords.length;
144
+ const confidence = bestScore > 0
145
+ ? Math.min(1, bestScore / Math.max(3, totalKeywords * 0.3))
146
+ : 0;
147
+
148
+ return {
149
+ domain: bestDomain,
150
+ confidence: Math.round(confidence * 100) / 100,
151
+ matchedSignals: matches[bestDomain],
152
+ suggestedComponents: domains[bestDomain].components,
153
+ };
154
+ }
155
+
156
+ /**
157
+ * Get domain configuration (keywords + components) for a given domain.
158
+ *
159
+ * @param {string} domain — Domain name
160
+ * @returns {object|null}
161
+ */
162
+ export function getDomain(domain) {
163
+ return domains[domain] || null;
164
+ }
165
+
166
+ /**
167
+ * Get all domain names.
168
+ * @returns {string[]}
169
+ */
170
+ export function getAllDomains() {
171
+ return Object.keys(domains);
172
+ }
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Pluggable embedding provider. Two implementations:
3
+ *
4
+ * voyage() — Voyage AI, voyage-3-lite by default (1024 dims).
5
+ * Env: VOYAGE_API_KEY. Anthropic-recommended; generous free tier.
6
+ *
7
+ * openai() — OpenAI text-embedding-3-small (1536 dims).
8
+ * Env: OPENAI_API_KEY. Cheap ($0.02/1M tokens).
9
+ *
10
+ * detectProvider() picks Voyage if available, else OpenAI, else null.
11
+ * A null provider means "embeddings unavailable" — callers should fall back
12
+ * to keyword retrieval cleanly.
13
+ *
14
+ * Both implementations batch inputs to minimize round-trips. Every provider
15
+ * returns a Promise<Float32Array[]> of the same length as the input.
16
+ */
17
+
18
+ const VOYAGE_URL = 'https://api.voyageai.com/v1/embeddings';
19
+ const OPENAI_URL = 'https://api.openai.com/v1/embeddings';
20
+
21
+ function getEnv(key) {
22
+ if (typeof process !== 'undefined' && process.env?.[key]) return process.env[key];
23
+ try {
24
+ const env = import.meta.env;
25
+ if (env?.[`VITE_${key}`]) return env[`VITE_${key}`];
26
+ if (env?.[key]) return env[key];
27
+ } catch {}
28
+ return '';
29
+ }
30
+
31
+ /**
32
+ * Voyage AI embedding provider.
33
+ *
34
+ * @param {object} [opts]
35
+ * @param {string} [opts.apiKey] — defaults to env VOYAGE_API_KEY
36
+ * @param {string} [opts.model] — voyage-3-lite | voyage-3 | voyage-code-3
37
+ * @returns {(texts: string[]) => Promise<Float32Array[]>} embedder, or null if no key
38
+ */
39
+ export function voyage({ apiKey, model = 'voyage-3-lite' } = {}) {
40
+ const key = apiKey || getEnv('VOYAGE_API_KEY');
41
+ if (!key) return null;
42
+
43
+ return async function embed(texts) {
44
+ if (!Array.isArray(texts) || texts.length === 0) return [];
45
+ const res = await fetch(VOYAGE_URL, {
46
+ method: 'POST',
47
+ headers: {
48
+ 'content-type': 'application/json',
49
+ 'authorization': `Bearer ${key}`,
50
+ },
51
+ body: JSON.stringify({ input: texts, model, input_type: 'document' }),
52
+ });
53
+ if (!res.ok) throw new Error(`Voyage ${res.status}: ${(await res.text()).slice(0, 200)}`);
54
+ const data = await res.json();
55
+ return data.data.map(d => new Float32Array(d.embedding));
56
+ };
57
+ }
58
+
59
+ /**
60
+ * OpenAI text-embedding-3-small provider.
61
+ *
62
+ * @param {object} [opts]
63
+ * @param {string} [opts.apiKey] — defaults to env OPENAI_API_KEY
64
+ * @param {string} [opts.model] — text-embedding-3-small | text-embedding-3-large
65
+ * @returns {(texts: string[]) => Promise<Float32Array[]>} embedder, or null if no key
66
+ */
67
+ export function openai({ apiKey, model = 'text-embedding-3-small' } = {}) {
68
+ const key = apiKey || getEnv('OPENAI_API_KEY');
69
+ if (!key) return null;
70
+
71
+ return async function embed(texts) {
72
+ if (!Array.isArray(texts) || texts.length === 0) return [];
73
+ const res = await fetch(OPENAI_URL, {
74
+ method: 'POST',
75
+ headers: {
76
+ 'content-type': 'application/json',
77
+ 'authorization': `Bearer ${key}`,
78
+ },
79
+ body: JSON.stringify({ input: texts, model }),
80
+ });
81
+ if (!res.ok) throw new Error(`OpenAI ${res.status}: ${(await res.text()).slice(0, 200)}`);
82
+ const data = await res.json();
83
+ return data.data.map(d => new Float32Array(d.embedding));
84
+ };
85
+ }
86
+
87
+ /**
88
+ * Auto-detect the best available provider. Voyage first (cheaper, dense vectors
89
+ * at 1024 dims), then OpenAI, then null.
90
+ */
91
+ export function detectProvider() {
92
+ const v = voyage(); if (v) return { name: 'voyage', model: 'voyage-3-lite', embed: v };
93
+ const o = openai(); if (o) return { name: 'openai', model: 'text-embedding-3-small', embed: o };
94
+ return null;
95
+ }
96
+
97
+ /** Cosine similarity between two Float32Arrays of the same length. */
98
+ export function cosine(a, b) {
99
+ if (!a || !b || a.length !== b.length) return 0;
100
+ let dot = 0, na = 0, nb = 0;
101
+ for (let i = 0; i < a.length; i++) {
102
+ dot += a[i] * b[i];
103
+ na += a[i] * a[i];
104
+ nb += b[i] * b[i];
105
+ }
106
+ if (na === 0 || nb === 0) return 0;
107
+ return dot / (Math.sqrt(na) * Math.sqrt(nb));
108
+ }
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Embedding retriever — loads the build-time pattern embedding index and
3
+ * scores a query against every pattern via cosine similarity.
4
+ *
5
+ * Index: packages/a2ui/corpus/pattern-embeddings.json (built by
6
+ * scripts/build-embeddings.mjs). When missing or empty, the retriever is
7
+ * effectively a no-op — callers see a scoreFor(name) of 0 and should fall
8
+ * back to keyword-only ranking.
9
+ *
10
+ * Query embedding uses the same provider as the index (baked in at build),
11
+ * so the query path also requires the provider's API key at runtime. If
12
+ * the key is absent, the retriever exposes a `available()` probe returning
13
+ * false and callers degrade gracefully.
14
+ */
15
+
16
+ import { detectProvider, cosine, voyage, openai } from './embedding-provider.js';
17
+
18
+ const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
19
+
20
+ let _index = null;
21
+ let _indexByName = null; // Map<string, Float32Array>
22
+ let _loadPromise = null;
23
+ let _embedFn = null;
24
+ let _available = null; // lazy, once the first probe runs
25
+
26
+ async function _loadIndex() {
27
+ if (_index) return _index;
28
+ if (_loadPromise) return _loadPromise;
29
+ _loadPromise = (async () => {
30
+ try {
31
+ if (IS_NODE) {
32
+ const fs = await import(/* @vite-ignore */ 'node:fs/promises');
33
+ const path = await import(/* @vite-ignore */ 'node:path');
34
+ const url = await import(/* @vite-ignore */ 'node:url');
35
+ const here = path.dirname(url.fileURLToPath(import.meta.url));
36
+ const p = path.resolve(here, '../corpus/pattern-embeddings.json');
37
+ const raw = await fs.readFile(p, 'utf8');
38
+ _index = JSON.parse(raw);
39
+ } else {
40
+ const url = new URL('../corpus/pattern-embeddings.json', import.meta.url);
41
+ const res = await fetch(url).catch(() => null);
42
+ _index = res?.ok ? await res.json().catch(() => null) : null;
43
+ }
44
+ } catch {
45
+ _index = null;
46
+ }
47
+ if (_index?.patterns?.length) {
48
+ _indexByName = new Map();
49
+ for (const p of _index.patterns) {
50
+ if (p?.name && Array.isArray(p.vector)) {
51
+ _indexByName.set(p.name, Float32Array.from(p.vector));
52
+ }
53
+ }
54
+ }
55
+ return _index;
56
+ })();
57
+ return _loadPromise;
58
+ }
59
+
60
+ /** Resolve the embed function matching the index's provider. */
61
+ function _resolveEmbed(providerName, model) {
62
+ if (providerName === 'voyage') return voyage({ model });
63
+ if (providerName === 'openai') return openai({ model });
64
+ // Unknown or null — fall through to whatever's available.
65
+ const auto = detectProvider();
66
+ return auto?.embed || null;
67
+ }
68
+
69
+ /**
70
+ * True when both the index AND the matching provider's API key are available.
71
+ * Callers use this to decide whether to include embedding scores in the blend.
72
+ */
73
+ export async function available() {
74
+ if (_available !== null) return _available;
75
+ const idx = await _loadIndex();
76
+ if (!idx || !idx.patterns?.length) { _available = false; return false; }
77
+ _embedFn = _resolveEmbed(idx.provider, idx.model);
78
+ _available = !!_embedFn;
79
+ return _available;
80
+ }
81
+
82
+ /**
83
+ * Embed a query string and return a { patternName → cosineScore } map.
84
+ * Returns an empty Map when unavailable (no index or no API key).
85
+ *
86
+ * @param {string} query — the user's intent (steelman or raw)
87
+ * @returns {Promise<Map<string, number>>}
88
+ */
89
+ export async function scoreAll(query) {
90
+ if (!query || typeof query !== 'string') return new Map();
91
+ if (!(await available())) return new Map();
92
+
93
+ let qVec;
94
+ try {
95
+ const [v] = await _embedFn([query]);
96
+ qVec = v;
97
+ } catch (e) {
98
+ // Don't let a runtime embedding failure nuke retrieval — fall back cleanly.
99
+ if (typeof console !== 'undefined') console.warn('[embedding-retriever]', e.message);
100
+ return new Map();
101
+ }
102
+
103
+ const out = new Map();
104
+ for (const [name, vec] of _indexByName) {
105
+ out.set(name, cosine(qVec, vec));
106
+ }
107
+ return out;
108
+ }
109
+
110
+ /** Number of patterns in the index. Useful for logging/diagnostics. */
111
+ export async function size() {
112
+ const idx = await _loadIndex();
113
+ return idx?.patterns?.length || 0;
114
+ }
115
+
116
+ /** Diagnostics: the provider/model the index was built with. */
117
+ export async function providerInfo() {
118
+ const idx = await _loadIndex();
119
+ return idx ? { provider: idx.provider, model: idx.model, dims: idx.dims } : null;
120
+ }