@adia-ai/a2ui-retrieval 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/authoring/index.js +3 -7
- package/component-catalog.js +111 -0
- package/context-assembler.js +20 -14
- package/embedding/index.js +5 -2
- package/index.js +1 -1
- package/intent/clarity.js +4 -4
- package/intent/prompt-analyzer.js +16 -8
- package/package.json +1 -1
- package/authoring/pattern-promotion.js +0 -135
- package/authoring/synthetic-data.js +0 -446
- package/concept-mapper.js +0 -127
- package/embedding/embedding-retriever.js +0 -152
- package/pattern-library.js +0 -659
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Embedding retriever — loads the build-time pattern embedding index and
|
|
3
|
-
* scores a query against every pattern via cosine similarity.
|
|
4
|
-
*
|
|
5
|
-
* Index: packages/a2ui/corpus/pattern-embeddings.json (built by
|
|
6
|
-
* scripts/build-embeddings.mjs). When missing or empty, the retriever is
|
|
7
|
-
* effectively a no-op — callers see a scoreFor(name) of 0 and should fall
|
|
8
|
-
* back to keyword-only ranking.
|
|
9
|
-
*
|
|
10
|
-
* Query embedding uses the same provider as the index (baked in at build),
|
|
11
|
-
* so the query path also requires the provider's API key at runtime. If
|
|
12
|
-
* the key is absent, the retriever exposes a `available()` probe returning
|
|
13
|
-
* false and callers degrade gracefully.
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
import { detectProvider, cosine, voyage, openai } from './embedding-provider.js';
|
|
17
|
-
|
|
18
|
-
const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
|
|
19
|
-
|
|
20
|
-
let _index = null;
|
|
21
|
-
let _indexByName = null; // Map<string, Float32Array>
|
|
22
|
-
let _loadPromise = null;
|
|
23
|
-
let _embedFn = null;
|
|
24
|
-
let _available = null; // lazy, once the first probe runs
|
|
25
|
-
|
|
26
|
-
async function _loadIndex() {
|
|
27
|
-
if (_index) return _index;
|
|
28
|
-
if (_loadPromise) return _loadPromise;
|
|
29
|
-
_loadPromise = (async () => {
|
|
30
|
-
try {
|
|
31
|
-
if (IS_NODE) {
|
|
32
|
-
const fs = await import(/* @vite-ignore */ 'node:fs/promises');
|
|
33
|
-
const path = await import(/* @vite-ignore */ 'node:path');
|
|
34
|
-
const url = await import(/* @vite-ignore */ 'node:url');
|
|
35
|
-
// Try the package-import path first (works under node_modules
|
|
36
|
-
// install layout — `@adia-ai/a2ui-corpus` exports
|
|
37
|
-
// `./pattern-embeddings`). Fall back to the relative source-tree
|
|
38
|
-
// path so a source-checkout monorepo without symlinked deps
|
|
39
|
-
// still resolves.
|
|
40
|
-
let p = null;
|
|
41
|
-
try {
|
|
42
|
-
const { createRequire } = await import(/* @vite-ignore */ 'node:module');
|
|
43
|
-
const require = createRequire(import.meta.url);
|
|
44
|
-
p = require.resolve('@adia-ai/a2ui-corpus/pattern-embeddings');
|
|
45
|
-
} catch {
|
|
46
|
-
const here = path.dirname(url.fileURLToPath(import.meta.url));
|
|
47
|
-
p = path.resolve(here, '../../corpus/pattern-embeddings.json');
|
|
48
|
-
}
|
|
49
|
-
const raw = await fs.readFile(p, 'utf8');
|
|
50
|
-
_index = JSON.parse(raw);
|
|
51
|
-
} else {
|
|
52
|
-
const url = new URL('../../corpus/pattern-embeddings.json', import.meta.url);
|
|
53
|
-
const res = await fetch(url).catch(() => null);
|
|
54
|
-
_index = res?.ok ? await res.json().catch(() => null) : null;
|
|
55
|
-
}
|
|
56
|
-
} catch {
|
|
57
|
-
_index = null;
|
|
58
|
-
}
|
|
59
|
-
if (_index?.patterns?.length) {
|
|
60
|
-
_indexByName = new Map();
|
|
61
|
-
for (const p of _index.patterns) {
|
|
62
|
-
if (p?.name && Array.isArray(p.vector)) {
|
|
63
|
-
_indexByName.set(p.name, Float32Array.from(p.vector));
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
return _index;
|
|
68
|
-
})();
|
|
69
|
-
return _loadPromise;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/** Resolve the embed function matching the index's provider. */
|
|
73
|
-
function _resolveEmbed(providerName, model) {
|
|
74
|
-
// The index's recorded (provider, model) is the source of truth — query
|
|
75
|
-
// embeddings MUST be generated by the same model the corpus was indexed
|
|
76
|
-
// with. Cross-provider mixing produces meaningless cosine scores; even
|
|
77
|
-
// same-provider/different-model emits different-dim vectors which cosine()
|
|
78
|
-
// short-circuits to 0 — silent retrieval failure. Fail loud (warn + null)
|
|
79
|
-
// when the recorded provider's key is unset, instead of auto-picking a
|
|
80
|
-
// different provider that would silently corrupt similarity rankings.
|
|
81
|
-
// (See chunk-embedding-retriever.js for the parallel implementation.)
|
|
82
|
-
let fn = null;
|
|
83
|
-
if (providerName === 'voyage') fn = voyage({ model });
|
|
84
|
-
else if (providerName === 'openai') fn = openai({ model });
|
|
85
|
-
else {
|
|
86
|
-
// No provider recorded in index header (legacy index) — fall through.
|
|
87
|
-
const auto = detectProvider();
|
|
88
|
-
return auto?.embed || null;
|
|
89
|
-
}
|
|
90
|
-
if (!fn && typeof console !== 'undefined') {
|
|
91
|
-
console.warn(
|
|
92
|
-
`[embedding-retriever] index was built with provider=${providerName} model=${model}, ` +
|
|
93
|
-
`but the corresponding API key is not set. Embeddings will be unavailable; falling back ` +
|
|
94
|
-
`to keyword-only retrieval. Set the matching API key, or rebuild the index with the ` +
|
|
95
|
-
`available provider via \`npm run build:embeddings\`.`
|
|
96
|
-
);
|
|
97
|
-
}
|
|
98
|
-
return fn;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
/**
|
|
102
|
-
* True when both the index AND the matching provider's API key are available.
|
|
103
|
-
* Callers use this to decide whether to include embedding scores in the blend.
|
|
104
|
-
*/
|
|
105
|
-
export async function available() {
|
|
106
|
-
if (_available !== null) return _available;
|
|
107
|
-
const idx = await _loadIndex();
|
|
108
|
-
if (!idx || !idx.patterns?.length) { _available = false; return false; }
|
|
109
|
-
_embedFn = _resolveEmbed(idx.provider, idx.model);
|
|
110
|
-
_available = !!_embedFn;
|
|
111
|
-
return _available;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
/**
|
|
115
|
-
* Embed a query string and return a { patternName → cosineScore } map.
|
|
116
|
-
* Returns an empty Map when unavailable (no index or no API key).
|
|
117
|
-
*
|
|
118
|
-
* @param {string} query — the user's intent (steelman or raw)
|
|
119
|
-
* @returns {Promise<Map<string, number>>}
|
|
120
|
-
*/
|
|
121
|
-
export async function scoreAll(query) {
|
|
122
|
-
if (!query || typeof query !== 'string') return new Map();
|
|
123
|
-
if (!(await available())) return new Map();
|
|
124
|
-
|
|
125
|
-
let qVec;
|
|
126
|
-
try {
|
|
127
|
-
const [v] = await _embedFn([query]);
|
|
128
|
-
qVec = v;
|
|
129
|
-
} catch (e) {
|
|
130
|
-
// Don't let a runtime embedding failure nuke retrieval — fall back cleanly.
|
|
131
|
-
if (typeof console !== 'undefined') console.warn('[embedding-retriever]', e.message);
|
|
132
|
-
return new Map();
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
const out = new Map();
|
|
136
|
-
for (const [name, vec] of _indexByName) {
|
|
137
|
-
out.set(name, cosine(qVec, vec));
|
|
138
|
-
}
|
|
139
|
-
return out;
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
/** Number of patterns in the index. Useful for logging/diagnostics. */
|
|
143
|
-
export async function size() {
|
|
144
|
-
const idx = await _loadIndex();
|
|
145
|
-
return idx?.patterns?.length || 0;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
/** Diagnostics: the provider/model the index was built with. */
|
|
149
|
-
export async function providerInfo() {
|
|
150
|
-
const idx = await _loadIndex();
|
|
151
|
-
return idx ? { provider: idx.provider, model: idx.model, dims: idx.dims } : null;
|
|
152
|
-
}
|