@adia-ai/a2ui-retrieval 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,18 @@ Follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and
7
7
 
8
8
  _No pending changes._
9
9
 
10
+ ## [0.4.7] - 2026-05-12
11
+
12
+ ### Removed — `embedding/embedding-retriever.js` + `concept-mapper.js` retired (§72, the §65 carry-over)
13
+
14
+ The pattern-embeddings retrieval surface — `embedding-retriever.js` and its sole consumer `concept-mapper.js` (dead since v0.4.6 §64 retired `pattern-library.js`) — has been deleted from `packages/a2ui/retrieval/`. Companion artifacts in `@adia-ai/a2ui-corpus` (`pattern-embeddings.json` data file + `./pattern-embeddings` subpath export) and repo-side (`scripts/build/embeddings.mjs` + `build:embeddings` / `build:embeddings:all` npm scripts) retired in the same arc.
15
+
16
+ `embedding/index.js` no longer re-exports from `embedding-retriever.js`. The remaining surface: `embedding-provider.js` (provider abstraction over Voyage / OpenAI) + `chunk-embedding-retriever.js` (the canonical embedding-based retriever, scoped to chunks).
17
+
18
+ ### Changed — `intent/prompt-analyzer.js` `getVocab()` reads canonical catalog (§72)
19
+
20
+ Migrated from `@adia-ai/a2ui-corpus/patterns/_components.json` (retired) to `@adia-ai/a2ui-corpus/catalog-a2ui_0_9.json` (the canonical v0.9 catalog). Vocabulary now built from `components[name].x-adiaui.synonyms.tags`. Same `displayList` + `allowedSet` output shape; no behavior change for downstream callers.
21
+
10
22
  ## [0.4.6] - 2026-05-12
11
23
 
12
24
  ### Changed — `pattern-library.js` retired, consumers migrate to `composition-library` (§64 multi-step, 2026-05-12)
@@ -1,7 +1,10 @@
1
1
  /**
2
- * @adia-ai/a2ui-retrieval/embedding — embedding-provider + retrievers surface.
2
+ * @adia-ai/a2ui-retrieval/embedding — embedding-provider + chunk retriever surface.
3
+ *
4
+ * Since §65 (v0.4.7), the legacy pattern-embedding retriever was retired
5
+ * along with its only consumer (concept-mapper.js, dead post-§64). The
6
+ * canonical retrieval surface post-§40 is chunk-embedding-retriever.
3
7
  */
4
8
 
5
9
  export * from './embedding-provider.js';
6
- export * from './embedding-retriever.js';
7
10
  export * from './chunk-embedding-retriever.js';
@@ -38,28 +38,36 @@
38
38
  let _vocab = null;
39
39
  async function getVocab() {
40
40
  if (_vocab) return _vocab;
41
- let catalog = {};
41
+ let catalogJson = {};
42
42
  try {
43
43
  const IS_NODE = typeof process !== 'undefined' && process.versions?.node;
44
44
  if (IS_NODE) {
45
45
  const fs = await import(/* @vite-ignore */ 'node:fs/promises');
46
46
  const path = await import(/* @vite-ignore */ 'node:path');
47
47
  const url = await import(/* @vite-ignore */ 'node:url');
48
- // packages/a2ui/retrieval/intent up 2 packages/a2ui corpus/patterns/_components.json
48
+ // Since §65 (v0.4.7), reads from canonical v0.9 catalog at
49
+ // corpus/catalog-a2ui_0_9.json (assembled from yamls). Previously read
50
+ // the hand-maintained corpus/patterns/_components.json retired in §65.
51
+ // packages/a2ui/retrieval/intent → up 2 → packages/a2ui → corpus/catalog-a2ui_0_9.json
49
52
  const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
50
- const raw = await fs.readFile(path.join(__dirname, '../../corpus/patterns/_components.json'), 'utf8');
51
- catalog = JSON.parse(raw);
53
+ const raw = await fs.readFile(path.join(__dirname, '../../corpus/catalog-a2ui_0_9.json'), 'utf8');
54
+ catalogJson = JSON.parse(raw);
52
55
  } else {
53
- const resp = await fetch(new URL('../../corpus/patterns/_components.json', import.meta.url));
54
- if (resp.ok) catalog = await resp.json();
56
+ const resp = await fetch(new URL('../../corpus/catalog-a2ui_0_9.json', import.meta.url));
57
+ if (resp.ok) catalogJson = await resp.json();
55
58
  }
56
59
  } catch { /* empty vocab — analyzer will still work, just unconstrained */ }
57
60
 
61
+ // Adapt v0.9 catalog shape: `components: {Name: {x-adiaui: {synonyms: {tags: [...]}}}}`
62
+ // → legacy `{Name: {aliases}}` shape this vocab builder expects.
63
+ const comps = catalogJson?.components || {};
58
64
  const displayList = [];
59
65
  const allowedSet = new Set();
60
- for (const [name, data] of Object.entries(catalog)) {
66
+ for (const [name, def] of Object.entries(comps)) {
61
67
  allowedSet.add(name);
62
- const aliases = Array.isArray(data?.aliases) ? data.aliases : [];
68
+ const ext = def?.['x-adiaui'] || {};
69
+ const syns = (ext.synonyms && typeof ext.synonyms === 'object') ? ext.synonyms : null;
70
+ const aliases = Array.isArray(syns?.tags) ? syns.tags : [];
63
71
  for (const a of aliases) allowedSet.add(a);
64
72
  displayList.push(aliases.length ? `${name} (also: ${aliases.join(', ')})` : name);
65
73
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adia-ai/a2ui-retrieval",
3
- "version": "0.4.6",
3
+ "version": "0.4.7",
4
4
  "description": "AdiaUI A2UI retrieval layer — catalog lookup, intent classification, domain routing, pattern + anti-pattern matching, clarity + context assembly. Consumed by the compose engine and any A2UI-protocol tooling that needs to reason about user intent against the catalog.",
5
5
  "type": "module",
6
6
  "main": "./index.js",
package/concept-mapper.js DELETED
@@ -1,127 +0,0 @@
1
- /**
2
- * Concept Mapper — re-rank corpus patterns using analyzer signals.
3
- *
4
- * The existing `searchBlocks(query)` does pure keyword matching against
5
- * pattern.keywords + pattern.description. That's necessary but not sufficient
6
- * — a prompt about a "user dashboard" might lexically prefer a pattern named
7
- * "user-profile" over "admin-dashboard" even though the latter is a better
8
- * conceptual match.
9
- *
10
- * This module takes the structured analysis produced by prompt-analyzer.js
11
- * and combines THREE signals into a final pattern score:
12
- *
13
- * lexical: the original keyword score from searchBlocks (steelman → keywords)
14
- * conceptual: overlap between analysis.concepts and pattern.tags.purpose
15
- * structural: overlap between analysis.impliedComponents and the set of
16
- * component types referenced in the pattern's template
17
- *
18
- * Patterns missing tags fall back to lexical-only scoring (graceful
19
- * degradation; the older patterns in the corpus haven't all been retagged).
20
- */
21
-
22
- import { searchBlocks } from '../engine/reference.js';
23
- import { scoreAll as embeddingScoreAll, available as embeddingAvailable } from './embedding/embedding-retriever.js';
24
-
25
- /** Weights for the combined score. Tuned to keep lexical authoritative
26
- * but let strong conceptual+structural+semantic signals override marginal
27
- * lexical differences. Embedding semantic similarity scores are in [-1, 1]
28
- * so their weight is high (30) — cosine of 0.8 → 24 points, comparable to
29
- * 3 concept-tag hits. Tuned to fix the "product card → ticket form"
30
- * keyword-collision class of failure. */
31
- const WEIGHTS = {
32
- lexical: 1.0, // baseline
33
- conceptual: 8, // each matching concept-tag adds 8 points
34
- structural: 1.5, // each matching component-signature element adds 1.5 points
35
- semantic: 30, // cosine(query_embedding, pattern_embedding) × 30
36
- };
37
-
38
- /**
39
- * Re-rank pattern matches using the analyzer's structured signals.
40
- *
41
- * @param {object} opts
42
- * @param {object} opts.analysis — Output of analyzePrompt() from prompt-analyzer.js
43
- * @param {string} [opts.domain] — Domain hint (passed through to searchBlocks)
44
- * @param {number} [opts.limit=10] — Cap on returned results
45
- * @returns {Array<{ pattern: object, score: number, breakdown: object }>}
46
- * Patterns ranked by combined score, descending.
47
- */
48
- export async function rankByConceptAndSignature({ analysis, domain, limit = 10 }) {
49
- if (!analysis) return [];
50
-
51
- // Use the steelmanned brief for lexical search — it's denser than the raw
52
- // intent and surfaces keywords the user implied but didn't say.
53
- const query = analysis.steelman || analysis.raw;
54
- const lexicalHits = searchBlocks(query, { domain });
55
- if (!Array.isArray(lexicalHits) || lexicalHits.length === 0) return [];
56
-
57
- const conceptSet = new Set((analysis.concepts || []).map(c => c.toLowerCase()));
58
- const componentSet = new Set(analysis.impliedComponents || []);
59
-
60
- // Semantic channel — only run when the index + provider are both available.
61
- // Returns a Map<patternName, cosineSim>. Silent no-op otherwise.
62
- const semanticMap = (await embeddingAvailable())
63
- ? await embeddingScoreAll(query)
64
- : new Map();
65
-
66
- const ranked = lexicalHits.map(hit => {
67
- // Different shapes of hit objects in the corpus — be permissive.
68
- const pattern = hit.pattern || hit;
69
- const lexicalScore = hit.score ?? hit.confidence ?? 1;
70
-
71
- const conceptScore = scoreConceptOverlap(pattern, conceptSet);
72
- const structuralScore = scoreSignatureOverlap(pattern, componentSet);
73
- // Clamp negative cosines to 0 — no retrieval value in "most anti-similar".
74
- const semanticScore = Math.max(0, semanticMap.get(pattern.name) || 0);
75
-
76
- const combined =
77
- WEIGHTS.lexical * lexicalScore +
78
- WEIGHTS.conceptual * conceptScore +
79
- WEIGHTS.structural * structuralScore +
80
- WEIGHTS.semantic * semanticScore;
81
-
82
- return {
83
- pattern,
84
- score: combined,
85
- breakdown: {
86
- lexical: +(WEIGHTS.lexical * lexicalScore).toFixed(2),
87
- conceptual: +(WEIGHTS.conceptual * conceptScore).toFixed(2),
88
- structural: +(WEIGHTS.structural * structuralScore).toFixed(2),
89
- semantic: +(WEIGHTS.semantic * semanticScore).toFixed(2),
90
- },
91
- };
92
- });
93
-
94
- ranked.sort((a, b) => b.score - a.score);
95
- return ranked.slice(0, limit);
96
- }
97
-
98
- /** Count how many of the analysis concepts appear in the pattern's tag system. */
99
- function scoreConceptOverlap(pattern, conceptSet) {
100
- if (!pattern || conceptSet.size === 0) return 0;
101
- const tags = pattern.tags || {};
102
- const flat = []
103
- .concat(tags.purpose || [])
104
- .concat(tags.layout || [])
105
- .concat(tags.interaction || [])
106
- .concat(pattern.keywords || [])
107
- .map(t => String(t).toLowerCase());
108
- let hits = 0;
109
- for (const c of conceptSet) if (flat.includes(c)) hits++;
110
- return hits;
111
- }
112
-
113
- /** Count how many of the implied components appear in the pattern's template. */
114
- function scoreSignatureOverlap(pattern, componentSet) {
115
- if (!pattern || componentSet.size === 0) return 0;
116
- const template = pattern.template;
117
- if (!Array.isArray(template)) return 0;
118
- // Build the pattern's component signature once per call; small templates
119
- // mean the cost is negligible. Cache later if hot.
120
- const sig = new Set();
121
- for (const node of template) {
122
- if (node && typeof node.component === 'string') sig.add(node.component);
123
- }
124
- let hits = 0;
125
- for (const c of componentSet) if (sig.has(c)) hits++;
126
- return hits;
127
- }
@@ -1,152 +0,0 @@
1
- /**
2
- * Embedding retriever — loads the build-time pattern embedding index and
3
- * scores a query against every pattern via cosine similarity.
4
- *
5
- * Index: packages/a2ui/corpus/pattern-embeddings.json (built by
6
- * scripts/build-embeddings.mjs). When missing or empty, the retriever is
7
- * effectively a no-op — callers see a scoreFor(name) of 0 and should fall
8
- * back to keyword-only ranking.
9
- *
10
- * Query embedding uses the same provider as the index (baked in at build),
11
- * so the query path also requires the provider's API key at runtime. If
12
- * the key is absent, the retriever exposes a `available()` probe returning
13
- * false and callers degrade gracefully.
14
- */
15
-
16
- import { detectProvider, cosine, voyage, openai } from './embedding-provider.js';
17
-
18
- const IS_NODE = typeof process !== 'undefined' && !!process.versions?.node;
19
-
20
- let _index = null;
21
- let _indexByName = null; // Map<string, Float32Array>
22
- let _loadPromise = null;
23
- let _embedFn = null;
24
- let _available = null; // lazy, once the first probe runs
25
-
26
- async function _loadIndex() {
27
- if (_index) return _index;
28
- if (_loadPromise) return _loadPromise;
29
- _loadPromise = (async () => {
30
- try {
31
- if (IS_NODE) {
32
- const fs = await import(/* @vite-ignore */ 'node:fs/promises');
33
- const path = await import(/* @vite-ignore */ 'node:path');
34
- const url = await import(/* @vite-ignore */ 'node:url');
35
- // Try the package-import path first (works under node_modules
36
- // install layout — `@adia-ai/a2ui-corpus` exports
37
- // `./pattern-embeddings`). Fall back to the relative source-tree
38
- // path so a source-checkout monorepo without symlinked deps
39
- // still resolves.
40
- let p = null;
41
- try {
42
- const { createRequire } = await import(/* @vite-ignore */ 'node:module');
43
- const require = createRequire(import.meta.url);
44
- p = require.resolve('@adia-ai/a2ui-corpus/pattern-embeddings');
45
- } catch {
46
- const here = path.dirname(url.fileURLToPath(import.meta.url));
47
- p = path.resolve(here, '../../corpus/pattern-embeddings.json');
48
- }
49
- const raw = await fs.readFile(p, 'utf8');
50
- _index = JSON.parse(raw);
51
- } else {
52
- const url = new URL('../../corpus/pattern-embeddings.json', import.meta.url);
53
- const res = await fetch(url).catch(() => null);
54
- _index = res?.ok ? await res.json().catch(() => null) : null;
55
- }
56
- } catch {
57
- _index = null;
58
- }
59
- if (_index?.patterns?.length) {
60
- _indexByName = new Map();
61
- for (const p of _index.patterns) {
62
- if (p?.name && Array.isArray(p.vector)) {
63
- _indexByName.set(p.name, Float32Array.from(p.vector));
64
- }
65
- }
66
- }
67
- return _index;
68
- })();
69
- return _loadPromise;
70
- }
71
-
72
- /** Resolve the embed function matching the index's provider. */
73
- function _resolveEmbed(providerName, model) {
74
- // The index's recorded (provider, model) is the source of truth — query
75
- // embeddings MUST be generated by the same model the corpus was indexed
76
- // with. Cross-provider mixing produces meaningless cosine scores; even
77
- // same-provider/different-model emits different-dim vectors which cosine()
78
- // short-circuits to 0 — silent retrieval failure. Fail loud (warn + null)
79
- // when the recorded provider's key is unset, instead of auto-picking a
80
- // different provider that would silently corrupt similarity rankings.
81
- // (See chunk-embedding-retriever.js for the parallel implementation.)
82
- let fn = null;
83
- if (providerName === 'voyage') fn = voyage({ model });
84
- else if (providerName === 'openai') fn = openai({ model });
85
- else {
86
- // No provider recorded in index header (legacy index) — fall through.
87
- const auto = detectProvider();
88
- return auto?.embed || null;
89
- }
90
- if (!fn && typeof console !== 'undefined') {
91
- console.warn(
92
- `[embedding-retriever] index was built with provider=${providerName} model=${model}, ` +
93
- `but the corresponding API key is not set. Embeddings will be unavailable; falling back ` +
94
- `to keyword-only retrieval. Set the matching API key, or rebuild the index with the ` +
95
- `available provider via \`npm run build:embeddings\`.`
96
- );
97
- }
98
- return fn;
99
- }
100
-
101
- /**
102
- * True when both the index AND the matching provider's API key are available.
103
- * Callers use this to decide whether to include embedding scores in the blend.
104
- */
105
- export async function available() {
106
- if (_available !== null) return _available;
107
- const idx = await _loadIndex();
108
- if (!idx || !idx.patterns?.length) { _available = false; return false; }
109
- _embedFn = _resolveEmbed(idx.provider, idx.model);
110
- _available = !!_embedFn;
111
- return _available;
112
- }
113
-
114
- /**
115
- * Embed a query string and return a { patternName → cosineScore } map.
116
- * Returns an empty Map when unavailable (no index or no API key).
117
- *
118
- * @param {string} query — the user's intent (steelman or raw)
119
- * @returns {Promise<Map<string, number>>}
120
- */
121
- export async function scoreAll(query) {
122
- if (!query || typeof query !== 'string') return new Map();
123
- if (!(await available())) return new Map();
124
-
125
- let qVec;
126
- try {
127
- const [v] = await _embedFn([query]);
128
- qVec = v;
129
- } catch (e) {
130
- // Don't let a runtime embedding failure nuke retrieval — fall back cleanly.
131
- if (typeof console !== 'undefined') console.warn('[embedding-retriever]', e.message);
132
- return new Map();
133
- }
134
-
135
- const out = new Map();
136
- for (const [name, vec] of _indexByName) {
137
- out.set(name, cosine(qVec, vec));
138
- }
139
- return out;
140
- }
141
-
142
- /** Number of patterns in the index. Useful for logging/diagnostics. */
143
- export async function size() {
144
- const idx = await _loadIndex();
145
- return idx?.patterns?.length || 0;
146
- }
147
-
148
- /** Diagnostics: the provider/model the index was built with. */
149
- export async function providerInfo() {
150
- const idx = await _loadIndex();
151
- return idx ? { provider: idx.provider, model: idx.model, dims: idx.dims } : null;
152
- }