@adia-ai/a2ui-retrieval 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,446 +0,0 @@
1
- /**
2
- * SyntheticDataGenerator — Fills coverage gaps in training data by
3
- * generating A2UI JSON examples for uncovered patterns.
4
- *
5
- * Uses the LLM adapter to generate schemas, validates them via the
6
- * generative validator, scores quality via anti-pattern checks,
7
- * and stores results as training pairs (prompt -> schema).
8
- *
9
- * Spec: A003 section 6 — Synthetic Data Generation
10
- */
11
-
12
- import { getCatalog } from '../catalog.js';
13
- import { getAntiPatterns } from '../anti-patterns.js';
14
- import { getAllPatterns } from '../pattern-library.js';
15
- import { serializeEntry } from '../component-entry.js';
16
-
17
- // ── Coverage targets (spec section 6.2) ──
18
-
19
- const COVERAGE_TARGETS = [
20
- { id: 'sidebar-main', description: 'Sidebar navigation with main content area', complexity: 'medium' },
21
- { id: 'modal-form', description: 'Modal dialog containing a form with validation', complexity: 'medium' },
22
- { id: 'drawer-nav', description: 'Drawer panel with navigation links', complexity: 'medium' },
23
- { id: 'toast-sequence', description: 'Sequence of toast notifications (success, error, info)', complexity: 'low' },
24
- { id: 'tabs-content', description: 'Tabbed interface with different content per tab', complexity: 'medium' },
25
- { id: 'accordion-faq', description: 'Accordion with FAQ-style question/answer pairs', complexity: 'low' },
26
- { id: 'wizard-steps', description: 'Multi-step wizard with progress indicator and form fields', complexity: 'high' },
27
- { id: 'data-grid', description: 'Data table with sort, filter, and pagination', complexity: 'high' },
28
- { id: 'card-grid', description: 'Grid of cards with different content types', complexity: 'medium' },
29
- { id: 'auth-form', description: 'Login form with email, password, and social buttons', complexity: 'medium' },
30
- { id: 'profile-card', description: 'User profile card with avatar, name, stats, and actions', complexity: 'medium' },
31
- { id: 'empty-state', description: 'Empty state with illustration, heading, and CTA button', complexity: 'low' },
32
- ];
33
-
34
- // ── Default generation options ──
35
-
36
- const DEFAULT_OPTIONS = {
37
- temperature: 0.7,
38
- maxRetries: 2,
39
- batchSize: 4,
40
- };
41
-
42
- /**
43
- * SyntheticDataGenerator — generates A2UI training examples for coverage gaps.
44
- */
45
- export class SyntheticDataGenerator {
46
- #llmAdapter;
47
- #catalog;
48
- #patternLibrary;
49
- #validator;
50
- #antiPatterns;
51
-
52
- /**
53
- * @param {object} deps
54
- * @param {object} deps.llmAdapter — LLM adapter with complete() method
55
- * @param {object} [deps.catalog] — Component catalog (defaults to built-in)
56
- * @param {object} [deps.patternLibrary] — Pattern library (defaults to built-in)
57
- * @param {object} [deps.validator] — Schema validator with validateSchema()
58
- * @param {object} [deps.antiPatterns] — Anti-patterns checker
59
- */
60
- constructor({ llmAdapter, catalog, patternLibrary, validator, antiPatterns }) {
61
- this.#llmAdapter = llmAdapter;
62
- this.#catalog = catalog || null;
63
- this.#patternLibrary = patternLibrary || null;
64
- this.#validator = validator || null;
65
- this.#antiPatterns = antiPatterns || null;
66
- }
67
-
68
- /**
69
- * Analyze coverage gaps — which target patterns are missing from existing examples.
70
- *
71
- * @param {object[]} existingExamples — Array of { name, template } pattern objects
72
- * @returns {{ covered: string[], missing: string[], coverage: number }}
73
- */
74
- analyzeCoverage(existingExamples) {
75
- const existingNames = new Set(
76
- (existingExamples || getAllPatterns()).map(p => p.name)
77
- );
78
-
79
- const covered = [];
80
- const missing = [];
81
-
82
- for (const target of COVERAGE_TARGETS) {
83
- if (existingNames.has(target.id)) {
84
- covered.push(target.id);
85
- } else {
86
- missing.push(target.id);
87
- }
88
- }
89
-
90
- const total = COVERAGE_TARGETS.length;
91
- const coverage = total === 0 ? 1 : covered.length / total;
92
-
93
- return { covered, missing, coverage };
94
- }
95
-
96
- /**
97
- * Generate synthetic examples for missing patterns.
98
- *
99
- * For each gap: builds a prompt, calls the LLM, validates, scores, and stores.
100
- *
101
- * @param {string[]} gaps — Pattern IDs to generate (from analyzeCoverage().missing)
102
- * @param {object} [options]
103
- * @param {string} [options.model] — Model override for the LLM adapter
104
- * @param {number} [options.temperature] — Sampling temperature (default 0.7)
105
- * @param {number} [options.maxRetries] — Max retries per pattern (default 2)
106
- * @param {number} [options.batchSize] — Concurrent generation batch size (default 4)
107
- * @returns {Promise<{ generated: object[], failed: string[], stats: object }>}
108
- */
109
- async generateExamples(gaps, options = {}) {
110
- const opts = { ...DEFAULT_OPTIONS, ...options };
111
- const generated = [];
112
- const failed = [];
113
- let totalTokens = 0;
114
- let totalAttempts = 0;
115
-
116
- // Process in batches
117
- for (let i = 0; i < gaps.length; i += opts.batchSize) {
118
- const batch = gaps.slice(i, i + opts.batchSize);
119
-
120
- const results = await Promise.allSettled(
121
- batch.map(gapId => this.#generateWithRetry(gapId, opts))
122
- );
123
-
124
- for (let j = 0; j < results.length; j++) {
125
- const result = results[j];
126
- const gapId = batch[j];
127
-
128
- if (result.status === 'fulfilled' && result.value) {
129
- generated.push(result.value);
130
- totalTokens += result.value.tokenUsage || 0;
131
- totalAttempts += result.value.attempts || 1;
132
- } else {
133
- failed.push(gapId);
134
- totalAttempts += opts.maxRetries + 1;
135
- }
136
- }
137
- }
138
-
139
- return {
140
- generated,
141
- failed,
142
- stats: {
143
- total: gaps.length,
144
- succeeded: generated.length,
145
- failed: failed.length,
146
- totalTokens,
147
- totalAttempts,
148
- averageQuality: generated.length > 0
149
- ? generated.reduce((sum, g) => sum + g.quality.overall, 0) / generated.length
150
- : 0,
151
- },
152
- };
153
- }
154
-
155
- /**
156
- * Generate a single training pair for a pattern description.
157
- *
158
- * @param {string} patternDescription — Natural language description of the pattern
159
- * @returns {Promise<{ prompt: string, schema: object[], quality: object }>}
160
- */
161
- async generateOne(patternDescription) {
162
- const systemPrompt = await this.#buildSystemPrompt();
163
- const userPrompt = this.#buildUserPrompt(patternDescription);
164
-
165
- const response = await this.#llmAdapter.complete({
166
- messages: [{ role: 'user', content: userPrompt }],
167
- systemPrompt,
168
- });
169
-
170
- const schema = this.#parseResponse(response.content);
171
- const quality = this.scoreExample(schema);
172
-
173
- return {
174
- prompt: patternDescription,
175
- schema,
176
- quality,
177
- tokenUsage: (response.usage?.inputTokens || 0) + (response.usage?.outputTokens || 0),
178
- };
179
- }
180
-
181
- /**
182
- * Score a generated example against quality criteria.
183
- *
184
- * Uses the anti-patterns checker from the intelligence system to detect
185
- * structural issues, missing props, anti-patterns, and unnecessary wrappers.
186
- *
187
- * @param {object[]} schema — A2UI messages array
188
- * @returns {{ structural: number, completeness: number, idiomatic: number, minimal: number, overall: number }}
189
- */
190
- scoreExample(schema) {
191
- // Collect all components across messages
192
- const allComponents = [];
193
- for (const msg of schema) {
194
- if (msg.type === 'updateComponents' && Array.isArray(msg.components)) {
195
- allComponents.push(...msg.components);
196
- }
197
- }
198
-
199
- // Run validation if available
200
- let validationIssues = [];
201
- if (this.#validator) {
202
- const validation = this.#validator(schema);
203
- validationIssues = (validation.checks || []).filter(c => !c.passed);
204
- }
205
-
206
- // Run anti-pattern checks (HTML-based)
207
- // Serialize components to a minimal HTML representation for pattern matching
208
- const html = this.#componentsToHtml(allComponents);
209
- const antiPatternChecks = this.#antiPatterns
210
- ? this.#antiPatterns(html)
211
- : [];
212
-
213
- // Structural: no orphaned children, valid message format, root exists
214
- const structuralIssues = validationIssues.filter(c =>
215
- ['hasRootComponent', 'noOrphanedChildren', 'validMessageFormat', 'flatAdjacency'].includes(c.name)
216
- );
217
- const structural = structuralIssues.length === 0 ? 1 : 0.5;
218
-
219
- // Completeness: text content set, all types registered
220
- const completenessIssues = validationIssues.filter(c =>
221
- ['textContentSet', 'allTypesRegistered'].includes(c.name)
222
- );
223
- const completeness = Math.max(0, 1 - (completenessIssues.length * 0.1));
224
-
225
- // Idiomatic: no anti-patterns (bare divs, inline styles, wrong nesting)
226
- const idiomaticViolations = antiPatternChecks.filter(ap =>
227
- ['noBareDivs', 'noBareInputs', 'cardStructure', 'noInventedComponents'].includes(ap.name)
228
- );
229
- const idiomatic = idiomaticViolations.length === 0 ? 1 : 0.5;
230
-
231
- // Minimal: no unnecessary wrappers or inline layout/colors
232
- const minimalViolations = antiPatternChecks.filter(ap =>
233
- ['noHardcodedColors', 'noInlineLayout'].includes(ap.name)
234
- );
235
- const minimal = minimalViolations.length === 0 ? 1 : 0.5;
236
-
237
- // Overall weighted average
238
- const overall = (structural * 0.3) + (completeness * 0.25) + (idiomatic * 0.25) + (minimal * 0.2);
239
-
240
- return { structural, completeness, idiomatic, minimal, overall };
241
- }
242
-
243
- // ── Private helpers ──
244
-
245
- /**
246
- * Generate with retry logic.
247
- * @param {string} gapId — Target pattern ID
248
- * @param {object} opts — Generation options
249
- * @returns {Promise<object|null>}
250
- */
251
- async #generateWithRetry(gapId, opts) {
252
- const target = COVERAGE_TARGETS.find(t => t.id === gapId);
253
- if (!target) return null;
254
-
255
- let lastError;
256
- for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
257
- try {
258
- const result = await this.generateOne(target.description);
259
-
260
- // Require minimum quality threshold
261
- if (result.quality.overall >= 0.5) {
262
- return {
263
- id: gapId,
264
- ...result,
265
- attempts: attempt + 1,
266
- };
267
- }
268
-
269
- lastError = new Error(`Quality too low: ${result.quality.overall}`);
270
- } catch (err) {
271
- lastError = err;
272
- }
273
- }
274
-
275
- throw lastError;
276
- }
277
-
278
- /**
279
- * Build the system prompt with catalog, anti-patterns, and format rules.
280
- * @returns {string}
281
- */
282
- async #buildSystemPrompt() {
283
- const parts = [];
284
-
285
- // Role
286
- parts.push('You are an A2UI training data generator for the AdiaUI design system. Output ONLY a JSON array of A2UI messages.');
287
-
288
- // Output format
289
- parts.push('Output format: [{ "type": "updateComponents", "surfaceId": "default", "components": [...] }]');
290
- parts.push('Components use flat adjacency: each has { id, component, children?: [string ids], ...props }.');
291
- parts.push('The root component must have id "root".');
292
-
293
- // Anti-patterns (rules)
294
- const antiPatterns = getAntiPatterns();
295
- if (antiPatterns.length > 0) {
296
- const rules = antiPatterns.map(ap => `- ${ap.description}`).join('\n');
297
- parts.push(`Rules:\n${rules}`);
298
- }
299
-
300
- // Available components (summary level)
301
- const catalog = this.#catalog || await getCatalog();
302
- const entries = catalog.entries || new Map();
303
- if (entries.size > 0) {
304
- const lines = [];
305
- for (const entry of entries.values()) {
306
- const serialized = serializeEntry(entry, 'index');
307
- lines.push(`- ${serialized.type} (${serialized.tag}): ${serialized.description || ''}`);
308
- }
309
- parts.push(`Available components:\n${lines.join('\n')}`);
310
- }
311
-
312
- // Quality criteria
313
- parts.push([
314
- 'Quality criteria:',
315
- '- Structural: valid root, all children resolve, flat adjacency list',
316
- '- Completeness: all Text components have textContent, all types are registered',
317
- '- Idiomatic: use Card > Header + Section + Footer, no bare divs, no invented components',
318
- '- Minimal: no inline styles, no hardcoded colors, use semantic props and variants',
319
- ].join('\n'));
320
-
321
- return parts.join('\n\n');
322
- }
323
-
324
- /**
325
- * Build the user prompt for a specific pattern.
326
- * @param {string} patternDescription
327
- * @returns {string}
328
- */
329
- #buildUserPrompt(patternDescription) {
330
- return [
331
- `Generate an A2UI component tree for this UI pattern:`,
332
- ``,
333
- `Pattern: ${patternDescription}`,
334
- ``,
335
- `Requirements:`,
336
- `- Use realistic content (names, values, labels)`,
337
- `- Follow Card > Header + Section + Footer anatomy where appropriate`,
338
- `- Use layout components (Row, Column, Grid) for composition`,
339
- `- Include all necessary props (text, variant, label, placeholder, etc.)`,
340
- `- Output valid JSON — no markdown, no explanation`,
341
- ].join('\n');
342
- }
343
-
344
- /**
345
- * Parse an LLM response into A2UI messages.
346
- * Handles raw JSON, markdown code fences, bare component arrays.
347
- *
348
- * @param {string} content — Raw LLM response
349
- * @returns {object[]}
350
- */
351
- #parseResponse(content) {
352
- if (!content || typeof content !== 'string') {
353
- return [];
354
- }
355
-
356
- let json = content.trim();
357
-
358
- // Strip markdown code fences
359
- const fenceMatch = json.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
360
- if (fenceMatch) {
361
- json = fenceMatch[1].trim();
362
- }
363
-
364
- try {
365
- const parsed = JSON.parse(json);
366
-
367
- // Array of messages
368
- if (Array.isArray(parsed)) {
369
- if (parsed.length > 0 && parsed[0].type === 'updateComponents') {
370
- return parsed;
371
- }
372
- // Bare components array — wrap
373
- if (parsed.length > 0 && parsed[0].id && parsed[0].component) {
374
- return [{ type: 'updateComponents', surfaceId: 'default', components: parsed }];
375
- }
376
- return parsed;
377
- }
378
-
379
- // Single message object
380
- if (parsed && typeof parsed === 'object' && parsed.type === 'updateComponents') {
381
- return [parsed];
382
- }
383
-
384
- // Single component — wrap
385
- if (parsed && typeof parsed === 'object' && parsed.id && parsed.component) {
386
- return [{ type: 'updateComponents', surfaceId: 'default', components: [parsed] }];
387
- }
388
-
389
- return [];
390
- } catch {
391
- return [];
392
- }
393
- }
394
-
395
- /**
396
- * Convert a flat component list to a minimal HTML-like string for anti-pattern checking.
397
- * The anti-patterns module uses regex/function checks on HTML strings.
398
- *
399
- * @param {object[]} components
400
- * @returns {string}
401
- */
402
- #componentsToHtml(components) {
403
- const byId = new Map(components.map(c => [c.id, c]));
404
- const lines = [];
405
-
406
- for (const comp of components) {
407
- const type = comp.component;
408
- if (!type) continue;
409
-
410
- // Map A2UI types to their AdiaUI tag names for anti-pattern checking
411
- const tag = type.toLowerCase().replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase();
412
- const tagName = tag.endsWith('-ui') ? tag : `${tag}-ui`;
413
-
414
- // Build a minimal HTML representation
415
- const attrs = [];
416
- if (comp.style) {
417
- attrs.push(`style="${typeof comp.style === 'object' ? Object.entries(comp.style).map(([k, v]) => `${k}:${v}`).join(';') : comp.style}"`);
418
- }
419
- const attrStr = attrs.length > 0 ? ' ' + attrs.join(' ') : '';
420
-
421
- // Represent nesting for structural checks
422
- if (Array.isArray(comp.children)) {
423
- const childTypes = comp.children
424
- .map(id => byId.get(id))
425
- .filter(Boolean)
426
- .map(c => {
427
- const ct = c.component?.toLowerCase().replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase() || '';
428
- return ct.endsWith('-ui') ? ct : ct;
429
- });
430
- lines.push(`<${tagName}${attrStr}>${childTypes.map(ct => `<${ct}>`).join('')}</${tagName}>`);
431
- } else {
432
- lines.push(`<${tagName}${attrStr}></${tagName}>`);
433
- }
434
- }
435
-
436
- return lines.join('\n');
437
- }
438
- }
439
-
440
- /**
441
- * Get the list of coverage targets.
442
- * @returns {Array<{ id: string, description: string, complexity: string }>}
443
- */
444
- export function getCoverageTargets() {
445
- return [...COVERAGE_TARGETS];
446
- }
package/concept-mapper.js DELETED
@@ -1,127 +0,0 @@
1
- /**
2
- * Concept Mapper — re-rank corpus patterns using analyzer signals.
3
- *
4
- * The existing `searchBlocks(query)` does pure keyword matching against
5
- * pattern.keywords + pattern.description. That's necessary but not sufficient
6
- * — a prompt about a "user dashboard" might lexically prefer a pattern named
7
- * "user-profile" over "admin-dashboard" even though the latter is a better
8
- * conceptual match.
9
- *
10
- * This module takes the structured analysis produced by prompt-analyzer.js
11
- * and combines THREE signals into a final pattern score:
12
- *
13
- * lexical: the original keyword score from searchBlocks (steelman → keywords)
14
- * conceptual: overlap between analysis.concepts and pattern.tags.purpose
15
- * structural: overlap between analysis.impliedComponents and the set of
16
- * component types referenced in the pattern's template
17
- *
18
- * Patterns missing tags fall back to lexical-only scoring (graceful
19
- * degradation; the older patterns in the corpus haven't all been retagged).
20
- */
21
-
22
- import { searchBlocks } from '../engine/reference.js';
23
- import { scoreAll as embeddingScoreAll, available as embeddingAvailable } from './embedding/embedding-retriever.js';
24
-
25
- /** Weights for the combined score. Tuned to keep lexical authoritative
26
- * but let strong conceptual+structural+semantic signals override marginal
27
- * lexical differences. Embedding semantic similarity scores are in [-1, 1]
28
- * so their weight is high (30) — cosine of 0.8 → 24 points, comparable to
29
- * 3 concept-tag hits. Tuned to fix the "product card → ticket form"
30
- * keyword-collision class of failure. */
31
- const WEIGHTS = {
32
- lexical: 1.0, // baseline
33
- conceptual: 8, // each matching concept-tag adds 8 points
34
- structural: 1.5, // each matching component-signature element adds 1.5 points
35
- semantic: 30, // cosine(query_embedding, pattern_embedding) × 30
36
- };
37
-
38
- /**
39
- * Re-rank pattern matches using the analyzer's structured signals.
40
- *
41
- * @param {object} opts
42
- * @param {object} opts.analysis — Output of analyzePrompt() from prompt-analyzer.js
43
- * @param {string} [opts.domain] — Domain hint (passed through to searchBlocks)
44
- * @param {number} [opts.limit=10] — Cap on returned results
45
- * @returns {Array<{ pattern: object, score: number, breakdown: object }>}
46
- * Patterns ranked by combined score, descending.
47
- */
48
- export async function rankByConceptAndSignature({ analysis, domain, limit = 10 }) {
49
- if (!analysis) return [];
50
-
51
- // Use the steelmanned brief for lexical search — it's denser than the raw
52
- // intent and surfaces keywords the user implied but didn't say.
53
- const query = analysis.steelman || analysis.raw;
54
- const lexicalHits = searchBlocks(query, { domain });
55
- if (!Array.isArray(lexicalHits) || lexicalHits.length === 0) return [];
56
-
57
- const conceptSet = new Set((analysis.concepts || []).map(c => c.toLowerCase()));
58
- const componentSet = new Set(analysis.impliedComponents || []);
59
-
60
- // Semantic channel — only run when the index + provider are both available.
61
- // Returns a Map<patternName, cosineSim>. Silent no-op otherwise.
62
- const semanticMap = (await embeddingAvailable())
63
- ? await embeddingScoreAll(query)
64
- : new Map();
65
-
66
- const ranked = lexicalHits.map(hit => {
67
- // Different shapes of hit objects in the corpus — be permissive.
68
- const pattern = hit.pattern || hit;
69
- const lexicalScore = hit.score ?? hit.confidence ?? 1;
70
-
71
- const conceptScore = scoreConceptOverlap(pattern, conceptSet);
72
- const structuralScore = scoreSignatureOverlap(pattern, componentSet);
73
- // Clamp negative cosines to 0 — no retrieval value in "most anti-similar".
74
- const semanticScore = Math.max(0, semanticMap.get(pattern.name) || 0);
75
-
76
- const combined =
77
- WEIGHTS.lexical * lexicalScore +
78
- WEIGHTS.conceptual * conceptScore +
79
- WEIGHTS.structural * structuralScore +
80
- WEIGHTS.semantic * semanticScore;
81
-
82
- return {
83
- pattern,
84
- score: combined,
85
- breakdown: {
86
- lexical: +(WEIGHTS.lexical * lexicalScore).toFixed(2),
87
- conceptual: +(WEIGHTS.conceptual * conceptScore).toFixed(2),
88
- structural: +(WEIGHTS.structural * structuralScore).toFixed(2),
89
- semantic: +(WEIGHTS.semantic * semanticScore).toFixed(2),
90
- },
91
- };
92
- });
93
-
94
- ranked.sort((a, b) => b.score - a.score);
95
- return ranked.slice(0, limit);
96
- }
97
-
98
- /** Count how many of the analysis concepts appear in the pattern's tag system. */
99
- function scoreConceptOverlap(pattern, conceptSet) {
100
- if (!pattern || conceptSet.size === 0) return 0;
101
- const tags = pattern.tags || {};
102
- const flat = []
103
- .concat(tags.purpose || [])
104
- .concat(tags.layout || [])
105
- .concat(tags.interaction || [])
106
- .concat(pattern.keywords || [])
107
- .map(t => String(t).toLowerCase());
108
- let hits = 0;
109
- for (const c of conceptSet) if (flat.includes(c)) hits++;
110
- return hits;
111
- }
112
-
113
- /** Count how many of the implied components appear in the pattern's template. */
114
- function scoreSignatureOverlap(pattern, componentSet) {
115
- if (!pattern || componentSet.size === 0) return 0;
116
- const template = pattern.template;
117
- if (!Array.isArray(template)) return 0;
118
- // Build the pattern's component signature once per call; small templates
119
- // mean the cost is negligible. Cache later if hot.
120
- const sig = new Set();
121
- for (const node of template) {
122
- if (node && typeof node.component === 'string') sig.add(node.component);
123
- }
124
- let hits = 0;
125
- for (const c of componentSet) if (sig.has(c)) hits++;
126
- return hits;
127
- }