npm - @adia-ai/a2ui-retrieval - Versions diffs - 0.4.5 → 0.4.7 - Mend

@adia-ai/a2ui-retrieval 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +26 -0
package/authoring/index.js +3 -7
package/component-catalog.js +111 -0
package/context-assembler.js +20 -14
package/embedding/index.js +5 -2
package/index.js +1 -1
package/intent/clarity.js +4 -4
package/intent/prompt-analyzer.js +16 -8
package/package.json +1 -1
package/authoring/pattern-promotion.js +0 -135
package/authoring/synthetic-data.js +0 -446
package/concept-mapper.js +0 -127
package/embedding/embedding-retriever.js +0 -152
package/pattern-library.js +0 -659

package/authoring/synthetic-data.js DELETED Viewed

@@ -1,446 +0,0 @@
-/**
- * SyntheticDataGenerator — Fills coverage gaps in training data by
- * generating A2UI JSON examples for uncovered patterns.
- *
- * Uses the LLM adapter to generate schemas, validates them via the
- * generative validator, scores quality via anti-pattern checks,
- * and stores results as training pairs (prompt -> schema).
- *
- * Spec: A003 section 6 — Synthetic Data Generation
- */
-import { getCatalog } from '../catalog.js';
-import { getAntiPatterns } from '../anti-patterns.js';
-import { getAllPatterns } from '../pattern-library.js';
-import { serializeEntry } from '../component-entry.js';
-// ── Coverage targets (spec section 6.2) ──
-const COVERAGE_TARGETS = [
-  { id: 'sidebar-main', description: 'Sidebar navigation with main content area', complexity: 'medium' },
-  { id: 'modal-form', description: 'Modal dialog containing a form with validation', complexity: 'medium' },
-  { id: 'drawer-nav', description: 'Drawer panel with navigation links', complexity: 'medium' },
-  { id: 'toast-sequence', description: 'Sequence of toast notifications (success, error, info)', complexity: 'low' },
-  { id: 'tabs-content', description: 'Tabbed interface with different content per tab', complexity: 'medium' },
-  { id: 'accordion-faq', description: 'Accordion with FAQ-style question/answer pairs', complexity: 'low' },
-  { id: 'wizard-steps', description: 'Multi-step wizard with progress indicator and form fields', complexity: 'high' },
-  { id: 'data-grid', description: 'Data table with sort, filter, and pagination', complexity: 'high' },
-  { id: 'card-grid', description: 'Grid of cards with different content types', complexity: 'medium' },
-  { id: 'auth-form', description: 'Login form with email, password, and social buttons', complexity: 'medium' },
-  { id: 'profile-card', description: 'User profile card with avatar, name, stats, and actions', complexity: 'medium' },
-  { id: 'empty-state', description: 'Empty state with illustration, heading, and CTA button', complexity: 'low' },
-];
-// ── Default generation options ──
-const DEFAULT_OPTIONS = {
-  temperature: 0.7,
-  maxRetries: 2,
-  batchSize: 4,
-};
-/**
- * SyntheticDataGenerator — generates A2UI training examples for coverage gaps.
- */
-export class SyntheticDataGenerator {
-  #llmAdapter;
-  #catalog;
-  #patternLibrary;
-  #validator;
-  #antiPatterns;
-  /**
-   * @param {object} deps
-   * @param {object} deps.llmAdapter — LLM adapter with complete() method
-   * @param {object} [deps.catalog] — Component catalog (defaults to built-in)
-   * @param {object} [deps.patternLibrary] — Pattern library (defaults to built-in)
-   * @param {object} [deps.validator] — Schema validator with validateSchema()
-   * @param {object} [deps.antiPatterns] — Anti-patterns checker
-   */
-  constructor({ llmAdapter, catalog, patternLibrary, validator, antiPatterns }) {
-    this.#llmAdapter = llmAdapter;
-    this.#catalog = catalog || null;
-    this.#patternLibrary = patternLibrary || null;
-    this.#validator = validator || null;
-    this.#antiPatterns = antiPatterns || null;
-  }
-  /**
-   * Analyze coverage gaps — which target patterns are missing from existing examples.
-   *
-   * @param {object[]} existingExamples — Array of { name, template } pattern objects
-   * @returns {{ covered: string[], missing: string[], coverage: number }}
-   */
-  analyzeCoverage(existingExamples) {
-    const existingNames = new Set(
-      (existingExamples || getAllPatterns()).map(p => p.name)
-    );
-    const covered = [];
-    const missing = [];
-    for (const target of COVERAGE_TARGETS) {
-      if (existingNames.has(target.id)) {
-        covered.push(target.id);
-      } else {
-        missing.push(target.id);
-      }
-    }
-    const total = COVERAGE_TARGETS.length;
-    const coverage = total === 0 ? 1 : covered.length / total;
-    return { covered, missing, coverage };
-  }
-  /**
-   * Generate synthetic examples for missing patterns.
-   *
-   * For each gap: builds a prompt, calls the LLM, validates, scores, and stores.
-   *
-   * @param {string[]} gaps — Pattern IDs to generate (from analyzeCoverage().missing)
-   * @param {object} [options]
-   * @param {string} [options.model] — Model override for the LLM adapter
-   * @param {number} [options.temperature] — Sampling temperature (default 0.7)
-   * @param {number} [options.maxRetries] — Max retries per pattern (default 2)
-   * @param {number} [options.batchSize] — Concurrent generation batch size (default 4)
-   * @returns {Promise<{ generated: object[], failed: string[], stats: object }>}
-   */
-  async generateExamples(gaps, options = {}) {
-    const opts = { ...DEFAULT_OPTIONS, ...options };
-    const generated = [];
-    const failed = [];
-    let totalTokens = 0;
-    let totalAttempts = 0;
-    // Process in batches
-    for (let i = 0; i < gaps.length; i += opts.batchSize) {
-      const batch = gaps.slice(i, i + opts.batchSize);
-      const results = await Promise.allSettled(
-        batch.map(gapId => this.#generateWithRetry(gapId, opts))
-      );
-      for (let j = 0; j < results.length; j++) {
-        const result = results[j];
-        const gapId = batch[j];
-        if (result.status === 'fulfilled' && result.value) {
-          generated.push(result.value);
-          totalTokens += result.value.tokenUsage || 0;
-          totalAttempts += result.value.attempts || 1;
-        } else {
-          failed.push(gapId);
-          totalAttempts += opts.maxRetries + 1;
-        }
-      }
-    }
-    return {
-      generated,
-      failed,
-      stats: {
-        total: gaps.length,
-        succeeded: generated.length,
-        failed: failed.length,
-        totalTokens,
-        totalAttempts,
-        averageQuality: generated.length > 0
-          ? generated.reduce((sum, g) => sum + g.quality.overall, 0) / generated.length
-          : 0,
-      },
-    };
-  }
-  /**
-   * Generate a single training pair for a pattern description.
-   *
-   * @param {string} patternDescription — Natural language description of the pattern
-   * @returns {Promise<{ prompt: string, schema: object[], quality: object }>}
-   */
-  async generateOne(patternDescription) {
-    const systemPrompt = await this.#buildSystemPrompt();
-    const userPrompt = this.#buildUserPrompt(patternDescription);
-    const response = await this.#llmAdapter.complete({
-      messages: [{ role: 'user', content: userPrompt }],
-      systemPrompt,
-    });
-    const schema = this.#parseResponse(response.content);
-    const quality = this.scoreExample(schema);
-    return {
-      prompt: patternDescription,
-      schema,
-      quality,
-      tokenUsage: (response.usage?.inputTokens || 0) + (response.usage?.outputTokens || 0),
-    };
-  }
-  /**
-   * Score a generated example against quality criteria.
-   *
-   * Uses the anti-patterns checker from the intelligence system to detect
-   * structural issues, missing props, anti-patterns, and unnecessary wrappers.
-   *
-   * @param {object[]} schema — A2UI messages array
-   * @returns {{ structural: number, completeness: number, idiomatic: number, minimal: number, overall: number }}
-   */
-  scoreExample(schema) {
-    // Collect all components across messages
-    const allComponents = [];
-    for (const msg of schema) {
-      if (msg.type === 'updateComponents' && Array.isArray(msg.components)) {
-        allComponents.push(...msg.components);
-      }
-    }
-    // Run validation if available
-    let validationIssues = [];
-    if (this.#validator) {
-      const validation = this.#validator(schema);
-      validationIssues = (validation.checks || []).filter(c => !c.passed);
-    }
-    // Run anti-pattern checks (HTML-based)
-    // Serialize components to a minimal HTML representation for pattern matching
-    const html = this.#componentsToHtml(allComponents);
-    const antiPatternChecks = this.#antiPatterns
-      ? this.#antiPatterns(html)
-      : [];
-    // Structural: no orphaned children, valid message format, root exists
-    const structuralIssues = validationIssues.filter(c =>
-      ['hasRootComponent', 'noOrphanedChildren', 'validMessageFormat', 'flatAdjacency'].includes(c.name)
-    );
-    const structural = structuralIssues.length === 0 ? 1 : 0.5;
-    // Completeness: text content set, all types registered
-    const completenessIssues = validationIssues.filter(c =>
-      ['textContentSet', 'allTypesRegistered'].includes(c.name)
-    );
-    const completeness = Math.max(0, 1 - (completenessIssues.length * 0.1));
-    // Idiomatic: no anti-patterns (bare divs, inline styles, wrong nesting)
-    const idiomaticViolations = antiPatternChecks.filter(ap =>
-      ['noBareDivs', 'noBareInputs', 'cardStructure', 'noInventedComponents'].includes(ap.name)
-    );
-    const idiomatic = idiomaticViolations.length === 0 ? 1 : 0.5;
-    // Minimal: no unnecessary wrappers or inline layout/colors
-    const minimalViolations = antiPatternChecks.filter(ap =>
-      ['noHardcodedColors', 'noInlineLayout'].includes(ap.name)
-    );
-    const minimal = minimalViolations.length === 0 ? 1 : 0.5;
-    // Overall weighted average
-    const overall = (structural * 0.3) + (completeness * 0.25) + (idiomatic * 0.25) + (minimal * 0.2);
-    return { structural, completeness, idiomatic, minimal, overall };
-  }
-  // ── Private helpers ──
-  /**
-   * Generate with retry logic.
-   * @param {string} gapId — Target pattern ID
-   * @param {object} opts — Generation options
-   * @returns {Promise<object|null>}
-   */
-  async #generateWithRetry(gapId, opts) {
-    const target = COVERAGE_TARGETS.find(t => t.id === gapId);
-    if (!target) return null;
-    let lastError;
-    for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
-      try {
-        const result = await this.generateOne(target.description);
-        // Require minimum quality threshold
-        if (result.quality.overall >= 0.5) {
-          return {
-            id: gapId,
-            ...result,
-            attempts: attempt + 1,
-          };
-        }
-        lastError = new Error(`Quality too low: ${result.quality.overall}`);
-      } catch (err) {
-        lastError = err;
-      }
-    }
-    throw lastError;
-  }
-  /**
-   * Build the system prompt with catalog, anti-patterns, and format rules.
-   * @returns {string}
-   */
-  async #buildSystemPrompt() {
-    const parts = [];
-    // Role
-    parts.push('You are an A2UI training data generator for the AdiaUI design system. Output ONLY a JSON array of A2UI messages.');
-    // Output format
-    parts.push('Output format: [{ "type": "updateComponents", "surfaceId": "default", "components": [...] }]');
-    parts.push('Components use flat adjacency: each has { id, component, children?: [string ids], ...props }.');
-    parts.push('The root component must have id "root".');
-    // Anti-patterns (rules)
-    const antiPatterns = getAntiPatterns();
-    if (antiPatterns.length > 0) {
-      const rules = antiPatterns.map(ap => `- ${ap.description}`).join('\n');
-      parts.push(`Rules:\n${rules}`);
-    }
-    // Available components (summary level)
-    const catalog = this.#catalog || await getCatalog();
-    const entries = catalog.entries || new Map();
-    if (entries.size > 0) {
-      const lines = [];
-      for (const entry of entries.values()) {
-        const serialized = serializeEntry(entry, 'index');
-        lines.push(`- ${serialized.type} (${serialized.tag}): ${serialized.description || ''}`);
-      }
-      parts.push(`Available components:\n${lines.join('\n')}`);
-    }
-    // Quality criteria
-    parts.push([
-      'Quality criteria:',
-      '- Structural: valid root, all children resolve, flat adjacency list',
-      '- Completeness: all Text components have textContent, all types are registered',
-      '- Idiomatic: use Card > Header + Section + Footer, no bare divs, no invented components',
-      '- Minimal: no inline styles, no hardcoded colors, use semantic props and variants',
-    ].join('\n'));
-    return parts.join('\n\n');
-  }
-  /**
-   * Build the user prompt for a specific pattern.
-   * @param {string} patternDescription
-   * @returns {string}
-   */
-  #buildUserPrompt(patternDescription) {
-    return [
-      `Generate an A2UI component tree for this UI pattern:`,
-      ``,
-      `Pattern: ${patternDescription}`,
-      ``,
-      `Requirements:`,
-      `- Use realistic content (names, values, labels)`,
-      `- Follow Card > Header + Section + Footer anatomy where appropriate`,
-      `- Use layout components (Row, Column, Grid) for composition`,
-      `- Include all necessary props (text, variant, label, placeholder, etc.)`,
-      `- Output valid JSON — no markdown, no explanation`,
-    ].join('\n');
-  }
-  /**
-   * Parse an LLM response into A2UI messages.
-   * Handles raw JSON, markdown code fences, bare component arrays.
-   *
-   * @param {string} content — Raw LLM response
-   * @returns {object[]}
-   */
-  #parseResponse(content) {
-    if (!content || typeof content !== 'string') {
-      return [];
-    }
-    let json = content.trim();
-    // Strip markdown code fences
-    const fenceMatch = json.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
-    if (fenceMatch) {
-      json = fenceMatch[1].trim();
-    }
-    try {
-      const parsed = JSON.parse(json);
-      // Array of messages
-      if (Array.isArray(parsed)) {
-        if (parsed.length > 0 && parsed[0].type === 'updateComponents') {
-          return parsed;
-        }
-        // Bare components array — wrap
-        if (parsed.length > 0 && parsed[0].id && parsed[0].component) {
-          return [{ type: 'updateComponents', surfaceId: 'default', components: parsed }];
-        }
-        return parsed;
-      }
-      // Single message object
-      if (parsed && typeof parsed === 'object' && parsed.type === 'updateComponents') {
-        return [parsed];
-      }
-      // Single component — wrap
-      if (parsed && typeof parsed === 'object' && parsed.id && parsed.component) {
-        return [{ type: 'updateComponents', surfaceId: 'default', components: [parsed] }];
-      }
-      return [];
-    } catch {
-      return [];
-    }
-  }
-  /**
-   * Convert a flat component list to a minimal HTML-like string for anti-pattern checking.
-   * The anti-patterns module uses regex/function checks on HTML strings.
-   *
-   * @param {object[]} components
-   * @returns {string}
-   */
-  #componentsToHtml(components) {
-    const byId = new Map(components.map(c => [c.id, c]));
-    const lines = [];
-    for (const comp of components) {
-      const type = comp.component;
-      if (!type) continue;
-      // Map A2UI types to their AdiaUI tag names for anti-pattern checking
-      const tag = type.toLowerCase().replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase();
-      const tagName = tag.endsWith('-ui') ? tag : `${tag}-ui`;
-      // Build a minimal HTML representation
-      const attrs = [];
-      if (comp.style) {
-        attrs.push(`style="${typeof comp.style === 'object' ? Object.entries(comp.style).map(([k, v]) => `${k}:${v}`).join(';') : comp.style}"`);
-      }
-      const attrStr = attrs.length > 0 ? ' ' + attrs.join(' ') : '';
-      // Represent nesting for structural checks
-      if (Array.isArray(comp.children)) {
-        const childTypes = comp.children
-          .map(id => byId.get(id))
-          .filter(Boolean)
-          .map(c => {
-            const ct = c.component?.toLowerCase().replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase() || '';
-            return ct.endsWith('-ui') ? ct : ct;
-          });
-        lines.push(`<${tagName}${attrStr}>${childTypes.map(ct => `<${ct}>`).join('')}</${tagName}>`);
-      } else {
-        lines.push(`<${tagName}${attrStr}></${tagName}>`);
-      }
-    }
-    return lines.join('\n');
-  }
-}
-/**
- * Get the list of coverage targets.
- * @returns {Array<{ id: string, description: string, complexity: string }>}
- */
-export function getCoverageTargets() {
-  return [...COVERAGE_TARGETS];
-}

package/concept-mapper.js DELETED Viewed

@@ -1,127 +0,0 @@
-/**
- * Concept Mapper — re-rank corpus patterns using analyzer signals.
- *
- * The existing `searchBlocks(query)` does pure keyword matching against
- * pattern.keywords + pattern.description. That's necessary but not sufficient
- * — a prompt about a "user dashboard" might lexically prefer a pattern named
- * "user-profile" over "admin-dashboard" even though the latter is a better
- * conceptual match.
- *
- * This module takes the structured analysis produced by prompt-analyzer.js
- * and combines THREE signals into a final pattern score:
- *
- *   lexical:   the original keyword score from searchBlocks (steelman → keywords)
- *   conceptual: overlap between analysis.concepts and pattern.tags.purpose
- *   structural: overlap between analysis.impliedComponents and the set of
- *               component types referenced in the pattern's template
- *
- * Patterns missing tags fall back to lexical-only scoring (graceful
- * degradation; the older patterns in the corpus haven't all been retagged).
- */
-import { searchBlocks } from '../engine/reference.js';
-import { scoreAll as embeddingScoreAll, available as embeddingAvailable } from './embedding/embedding-retriever.js';
-/** Weights for the combined score. Tuned to keep lexical authoritative
- *  but let strong conceptual+structural+semantic signals override marginal
- *  lexical differences. Embedding semantic similarity scores are in [-1, 1]
- *  so their weight is high (30) — cosine of 0.8 → 24 points, comparable to
- *  3 concept-tag hits. Tuned to fix the "product card → ticket form"
- *  keyword-collision class of failure. */
-const WEIGHTS = {
-  lexical:    1.0,   // baseline
-  conceptual: 8,     // each matching concept-tag adds 8 points
-  structural: 1.5,   // each matching component-signature element adds 1.5 points
-  semantic:   30,    // cosine(query_embedding, pattern_embedding) × 30
-};
-/**
- * Re-rank pattern matches using the analyzer's structured signals.
- *
- * @param {object} opts
- * @param {object} opts.analysis — Output of analyzePrompt() from prompt-analyzer.js
- * @param {string} [opts.domain] — Domain hint (passed through to searchBlocks)
- * @param {number} [opts.limit=10] — Cap on returned results
- * @returns {Array<{ pattern: object, score: number, breakdown: object }>}
- *          Patterns ranked by combined score, descending.
- */
-export async function rankByConceptAndSignature({ analysis, domain, limit = 10 }) {
-  if (!analysis) return [];
-  // Use the steelmanned brief for lexical search — it's denser than the raw
-  // intent and surfaces keywords the user implied but didn't say.
-  const query = analysis.steelman || analysis.raw;
-  const lexicalHits = searchBlocks(query, { domain });
-  if (!Array.isArray(lexicalHits) || lexicalHits.length === 0) return [];
-  const conceptSet = new Set((analysis.concepts || []).map(c => c.toLowerCase()));
-  const componentSet = new Set(analysis.impliedComponents || []);
-  // Semantic channel — only run when the index + provider are both available.
-  // Returns a Map<patternName, cosineSim>. Silent no-op otherwise.
-  const semanticMap = (await embeddingAvailable())
-    ? await embeddingScoreAll(query)
-    : new Map();
-  const ranked = lexicalHits.map(hit => {
-    // Different shapes of hit objects in the corpus — be permissive.
-    const pattern = hit.pattern || hit;
-    const lexicalScore = hit.score ?? hit.confidence ?? 1;
-    const conceptScore = scoreConceptOverlap(pattern, conceptSet);
-    const structuralScore = scoreSignatureOverlap(pattern, componentSet);
-    // Clamp negative cosines to 0 — no retrieval value in "most anti-similar".
-    const semanticScore = Math.max(0, semanticMap.get(pattern.name) || 0);
-    const combined =
-      WEIGHTS.lexical * lexicalScore +
-      WEIGHTS.conceptual * conceptScore +
-      WEIGHTS.structural * structuralScore +
-      WEIGHTS.semantic * semanticScore;
-    return {
-      pattern,
-      score: combined,
-      breakdown: {
-        lexical:    +(WEIGHTS.lexical * lexicalScore).toFixed(2),
-        conceptual: +(WEIGHTS.conceptual * conceptScore).toFixed(2),
-        structural: +(WEIGHTS.structural * structuralScore).toFixed(2),
-        semantic:   +(WEIGHTS.semantic * semanticScore).toFixed(2),
-      },
-    };
-  });
-  ranked.sort((a, b) => b.score - a.score);
-  return ranked.slice(0, limit);
-}
-/** Count how many of the analysis concepts appear in the pattern's tag system. */
-function scoreConceptOverlap(pattern, conceptSet) {
-  if (!pattern || conceptSet.size === 0) return 0;
-  const tags = pattern.tags || {};
-  const flat = []
-    .concat(tags.purpose || [])
-    .concat(tags.layout || [])
-    .concat(tags.interaction || [])
-    .concat(pattern.keywords || [])
-    .map(t => String(t).toLowerCase());
-  let hits = 0;
-  for (const c of conceptSet) if (flat.includes(c)) hits++;
-  return hits;
-}
-/** Count how many of the implied components appear in the pattern's template. */
-function scoreSignatureOverlap(pattern, componentSet) {
-  if (!pattern || componentSet.size === 0) return 0;
-  const template = pattern.template;
-  if (!Array.isArray(template)) return 0;
-  // Build the pattern's component signature once per call; small templates
-  // mean the cost is negligible. Cache later if hot.
-  const sig = new Set();
-  for (const node of template) {
-    if (node && typeof node.component === 'string') sig.add(node.component);
-  }
-  let hits = 0;
-  for (const c of componentSet) if (sig.has(c)) hits++;
-  return hits;
-}