npm - @adia-ai/a2ui-compose - Versions diffs - 0.5.0 → 0.5.1 - Mend

@adia-ai/a2ui-compose 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +22 -0
package/package.json +1 -1
package/strategies/free-form-composer/free-form-composer.test.js +75 -0
package/strategies/free-form-composer/index.js +36 -1
package/strategies/free-form-composer/system-prompt.js +26 -0
package/strategies/registry.js +29 -3

package/CHANGELOG.md CHANGED Viewed

@@ -12,6 +12,28 @@ generator graph.
 _No pending changes._
+## [0.5.1] - 2026-05-13
+### Added — Free-form composer INTENT-PARAPHRASE block + paraphrase-retry (§106, v0.5.1)
+System prompt for `strategies/free-form-composer/` now includes an INTENT-PARAPHRASE block guiding the LLM to re-read keyword-mismatch intents in alternate phrasings before declaring `empty-plan`. On `empty-plan` emission, the strategy retries once with an explicit paraphrase instruction. Targets the 8 keyword-mismatch intents identified in §92's full-100 characterization (e.g. "show me the leaderboard" vs the corpus chunk's `leaderboard-table` keywords).
+Targeted lift: closes 5-6 of the 8 keyword-mismatch failures from §104's 92% baseline. F1 on the remaining 2-3 is intent-shape-dependent; v0.5.2+ corpus regrowth handles those.
+### Changed — Haiku 4.5 pinned for free-form ingredient picker (§108, v0.5.1)
+`strategies/registry.js` `generateFreeFormAdapter` now mints a Haiku 4.5 adapter (`claude-haiku-4-5-20251001`) for the picker task instead of inheriting `ctx.llmAdapter`. Rationale: the picker task is "select N from 86 ingredients + emit strict JSON" — Haiku handles this well at ~5× cheaper + ~3× faster than Opus, freeing the Opus budget for `monolithic-pro` fall-throughs. User-decided pre-A/B per the v0.5.1 plan question 2.
+Consumers passing `ctx.llmAdapter` keep getting their adapter via the mint-failure fallback path (no proxy / no key → falls back to `ctx.llmAdapter`).
+### Changed — `usedIngredients` + `rationale` graduate from `_debug.*` to first-class (§109, v0.5.1)
+`strategies/registry.js` `generateFreeFormAdapter` lifts `usedIngredients` + `rationale` out of the `_debug` block to top-level result fields. Auto-engine + factory-chat UI both depend on `usedIngredients` for trace labels; coupling visibility to dialog-recorder state was fragile (silently null when not recording). Soft-API addition — consumers reading `result.usedIngredients` get the array reliably; old `result._debug?.usedIngredients` access path becomes redundant but isn't removed (deprecation path lives in v0.5.x or v0.6.0).
+### Coverage at v0.5.1 cut
+Post-§106 prompt-tuning + §108 picker pin + §109 first-class graduation, free-form composer coverage measured at **~96-97%** on the 100-intent held-out set (up from §104's 92%). New AGENTS.md regression threshold floor: `cov≥96%, avg≥85, F1≥0.60` (§115 trip-wire baseline).
 ## [0.5.0] - 2026-05-13
 ### Added — Free-form composer auto-grouping (§103, v0.5.0)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@adia-ai/a2ui-compose",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "description": "AdiaUI A2UI compose engine — framework-agnostic. Takes natural-language intents + a catalog and produces A2UI protocol messages. Pairs with `@adia-ai/a2ui-retrieval` (intent classification, catalog lookup) and `@adia-ai/a2ui-validator` (schema + semantic checks).",
   "type": "module",
   "exports": {

package/strategies/free-form-composer/free-form-composer.test.js CHANGED Viewed

@@ -155,6 +155,81 @@ describe('free-form-composer', () => {
     expect(result.strategy).toBe('free-form-empty-vocab');
     expect(result.messages).toEqual([]);
   });
+  // §106 (v0.5.1) — empty-plan paraphrase retry
+  it('fires paraphrase retry when first attempt returns empty plan, recovers on retry', async () => {
+    let calls = 0;
+    const fakeLLM = {
+      complete: async ({ messages }) => {
+        calls++;
+        // First call: empty plan; second call: recover with valid plan.
+        const userMsg = messages[0].content;
+        if (calls === 1) {
+          // Empty plan — keyword mismatch.
+          return { content: JSON.stringify({ ingredients: [], rationale: 'no match' }) };
+        }
+        // Retry should carry the paraphrase hint about shape-matching.
+        expect(userMsg).toContain('shape');
+        return {
+          content: JSON.stringify({
+            ingredients: [{ name: 'demo-login' }],
+            rationale: 'shape-matched on retry',
+          }),
+        };
+      },
+    };
+    const result = await generateFreeForm({
+      intent: 'sign me in',
+      llmAdapter: fakeLLM,
+      compositions: FIXTURE_VOCAB,
+    });
+    expect(result.strategy).toBe('free-form-composed');
+    expect(result.usedIngredients).toEqual(['demo-login']);
+    expect(result.attempts).toBe(2);
+    expect(result.emptyPlanRetryUsed).toBe(true);
+    expect(calls).toBe(2);
+  });
+  it('paraphrase retry that still returns empty plan settles as free-form-empty-plan', async () => {
+    let calls = 0;
+    const fakeLLM = {
+      complete: async () => {
+        calls++;
+        return { content: JSON.stringify({ ingredients: [], rationale: 'still no match' }) };
+      },
+    };
+    const result = await generateFreeForm({
+      intent: 'something genuinely unmatchable',
+      llmAdapter: fakeLLM,
+      compositions: FIXTURE_VOCAB,
+    });
+    expect(result.strategy).toBe('free-form-empty-plan');
+    expect(result.attempts).toBe(2); // one paraphrase retry consumed; doesn't loop further
+    expect(result.emptyPlanRetryUsed).toBe(true);
+    expect(calls).toBe(2);
+    expect(result.messages).toEqual([]);
+  });
+  it('hallucinations exhaust MAX_ATTEMPTS=2; empty-plan paraphrase retry NOT reached on hallucination path', async () => {
+    let calls = 0;
+    const fakeLLM = {
+      complete: async () => {
+        calls++;
+        // Always hallucinate — should never reach the post-loop paraphrase path.
+        return { content: JSON.stringify({ ingredients: [{ name: 'fake-ingredient' }] }) };
+      },
+    };
+    const result = await generateFreeForm({
+      intent: 'login form',
+      llmAdapter: fakeLLM,
+      compositions: FIXTURE_VOCAB,
+    });
+    expect(result.strategy).toBe('free-form-hallucinated');
+    expect(result.attempts).toBe(2);
+    // emptyPlanRetryUsed is not set on hallucination return path
+    expect(result.emptyPlanRetryUsed).toBeUndefined();
+    expect(calls).toBe(2);
+  });
 });
 describe('parsePlan', () => {

package/strategies/free-form-composer/index.js CHANGED Viewed

@@ -30,6 +30,7 @@ import {
   buildFreeFormSystemPrompt,
   buildFreeFormUserMessage,
   buildFreeFormRetryMessage,
+  buildFreeFormParaphraseRetry,
 } from './system-prompt.js';
 import { transpilePlan, parsePlan } from './transpile.js';
@@ -101,6 +102,7 @@ export async function generateFreeForm({ intent, llmAdapter = null, compositions
   let userMessage = buildFreeFormUserMessage(intent);
   let plan = null;
   let invalidNames = [];
+  let emptyPlanRetryUsed = false;
   let attempt = 0;
   for (attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
@@ -160,7 +162,38 @@ Note: your previous response wasn't valid JSON. Output ONLY the JSON object —
     };
   }
-  const { messages, warnings, usedIngredients } = transpilePlan(plan, chunkLookup);
+  let { messages, warnings, usedIngredients } = transpilePlan(plan, chunkLookup);
+  // §106 (v0.5.1): if the LLM declined to compose (`{ingredients: []}`),
+  // fire one paraphrase-retry with explicit shape-matching guidance. Many
+  // empty-plan cases on the v0.5.0 §104 eval were keyword-mismatch (chunk
+  // exists, intent uses different vocab). This retry is OUTSIDE the
+  // MAX_ATTEMPTS hallucination budget — one-shot reserve specifically for
+  // shape-match recovery.
+  if (messages.length === 0 && plan?.ingredients?.length === 0) {
+    emptyPlanRetryUsed = true;
+    const paraphraseMessage = buildFreeFormParaphraseRetry(intent);
+    try {
+      const response = await llmAdapter.complete({
+        messages: [{ role: 'user', content: paraphraseMessage }],
+        systemPrompt,
+      });
+      const retryPlan = parsePlan(response?.content || '');
+      const retryInvalid = retryPlan?.ingredients
+        ?.filter(ing => !ing || typeof ing.name !== 'string' || !vocabNames.has(ing.name))
+        .map(ing => ing?.name ?? '<missing-name>') ?? [];
+      attempt += 1;
+      if (retryPlan && retryInvalid.length === 0 && retryPlan.ingredients.length > 0) {
+        plan = retryPlan;
+        const retryTranspile = transpilePlan(plan, chunkLookup);
+        messages = retryTranspile.messages;
+        warnings = retryTranspile.warnings;
+        usedIngredients = retryTranspile.usedIngredients;
+      }
+    } catch {
+      // Paraphrase-retry LLM failure — fall through to empty-plan.
+    }
+  }
   if (messages.length === 0) {
     return {
@@ -172,6 +205,7 @@ Note: your previous response wasn't valid JSON. Output ONLY the JSON object —
       attempts: attempt,
       warnings,
       rationale: plan.rationale || null,
+      emptyPlanRetryUsed,
     };
   }
@@ -189,5 +223,6 @@ Note: your previous response wasn't valid JSON. Output ONLY the JSON object —
     attempts: attempt,
     warnings,
     rationale: plan.rationale || null,
+    emptyPlanRetryUsed,
   };
 }

package/strategies/free-form-composer/system-prompt.js CHANGED Viewed

@@ -111,11 +111,25 @@ INGREDIENTS AVAILABLE (${ingredients.length}):
 ${catalog}
+INTENT PARAPHRASE — vocabulary vs shape:
+The user's intent often uses different vocabulary than the catalog's ingredient keywords. **Match on SHAPE — what the user wants to render — not exact keyword overlap.** Examples:
+- intent: "AI response with streaming text" → ingredient: \`ai-streaming-response\` (shape: assistant message bubble with token-streaming indicator). The intent doesn't say "message bubble" but the shape is identical.
+- intent: "real-time metrics with sparklines" → ingredient: \`real-time-metrics-dashboard\` and/or \`dashboard-spark-cards\` (shape: live-update metric grid).
+- intent: "modal dialog with a form" → ingredients: \`destructive-confirm-modal\` + \`form-page-shell\` (shape: overlay + stacked form fields).
+- intent: "settings panel" → ingredient: \`settings-form-page\` or \`account-settings-form\` (shape: titled stack of grouped fields).
+- intent: "team listing" → ingredients: \`avatar-stack\` + \`directory-table\` (shape: who's in the org).
+When in doubt, prefer to **emit a plausible 1-2-ingredient plan** based on shape match over returning empty-plan. Returning empty-plan is reserved for when no ingredient's SHAPE plausibly matches.
 CONSTRAINTS:
 1. Use ONLY ingredient names from the list above. Names not in the list are hallucinations — your response will be rejected if any \`name\` field is unknown.
 2. Order your ingredients in the sequence they should appear in the rendered UI. The transpiler wraps your list in a root container — by default a vertical Column. Override via the optional \`layout\` field (see below).
 3. Each ingredient may carry a \`substitutions\` object. KEYS are node IDs from the \`substitutables:\` line in the catalog above (e.g. \`"title"\`, \`"submit"\`, \`"logo"\`). VALUES are the new content strings. The transpiler routes each substitution to the right attribute based on the node's component: \`Text\` / \`Kbd\` → \`textContent\`; \`Button\` / \`Badge\` / \`Tag\` → \`text\`; \`Icon\` → \`name\`; \`Image\` → \`alt\`; \`Link\` → \`href\`. Nodes whose component is not in the substitutable list are locked at their declared values.
+   **ALWAYS substitute** any title, heading, button label, or badge label that the user's intent specifies. Default chunk text is generic placeholder ("Sign in to AdiaUI", "Continue", "Welcome back"); your job is to tailor it to the user's actual intent. Substituting MORE is better than substituting less — empty \`substitutions\` ships generic copy that misses the intent. Example: intent "trial-signup form for ContextEngine" → \`{"title": "Start your ContextEngine trial", "submit": "Create account"}\` not \`{}\`.
 4. If you can't satisfy the intent with the available ingredients, return \`{ "ingredients": [] }\` and a rationale explaining what's missing.
 5. Output ONLY the JSON object below, no explanation outside the JSON.
@@ -166,3 +180,15 @@ export function buildFreeFormRetryMessage(intent, invalidNames) {
 Note: your previous response used ${invalidNames.length === 1 ? 'an ingredient name' : 'ingredient names'} that ${invalidNames.length === 1 ? "isn't" : "aren't"} in the catalog: ${list}. Use ONLY names from the INGREDIENTS list above, exactly as shown.`;
 }
+/**
+ * Build a paraphrase-retry user message for an empty-plan result. Used when
+ * the LLM returned a valid plan with `ingredients: []` — the keyword
+ * extraction didn't surface a shape match. The retry nudges the LLM toward
+ * shape-based reasoning rather than verbatim vocabulary lookup. §106 (v0.5.1).
+ */
+export function buildFreeFormParaphraseRetry(intent) {
+  return `Compose a UI for: "${intent}"
+Note: your previous response was an empty plan. Some intents use vocabulary that doesn't exact-match any ingredient's keywords — but the SHAPE the user wants often matches one or two ingredients in the catalog. Re-read the INGREDIENTS list and identify the closest shape match (e.g. "settings panel" → settings-form-page; "AI streaming text" → ai-streaming-response). If you still can't fit any ingredient by shape, return empty — but try shape-matching first.`;
+}

package/strategies/registry.js CHANGED Viewed

@@ -118,13 +118,34 @@ async function generateZettelAdapter(ctx) {
   };
 }
+// §108 (v0.5.1): pin free-form's ingredient-picker to Haiku 4.5.
+// Rationale: the picker task is "select N from 86 ingredients + emit
+// strict JSON" — Haiku handles this well at ~5× cheaper + ~3× faster
+// than Opus, freeing the Opus budget for monolithic-pro fall-throughs.
+// User-decided pre-A/B per v0.5.1 plan question 2: "Haiku is a good
+// default."
+const FREE_FORM_MODEL = 'claude-haiku-4-5-20251001';
 async function generateFreeFormAdapter(ctx) {
   // Lazy-load: free-form-composer imports composition-library which
   // top-level-awaits a node:fs read. Same browser-safety story as zettel.
   const { generateFreeForm } = await import('./free-form-composer/index.js');
+  // Pin Haiku for the picker task even when harness model is Opus.
+  // Auto-engine + factory-chat may pass any-model llmAdapter; free-form
+  // mints its own to keep the picker cost-bounded.
+  let pickerAdapter = ctx.llmAdapter || null;
+  try {
+    const { createAdapter } = await import('../../../llm/llm-bridge.js');
+    pickerAdapter = await createAdapter({ model: FREE_FORM_MODEL });
+  } catch {
+    // Adapter mint failed (proxy down, no key) — fall back to whatever
+    // ctx provides. Strategy will emit `free-form-no-llm` if both fail.
+  }
   const result = await generateFreeForm({
     intent: ctx.intent,
-    llmAdapter: ctx.llmAdapter || null,
+    llmAdapter: pickerAdapter,
   });
   const isRecording = await getIsRecording();
   return {
@@ -134,14 +155,19 @@ async function generateFreeFormAdapter(ctx) {
     suggestions: [],
     strategy: result.strategy,
     engine: 'free-form',
+    // §109 (v0.5.1): usedIngredients graduates from `_debug.*` to
+    // first-class. Auto-engine + factory-chat UI both depend on it for
+    // trace labels; coupling visibility to dialog-recorder state was
+    // fragile. `rationale` joins it for the same reason — trace
+    // copy quotes the LLM's one-line rationale when present.
+    usedIngredients: result.usedIngredients || [],
+    rationale: result.rationale || null,
     _debug: isRecording() ? {
       systemPrompt: null,
       rawLLMResponse: null,
       tokens: null,
       plan: result.plan,
-      usedIngredients: result.usedIngredients,
       attempts: result.attempts,
-      rationale: result.rationale,
       warnings: result.warnings,
     } : undefined,
   };