npm - agentready-design-cli - Versions diffs - 0.2.0 → 0.3.0 - Mend

agentready-design-cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/{chunk-WHGIRQPX.js → chunk-4AF47E7U.js} +75 -23
package/dist/cli.js +57 -4
package/dist/index.d.ts +5 -1
package/dist/index.js +1 -1
package/package.json +1 -1

package/dist/{chunk-WHGIRQPX.js → chunk-4AF47E7U.js} RENAMED Viewed

@@ -1020,8 +1020,52 @@ var allChecks = {
   ...checks8
 };
+// src/fusion-annotations.ts
+var FUSION_PATHS = {
+  "1.1": "Builder Fusion's `npx @builder.io/dev-tools@latest index-repo` auto-detects design tokens in CSS/Tailwind/SCSS at index time and exposes them to the agent and to design mode without requiring a separate DTCG export. Useful for teams that haven't adopted Style Dictionary yet.",
+  "1.2": "If primitives exist in code but no semantic layer has been defined, Fusion's index can pick up CSS variable aliases as a partial semantic layer. For teams without any tokens in code, Design System Intelligence Lite can bootstrap a token set from a Figma file, markdown, or prompt \u2014 no CLI required.",
+  "1.3": 'Once `index-repo` has run, tokens are enumerable to the agent via the Design System Intelligence MCP server and via `@`-mention in chat. This satisfies the "single source" requirement at the agent layer even if the underlying code still has multiple token files.',
+  "1.4": "Builder Fusion's strict mode (project setting) restricts the design mode style picker to indexed tokens only, providing a runtime enforcement layer in addition to (not instead of) lint-time enforcement.",
+  "2.1": "Running `index-repo` against a component library produces a machine-readable component index that Fusion and any MCP-compatible agent (Claude Code, Cursor) can consume. This is an alternative to publishing a CEM if the goal is agent consumption rather than tooling interoperability.",
+  "3.1": "The Design System Intelligence MCP server can serve as the canonical agent-facing entry point even if human-facing docs remain scattered. Note that this addresses agent discoverability, not human discoverability.",
+  "3.4": "Builder's Design System Intelligence produces LLM-friendly documentation derived from the indexed codebase, serving a similar function to `llms.txt` for agents using the DSI MCP server. Publishing `llms.txt` is still recommended for agents not using the MCP server.",
+  "4.3": "Index existing pages or templates as additional repos in the Fusion workspace \u2014 Fusion will treat them as pattern references the agent can draw from when generating new pages. This works as a stopgap if a formal `recipes/` directory doesn't exist yet.",
+  "5.1": "Fusion reads `AGENTS.md` from the project root as one of its instruction sources. Hand-curate it; auto-generation is discouraged by the rubric and by Builder's own guidance.",
+  "5.2": "The Design System Intelligence MCP server ships with Builder Fusion and exposes the indexed component and token data to any MCP-compatible agent. This is the lowest-friction way to satisfy 5.2 if the system is already indexed.",
+  "5.3": "Builder Fusion reads `.builder/rules/` files and `AGENTS.md` as always-on context. Keep foundational rules (spacing, color, type) at the top level and component-specific guidance in named files (e.g. `flashbar-variants.mdc`) so the agent can resolve them just-in-time rather than loading everything every turn.",
+  "5.4": "Once components are indexed, Fusion resolves canonical import paths automatically and `@`-mention in chat enforces stable identifiers regardless of file structure.",
+  "6.3": "Builder Fusion supports two-index workflows for migrations \u2014 index both the legacy and target versions, and the agent can reason about both during a migration. This operationalizes version targeting rather than relying on documentation alone.",
+  "7.1": "If Storybook stories exist, indexing them via `index-repo` pulls usage examples into the agent's context. Stories then double as agent training data, not just human documentation."
+};
+var COMMON_FAILURE_MODES = {
+  "1.1": "Tokens are detected at index time but fail to resolve in rendered output \u2014 the agent uses the correct token reference, but the wrong value renders in preview because the runtime resolution chain is broken.",
+  "1.2": "Without a semantic layer, the agent picks primitive tokens by visual similarity rather than intent (e.g., `gray-700` instead of `text-default`), producing output that drifts from the system as it scales.",
+  "1.3": "Multiple token sources exist (one in code, one in Figma, one in a doc) and the agent silently picks one \u2014 usually the most recently touched \u2014 without surfacing the conflict to the user.",
+  "2.1": "Without a machine-readable manifest, the agent infers props from usage examples and gets variants subtly wrong (correct name, wrong values), which often passes review until QA.",
+  "2.3": "When composition rules aren't explicit, the agent nests components in ways the library wasn't designed for (e.g., Button-in-Button, nested form controls) and the failure only surfaces at integration time.",
+  "2.4": "Without documented anti-patterns, when the agent hits a build error during a multi-step plan it abandons design-system components and inlines raw markup or SVGs to make the error go away, rather than fixing within-system.",
+  "3.3": "With only happy-path examples, the agent treats edge cases (loading, empty, error states) as out-of-scope and silently omits them from generated output.",
+  "4.1": "Without layout primitives, the agent reaches for raw Flexbox and CSS Grid with hardcoded values, producing layouts that pass visual review but break responsive behavior.",
+  "4.3": "When no patterns layer exists, the agent reconstructs common compositions (empty states, page headers, data table toolbars) from primitives each time, producing inconsistent variants of the same pattern across a codebase.",
+  "5.3": "Rules files load into context but the agent skips reading component-scoped rules at the moment they apply, generating output that violates documented per-component guidance even though the rule was technically available.",
+  "6.2": "Without machine-readable deprecation metadata, the agent generates code using deprecated APIs because there's no signal that they're deprecated \u2014 the components still exist, the props still work, the docs still describe them.",
+  "7.2": "Without visual regression, drift in rendered output (wrong fonts, off-by-a-token spacing) ships unnoticed until a designer reviews production.",
+  "8.1": "Without evals, regressions in agent behavior under stress (build errors, missing components, ambiguous prompts) go undetected. Teams discover them in customer POCs rather than in CI."
+};
 // src/runner.ts
 import { readFile as readFile2 } from "fs/promises";
+var CLI_VERSION = "0.3.0";
+function annotate(criterion) {
+  const fusionPath = FUSION_PATHS[criterion.id];
+  const commonFailureMode = COMMON_FAILURE_MODES[criterion.id];
+  if (!fusionPath && !commonFailureMode) return criterion;
+  return {
+    ...criterion,
+    ...fusionPath ? { fusionPath } : {},
+    ...commonFailureMode ? { commonFailureMode } : {}
+  };
+}
 async function runAssessment(opts) {
   const ctx = new CheckContext(opts.target);
   let existing = null;
@@ -1036,21 +1080,23 @@ async function runAssessment(opts) {
   for (const entry of RUBRIC) {
     const existingHit = existing?.criteria.find((c) => c.id === entry.id);
     if (existingHit && existingHit.status === "scored") {
-      criteria.push(existingHit);
+      criteria.push(annotate(existingHit));
       continue;
     }
     const check = allChecks[entry.id];
     if (!check) {
-      criteria.push({
-        id: entry.id,
-        category: entry.category,
-        title: entry.title,
-        score: null,
-        status: "pending",
-        pending: "requires-agent",
-        rationale: "No deterministic check implemented; requires an agent.",
-        evidence: []
-      });
+      criteria.push(
+        annotate({
+          id: entry.id,
+          category: entry.category,
+          title: entry.title,
+          score: null,
+          status: "pending",
+          pending: "requires-agent",
+          rationale: "No deterministic check implemented; requires an agent.",
+          evidence: []
+        })
+      );
       continue;
     }
     try {
@@ -1066,30 +1112,32 @@ async function runAssessment(opts) {
         ...result.rationale ? { rationale: result.rationale } : {},
         ...result.suggestion ? { suggestion: result.suggestion } : {}
       };
-      criteria.push(criterion);
+      criteria.push(annotate(criterion));
     } catch (err) {
-      criteria.push({
-        id: entry.id,
-        category: entry.category,
-        title: entry.title,
-        score: null,
-        status: "error",
-        rationale: `Check threw: ${err.message}`,
-        evidence: []
-      });
+      criteria.push(
+        annotate({
+          id: entry.id,
+          category: entry.category,
+          title: entry.title,
+          score: null,
+          status: "error",
+          rationale: `Check threw: ${err.message}`,
+          evidence: []
+        })
+      );
     }
   }
   const rollup = computeRollup(criteria);
   const backlog = generateBacklog(criteria);
   return {
-    schemaVersion: "0.1.0",
+    schemaVersion: "0.1.1",
     rubricVersion: "0.1.0",
     generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
     target: { path: ctx.root, name: opts.name ?? existing?.target?.name },
     producer: {
       kind: existing ? "merged" : "cli",
       name: "agentready-design-cli",
-      version: "0.1.0"
+      version: CLI_VERSION
     },
     criteria,
     rollup,
@@ -1174,7 +1222,11 @@ function renderMarkdown(report) {
         }).filter(Boolean).join("; ");
         if (ev) lines.push(`  - _Evidence:_ ${ev}`);
       }
+      if (c.commonFailureMode && c.status === "scored" && c.score !== null && c.score <= 2) {
+        lines.push(`  - _Common failure mode:_ ${c.commonFailureMode}`);
+      }
       if (c.suggestion) lines.push(`  - _Suggestion:_ ${c.suggestion}`);
+      if (c.fusionPath) lines.push(`  - _Fusion path:_ ${c.fusionPath}`);
     }
   }
   lines.push("");

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ import {
   generateBacklog,
   renderMarkdown,
   runAssessment
-} from "./chunk-WHGIRQPX.js";
+} from "./chunk-4AF47E7U.js";
 // src/cli.ts
 import { writeFile, mkdir } from "fs/promises";
@@ -76,7 +76,7 @@ You are auditing the current repository against the **Agent-Ready Design v0.1 ru
 - **5.1** \`AGENTS.md\` (or \`CLAUDE.md\`) exists and is **hand-curated** (per ETH Zurich evidence, auto-generated ones often *hurt* performance).
 - **5.2** An MCP server (or registry endpoint) exposes the system to agents.
-- **5.3** Always-on foundational rules (spacing/color/type) are injected into every agent run via rules files.
+- **5.3** Always-on foundational rules (spacing/color/type) are injected into every agent run via rules files, AND component-scoped rules are organized so the agent can resolve them just-in-time rather than loading every rule every turn. Loading every rule into context per turn is a known anti-pattern that degrades agent output quality.
 - **5.4** Components are addressable by stable, predictable identifiers (one canonical import path).
 ### Category 6 \u2014 Distribution & Versioning
@@ -148,7 +148,16 @@ Print this format in your response:
 - **1.1 Tokens exist as exported, versioned source files. \u2014 N/4**
   - Rationale: <one sentence>
   - Evidence: \`tokens/colors.json:1\` \u2026
+  - Common failure mode (if scored low): <commonFailureMode if present>
   - Suggestion: <how to raise one level>
+  - Fusion path: <fusionPath if present>
+Conditional rendering rules:
+- \`Common failure mode\` line is rendered **only when** the criterion's score is \u2264 2 **and** \`commonFailureMode\` is set on the criterion. Skip the line entirely otherwise.
+- \`Fusion path\` line is rendered **whenever** \`fusionPath\` is set on the criterion, regardless of score. Skip it entirely otherwise.
+Both fields are optional in the schema \u2014 many criteria will not have either populated, and that's the expected state. Do **not** invent text for criteria that don't already have \`fusionPath\` or \`commonFailureMode\` set in \`agent-ready-report.json\`; just preserve whatever the CLI emitted.
 \u2026 (continue for every criterion)
@@ -164,7 +173,7 @@ Also write \`agent-ready-report.json\` at the repo root, conforming to the schem
 \`\`\`jsonc
 {
-  "schemaVersion": "0.1.0",
+  "schemaVersion": "0.1.1",
   "rubricVersion": "0.1.0",
   "generatedAt": "<ISO timestamp>",
   "target": { "path": "<repo path>", "name": "<optional>" },
@@ -181,7 +190,9 @@ Also write \`agent-ready-report.json\` at the repo root, conforming to the schem
         { "kind": "file", "path": "tokens/colors.css", "line": 1 },
         { "kind": "glob", "path": "**/*.tokens.json", "detail": "no matches" }
       ],
-      "suggestion": "Add Style Dictionary v4 with DTCG JSON output."
+      "suggestion": "Add Style Dictionary v4 with DTCG JSON output.",
+      "fusionPath": "<preserve verbatim if present in agent-ready-report.json; otherwise omit>",
+      "commonFailureMode": "<preserve verbatim if present in agent-ready-report.json; otherwise omit>"
     }
     // \u2026 one entry per criterion (31 total) \u2026
   ],
@@ -288,7 +299,49 @@ async function gatherSample(ctx) {
   }
   return sample;
 }
+var PENDING_PER_CHUNK = 6;
 async function callRemote(opts) {
+  const pending = opts.report.criteria.filter((c) => c.status === "pending");
+  if (pending.length === 0) return { deltas: [] };
+  const scored = opts.report.criteria.filter((c) => c.status === "scored");
+  const chunks = [];
+  for (let i = 0; i < pending.length; i += PENDING_PER_CHUNK) {
+    chunks.push(pending.slice(i, i + PENDING_PER_CHUNK));
+  }
+  const results = await Promise.all(
+    chunks.map(
+      (chunk) => callRemoteOnce({
+        apiUrl: opts.apiUrl,
+        sample: opts.sample,
+        signal: opts.signal,
+        // Each chunk gets its own pending subset plus the full scored context.
+        report: {
+          ...opts.report,
+          criteria: [...chunk, ...scored]
+        }
+      })
+    )
+  );
+  const deltas = results.flatMap((r) => r.deltas ?? []);
+  const totalInput = results.reduce(
+    (n, r) => n + (r.meta?.inputTokens ?? 0),
+    0
+  );
+  const totalOutput = results.reduce(
+    (n, r) => n + (r.meta?.outputTokens ?? 0),
+    0
+  );
+  return {
+    deltas,
+    meta: {
+      model: results[0]?.meta?.model,
+      pendingScored: deltas.length,
+      inputTokens: totalInput,
+      outputTokens: totalOutput
+    }
+  };
+}
+async function callRemoteOnce(opts) {
   const url = opts.apiUrl ?? process.env.AGENT_READY_API_URL ?? DEFAULT_API_URL;
   const res = await fetch(url, {
     method: "POST",

package/dist/index.d.ts CHANGED Viewed

@@ -39,6 +39,10 @@ interface Criterion {
   rationale?: string;
   evidence: Evidence[];
   suggestion?: string;
+  /** Builder Fusion-specific path to address this criterion, if one exists. */
+  fusionPath?: string;
+  /** Short, anonymized description of how this criterion's absence breaks in customer POCs. */
+  commonFailureMode?: string;
 }
 interface BacklogItem {
@@ -59,7 +63,7 @@ interface Rollup {
 }
 interface Report {
-  schemaVersion: "0.1.0";
+  schemaVersion: "0.1.1";
   rubricVersion: "0.1.0";
   generatedAt: string;
   target: { path: string; name?: string; repository?: string };

package/dist/index.js CHANGED Viewed

@@ -7,7 +7,7 @@ import {
   generateBacklog,
   renderMarkdown,
   runAssessment
-} from "./chunk-WHGIRQPX.js";
+} from "./chunk-4AF47E7U.js";
 export {
   RUBRIC,
   TIER_LABELS,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentready-design-cli",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "Deterministic checks for the Agent-Ready Design rubric. Run against a design-system repo to score what can be scored without an LLM.",
   "license": "MIT",
   "author": "Hunter Gillispie <hunter@builder.io>",