npm - @cubis/foundry - Versions diffs - 0.3.69 → 0.3.71 - Mend

@cubis/foundry 0.3.69 → 0.3.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

package/mcp/src/upstream/passthrough.ts CHANGED Viewed

@@ -10,11 +10,12 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 import {
   parsePostmanState,
   parseStitchState,
+  parsePlaywrightState,
   readEffectiveConfig,
 } from "../cbxConfig/index.js";
 import type { CbxConfig, ConfigScope } from "../cbxConfig/types.js";
-type ServiceId = "postman" | "stitch";
+type ServiceId = "postman" | "stitch" | "playwright";
 export interface UpstreamToolInfo {
   name: string;
@@ -126,7 +127,10 @@ async function loadCachedCatalogTools({
   }
 }
-function getServiceAuth(config: CbxConfig, service: ServiceId): {
+function getServiceAuth(
+  config: CbxConfig,
+  service: ServiceId,
+): {
   mcpUrl: string | null;
   activeProfileName: string | null;
   envVar: string | null;
@@ -134,6 +138,17 @@ function getServiceAuth(config: CbxConfig, service: ServiceId): {
   configured: boolean;
   error?: string;
 } {
+  if (service === "playwright") {
+    const state = parsePlaywrightState(config);
+    return {
+      mcpUrl: state.mcpUrl,
+      activeProfileName: null,
+      envVar: null,
+      headers: {},
+      configured: Boolean(state.mcpUrl),
+    };
+  }
   if (service === "postman") {
     const state = parsePostmanState(config);
     const activeProfile = state.activeProfile;
@@ -214,9 +229,11 @@ function isCallToolResult(
   result: Awaited<ReturnType<Client["callTool"]>>,
 ): result is CallToolResult {
   return Array.isArray(
-    (result as {
-      content?: unknown;
-    }).content,
+    (
+      result as {
+        content?: unknown;
+      }
+    ).content,
   );
 }
@@ -275,6 +292,7 @@ export async function discoverUpstreamCatalogs(
 ): Promise<{
   postman: UpstreamCatalog;
   stitch: UpstreamCatalog;
+  playwright: UpstreamCatalog;
 }> {
   const effective = readEffectiveConfig(scope);
   if (!effective) {
@@ -291,9 +309,14 @@ export async function discoverUpstreamCatalogs(
       discoveryError: "cbx_config.json not found",
     };
     const missingStitch: UpstreamCatalog = { ...missing, service: "stitch" };
+    const missingPlaywright: UpstreamCatalog = {
+      ...missing,
+      service: "playwright",
+    };
     return {
       postman: missing,
       stitch: missingStitch,
+      playwright: missingPlaywright,
     };
   }
@@ -366,6 +389,7 @@ export async function discoverUpstreamCatalogs(
   return {
     postman: await discoverOne("postman"),
     stitch: await discoverOne("stitch"),
+    playwright: await discoverOne("playwright"),
   };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cubis/foundry",
-  "version": "0.3.69",
+  "version": "0.3.71",
   "description": "Cubis Foundry CLI for workflow-first AI agent environments",
   "type": "module",
   "bin": {

package/src/cli/core.ts CHANGED Viewed

@@ -217,6 +217,10 @@ const STITCH_SKILL_ID = "stitch";
 const STITCH_MCP_SERVER_ID = "StitchMCP";
 const STITCH_API_KEY_ENV_VAR = "STITCH_API_KEY_DEFAULT";
 const STITCH_MCP_URL = "https://stitch.googleapis.com/mcp";
+const PLAYWRIGHT_SKILL_ID = "playwright";
+const PLAYWRIGHT_MCP_SERVER_ID = "PlaywrightMCP";
+const PLAYWRIGHT_DEFAULT_PORT = 8931;
+const PLAYWRIGHT_MCP_URL = `http://localhost:${PLAYWRIGHT_DEFAULT_PORT}/mcp`;
 const POSTMAN_WORKSPACE_MANUAL_CHOICE = "__postman_workspace_manual__";
 const CBX_CONFIG_FILENAME = "cbx_config.json";
 const CBX_CREDENTIALS_ENV_FILENAME = "credentials.env";
@@ -4719,6 +4723,30 @@ function buildGeminiStitchServer({
   };
 }
+function buildVsCodePlaywrightServer({ mcpUrl = PLAYWRIGHT_MCP_URL } = {}) {
+  return {
+    type: "http",
+    url: mcpUrl,
+    headers: {},
+  };
+}
+function buildCopilotCliPlaywrightServer({ mcpUrl = PLAYWRIGHT_MCP_URL } = {}) {
+  return {
+    type: "http",
+    url: mcpUrl,
+    headers: {},
+    tools: ["*"],
+  };
+}
+function buildGeminiPlaywrightServer({ mcpUrl = PLAYWRIGHT_MCP_URL } = {}) {
+  return {
+    httpUrl: mcpUrl,
+    headers: {},
+  };
+}
 function getPostmanApiKeySource({ apiKey, envApiKey }) {
   if (apiKey) return "inline";
   if (envApiKey) return "env";
@@ -5210,6 +5238,7 @@ async function applyPostmanMcpForPlatform({
   stitchMcpUrl,
   includeStitchMcp = false,
   includeFoundryMcp = true,
+  includePlaywrightMcp = false,
   foundryRuntime = "local",
   dryRun = false,
   cwd = process.cwd(),
@@ -5278,6 +5307,9 @@ async function applyPostmanMcpForPlatform({
             mcpUrl: stitchMcpUrl,
           });
         }
+        if (includePlaywrightMcp) {
+          mcpServers[PLAYWRIGHT_MCP_SERVER_ID] = buildGeminiPlaywrightServer();
+        }
         next.mcpServers = mcpServers;
         return next;
       },
@@ -5324,6 +5356,10 @@ async function applyPostmanMcpForPlatform({
           } else {
             delete mcpServers[FOUNDRY_MCP_SERVER_ID];
           }
+          if (includePlaywrightMcp) {
+            mcpServers[PLAYWRIGHT_MCP_SERVER_ID] =
+              buildCopilotCliPlaywrightServer();
+          }
           next.mcpServers = mcpServers;
           return next;
         }
@@ -5350,6 +5386,9 @@ async function applyPostmanMcpForPlatform({
         } else {
           delete servers[FOUNDRY_MCP_SERVER_ID];
         }
+        if (includePlaywrightMcp) {
+          servers[PLAYWRIGHT_MCP_SERVER_ID] = buildVsCodePlaywrightServer();
+        }
         next.servers = servers;
         return next;
       },
@@ -5401,6 +5440,9 @@ async function applyPostmanMcpForPlatform({
           } else {
             delete servers[FOUNDRY_MCP_SERVER_ID];
           }
+          if (includePlaywrightMcp) {
+            servers[PLAYWRIGHT_MCP_SERVER_ID] = buildVsCodePlaywrightServer();
+          }
           next.servers = servers;
           return next;
         },
@@ -5529,6 +5571,57 @@ async function applyPostmanMcpForPlatform({
     };
   }
+  if (platform === "claude") {
+    const claudeConfigPath =
+      mcpScope === "global"
+        ? path.join(os.homedir(), ".claude", "mcp.json")
+        : path.join(workspaceRoot, ".mcp.json");
+    const result = await upsertJsonObjectFile({
+      targetPath: claudeConfigPath,
+      updater: (existing) => {
+        const next = { ...existing };
+        const mcpServers =
+          next.mcpServers &&
+          typeof next.mcpServers === "object" &&
+          !Array.isArray(next.mcpServers)
+            ? { ...next.mcpServers }
+            : {};
+        if (includeFoundryMcp) {
+          if (normalizedFoundryRuntime === "docker") {
+            mcpServers[FOUNDRY_MCP_SERVER_ID] = {
+              type: "url",
+              url: buildFoundryDockerUrl({ port: foundryDockerPort }),
+            };
+          } else {
+            mcpServers[FOUNDRY_MCP_SERVER_ID] = {
+              type: "stdio",
+              command: FOUNDRY_MCP_COMMAND,
+              args: buildFoundryServeArgs({ scope: foundryScope }),
+            };
+          }
+        } else {
+          delete mcpServers[FOUNDRY_MCP_SERVER_ID];
+        }
+        if (includePlaywrightMcp) {
+          mcpServers[PLAYWRIGHT_MCP_SERVER_ID] = {
+            type: "url",
+            url: PLAYWRIGHT_MCP_URL,
+          };
+        }
+        next.mcpServers = mcpServers;
+        return next;
+      },
+      dryRun,
+    });
+    return {
+      kind: "claude-mcp",
+      scope: mcpScope,
+      path: claudeConfigPath,
+      action: result.action,
+      warnings: [...warnings, ...result.warnings],
+    };
+  }
   return {
     kind: "unknown",
     scope: mcpScope,
@@ -5606,10 +5699,7 @@ async function resolvePostmanInstallSelection({
     : null;
   let workspaceSelectionSource = hasWorkspaceOption ? "option" : "none";
   const requestedMcpScope = options.mcpScope
-    ? coerceWorkspaceOnlyMcpScope(
-        options.mcpScope,
-        "--mcp-scope",
-      )
+    ? coerceWorkspaceOnlyMcpScope(options.mcpScope, "--mcp-scope")
     : null;
   let mcpScope = requestedMcpScope?.scope || "project";
   const warnings = [];
@@ -6057,6 +6147,7 @@ async function configurePostmanInstallArtifacts({
         stitchMcpUrl: effectiveStitchMcpUrl,
         includeStitchMcp: shouldInstallStitch,
         includeFoundryMcp: postmanSelection.foundryMcpEnabled,
+        includePlaywrightMcp: postmanSelection.playwrightEnabled ?? false,
         foundryRuntime: postmanSelection.effectiveMcpRuntime || "local",
         dryRun,
         cwd,
@@ -6167,6 +6258,7 @@ async function applyPostmanConfigArtifacts({
     POSTMAN_API_KEY_ENV_VAR;
   const postmanMcpUrl = postmanState.mcpUrl || POSTMAN_MCP_URL;
   const stitchEnabled = Boolean(stitchState);
+  const playwrightEnabled = Boolean(configValue?.playwright);
   const stitchApiKeyEnvVar =
     normalizePostmanApiKey(stitchState?.apiKeyEnvVar) || STITCH_API_KEY_ENV_VAR;
   const stitchMcpUrl = stitchState?.mcpUrl || STITCH_MCP_URL;
@@ -6239,6 +6331,7 @@ async function applyPostmanConfigArtifacts({
       stitchMcpUrl,
       includeStitchMcp: stitchEnabled,
       includeFoundryMcp: true,
+      includePlaywrightMcp: playwrightEnabled ?? false,
       foundryRuntime,
       dryRun,
       cwd,
@@ -8239,7 +8332,9 @@ async function performWorkflowInstall(
     cancelled: false,
     cwd,
     scope,
-    warnings: requestedInstallScope.warning ? [requestedInstallScope.warning] : [],
+    warnings: requestedInstallScope.warning
+      ? [requestedInstallScope.warning]
+      : [],
     ruleScope,
     dryRun,
     platform,

package/src/cli/init/execute.ts CHANGED Viewed

@@ -23,12 +23,20 @@ export function buildInitExecutionPlan({
   const planItems: InitExecutionPlanItem[] = [];
   const wantsPostman = hasMcpSelection(selections.selectedMcps, "postman");
   const wantsStitch = hasMcpSelection(selections.selectedMcps, "stitch");
-  const wantsFoundry = hasMcpSelection(selections.selectedMcps, "cubis-foundry");
+  const wantsFoundry = hasMcpSelection(
+    selections.selectedMcps,
+    "cubis-foundry",
+  );
+  const wantsPlaywright = hasMcpSelection(
+    selections.selectedMcps,
+    "playwright",
+  );
   for (const platform of selections.platforms) {
     const stitchSupported = platform === "antigravity";
     const stitchEnabled = wantsStitch && stitchSupported;
-    const hasAnyMcp = wantsPostman || stitchEnabled || wantsFoundry;
+    const hasAnyMcp =
+      wantsPostman || stitchEnabled || wantsFoundry || wantsPlaywright;
     const warnings: string[] = [];
     if (wantsStitch && !stitchSupported) {
       warnings.push(
@@ -48,10 +56,11 @@ export function buildInitExecutionPlan({
       target,
       postman: wantsPostman,
       stitch: stitchEnabled,
+      playwright: wantsPlaywright,
       stitchDefaultForAntigravity: false,
       mcpScope: selections.mcpScope,
       foundryMcp: wantsFoundry,
-      mcpToolSync: wantsPostman || stitchEnabled,
+      mcpToolSync: wantsPostman || stitchEnabled || wantsPlaywright,
       mcpRuntime: hasAnyMcp ? selections.mcpRuntime : "local",
       mcpFallback: "local",
       mcpBuildLocal: hasAnyMcp ? selections.mcpBuildLocal : false,
@@ -83,9 +92,9 @@ export function formatInitSummary(selections: InitWizardSelections) {
     `- Skill profile: ${selections.skillProfile}`,
     `- Skills scope: ${selections.skillsScope}`,
     `- MCP scope: ${selections.mcpScope}`,
-    `- MCP runtime: ${selections.mcpRuntime}${selections.mcpRuntime === "docker" ? selections.mcpBuildLocal ? " (build local image)" : " (pull image)" : ""}`,
+    `- MCP runtime: ${selections.mcpRuntime}${selections.mcpRuntime === "docker" ? (selections.mcpBuildLocal ? " (build local image)" : " (pull image)") : ""}`,
     `- MCP selections: ${selections.selectedMcps.length > 0 ? selections.selectedMcps.join(", ") : "(none)"}`,
     `- Postman mode: ${postmanSelected ? selections.postmanMode : "(not selected)"}`,
-    `- Postman workspace: ${postmanSelected ? selections.postmanWorkspaceId === null ? "null" : selections.postmanWorkspaceId : "(not selected)"}`,
+    `- Postman workspace: ${postmanSelected ? (selections.postmanWorkspaceId === null ? "null" : selections.postmanWorkspaceId) : "(not selected)"}`,
   ].join("\n");
 }

package/src/cli/init/prompts.ts CHANGED Viewed

@@ -90,6 +90,11 @@ export async function promptInitMcpSelection(defaultMcps: InitMcpId[]) {
         value: "stitch",
         checked: defaultMcps.includes("stitch"),
       },
+      {
+        name: "Playwright",
+        value: "playwright",
+        checked: defaultMcps.includes("playwright"),
+      },
     ],
   });
 }

package/src/cli/init/types.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 export type InitScope = "project" | "global";
 export type InitSkillProfile = "core" | "web-backend" | "full";
-export type InitMcpId = "cubis-foundry" | "postman" | "stitch";
+export type InitMcpId = "cubis-foundry" | "postman" | "stitch" | "playwright";
 export type InitPostmanMode = "full" | "minimal";
 export type InitPlatformId = "codex" | "antigravity" | "copilot" | "claude";
 export type InitMcpRuntime = "local" | "docker";

package/workflows/powers/ask-questions-if-underspecified/SKILL.md CHANGED Viewed

@@ -1,17 +1,27 @@
 ---
 name: ask-questions-if-underspecified
-description: Clarify requirements before implementing. Use when serious doubts arise.
+description: Clarify requirements before implementing. Use when serious doubts arise about objective, scope, constraints, environment, or safety — or when the task is substantial enough that being wrong wastes significant effort.
 ---
 # Ask Questions If Underspecified
 ## When to Use
-Use this skill when a request has multiple plausible interpretations or key details (objective, scope, constraints, environment, or safety) are unclear.
+Use this skill when a request has multiple plausible interpretations or key details (objective, scope, constraints, environment, or safety) are unclear — **and** when the cost of implementing the wrong interpretation is significant.
+Three situations require clarification:
+1. **High branching** — Multiple plausible interpretations produce significantly different implementations
+2. **Substantial deliverable** — The task is large enough that wrong assumptions waste real time
+3. **Safety-critical** — The action is hard to reverse (data migrations, deployments, file deletions)
 ## When NOT to Use
-Do not use this skill when the request is already clear, or when a quick, low-risk discovery read can answer the missing details.
+Do not use this skill when:
+- The request is already clear and one interpretation is obviously correct
+- A quick discovery read (config files, existing patterns, repo structure) can answer the missing details faster than asking
+- The task is small enough that being slightly wrong is cheap and correctable
 ## Goal
@@ -22,6 +32,7 @@ Ask the minimum set of clarifying questions needed to avoid wrong work; do not s
 ### 1) Decide whether the request is underspecified
 Treat a request as underspecified if after exploring how to perform the work, some or all of the following are not clear:
 - Define the objective (what should change vs stay the same)
 - Define "done" (acceptance criteria, examples, edge cases)
 - Define scope (which files/components/users are in/out)
@@ -36,6 +47,7 @@ If multiple plausible interpretations exist, assume it is underspecified.
 Ask 1-5 questions in the first pass. Prefer questions that eliminate whole branches of work.
 Make questions easy to answer:
 - Optimize for scannability (short, numbered questions; avoid paragraphs)
 - Offer multiple-choice options when possible
 - Suggest reasonable defaults when appropriate (mark them clearly as the default/recommended choice; bold the recommended choice in the list, or if you present options in a code block, put a bold "Recommended" line immediately above the block and also tag defaults inside the block)
@@ -47,10 +59,12 @@ Make questions easy to answer:
 ### 3) Pause before acting
 Until must-have answers arrive:
 - Do not run commands, edit files, or produce a detailed plan that depends on unknowns
 - Do perform a clearly labeled, low-risk discovery step only if it does not commit you to a direction (e.g., inspect repo structure, read relevant config files)
 If the user explicitly asks you to proceed without answers:
 - State your assumptions as a short numbered list
 - Ask for confirmation; proceed only after they confirm or correct them
@@ -83,3 +97,37 @@ Reply with: defaults (or 1a 2a)
 - Don't ask questions you can answer with a quick, low-risk discovery read (e.g., configs, existing patterns, docs).
 - Don't ask open-ended questions if a tight multiple-choice or yes/no would eliminate ambiguity faster.
+- Don't ask more than 5 questions at once — rank by impact and ask the top ones.
+- Don't skip the fast-path — every clarification block needs `defaults` shortcut.
+- Don't forget to restate interpretation before proceeding — confirms you heard correctly.
+- Don't ask about reversible decisions — pick one, proceed, let them correct if wrong.
+## Three-Stage Pattern (for complex or substantial tasks)
+For tasks where wrong assumptions would waste significant effort — documents, architecture decisions, multi-file features — use a three-stage approach:
+### Stage 1: Meta-context questions (3-5 questions)
+Ask about the big picture before touching content:
+- What _type_ of deliverable is this? (spec, code, doc, design, plan)
+- Who's the audience/consumer?
+- What does "done" look like?
+- Existing template, format, or precedent to follow?
+- Hard constraints (framework, performance, compatibility)?
+### Stage 2: Info dump + targeted follow-up
+After Stage 1 answers: invite the user to brain-dump everything relevant.
+> "Dump everything you know — background, prior decisions, constraints, opinions, blockers. Don't organize it. Just get it all out."
+Then ask 5-10 targeted follow-up questions based on gaps. Users can answer in shorthand (`1: yes, 2: see above, 3: no`).
+**Exit Stage 2 when:** You understand objective, constraints, and at least one clear definition of success.
+### Stage 3: Confirm interpretation, then proceed
+Restate in 1-3 sentences before starting:
+> "Here's what I understand: [objective]. [Key constraint]. [What done looks like]. Starting now — correct me if anything's off."

package/workflows/powers/behavioral-modes/SKILL.md CHANGED Viewed

@@ -7,6 +7,7 @@ allowed-tools: Read, Glob, Grep
 # Behavioral Modes - Adaptive AI Operating Modes
 ## Purpose
 This skill defines distinct behavioral modes that optimize AI performance for specific tasks. Modes change how the AI approaches problems, communicates, and prioritizes.
 ---
@@ -18,6 +19,7 @@ This skill defines distinct behavioral modes that optimize AI performance for sp
 **When to use:** Early project planning, feature ideation, architecture decisions
 **Behavior:**
 - Ask clarifying questions before assumptions
 - Offer multiple alternatives (at least 3)
 - Think divergently - explore unconventional solutions
@@ -25,6 +27,7 @@ This skill defines distinct behavioral modes that optimize AI performance for sp
 - Use visual diagrams (mermaid) to explain concepts
 **Output style:**
 ```
 "Let's explore this together. Here are some approaches:
@@ -46,6 +49,7 @@ What resonates with you? Or should we explore a different direction?"
 **When to use:** Writing code, building features, executing plans
 **Behavior:**
 - **CRITICAL: Use `clean-code` skill standards** - concise, direct, no verbose explanations
 - Fast execution - minimize questions
 - Use established patterns and best practices
@@ -57,6 +61,7 @@ What resonates with you? Or should we explore a different direction?"
 - **NO RUSHING** - Quality > Speed. Read ALL references before coding.
 **Output style:**
 ```
 [Code block]
@@ -64,6 +69,7 @@ What resonates with you? Or should we explore a different direction?"
 ```
 **NOT:**
 ```
 "Building [feature]...
@@ -83,6 +89,7 @@ Run `npm run dev` to test."
 **When to use:** Fixing bugs, troubleshooting errors, investigating issues
 **Behavior:**
 - Ask for error messages and reproduction steps
 - Think systematically - check logs, trace data flow
 - Form hypothesis → test → verify
@@ -90,6 +97,7 @@ Run `npm run dev` to test."
 - Prevent future occurrences
 **Output style:**
 ```
 "Investigating...
@@ -106,6 +114,7 @@ Run `npm run dev` to test."
 **When to use:** Code review, architecture review, security audit
 **Behavior:**
 - Be thorough but constructive
 - Categorize by severity (Critical/High/Medium/Low)
 - Explain the "why" behind suggestions
@@ -113,6 +122,7 @@ Run `npm run dev` to test."
 - Acknowledge what's done well
 **Output style:**
 ```
 ## Code Review: [file/feature]
@@ -133,6 +143,7 @@ Run `npm run dev` to test."
 **When to use:** Explaining concepts, documentation, onboarding
 **Behavior:**
 - Explain from fundamentals
 - Use analogies and examples
 - Progress from simple to complex
@@ -140,6 +151,7 @@ Run `npm run dev` to test."
 - Check understanding
 **Output style:**
 ```
 ## Understanding [Concept]
@@ -163,6 +175,7 @@ Run `npm run dev` to test."
 **When to use:** Production deployment, final polish, release preparation
 **Behavior:**
 - Focus on stability over features
 - Check for missing error handling
 - Verify environment configs
@@ -170,6 +183,7 @@ Run `npm run dev` to test."
 - Create deployment checklist
 **Output style:**
 ```
 ## Pre-Ship Checklist
@@ -195,35 +209,111 @@ Run `npm run dev` to test."
 The AI should automatically detect the appropriate mode based on:
-| Trigger | Mode |
-|---------|------|
-| "what if", "ideas", "options" | BRAINSTORM |
-| "build", "create", "add" | IMPLEMENT |
-| "not working", "error", "bug" | DEBUG |
-| "review", "check", "audit" | REVIEW |
-| "explain", "how does", "learn" | TEACH |
-| "deploy", "release", "production" | SHIP |
+| Trigger                                        | Mode                |
+| ---------------------------------------------- | ------------------- |
+| "what if", "ideas", "options"                  | BRAINSTORM          |
+| "build", "create", "add"                       | IMPLEMENT           |
+| "not working", "error", "bug"                  | DEBUG               |
+| "review", "check", "audit"                     | REVIEW              |
+| "explain", "how does", "learn"                 | TEACH               |
+| "deploy", "release", "production"              | SHIP                |
+| "iterate", "refine quality", "not good enough" | EVALUATOR-OPTIMIZER |
+---
+## Workflow Patterns
+Three patterns govern how modes combine across multiple agents or steps. Use the simplest pattern that solves the problem — add complexity only when it measurably improves results.
+### 1. Sequential (default)
+Use when tasks have dependencies — each step needs the previous step's output.
+```
+[BRAINSTORM] → [IMPLEMENT] → [REVIEW] → [SHIP]
+```
+Best for: multi-stage features, draft-review-polish cycles, data pipelines.
+### 2. Parallel
+Use when tasks are independent and doing them one at a time is too slow.
+```
+[security REVIEW + performance REVIEW + quality REVIEW] → synthesize
+```
+Best for: code review across multiple dimensions, parallel analysis. Requires a clear aggregation strategy before starting.
+### 3. Evaluator-Optimizer (new)
+Use when first-draft quality consistently falls short and quality is measurable.
+```
+[IMPLEMENT] → [REVIEW with criteria] → pass? → done
+                      ↓ fail
+               feedback → [IMPLEMENT again]
+```
+**When to use:**
+- Technical docs, customer communications, SQL queries against specific standards
+- Any output where the gap between first attempt and required quality is significant
+- When you have clear, checkable criteria (not just "make it better")
+**When NOT to use:**
+- First-attempt quality is already acceptable
+- Criteria are too subjective for consistent AI evaluation
+- Real-time use cases needing immediate responses
+- Deterministic validators exist (linters, schema validators) — use those instead
+**Implementation:**
+```
+## Generator
+Task: [what to create]
+Constraints: [specific, measurable requirements — these become eval criteria]
+## Evaluator
+Criteria:
+1. [Criterion A] — Pass/Fail + specific failure note
+2. [Criterion B] — Pass/Fail + specific failure note
+Output JSON: { "pass": bool, "failures": ["..."], "revision_note": "..." }
+Max iterations: 3  ← always set a ceiling
+Stop when: all criteria pass OR max iterations reached
+```
 ---
-## Multi-Agent Collaboration Patterns (2025)
+## Multi-Agent Collaboration Patterns
 Modern architectures optimized for agent-to-agent collaboration:
 ### 1. 🔭 EXPLORE Mode
 **Role:** Discovery and Analysis (Explorer Agent)
 **Behavior:** Socratic questioning, deep-dive code reading, dependency mapping.
 **Output:** `discovery-report.json`, architectural visualization.
 ### 2. 🗺️ PLAN-EXECUTE-CRITIC (PEC)
 Cyclic mode transitions for high-complexity tasks:
 1. **Planner:** Decomposes the task into atomic steps (`task.md`).
 2. **Executor:** Performs the actual coding (`IMPLEMENT`).
 3. **Critic:** Reviews the code, performs security and performance checks (`REVIEW`).
 ### 3. 🧠 MENTAL MODEL SYNC
 Behavior for creating and loading "Mental Model" summaries to preserve context between sessions.
+### 4. 🔄 EVALUATOR-OPTIMIZER
+Paired agents in an iterative quality loop: Generator produces, Evaluator scores against criteria, Generator refines. Set max iteration ceiling before starting.
 ---
 ## Combining Modes
@@ -239,4 +329,5 @@ Users can explicitly request a mode:
 /implement the user profile page
 /debug why login fails
 /review this pull request
+/iterate [target quality bar]    ← triggers evaluator-optimizer
 ```