npm - forge-orkes - Versions diffs - 0.1.0 → 0.3.0 - Mend

forge-orkes 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/bin/create-forge.js +277 -6
package/package.json +1 -1
package/template/.claude/settings.json +27 -2
package/template/.claude/skills/architecting/SKILL.md +12 -0
package/template/.claude/skills/auditing/SKILL.md +12 -0
package/template/.claude/skills/discussing/SKILL.md +144 -38
package/template/.claude/skills/executing/SKILL.md +14 -0
package/template/.claude/skills/forge/SKILL.md +61 -4
package/template/.claude/skills/planning/SKILL.md +12 -0
package/template/.claude/skills/researching/SKILL.md +12 -0
package/template/.claude/skills/upgrading/SKILL.md +90 -0
package/template/.claude/skills/verifying/SKILL.md +28 -0
package/template/CLAUDE.md +4 -0

package/bin/create-forge.js CHANGED Viewed

@@ -6,6 +6,23 @@ const readline = require('readline');
 const templateDir = path.join(__dirname, '..', 'template');
 const targetDir = process.cwd();
+const pkgVersion = require('../package.json').version;
+// --- File classification for upgrades ---
+// Framework-owned: Forge controls these entirely
+const FRAMEWORK_OWNED_DIRS = ['.claude/agents', '.claude/skills'];
+// Template-only: reference templates Forge controls
+const TEMPLATE_ONLY_DIRS = ['.forge/templates'];
+// Merge-owned: never auto-overwrite, stage for review
+const MERGE_OWNED_FILES = ['CLAUDE.md'];
+// Settings file gets smart-merge (overwrite forge.* keys, preserve user hooks)
+const SETTINGS_FILE = '.claude/settings.json';
+// --- Helpers ---
 function copyDirRecursive(src, dest) {
   let count = 0;
@@ -38,7 +55,132 @@ function prompt(question) {
   });
 }
-async function main() {
+function stampVersion(settingsPath) {
+  const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8'));
+  if (settings.forge) {
+    settings.forge.version = pkgVersion;
+  }
+  fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n');
+}
+/**
+ * Recursively collect all relative file paths under a directory.
+ */
+function collectFiles(dir, base) {
+  const results = [];
+  if (!fs.existsSync(dir)) return results;
+  for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+    const rel = path.join(base, entry.name);
+    if (entry.isDirectory()) {
+      results.push(...collectFiles(path.join(dir, entry.name), rel));
+    } else {
+      results.push(rel);
+    }
+  }
+  return results;
+}
+/**
+ * Compare and overwrite framework-owned or template-only files.
+ * Returns { updated, added, unchanged, removed } arrays of relative paths.
+ */
+function upgradeDir(relDir) {
+  const srcDir = path.join(templateDir, relDir);
+  const destDir = path.join(targetDir, relDir);
+  const result = { updated: [], added: [], unchanged: [], removed: [] };
+  if (!fs.existsSync(srcDir)) return result;
+  const srcFiles = collectFiles(srcDir, '');
+  const destFiles = new Set(collectFiles(destDir, ''));
+  for (const rel of srcFiles) {
+    const srcPath = path.join(srcDir, rel);
+    const destPath = path.join(destDir, rel);
+    const displayPath = path.join(relDir, rel);
+    if (!fs.existsSync(destPath)) {
+      fs.mkdirSync(path.dirname(destPath), { recursive: true });
+      fs.copyFileSync(srcPath, destPath);
+      result.added.push(displayPath);
+    } else {
+      const srcContent = fs.readFileSync(srcPath);
+      const destContent = fs.readFileSync(destPath);
+      if (Buffer.compare(srcContent, destContent) !== 0) {
+        fs.copyFileSync(srcPath, destPath);
+        result.updated.push(displayPath);
+      } else {
+        result.unchanged.push(displayPath);
+      }
+    }
+  }
+  // Detect files in dest that are no longer in template
+  for (const rel of destFiles) {
+    const srcPath = path.join(srcDir, rel);
+    if (!fs.existsSync(srcPath)) {
+      result.removed.push(path.join(relDir, rel));
+    }
+  }
+  return result;
+}
+/**
+ * Handle merge-owned files: stage new version for manual review if different.
+ */
+function handleMergeFile(relFile) {
+  const srcPath = path.join(templateDir, relFile);
+  const destPath = path.join(targetDir, relFile);
+  if (!fs.existsSync(srcPath)) return null;
+  if (!fs.existsSync(destPath)) return null;
+  const srcContent = fs.readFileSync(srcPath, 'utf-8');
+  const destContent = fs.readFileSync(destPath, 'utf-8');
+  if (srcContent === destContent) return 'unchanged';
+  // Stage the new version for manual review
+  const upgradeDir = path.join(targetDir, '.forge', 'upgrade');
+  fs.mkdirSync(upgradeDir, { recursive: true });
+  const basename = path.basename(relFile);
+  const newPath = path.join(upgradeDir, `${basename}.new`);
+  fs.writeFileSync(newPath, srcContent);
+  return 'staged';
+}
+/**
+ * Smart-merge settings.json: overwrite forge.* keys from template, preserve user hooks.
+ */
+function upgradeSettings() {
+  const srcPath = path.join(templateDir, SETTINGS_FILE);
+  const destPath = path.join(targetDir, SETTINGS_FILE);
+  if (!fs.existsSync(destPath)) return 'missing';
+  if (!fs.existsSync(srcPath)) return 'missing';
+  const srcSettings = JSON.parse(fs.readFileSync(srcPath, 'utf-8'));
+  const destSettings = JSON.parse(fs.readFileSync(destPath, 'utf-8'));
+  const before = JSON.stringify(destSettings);
+  // Overwrite forge.* keys from template
+  destSettings.forge = { ...destSettings.forge, ...srcSettings.forge };
+  // Always stamp current package version
+  destSettings.forge.version = pkgVersion;
+  const after = JSON.stringify(destSettings);
+  if (before === after) return 'unchanged';
+  fs.writeFileSync(destPath, JSON.stringify(destSettings, null, 2) + '\n');
+  return 'updated';
+}
+// --- Commands ---
+async function install() {
   console.log('\n  Forge - Meta-prompting framework for Claude Code\n');
   // Handle CLAUDE.md
@@ -94,10 +236,139 @@ async function main() {
   const forgeCount = copyDirRecursive(srcForge, destForge);
   console.log(`  Installed .forge/templates/ (${forgeCount} files)`);
-  console.log('\n  Forge is ready. Start with: /forge\n');
+  // Stamp version from package.json into settings.json
+  const settingsPath = path.join(targetDir, SETTINGS_FILE);
+  if (fs.existsSync(settingsPath)) {
+    stampVersion(settingsPath);
+  }
+  console.log(`\n  Forge v${pkgVersion} is ready. Start with: /forge\n`);
 }
-main().catch((err) => {
-  console.error('Error:', err.message);
-  process.exit(1);
-});
+async function upgrade() {
+  console.log('\n  Forge Upgrade\n');
+  // Verify Forge is installed
+  const settingsPath = path.join(targetDir, SETTINGS_FILE);
+  if (!fs.existsSync(settingsPath)) {
+    console.error(
+      '  Forge is not installed in this directory.\n  Run `npx forge-orkes` first to install.\n'
+    );
+    process.exit(1);
+  }
+  // Read installed version
+  let installedVersion = 'unknown';
+  try {
+    const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf-8'));
+    installedVersion = settings.forge?.version || 'unknown';
+  } catch {
+    // proceed with unknown version
+  }
+  console.log(`  Installed: v${installedVersion}`);
+  console.log(`  Available: v${pkgVersion}\n`);
+  const results = {
+    updated: [],
+    added: [],
+    unchanged: [],
+    removed: [],
+    needsReview: [],
+  };
+  // 1. Process framework-owned directories
+  for (const dir of FRAMEWORK_OWNED_DIRS) {
+    const dirResult = upgradeDir(dir);
+    results.updated.push(...dirResult.updated);
+    results.added.push(...dirResult.added);
+    results.unchanged.push(...dirResult.unchanged);
+    results.removed.push(...dirResult.removed);
+  }
+  // 2. Process template-only directories
+  for (const dir of TEMPLATE_ONLY_DIRS) {
+    const dirResult = upgradeDir(dir);
+    results.updated.push(...dirResult.updated);
+    results.added.push(...dirResult.added);
+    results.unchanged.push(...dirResult.unchanged);
+    results.removed.push(...dirResult.removed);
+  }
+  // 3. Process merge-owned files
+  for (const file of MERGE_OWNED_FILES) {
+    const status = handleMergeFile(file);
+    if (status === 'staged') {
+      results.needsReview.push(file);
+    } else if (status === 'unchanged') {
+      results.unchanged.push(file);
+    }
+  }
+  // 4. Smart-merge settings.json
+  const settingsStatus = upgradeSettings();
+  if (settingsStatus === 'updated') {
+    results.updated.push(SETTINGS_FILE);
+  } else if (settingsStatus === 'unchanged') {
+    results.unchanged.push(SETTINGS_FILE);
+  }
+  // Report results
+  const totalChanges =
+    results.updated.length + results.added.length + results.needsReview.length;
+  if (totalChanges === 0 && results.removed.length === 0) {
+    console.log('  Already up to date.\n');
+    return;
+  }
+  if (results.updated.length > 0) {
+    console.log(`  Updated (${results.updated.length}):`);
+    for (const f of results.updated) {
+      console.log(`    ${f}`);
+    }
+    console.log();
+  }
+  if (results.added.length > 0) {
+    console.log(`  Added (${results.added.length}):`);
+    for (const f of results.added) {
+      console.log(`    ${f}`);
+    }
+    console.log();
+  }
+  if (results.needsReview.length > 0) {
+    console.log(`  Needs manual review (${results.needsReview.length}):`);
+    for (const f of results.needsReview) {
+      console.log(`    ${f} → .forge/upgrade/${path.basename(f)}.new`);
+    }
+    console.log();
+  }
+  if (results.removed.length > 0) {
+    console.log(`  Removed from template (${results.removed.length}):`);
+    for (const f of results.removed) {
+      console.log(`    ${f} (still in your project — delete manually if unused)`);
+    }
+    console.log();
+  }
+  console.log(`  Upgraded to v${pkgVersion}\n`);
+}
+// --- Entry point ---
+const subcommand = process.argv[2];
+if (subcommand === 'upgrade') {
+  upgrade().catch((err) => {
+    console.error('Error:', err.message);
+    process.exit(1);
+  });
+} else {
+  install().catch((err) => {
+    console.error('Error:', err.message);
+    process.exit(1);
+  });
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "forge-orkes",
-  "version": "0.1.0",
+  "version": "0.3.0",
   "description": "Set up the Forge meta-prompting framework for Claude Code in your project",
   "bin": {
     "create-forge": "./bin/create-forge.js"

package/template/.claude/settings.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "forge": {
-    "version": "0.1.0",
+    "version": "0.3.0",
     "default_tier": "standard",
     "beads_integration": false,
     "context_gates": {
@@ -10,7 +10,14 @@
       "constitution_md_max_kb": 10
     },
     "commit_format": "{type}({scope}): {description}",
-    "commit_types": ["feat", "fix", "test", "refactor", "chore", "docs"]
+    "commit_types": [
+      "feat",
+      "fix",
+      "test",
+      "refactor",
+      "chore",
+      "docs"
+    ]
   },
   "hooks": {
     "PostToolUse": [
@@ -23,9 +30,27 @@
             "async": true
           }
         ]
+      },
+      {
+        "matcher": "Skill",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "mkdir -p .forge && echo \"$TOOL_INPUT\" > .forge/.active-skill"
+          }
+        ]
       }
     ],
     "PreToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "if [ ! -f .forge/.active-skill ]; then echo \"[Forge] No active skill. Invoke /forge or /quick-tasking before editing code. To bypass: touch .forge/.active-skill\" >&2; exit 2; fi"
+          }
+        ]
+      },
       {
         "matcher": "Bash(git commit)",
         "hooks": [

package/template/.claude/skills/architecting/SKILL.md CHANGED Viewed

@@ -119,3 +119,15 @@ After completing architectural work:
 3. API contracts defined (if applicable)
 4. Constitutional gates verified
 5. User has approved significant decisions
+## Phase Handoff
+After architectural decisions are documented:
+1. **Verify persistence** — Confirm ADRs are written to `.forge/decisions/`, data models and API contracts to `.forge/phases/{N}-{name}/`
+2. **Update state** — Set `current.status` to `planning` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Architecting phase complete. Decisions are documented in `.forge/decisions/` and phase artifacts. I recommend clearing context (`/clear`) before starting the planning phase — the planner will load ADRs, contracts, and state from disk.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*

package/template/.claude/skills/auditing/SKILL.md CHANGED Viewed

@@ -300,3 +300,15 @@ This is a soft gate — critical issues strongly recommend fixing before complet
 - The user always has final authority over ship decisions
 The report documents the decision either way, creating an audit trail.
+## Phase Handoff
+After auditing routes to refactoring (all three paths: HEALTHY, accepted risk, accepted warnings):
+1. **Verify persistence** — Confirm health report is written to `.forge/audits/milestone-{id}-health-report.md`
+2. **Update state** — Set `current.status` to `refactoring` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Health audit complete. Report written to `.forge/audits/`. I recommend clearing context (`/clear`) before the refactoring review — the refactoring scanner spawns a fresh agent with the git diff and health report, so a clean context ensures accurate scanning.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*

package/template/.claude/skills/discussing/SKILL.md CHANGED Viewed

@@ -29,46 +29,94 @@ The only output is the conversation itself and, at the end, a summary of decisio
 ## Pre-Planning Discussion
-When entering from `researching` with findings in hand:
+When entering from `researching` (potentially after a context clear):
-### Step 1: Present the Landscape
+### Step 0: Load Context
+If entering with a fresh context (after `/clear`):
+```
+Read: .forge/state/milestone-{id}.yml → current position, progress
+Read: .forge/project.yml → tech stack, project description
+Read: .forge/context.md → any existing locked decisions (if exists)
+Read: .forge/constitution.md → active gates (if exists)
+```
+Check if research findings were written to files (`.forge/phases/` or similar). If so, read them. If research was inline-only (conversation context), the findings may need to be re-summarized from the user — ask briefly: *"We're picking up after the research phase. Can you summarize the key findings, or should I re-scan the relevant areas?"*
+### Step 1: Present Decisions with AskUserQuestion
 Summarize what research found, structured around decisions the user needs to make — not a data dump.
-*"Based on what I found, here are the key decisions before we plan:"*
+**Use the `AskUserQuestion` tool for every decision point.** This gives users a clean, scannable interface instead of walls of text. You can batch up to 4 questions per `AskUserQuestion` call.
+For each decision:
+1. Write a **brief prose intro** (2-3 sentences max) setting context for the decision — what's the problem, why does it matter.
+2. Then immediately call `AskUserQuestion` with:
+   - `question`: The decision stated plainly, ending with `?`
+   - `header`: Short label (e.g., "Strategy", "Approach", "Scope")
+   - `options`: 2-4 realistic approaches. Each option gets:
+     - `label`: Concise name (1-5 words). Put your recommendation first with "(Recommended)" suffix.
+     - `description`: Trade-offs — what you gain and what you lose. Be honest about costs.
+   - `multiSelect`: false for mutually exclusive choices, true when combinations are valid.
+**Batch related decisions.** If you have 3-5 decisions, group them into 1-2 `AskUserQuestion` calls (max 4 questions each) rather than asking one at a time. This lets the user see the full landscape and make coherent choices.
+**When NOT to use AskUserQuestion:** For open-ended exploration questions where the answer isn't one of a few discrete options (e.g., "Walk me through the ideal user flow"), use regular prose. The tool is for decisions with concrete choices, not brainstorming.
+Example structure for a discussion with 3 decisions:
+```
+Brief context paragraph explaining the landscape from research.
-For each decision point:
-- **What needs deciding** — the question, stated plainly
-- **Options** — 2-3 realistic approaches (not exhaustive lists)
-- **Trade-offs** — what you gain and what you lose with each
-- **Recommendation** — if you have one, say so and say why. If you don't, say that too.
+→ AskUserQuestion with questions:
+  1. "Which recovery strategy should we use?" (header: "Recovery")
+     - "Sweep timer (Recommended)" / "Sweep timer + queue refactor" / ...
+  2. "Where should observability live?" (header: "Observability")
+     - "Server-side logs only" / "PostHog events" / "Both" / ...
+  3. "How should we handle the 704 contradictory records?" (header: "Data cleanup")
+     - "Migration + constraints" / "Let sweep handle it" / "Migration only" / ...
+```
-Keep it conversational. Don't present a 20-item matrix. Surface the 3-5 decisions that actually matter for this work.
+Surface the 3-5 decisions that actually matter. Don't present a 20-item matrix.
 ### Step 2: Facilitate, Don't Dictate
-Your role is to help the user think, not to push them toward your preference.
+After the user responds to decisions, your role is to help them think deeper — not to push your preference.
+**Use `AskUserQuestion` for follow-up decisions** that emerge from their answers. Use prose for open-ended exploration.
 Good facilitation patterns:
-- *"The main tension here is between X and Y. Which matters more for your project?"*
-- *"Option A is simpler now but harder to change later. Option B is more work upfront but more flexible. What's your timeline pressure like?"*
-- *"I'd lean toward X because [reason], but Y makes sense if [condition]. What's your read?"*
-- *"You mentioned [earlier decision] — that makes Option B a more natural fit. Does that match your thinking?"*
+- *"The main tension here is between X and Y."* → then `AskUserQuestion` with the concrete options
+- Referencing earlier decisions: *"You chose Option A for recovery — that makes X a more natural fit for observability."* → then `AskUserQuestion` with refined options
+- When trade-offs need explicit weighing → `AskUserQuestion` with `description` fields that name the costs
 Bad facilitation patterns:
-- Presenting options without trade-offs (just a list)
-- Asking "what do you think?" without giving the user something to react to
+- Presenting options as prose paragraphs when they could be `AskUserQuestion` choices
+- Asking "what do you think?" without giving the user something concrete to react to
 - Overwhelming with edge cases before the main path is clear
 - Treating every decision as equally important
 ### Step 3: Probe for Hidden Constraints
-Research often misses things the user knows but hasn't mentioned. Ask about:
-- **Timeline pressure** — does this need to ship by a date?
-- **Audience/users** — who actually uses this? (affects complexity trade-offs)
-- **Future direction** — is this a throwaway or the foundation for more?
-- **Past experience** — have they tried something similar before? What went wrong?
-- **Strong preferences** — anything they definitely want or definitely don't want?
+Research often misses things the user knows but hasn't mentioned. Use `AskUserQuestion` for structured probes where the answer shapes the plan:
+```
+AskUserQuestion:
+  question: "What's the timeline pressure for this work?"
+  header: "Timeline"
+  options:
+    - label: "Ship this week"
+      description: "Minimal scope, skip nice-to-haves"
+    - label: "Ship this month"
+      description: "Room for polish and edge cases"
+    - label: "No hard deadline"
+      description: "Do it right, scope is flexible"
+```
+For open-ended probes where you need the user to explain (not choose), use prose:
+- *"Have you tried something similar before? What went wrong?"*
+- *"Anything you definitely want or definitely don't want?"*
 One or two questions at a time. Don't interrogate.
@@ -123,6 +171,21 @@ Don't mechanically walk through all 5 layers for every requirement — that woul
 Ask 2-3 questions at a time, let the user respond, then go deeper where their answers reveal uncertainty. The conversation should feel like a collaborative design session, not an interrogation.
+**Use `AskUserQuestion` for behavior decisions within distillation.** When a question has discrete answers (retry vs. fail vs. alert, real-time vs. polling, roles A/B/C), use the tool. When you need the user to describe or explain something open-ended, use prose.
+Example — Layer 3 question as `AskUserQuestion`:
+```
+question: "When the external enrichment API is down, what should the system do?"
+header: "Failure mode"
+options:
+  - label: "Retry with backoff (Recommended)"
+    description: "Queue retries at 1m/5m/30m intervals. Adds complexity but self-heals."
+  - label: "Fail and alert"
+    description: "Mark as failed, send alert. Simple but requires manual re-trigger."
+  - label: "Skip and continue"
+    description: "Process remaining items, revisit failures in next sweep."
+```
 **What you're listening for:**
 - **Contradictions** — "It should be simple" but also "it needs to handle 12 different states." Surface these gently.
@@ -132,14 +195,25 @@ Ask 2-3 questions at a time, let the user respond, then go deeper where their an
 ### Step 5: Converge on Decisions
-When the conversation has covered the key points, summarize what's been decided:
+When the conversation has covered the key points, summarize what's been decided as a brief prose list, then use `AskUserQuestion` for final confirmation:
-*"Here's where I think we've landed:*
+*"Here's where I think we've landed:"*
 - *[Decision 1]: [what was decided and why]*
 - *[Decision 2]: [what was decided and why]*
 - *[Open question]: [what's still unresolved and how to handle it]*
-*Does this match your understanding? If so, I'll carry these into planning."*
+Then confirm with `AskUserQuestion`:
+```
+question: "Does this match your understanding? Ready to move to planning?"
+header: "Confirm"
+options:
+  - label: "Looks good, proceed"
+    description: "Lock these decisions and move to planning phase."
+  - label: "I want to adjust something"
+    description: "Revisit one or more decisions before locking."
+  - label: "More to discuss"
+    description: "There are topics we haven't covered yet."
+```
 These decisions flow into `context.md` as **Locked Decisions** when the `planning` skill runs next.
@@ -169,12 +243,12 @@ Don't just recite the plan back. Translate it into what it means:
 ### Step 3: Surface What's Worth Discussing
-Don't wait for the user to spot issues. Proactively surface:
+Don't wait for the user to spot issues. Proactively surface concerns, then **use `AskUserQuestion` for any that have discrete choices:**
-- **Assumptions you're not confident about** — "Plan 01 assumes the API returns paginated results. I didn't verify this."
-- **Decisions that could go either way** — "I split this into 3 plans for parallelism, but you could also do it as 2 larger plans if you prefer fewer context switches."
-- **Risks the plan doesn't address** — "There's no fallback if the external API is slow. Worth adding, or accept the risk?"
-- **Scope questions** — "Plan 03 includes admin-only features. Ship those in v1, or defer?"
+- **Decisions that could go either way** → `AskUserQuestion` with the options and trade-offs
+- **Scope questions** → `AskUserQuestion` (e.g., "Ship admin features in v1?" with "Yes, include" / "Defer to v2" options)
+- **Risks the plan doesn't address** → `AskUserQuestion` (e.g., "Worth adding a fallback?" with "Add fallback" / "Accept risk" options)
+- **Assumptions you're not confident about** → Prose, since these need the user to confirm or correct rather than choose
 ### Step 4: Drill into Functionality
@@ -188,23 +262,43 @@ This is where post-planning discussion earns its keep — the plan makes the fea
 ### Step 5: Discuss and Revise Direction
-The user may want to:
-- **Change approach** — "Let's use WebSockets instead of polling." → Note this. Planning skill will rebuild the affected plans.
-- **Adjust scope** — "Defer the admin features." → Note this for deferred items.
-- **Reorder priorities** — "Do the dashboard before the settings page." → Note the new wave order.
-- **Ask questions** — "What happens if we skip the caching layer?" → Discuss implications honestly.
-- **Approve as-is** — "Looks good, proceed." → Move to executing.
+The user may want to change approach, adjust scope, reorder priorities, ask questions, or approve as-is. Use `AskUserQuestion` to give them a clear way to signal their intent:
+```
+question: "How would you like to proceed with this plan?"
+header: "Direction"
+options:
+  - label: "Approve as-is"
+    description: "Lock decisions and move to execution."
+  - label: "Adjust scope"
+    description: "Defer or add features before building."
+  - label: "Change approach"
+    description: "Revisit a technical decision in the plan."
+  - label: "More questions"
+    description: "I want to discuss specific parts further."
+```
+Based on their response, either drill deeper with follow-up `AskUserQuestion` calls or move to summarizing.
 ### Step 6: Summarize Changes
-If the discussion produced changes to the plan direction:
+If the discussion produced changes to the plan direction, summarize as prose:
 *"Based on our discussion:*
 - *[Change 1]: [what changed and why]*
 - *[Change 2]: [what changed and why]*
 - *[Unchanged]: [what stays the same]*
-*Next step: I'll update the plans to reflect this. Want me to proceed with re-planning, or is there more to discuss?"*
+Then confirm next steps with `AskUserQuestion`:
+```
+question: "Ready to update the plans, or more to discuss?"
+header: "Next step"
+options:
+  - label: "Update plans"
+    description: "Re-plan affected areas with the revised decisions."
+  - label: "More to discuss"
+    description: "There are topics we haven't covered yet."
+```
 If re-planning is needed, route back to the `planning` skill with the discussion summary as input. The planning skill will update plans, requirements, and context.md accordingly.
@@ -227,3 +321,15 @@ If re-planning is needed, route back to the `planning` skill with the discussion
 - **Premature convergence** — locking decisions before the user has had a chance to think. Don't rush the summary.
 - **Scope creep via discussion** — "While we're at it, should we also..." Keep discussion focused on the work at hand.
 - **Discussion as procrastination** — if the user keeps wanting to discuss but never approves a plan, gently surface the pattern.
+## Phase Handoff
+After discussion converges on decisions:
+1. **Persist decisions** — The decision summary from Step 5 (pre-planning) or Step 6 (post-planning) will flow into `context.md` when the planning skill runs. For post-planning revisions, note the changes clearly so planning can pick them up.
+2. **Update state** — Set `current.status` to `planning` (or `architecting` for Full tier) in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Discussion phase complete. Decisions are captured and will be written to context.md during planning. I recommend clearing context (`/clear`) before starting the {planning/architecting} phase — the planner will load everything it needs from `.forge/` state files.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*

package/template/.claude/skills/executing/SKILL.md CHANGED Viewed

@@ -152,3 +152,17 @@ While executing, watch for and log these patterns in `.forge/state/index.yml →
 - **Agent struggles**: If you need multiple attempts to get something right, or the user has to guide you through it, log the task type as an `agent_struggle`.
 This takes seconds per signal. Don't skip it — this data drives framework evolution.
+## Phase Handoff
+After all plans in the phase are executed:
+1. **Verify persistence** — Confirm execution summary is documented, all commits are made, milestone state is updated with progress and deviations, and desire path signals are logged
+2. **Update state** — Set `current.status` to `verifying` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Execution phase complete. All tasks committed, state updated, deviations logged. I recommend clearing context (`/clear`) before starting verification — the verifier needs a fresh window to objectively assess the work against must_haves, without carrying the executor's assumptions.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*
+This handoff is especially important after execution — the verifier should approach the code with fresh eyes, not the executor's memory of what it intended to build.

package/template/.claude/skills/forge/SKILL.md CHANGED Viewed

@@ -481,6 +481,12 @@ Match ANY:
 → Add `designing` if UI work involved
 → Add `securing` if auth/data/API touched
+### Direct Utility Skills
+Match ANY:
+- User says "upgrade", "update forge", "sync forge"
+→ Route to `upgrading` skill (bypasses tier detection)
 ### User Override
 If user explicitly says "Use Quick/Standard/Full tier" — honor it. No arguments.
@@ -512,13 +518,64 @@ While working at any tier, if you encounter:
 When uncertain → Rule 4 (ask). Never silently make architectural decisions.
+## Context Handoff Protocol
+Phase transitions are natural context-clearing boundaries. After each phase completes and writes its state to disk, **recommend the user clear context** before the next phase begins. This prevents context rot — the #1 cause of quality degradation in long sessions.
+### Why Clear Between Phases
+Each phase produces persistent artifacts (state files, plans, reports, backlogs) that the next phase reads from disk. The next phase does NOT need the previous phase's working memory — it needs the artifacts. Carrying forward stale context wastes tokens and degrades output quality.
+### When to Recommend
+Recommend clearing context at every phase boundary in Standard and Full tiers:
+```
+researching → [clear] → discussing → [clear] → architecting → [clear] → planning → [clear] → executing → [clear] → verifying → [clear] → auditing → [clear] → refactoring
+```
+**Skip the recommendation when:**
+- Quick tier (single phase, no boundary to clear)
+- The phase was very short (under 5 minutes of work) and context is well under 40%
+- The user has explicitly said they don't want context-clearing prompts
+### The Handoff Prompt
+Each skill ends with a standard handoff message. The pattern is:
+1. **Confirm state is written** — skill verifies its outputs are persisted to `.forge/`
+2. **Summarize what was produced** — brief list of artifacts the next phase will need
+3. **Recommend clearing context** — present the prompt to the user:
+*"Phase complete. All state has been written to disk. I recommend clearing context (`/clear`) before starting {next phase} — this gives the next phase a fresh context window to work with. The {next phase} skill will load everything it needs from `.forge/` state files.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*
+4. **If user declines** — proceed normally. The recommendation is advisory, not blocking.
+### What Each Phase Writes (Handoff Artifacts)
+| Phase | Writes to Disk | Next Phase Reads |
+|-------|---------------|------------------|
+| researching | Research summary (markdown in conversation or `.forge/` files) | discussing reads research findings |
+| discussing | Decision summary → carried into planning via context.md | planning reads context.md |
+| architecting | ADRs in `.forge/decisions/`, data models, API contracts | planning reads decisions |
+| planning | Plans in `.forge/phases/`, requirements.yml, roadmap.yml, context.md | executing reads plans |
+| executing | Committed code, execution summary, milestone state updated | verifying reads must_haves from plans |
+| verifying | Verification report, desire paths updated | auditing reads project.yml + source files |
+| auditing | Health report in `.forge/audits/` | refactoring reads health report + git diff |
+### Context Loading on Resume
+When a skill starts after a context clear, it must load its required state from disk. Each skill's "Read Context" or "Pre-Execution Checklist" step handles this. The `forge` orchestrator reads `milestone-{id}.yml` to determine which skill to route to, then that skill loads its own dependencies.
 ## State Transitions
 ```
-not_started → [init if new] → researching → discussing → planning → executing → verifying → auditing → refactoring → complete
-                                                                   ↗ debugging (if stuck)
-                                                         ↗ designing (if UI)
-                                                         ↗ securing (if auth/data)
+not_started → [init if new] → researching → [clear] → discussing → [clear] → planning → [clear] → executing → [clear] → verifying → [clear] → auditing → [clear] → refactoring → complete
+                                                                                                    ↗ debugging (if stuck)
+                                                                                          ↗ designing (if UI)
+                                                                                          ↗ securing (if auth/data)
 ```
 Update `.forge/state/milestone-{id}.yml` at each transition. Update `.forge/state/index.yml` milestone `last_updated` timestamp.

package/template/.claude/skills/planning/SKILL.md CHANGED Viewed

@@ -223,3 +223,15 @@ Show the user:
 4. Ask: "Does this plan match your expectations? Any changes?"
 Planning is complete when user approves.
+## Phase Handoff
+After the user approves the plan:
+1. **Verify persistence** — Confirm all plans are written to `.forge/phases/{N}-{name}/plan-{NN}.md`, requirements to `.forge/requirements.yml`, roadmap to `.forge/roadmap.yml`, and context to `.forge/context.md`
+2. **Update state** — Set `current.status` to `executing` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Planning phase complete. Plans, requirements, and context are all written to `.forge/`. I recommend clearing context (`/clear`) before starting execution — the executor will load the plan files and context.md fresh, giving it maximum context window for building.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*

package/template/.claude/skills/researching/SKILL.md CHANGED Viewed

@@ -115,3 +115,15 @@ Research output should be under 500 lines. If larger, split into focused documen
 - `research-codebase.md` for codebase findings
 - `research-tech.md` for technology evaluation
 - `research-requirements.md` for requirements analysis
+## Phase Handoff
+After research is complete:
+1. **Persist findings** — Write research summary to `.forge/phases/` or present inline (for Standard tier, inline is fine; for Full tier with multiple research topics, write to files)
+2. **Update state** — Set `current.status` to `discussing` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Research phase complete. Findings are summarized above [or written to .forge/phases/]. I recommend clearing context (`/clear`) before starting the discussion phase — this gives discussing a fresh window to work with. The discussing skill will reference the research findings.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*

package/template/.claude/skills/upgrading/SKILL.md ADDED Viewed

@@ -0,0 +1,90 @@
+---
+name: upgrading
+description: "Sync Forge framework files from a local dev repo or NPM. Use when developing Forge itself or applying updates to an installed project."
+---
+# Upgrading: Local Dev Sync
+Sync framework files from a local Forge source repo into the current project. Use this during Forge development to test changes without publishing to NPM.
+For published upgrades, use `npx forge-orkes upgrade` instead.
+## Step 1: Resolve Source Path
+Check if `.forge/dev-source` exists in the project root.
+- **If it exists:** read the path from the file (first line, trimmed). Verify the path exists and contains `packages/create-forge/template/`.
+- **If it doesn't exist:** ask the user: *"Where is your local Forge repo? (e.g., ~/Dev/forge)"*
+  - Validate the path has `packages/create-forge/template/`
+  - Save the path to `.forge/dev-source` for next time
+The template directory is `{source}/packages/create-forge/template/`.
+## Step 2: File Classification
+Files are classified into three categories:
+| Category | Paths | Behavior |
+|----------|-------|----------|
+| **Framework-owned** | `.claude/agents/*.md`, `.claude/skills/*/SKILL.md` | Overwrite — these are Forge's |
+| **Merge-owned** | `CLAUDE.md`, `.claude/settings.json` | Never auto-overwrite |
+| **Template-only** | `.forge/templates/**` | Overwrite — reference templates |
+**Never touch** user-generated files: `.forge/project.yml`, `.forge/state/`, `.forge/constitution.md`, `.forge/context.md`, `.forge/requirements.yml`, `.forge/roadmap.yml`, `.forge/design-system.md`, `.forge/refactor-backlog.yml`.
+## Step 3: Sync Framework-Owned Files
+For each framework-owned file in the source template:
+1. Read the source file content
+2. Read the local file content (if it exists)
+3. If different → overwrite local with source, report as **updated**
+4. If same → report as **unchanged**
+5. If source has a new file not in local → copy it, report as **added**
+6. If local has a file not in source → report as **removed from template** (don't delete — let user decide)
+## Step 4: Sync Template-Only Files
+Same process as Step 3, but for `.forge/templates/**`.
+## Step 5: Handle Merge-Owned Files
+For `CLAUDE.md`:
+1. Read source and local versions
+2. If different → **do not overwrite**. Instead, summarize what changed in prose (new sections, removed sections, modified text)
+3. Present the summary to the user and let them decide how to merge
+For `.claude/settings.json`:
+1. Read source and local versions
+2. Compare the `forge.*` keys only
+3. If forge keys differ → update only `forge.*` keys in local, preserve user's `hooks` and any other custom keys
+4. Update `forge.version` to match the source package version
+## Step 6: Report
+Present a summary:
+```
+Forge Local Sync Complete
+─────────────────────────
+Source: {path}
+Version: {old} → {new}
+Updated: {N} files
+  - .claude/skills/executing/SKILL.md
+  - ...
+Added: {N} files
+  - .claude/skills/new-skill/SKILL.md
+  - ...
+Removed from template: {N} files
+  - .claude/agents/old-agent.md (still in your project)
+  - ...
+Needs manual review: {N} files
+  - CLAUDE.md (new sections: "Upgrade Mechanism", modified: "Skill Routing")
+  - ...
+Unchanged: {N} files
+```

package/template/.claude/skills/verifying/SKILL.md CHANGED Viewed

@@ -12,6 +12,20 @@ Prove completed work actually delivers what was promised. Task completion ≠ go
 Don't ask: "Did we complete all the tasks?"
 Ask: "Does the user get what they were promised?"
+## Load Context
+When entering with a fresh context (after `/clear`):
+```
+Read: .forge/state/milestone-{id}.yml → current phase, plans completed
+Read: .forge/project.yml → tech stack (for running tests)
+Read: .forge/phases/{N}-{name}/plan-{NN}.md → must_haves (truths, artifacts, key_links)
+Read: .forge/context.md → locked decisions (to understand intent)
+Read: .forge/requirements.yml → requirement IDs for coverage check
+```
+This is critical — the verifier should assess the code with fresh eyes, not carry the executor's assumptions. Load must_haves from the plan files and verify against the actual codebase.
 ## 3-Level Goal-Backward Verification
 ### Level 1: Observable Truths
@@ -199,3 +213,17 @@ When any pattern reaches **3+ occurrences**, surface it to the user:
 - *"Should I add a new constitutional article: 'Error Boundaries Required'?"*
 Only suggest changes when there's clear evidence (3+ occurrences). One-off issues are noise, not signal.
+## Phase Handoff
+After verification completes with a PASSED verdict:
+1. **Verify persistence** — Confirm verification results are documented, desire paths retrospective is logged to `.forge/state/index.yml`
+2. **Update state** — Set `current.status` to `auditing` in `.forge/state/milestone-{id}.yml`
+3. **Recommend context clear:**
+*"Verification phase complete — all truths verified, artifacts substantive and wired. I recommend clearing context (`/clear`) before the health audit — the auditing skill spawns fresh subagents anyway, and a clean orchestrator context ensures nothing is missed.*
+*Ready to continue? Clear context and invoke `/forge` to resume."*
+Note: If verification found GAPS, route back to planning in gap-closure mode instead. The context clear recommendation applies after the re-verified PASSED verdict.

package/template/CLAUDE.md CHANGED Viewed

@@ -46,6 +46,7 @@ Forge auto-detects complexity. Override with: "Use Quick/Standard/Full tier."
 | Build UI with design system consistency | `designing` | When UI involved |
 | Review security before shipping | `securing` | When auth/data/API involved |
 | Debug systematically with hypotheses | `debugging` | When stuck |
+| Upgrade Forge framework files | `upgrading` | On-demand |
 | Use Beads for cross-session memory | `beads-integration` | When Beads installed |
 ## Context Engineering
@@ -61,6 +62,9 @@ Forge auto-detects complexity. Override with: "Use Quick/Standard/Full tier."
 ### Fresh Agent Pattern
 When a task touches 20+ files or a complex subsystem, spawn a fresh executor agent with isolated context. This prevents context rot — the #1 cause of quality degradation in long sessions.
+### Context Handoff Between Phases
+Each phase writes its outputs to `.forge/` before completing. At every phase boundary (researching → discussing → planning → executing → verifying → auditing → refactoring), the completing skill recommends clearing context (`/clear`) before the next phase begins. The next phase loads what it needs from disk. This is advisory — skip for short phases where context is under 40%. See the `forge` skill's "Context Handoff Protocol" for full details.
 ### Lazy Loading
 Skills load only when invoked. CLAUDE.md stays in context; skill details load on demand. This keeps base context lean (~300 lines) while making full framework available.