npm - @aion0/forge - Versions diffs - 0.9.1 → 0.9.2 - Mend

@aion0/forge 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/RELEASE_NOTES.md +60 -5
package/app/api/agents/[id]/test/route.ts +150 -0
package/app/api/connectors/[id]/sync-cli/route.ts +73 -0
package/app/api/connectors/tool-test/route.ts +70 -0
package/app/api/jobs/[id]/cancel/route.ts +50 -0
package/app/api/jobs/[id]/dispatched-pipelines/route.ts +24 -0
package/app/api/jobs/[id]/run/route.ts +22 -2
package/app/api/jobs/route.ts +11 -1
package/app/api/pipelines/[id]/schema/route.ts +53 -0
package/app/api/pipelines/bulk-delete/route.ts +39 -0
package/app/api/pipelines/gc/route.ts +27 -0
package/app/api/schedules/[id]/cancel/route.ts +27 -0
package/app/api/schedules/[id]/route.ts +173 -0
package/app/api/schedules/[id]/run/route.ts +45 -0
package/app/api/schedules/[id]/runs/route.ts +22 -0
package/app/api/schedules/[id]/stop/route.ts +33 -0
package/app/api/schedules/route.ts +175 -0
package/app/api/tasks/bulk-delete/route.ts +47 -0
package/bin/forge-server.mjs +22 -1
package/cli/mw.mjs +186 -7657
package/cli/mw.ts +26 -0
package/components/ConnectorsPanel.tsx +46 -0
package/components/Dashboard.tsx +23 -10
package/components/JobsView.tsx +245 -6
package/components/PipelineEditor.tsx +38 -1
package/components/PipelineView.tsx +325 -4
package/components/ScheduleCreateModal.tsx +1507 -0
package/components/SchedulesView.tsx +605 -0
package/components/SettingsModal.tsx +106 -0
package/docs/Team-Workflow-Integration.md +487 -0
package/docs/UI-Design-Brief-SidePanel.md +278 -0
package/lib/__tests__/foreach-batch-yaml.test.ts +33 -0
package/lib/__tests__/foreach-before.test.ts +201 -0
package/lib/__tests__/foreach-parse.test.ts +114 -0
package/lib/__tests__/foreach-snapshot.test.ts +112 -0
package/lib/__tests__/foreach-source.test.ts +105 -0
package/lib/__tests__/foreach-template.test.ts +112 -0
package/lib/chat/agent-loop.ts +3 -3
package/lib/chat-standalone.ts +26 -1
package/lib/claude-process.ts +8 -5
package/lib/connectors/sync.ts +8 -2
package/lib/crypto.ts +1 -1
package/lib/dirs.ts +22 -7
package/lib/help-docs/05-pipelines.md +171 -0
package/lib/help-docs/13-schedules.md +165 -0
package/lib/help-docs/23-automation-states.md +148 -0
package/lib/help-docs/CLAUDE.md +6 -6
package/lib/init.ts +25 -6
package/lib/jobs/recipes.ts +3 -2
package/lib/jobs/scheduler.ts +215 -11
package/lib/jobs/store.ts +79 -3
package/lib/jobs/types.ts +31 -0
package/lib/logger.ts +1 -1
package/lib/notify.ts +13 -6
package/lib/pipeline-gc.ts +105 -0
package/lib/pipeline-scheduler.ts +29 -0
package/lib/pipeline.ts +811 -330
package/lib/schedules/action-runner.ts +257 -0
package/lib/schedules/scheduler.ts +422 -0
package/lib/schedules/state.ts +41 -0
package/lib/schedules/store.ts +618 -0
package/lib/schedules/types.ts +117 -0
package/lib/settings.ts +35 -0
package/lib/task-manager.ts +56 -13
package/lib/workflow-marketplace.ts +7 -1
package/lib/workspace/skill-installer.ts +7 -6
package/package.json +3 -1
package/lib/help-docs/19-jobs.md +0 -145
package/lib/help-docs/20-mantis-bug-fix.md +0 -115
package/lib/help-docs/22-recipes.md +0 -124

package/lib/pipeline.ts CHANGED Viewed

@@ -6,10 +6,11 @@
  */
 import { randomUUID } from 'node:crypto';
-import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, statSync } from 'node:fs';
+import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, statSync, unlinkSync, rmSync } from 'node:fs';
+import { execSync } from 'node:child_process';
 import { join } from 'node:path';
 import YAML from 'yaml';
-import { createTask, getTask, onTaskEvent, taskModelOverrides, taskAppendSystemPromptOverrides } from './task-manager';
+import { createTask, getTask, onTaskEvent, taskModelOverrides, taskAppendSystemPromptOverrides, cancelTask } from './task-manager';
 import { getProjectInfo } from './projects';
 import { loadSettings } from './settings';
 import { getAgent, listAgents } from './agents';
@@ -47,6 +48,16 @@ export interface WorkflowNode {
   outputs: { name: string; extract: 'result' | 'git_diff' | 'stdout' | 'plugin' }[];
   routes: { condition: string; next: string }[];
   maxIterations: number;
+  /** Auto-retry the node on transient failure. Default 0 (fail-fast,
+   *  matches old behavior). E.g. retries: 2 → 1 initial + 2 retries =
+   *  3 total attempts before the node is marked failed. Retries spawn
+   *  a fresh task each time; output state is wiped between attempts.
+   *  Use for nodes prone to transient errors: rate-limited API calls,
+   *  flaky network, race-condition-prone shell ops. */
+  retries?: number;
+  /** Milliseconds to wait before each retry. Default 0 (immediate).
+   *  Use 5000+ for downstream rate-limit recovery. */
+  retryDelayMs?: number;
 }
 // ─── Conversation Mode Types ──────────────────────────────
@@ -80,23 +91,131 @@ export interface ConversationConfig {
 // ─── Workflow ─────────────────────────────────────────────
+/** Type tag for an extended pipeline input field. */
+export type WorkflowInputType = 'string' | 'integer' | 'number' | 'boolean' | 'enum';
+/** Extended input field spec. Used when the yaml's `input:` block
+ *  declares an object instead of a plain description string. */
+export interface WorkflowInputFieldSpec {
+  description?: string;
+  label?: string;
+  type?: WorkflowInputType;
+  enum?: string[];           // for type: 'enum'
+  required?: boolean;        // overrides the description-heuristic
+  default?: string | number | boolean;
+  multiline?: boolean;       // forces textarea regardless of description heuristic
+}
+/** A workflow input value is either a legacy description string
+ *  (`input: { bug_id: "Mantis bug id" }`) or a full field spec
+ *  (`input: { bug_id: { description: "…", type: integer, required: true } }`).
+ *  Schedule UI / pipeline-schema endpoint normalize this into a uniform
+ *  field record before rendering. */
+export type WorkflowInputSpec = string | WorkflowInputFieldSpec;
+/**
+ * Workflow-level loop spec. Declaring this on a `dag` workflow turns the
+ * pipeline into "run the whole DAG N times, once per item in `source`".
+ * Each iteration's per-node state is fully reset between rounds, but the
+ * pipeline_run id stays the same — i.e. it's ONE run, M iterations, not
+ * M sibling runs. Designed for batch use cases (list of bug ids, list
+ * of MR ids, …) where each item flows through the same node chain.
+ *
+ * Resolved + validated at startPipeline time:
+ *   - `source` is templated against `input` / `vars` and parsed to an array
+ *   - empty array → pipeline immediately settles to done (0 iterations)
+ *
+ * Inside the run, node prompts reference the current item via
+ * `{{<asName>}}` (defaults to `{{item}}`) and current position via
+ * `{{loop.index}}` / `{{loop.total}}`.
+ *
+ * Not supported (kept Non-Goals — see forge-pipeline-foreach-design.md):
+ *   - nested for_each
+ *   - parallel iterations (always sequential in v1)
+ *   - dynamic source from an upstream node's output
+ *   - cross-iteration output access
+ */
+export interface ForEachSpec {
+  /** Templated string ("{{input.bug_ids}}") or literal array; resolved at
+   *  startPipeline time (no `before:`) or after setup phase finishes (with
+   *  `before:` — then source may reference `{{nodes.<id>.outputs.<name>}}`). */
+  source: string | unknown[];
+  /** Separator when `source` resolves to a string. Default ",". */
+  split?: string;
+  /** Variable name exposed inside nodes via `{{<asName>}}`. Default "item". */
+  as?: string;
+  /** Iteration-failure policy: "continue" (next iteration runs anyway, pipeline ends `failed` if any iter failed) or "stop" (first failure halts). Default "continue". */
+  on_failure?: 'continue' | 'stop';
+  /** Node ids that run ONCE before the loop body — for resolving items
+   *  dynamically from upstream (e.g. a list-iids shell node). These nodes:
+   *    - are scheduled first in isolation (loop-body nodes wait)
+   *    - keep `done` status across all iterations
+   *    - are excluded from per-iter snapshots + per-iter reset
+   *  The for_each.source template can reference their outputs. */
+  before?: string[];
+}
 export interface Workflow {
   name: string;
   type?: 'dag' | 'conversation';  // default: 'dag'
   description?: string;
   vars: Record<string, string>;
-  input: Record<string, string>;  // required input fields
+  input: Record<string, WorkflowInputSpec>;
   nodes: Record<string, WorkflowNode>;
+  /** Loop over a list — each iteration runs the full DAG once. See ForEachSpec. */
+  for_each?: ForEachSpec;
   // Conversation mode fields (only when type === 'conversation')
   conversation?: ConversationConfig;
 }
-export type PipelineNodeStatus = 'pending' | 'running' | 'done' | 'failed' | 'skipped';
+/** Extract a description string from either input shape. */
+export function inputDescription(spec: WorkflowInputSpec | undefined): string {
+  if (!spec) return '';
+  if (typeof spec === 'string') return spec;
+  return spec.description || '';
+}
+/** Normalize any input spec into a uniform field record. */
+export function normalizeInputField(name: string, spec: WorkflowInputSpec | undefined): {
+  name: string;
+  description: string;
+  label: string;
+  type: WorkflowInputType;
+  enum: string[] | null;
+  required: boolean;
+  default: string | number | boolean | null;
+  multiline: boolean;
+} {
+  if (!spec || typeof spec === 'string') {
+    const description = typeof spec === 'string' ? spec : '';
+    // Legacy heuristic: descriptions containing "optional" mean non-required.
+    const required = !!description && !/optional|leave blank|leave empty/i.test(description);
+    const multiline = /multi-line|multiline|prompt|description|body|template/i.test(description);
+    return { name, description, label: '', type: 'string', enum: null, required, default: null, multiline };
+  }
+  const t: WorkflowInputType = (spec.type || 'string') as WorkflowInputType;
+  return {
+    name,
+    description: spec.description || '',
+    label: spec.label || '',
+    type: t,
+    enum: Array.isArray(spec.enum) ? spec.enum.map(String) : null,
+    required: spec.required ?? true,
+    default: spec.default ?? null,
+    multiline: spec.multiline ?? false,
+  };
+}
+export type PipelineNodeStatus = 'pending' | 'running' | 'done' | 'failed' | 'skipped' | 'cancelled';
 export interface PipelineNodeState {
   status: PipelineNodeStatus;
   taskId?: string;
   outputs: Record<string, string>;
+  /** Number of times THIS node's task has been launched. 1 = first
+   *  attempt; 2,3,... = retries. Bumped each time we (re)create the
+   *  task. Capped by WorkflowNode.retries (default 0 = no retry). */
+  attempts?: number;
   iterations: number;
   startedAt?: string;
   completedAt?: string;
@@ -121,6 +240,60 @@ export interface Pipeline {
    * recovery use the same set as the original run.
    */
   skills?: string[];
+  /**
+   * Absolute path to this run's scratch dir, served to YAML nodes as
+   * `{{run.tmp_dir}}`. Layout: `<project_dir>/.forge/worktrees/pipeline-<id>/`.
+   * Resolved from `input.project`; empty / undefined when project lookup
+   * failed (the template then renders as empty string).
+   * `done` runs wipe this immediately; `failed`/`cancelled` keep it for
+   * GC (see lib/pipeline-gc.ts).
+   */
+  tmpDir?: string;
+  /**
+   * Loop state when this pipeline run was started against a workflow that
+   * declared `for_each:`. Absent on plain DAG runs (the orchestrator
+   * branches on `pipeline.forEach !== undefined`).
+   *
+   * Lifecycle: `currentIndex` starts at 0; one iteration = one full DAG
+   * pass. When all nodes settle, `iterations` gets a snapshot of the
+   * round's node states, then everything resets and `currentIndex` ticks
+   * up. The pipeline stays `running` until `currentIndex >= total`.
+   */
+  forEach?: {
+    /** The resolved item list. Populated at startPipeline (no `before:`) or
+     *  at the setup→loop transition (with `before:`). Empty during setup phase. */
+    items: unknown[];
+    /** 0-based index of the currently-executing iteration. */
+    currentIndex: number;
+    /** Cached items.length; redundant but cheap and makes UI math easy. */
+    total: number;
+    /** Variable name nodes use via `{{<asName>}}`. */
+    asName: string;
+    /** Set by startPipeline from workflow.for_each.on_failure. Default "continue". */
+    onFailure: 'continue' | 'stop';
+    /** Mirror of spec.before — frozen at start. Loop-setup nodes that run
+     *  ONCE, never reset, never snapshot. Undefined or empty = no setup phase. */
+    before?: string[];
+    /** Phase flag: false during setup (before nodes still running or pending);
+     *  true once items[] is final + loop can start. For backward compat,
+     *  pipelines without `before:` are created with itemsResolved=true. */
+    itemsResolved: boolean;
+    /** Per-iteration history snapshots (oldest first). Appended at iteration boundary in checkPipelineCompletion. */
+    iterations: Array<{
+      index: number;
+      status: 'done' | 'failed' | 'cancelled';
+      startedAt: string;
+      completedAt: string;
+      /** Compact snapshot. `taskId` is retained so the UI can re-open
+       *  the original task drawer for any past iteration's node. */
+      nodes: Record<string, {
+        status: PipelineNodeStatus;
+        outputs: Record<string, string>;
+        error?: string;
+        taskId?: string;
+      }>;
+    }>;
+  };
   // Conversation mode state
   conversation?: {
     config: ConversationConfig;
@@ -132,273 +305,15 @@ export interface Pipeline {
 // ─── Workflow Loading ─────────────────────────────────────
-// ─── Built-in workflows ──────────────────────────────────
-export const BUILTIN_WORKFLOWS: Record<string, string> = {
-  'issue-fix-and-review': `
-name: issue-fix-and-review
-description: "Fetch GitHub issue → fix code → create PR → review PR → notify"
-input:
-  issue_id: "GitHub issue number"
-  project: "Project name"
-  base_branch: "Base branch (default: auto-detect)"
-  extra_context: "Additional instructions for the fix (optional)"
-nodes:
-  setup:
-    mode: shell
-    project: "{{input.project}}"
-    prompt: |
-      cd "$(git rev-parse --show-toplevel)" && \
-      if [ -n "$(git status --porcelain)" ]; then echo "ERROR: Working directory has uncommitted changes. Please commit or stash first." && exit 1; fi && \
-      ORIG_BRANCH=$(git branch --show-current || git rev-parse --short HEAD) && \
-      REPO=$(gh repo view --json nameWithOwner -q .nameWithOwner 2>/dev/null || git remote get-url origin | sed 's/.*github.com[:/]//;s/.git$//') && \
-      BASE="{{input.base_branch}}" && \
-      if [ -z "$BASE" ] || [ "$BASE" = "auto-detect" ]; then BASE=$(git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' || echo main); fi && \
-      git checkout "$BASE" 2>/dev/null || true && \
-      git pull origin "$BASE" 2>/dev/null || true && \
-      OLD_BRANCH=$(git branch --list "fix/{{input.issue_id}}-*" | head -1 | tr -d ' *') && \
-      if [ -n "$OLD_BRANCH" ]; then git branch -D "$OLD_BRANCH" 2>/dev/null || true; fi && \
-      echo "REPO=$REPO" && echo "BASE=$BASE" && echo "ORIG_BRANCH=$ORIG_BRANCH"
-    outputs:
-      - name: info
-        extract: stdout
-  fetch-issue:
-    mode: shell
-    project: "{{input.project}}"
-    depends_on: [setup]
-    prompt: |
-      ISSUE_ID="{{input.issue_id}}" && \
-      if [ -z "$ISSUE_ID" ]; then echo "__SKIP__ No issue_id provided" && exit 0; fi && \
-      SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
-      REPO=$(echo "$SETUP_INFO" | grep REPO= | cut -d= -f2) && \
-      gh issue view "$ISSUE_ID" --json title,body,labels,number -R "$REPO"
-    outputs:
-      - name: issue_json
-        extract: stdout
-  fix-code:
-    project: "{{input.project}}"
-    depends_on: [fetch-issue]
-    prompt: |
-      A GitHub issue needs to be fixed. Here is the issue data:
-      {{nodes.fetch-issue.outputs.issue_json}}
-      Steps:
-      1. Create a new branch from the current branch (which is already on the base). Name format: fix/{{input.issue_id}}-<short-description> (e.g. fix/3-add-validation, fix/15-null-pointer). Any old branch for this issue has been cleaned up.
-      2. Analyze the issue and fix the code.
-      3. Stage and commit with a message referencing #{{input.issue_id}}.
-      Base branch info: {{nodes.setup.outputs.info}}
-      Additional context from user: {{input.extra_context}}
-    outputs:
-      - name: summary
-        extract: result
-      - name: diff
-        extract: git_diff
-  push-and-pr:
-    mode: shell
-    project: "{{input.project}}"
-    depends_on: [fix-code]
-    prompt: |
-      SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
-      REPO=$(echo "$SETUP_INFO" | grep REPO= | cut -d= -f2) && \
-      BRANCH=$(git branch --show-current) && \
-      git push -u origin "$BRANCH" --force-with-lease 2>&1 && \
-      PR_URL=$(gh pr create --title "Fix #{{input.issue_id}}" \
-        --body "Auto-fix by Forge Pipeline for issue #{{input.issue_id}}." -R "$REPO" 2>/dev/null || \
-        gh pr view "$BRANCH" --json url -q .url -R "$REPO" 2>/dev/null) && \
-      echo "$PR_URL"
-    outputs:
-      - name: pr_url
-        extract: stdout
-  review:
-    project: "{{input.project}}"
-    depends_on: [push-and-pr]
-    prompt: |
-      Review the code changes for issue #{{input.issue_id}}.
-      Fix summary: {{nodes.fix-code.outputs.summary}}
-      Git diff:
-      {{nodes.fix-code.outputs.diff}}
-      Check for:
-      - Bugs and logic errors
-      - Security vulnerabilities
-      - Performance issues
-      - Whether the fix actually addresses the issue
-      Respond with:
-      1. APPROVED or CHANGES_REQUESTED
-      2. Specific issues found with file paths and line numbers
-    outputs:
-      - name: review_result
-        extract: result
-  cleanup:
-    mode: shell
-    project: "{{input.project}}"
-    depends_on: [review]
-    prompt: |
-      SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
-      ORIG=$(echo "$SETUP_INFO" | grep ORIG_BRANCH= | cut -d= -f2) && \
-      PR_URL=$'{{nodes.push-and-pr.outputs.pr_url}}' && \
-      if [ -n "$(git status --porcelain)" ]; then
-        echo "Issue #{{input.issue_id}} — PR: $PR_URL (staying on $(git branch --show-current))"
-      else
-        git checkout "$ORIG" 2>/dev/null || true
-        echo "Issue #{{input.issue_id}} — PR: $PR_URL (switched back to $ORIG)"
-      fi
-    outputs:
-      - name: result
-        extract: stdout
-`,
-  'multi-agent-collaboration': `
-name: multi-agent-collaboration
-type: conversation
-description: "Two agents collaborate: one designs, one implements"
-input:
-  project: "Project name"
-  task: "What to build or fix"
-agents:
-  - id: architect
-    agent: claude
-    role: "You are a software architect. Round 1: design the solution with clear steps. Later rounds: review the implementation and say DONE if satisfied."
-  - id: implementer
-    agent: claude
-    role: "You are a developer. Implement what the architect designs. After implementing, say DONE."
-max_rounds: 3
-stop_condition: "both agents say DONE"
-initial_prompt: "Task: {{input.task}}"
-`,
-  'review-mr': `
-name: review-mr
-description: "Review PR — AI code review with GitHub comment"
-input:
-  project: "Project name"
-  branch: "Branch name or PR number (empty = auto-detect latest open PR)"
-  base_branch: "Target branch (default: main)"
-vars:
-  default_base: main
-nodes:
-  resolve-pr:
-    mode: shell
-    project: "{{input.project}}"
-    worktree: false
-    prompt: |
-      INPUT_BRANCH="{{input.branch}}" && \\
-      BASE="{{input.base_branch}}" && \\
-      if [ -z "$BASE" ] || echo "$BASE" | grep -q '{{'; then BASE="main"; fi && \\
-      if [ -z "$INPUT_BRANCH" ] || echo "$INPUT_BRANCH" | grep -q '{{'; then \\
-        INPUT_BRANCH=$(gh pr list --state open --base "$BASE" --json number -q '.[0].number' 2>/dev/null); \\
-        if [ -z "$INPUT_BRANCH" ]; then echo "ERROR: No open PR found targeting $BASE" && exit 1; fi; \\
-      fi && \\
-      if echo "$INPUT_BRANCH" | grep -qE '^[0-9]+$'; then \\
-        PR_NUM="$INPUT_BRANCH"; \\
-      else \\
-        PR_NUM=$(gh pr list --state open --head "$INPUT_BRANCH" --json number -q '.[0].number' 2>/dev/null); \\
-        if [ -z "$PR_NUM" ]; then echo "ERROR: No open PR for branch $INPUT_BRANCH" && exit 1; fi; \\
-      fi && \\
-      echo "$PR_NUM"
-    outputs:
-      - name: pr_number
-        extract: stdout
-  fetch-diff:
-    mode: shell
-    project: "{{input.project}}"
-    worktree: false
-    depends_on: [resolve-pr]
-    prompt: "gh pr diff {{nodes.resolve-pr.outputs.pr_number}}"
-    outputs:
-      - name: diff
-        extract: stdout
-  fetch-files:
-    mode: shell
-    project: "{{input.project}}"
-    worktree: false
-    depends_on: [resolve-pr]
-    prompt: |
-      PR_NUM="{{nodes.resolve-pr.outputs.pr_number}}" && \\
-      echo "=== PR #$PR_NUM ===" && \\
-      gh pr view "$PR_NUM" --json title,author,additions,deletions,changedFiles,commits,body --jq '"Title: " + .title + "\\nAuthor: " + .author.login + "\\nFiles: " + (.changedFiles|tostring) + " changed, +" + (.additions|tostring) + "/-" + (.deletions|tostring) + "\\nCommits: " + (.commits|length|tostring) + "\\n\\n=== PR Description ===\\n" + (.body // "(no description)")' && \\
-      echo "" && \\
-      echo "=== Changed Files ===" && \\
-      gh pr diff "$PR_NUM" --name-only
-    outputs:
-      - name: stats
-        extract: stdout
-  review:
-    project: "{{input.project}}"
-    worktree: false
-    depends_on: [fetch-diff, fetch-files, resolve-pr]
-    prompt: |
-      You are a senior code reviewer. Perform a thorough code review of this PR.
-      ## PR Info & Description
-      {{nodes.fetch-files.outputs.stats}}
-      ## Diff
-      {{nodes.fetch-diff.outputs.diff}}
-      ## Review Requirements
-      **First**: Verify the PR description against actual changes:
-      - Is every claimed change actually implemented?
-      - Any claimed changes that are NOT in the diff?
-      - Any changes in the diff NOT mentioned in the description?
-      **Then**: Review code quality:
-      1. Bug risk — logic errors, edge cases, null references
-      2. Security — injection, hardcoded secrets, sensitive data exposure
-      3. Performance — obvious bottlenecks
-      4. Code quality — readability, naming, DRY
-      ## Output
-      Write the full review report to /tmp/forge-review-pr{{nodes.resolve-pr.outputs.pr_number}}.md in this format:
-      ## 🤖 Forge AI Code Review — PR #{{nodes.resolve-pr.outputs.pr_number}}
-      ### 📋 Summary
-      - Verdict: ✅ Approve / ⚠️ Request Changes / ❌ Reject
-      - One-line summary
-      ### ✅ PR Description Verification
-      List each change claimed in the PR description, mark ✓ implemented / ✗ not implemented / ⚠️ partial
-      ### 🔴 Blockers (must fix)
-      (write "None" if none)
-      ### 🟡 Suggestions
-      ### 🟢 Nice-to-have
-      ### 💡 Highlights
-      ---
-      _Generated by [Forge](https://github.com/aiwatching/forge) Pipeline_
-      **You MUST write the complete report to /tmp/forge-review-pr{{nodes.resolve-pr.outputs.pr_number}}.md. This is the most important step.**
-    outputs:
-      - name: report
-        extract: result
-  post-comment:
-    mode: shell
-    project: "{{input.project}}"
-    worktree: false
-    depends_on: [review, resolve-pr]
-    prompt: |
-      PR_NUM="{{nodes.resolve-pr.outputs.pr_number}}" && \\
-      REPORT="/tmp/forge-review-pr\${PR_NUM}.md" && \\
-      if [ ! -f "$REPORT" ]; then echo "ERROR: Review report not found at $REPORT" && exit 1; fi && \\
-      gh pr comment "$PR_NUM" --body-file "$REPORT" && \\
-      rm -f "$REPORT" && \\
-      echo "Comment posted to PR #$PR_NUM"
-    outputs:
-      - name: result
-        extract: stdout
-`,
-};
+// All pipelines now live in the marketplace (`forge-workflow` repo).
+// Empty BUILTIN_WORKFLOWS preserves the constant export so any external
+// caller doesn't break, but no pipelines are shipped baked-in any more.
+// To use the previously-builtin ones, install from marketplace:
+//   - issue-fix-and-review
+//   - multi-agent-collaboration
+//   - review-mr
+export const BUILTIN_WORKFLOWS: Record<string, string> = {};
 export interface WorkflowWithMeta extends Workflow {
   builtin?: boolean;
@@ -409,22 +324,32 @@ export function listWorkflows(): WorkflowWithMeta[] {
   const userWorkflows: WorkflowWithMeta[] = [];
   if (existsSync(WORKFLOWS_DIR)) {
     for (const f of readdirSync(WORKFLOWS_DIR).filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))) {
+      const fullPath = join(WORKFLOWS_DIR, f);
       try {
-        userWorkflows.push({ ...parseWorkflow(readFileSync(join(WORKFLOWS_DIR, f), 'utf-8')), builtin: false });
-      } catch {}
+        userWorkflows.push({ ...parseWorkflow(readFileSync(fullPath, 'utf-8')), builtin: false });
+      } catch (e) {
+        // A broken yaml shouldn't crash the listing, but the user needs
+        // to know WHY their pipeline isn't showing up. Common offender:
+        // `outputs:` written as a map (`{name: "$STDOUT"}`) when the
+        // parser expects an array (`- name: x; extract: stdout`) and
+        // therefore .map() throws.
+        console.warn(`[listWorkflows] skip ${fullPath}: ${(e as Error).message}`);
+      }
     }
   }
   // Built-in workflows (don't override user ones with same name)
   const userNames = new Set(userWorkflows.map(w => w.name));
   const builtins: WorkflowWithMeta[] = [];
-  for (const [, yaml] of Object.entries(BUILTIN_WORKFLOWS)) {
+  for (const [key, yaml] of Object.entries(BUILTIN_WORKFLOWS)) {
     try {
       const w = parseWorkflow(yaml);
       if (!userNames.has(w.name)) {
         builtins.push({ ...w, builtin: true });
       }
-    } catch {}
+    } catch (e) {
+      console.warn(`[listWorkflows] builtin ${key} failed to parse: ${(e as Error).message}`);
+    }
   }
   return [...builtins, ...userWorkflows];
@@ -434,7 +359,67 @@ export function getWorkflow(name: string): WorkflowWithMeta | null {
   return listWorkflows().find(w => w.name === name) || null;
 }
-function parseWorkflow(raw: string): Workflow {
+/**
+ * Reserved template namespaces — these names already mean something in
+ * `{{...}}` substitution (input.x / vars.x / nodes.X.outputs.Y /
+ * run.tmp_dir / raw: prefix / loop.index|total). Picking one of them as
+ * the `for_each.as` variable would shadow it and silently break templates.
+ */
+const FOREACH_RESERVED_NAMES = new Set(['input', 'vars', 'nodes', 'run', 'raw', 'loop']);
+/**
+ * Parse + validate a workflow's `for_each:` block. Returns undefined when
+ * the workflow doesn't declare it (the common case). Throws on malformed
+ * input so authors find out at workflow load, not at first fire.
+ */
+function parseForEach(raw: any, workflowType: string): ForEachSpec | undefined {
+  if (raw === undefined || raw === null) return undefined;
+  if (workflowType !== 'dag') {
+    throw new Error(`workflow.for_each is only supported on type='dag' workflows (got '${workflowType}')`);
+  }
+  if (typeof raw !== 'object' || Array.isArray(raw)) {
+    throw new Error("workflow.for_each must be an object with at least { source: ... }");
+  }
+  const source = raw.source;
+  if (source === undefined || source === null || source === '') {
+    throw new Error("workflow.for_each.source is required (template like '{{input.bug_ids}}' or literal array)");
+  }
+  if (typeof source !== 'string' && !Array.isArray(source)) {
+    throw new Error("workflow.for_each.source must be a template string or array literal");
+  }
+  const asName = raw.as ?? 'item';
+  if (typeof asName !== 'string' || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(asName)) {
+    throw new Error("workflow.for_each.as must be a valid identifier (letters / digits / underscore, starting with letter or _)");
+  }
+  if (FOREACH_RESERVED_NAMES.has(asName)) {
+    throw new Error(`workflow.for_each.as='${asName}' collides with reserved template namespace (input / vars / nodes / run / raw / loop)`);
+  }
+  const onFailure = raw.on_failure ?? raw.onFailure ?? 'continue';
+  if (onFailure !== 'continue' && onFailure !== 'stop') {
+    throw new Error(`workflow.for_each.on_failure must be 'continue' or 'stop' (got '${onFailure}')`);
+  }
+  const split = raw.split;
+  if (split !== undefined && typeof split !== 'string') {
+    throw new Error("workflow.for_each.split must be a string");
+  }
+  let before: string[] | undefined;
+  if (raw.before !== undefined && raw.before !== null) {
+    if (!Array.isArray(raw.before) || raw.before.some((s: any) => typeof s !== 'string' || !s)) {
+      throw new Error("workflow.for_each.before must be an array of node id strings");
+    }
+    // node-id existence validated in parseWorkflow once nodes are built.
+    before = [...raw.before];
+  }
+  return {
+    source,
+    split,
+    as: asName,
+    on_failure: onFailure,
+    before,
+  };
+}
+export function parseWorkflow(raw: string): Workflow {
   const parsed = YAML.parse(raw);
   const workflowType = parsed.type || 'dag';
   const nodes: Record<string, WorkflowNode> = {};
@@ -464,6 +449,10 @@ function parseWorkflow(raw: string): Workflow {
         next: r.next,
       })),
       maxIterations: n.max_iterations || n.maxIterations || 3,
+      retries: Number.isFinite(Number(n.retries)) ? Math.max(0, Math.trunc(Number(n.retries))) : 0,
+      retryDelayMs: Number.isFinite(Number(n.retry_delay_ms ?? n.retryDelayMs))
+        ? Math.max(0, Math.trunc(Number(n.retry_delay_ms ?? n.retryDelayMs)))
+        : 0,
     };
   }
@@ -486,6 +475,15 @@ function parseWorkflow(raw: string): Workflow {
     };
   }
+  const for_each = parseForEach(parsed.for_each, workflowType);
+  if (for_each?.before) {
+    for (const id of for_each.before) {
+      if (!nodes[id]) {
+        throw new Error(`workflow.for_each.before references unknown node id '${id}' — must match one of: ${Object.keys(nodes).join(', ') || '(no nodes)'}`);
+      }
+    }
+  }
   return {
     name: parsed.name || 'unnamed',
     type: workflowType,
@@ -493,6 +491,7 @@ function parseWorkflow(raw: string): Workflow {
     vars: parsed.vars || {},
     input: parsed.input || {},
     nodes,
+    for_each,
     conversation,
   };
 }
@@ -520,15 +519,41 @@ export function deletePipeline(id: string): boolean {
   const filePath = join(PIPELINES_DIR, `${id}.json`);
   try {
     if (existsSync(filePath)) {
-      const { unlinkSync } = require('node:fs');
       unlinkSync(filePath);
       __pipelineCache.delete(filePath);
       return true;
     }
-  } catch {}
+  } catch (e) {
+    console.warn(`[pipeline] deletePipeline ${id} failed: ${(e as Error).message}`);
+  }
   return false;
 }
+/** Bulk-delete terminal pipelines created before `beforeIso`. Skips
+ *  anything still in 'running' or 'pending' so we never blow away
+ *  state from a live run. Returns count removed. */
+export interface BulkDeletePipelinesFilter {
+  before: string;  // ISO 8601
+  statuses?: Array<'done' | 'failed' | 'cancelled'>;
+}
+export function bulkDeletePipelines(filter: BulkDeletePipelinesFilter): number {
+  const wanted = new Set(filter.statuses && filter.statuses.length
+    ? filter.statuses
+    : ['done', 'failed', 'cancelled']);
+  const cutoff = new Date(filter.before).getTime();
+  if (!Number.isFinite(cutoff)) return 0;
+  let removed = 0;
+  for (const p of listPipelines()) {
+    if (!wanted.has(p.status as any)) continue;
+    const createdMs = new Date(p.createdAt).getTime();
+    if (!Number.isFinite(createdMs) || createdMs >= cutoff) continue;
+    if (deletePipeline(p.id)) removed++;
+  }
+  return removed;
+}
 // Parsed-pipeline cache keyed by absolute path → { mtime, pipeline }.
 // Pipeline files only change when a pipeline run advances (savePipeline) —
 // re-reading + JSON.parse on every list call was burning 1-3s with 200
@@ -638,19 +663,72 @@ function shellEscapeAnsiC(s: string): string {
     .replace(/\t/g, '\\t');
 }
-function resolveTemplate(template: string, ctx: {
+/** Stringify a for_each item for template substitution. Scalars become
+ *  their natural string; objects/arrays JSON-stringified (callers usually
+ *  pull out a sub-field with `{{<asName>.foo}}` instead). */
+function stringifyForeachItem(v: unknown): string {
+  if (v === null || v === undefined) return '';
+  if (typeof v === 'string') return v;
+  if (typeof v === 'number' || typeof v === 'boolean') return String(v);
+  try { return JSON.stringify(v); } catch { return String(v); }
+}
+export function resolveTemplate(template: string, ctx: {
   input: Record<string, string>;
   vars: Record<string, string>;
   nodes: Record<string, PipelineNodeState>;
+  tmpDir?: string;
+  forEach?: {
+    asName: string;
+    item: unknown;
+    index: number;
+    total: number;
+  };
 }, shellMode?: boolean): string {
   return template.replace(/\{\{(.*?)\}\}/g, (_, expr) => {
-    const path = expr.trim();
+    let path = expr.trim();
+    // `{{raw:…}}` opts out of shell-mode ANSI-C escaping so the value
+    // is substituted verbatim. Use this when the value lands in a
+    // quoted heredoc, a file written by `cat <<EOF`, or stdin to jq —
+    // anywhere the default `\n → \\n` escaping would surface as literal
+    // backslash-n in the final output (LLM-generated multi-line text
+    // posted as comments / chat seeds, etc.).
+    let raw = false;
+    if (path.startsWith('raw:')) {
+      raw = true;
+      path = path.slice(4).trim();
+    }
     let value = '';
     // {{input.xxx}}
     if (path.startsWith('input.')) value = ctx.input[path.slice(6)] || '';
     // {{vars.xxx}}
     else if (path.startsWith('vars.')) value = ctx.vars[path.slice(5)] || '';
+    // {{run.tmp_dir}} — absolute path to this pipeline run's scratch dir
+    // (`<project_dir>/.forge/worktrees/pipeline-<id>/`). Empty if project
+    // lookup failed at startPipeline time. Yaml is responsible for mkdir-ing
+    // any sub-paths like `{{run.tmp_dir}}/mr-${MR_IID}`.
+    else if (path === 'run.tmp_dir') value = ctx.tmpDir || '';
+    // {{loop.index}} — current iteration (0-based), only set when this
+    // pipeline run was started against a for_each workflow.
+    else if (ctx.forEach && path === 'loop.index') value = String(ctx.forEach.index);
+    // {{loop.total}} — total iterations the pipeline will run.
+    else if (ctx.forEach && path === 'loop.total') value = String(ctx.forEach.total);
+    // {{<asName>}} — current item itself (scalar → str, object → JSON).
+    else if (ctx.forEach && path === ctx.forEach.asName) {
+      value = stringifyForeachItem(ctx.forEach.item);
+    }
+    // {{<asName>.foo}} — sub-field of object item. Returns empty string
+    // (not literal placeholder) on missing field so node prompts can
+    // safely conditionalize on whether the field was set.
+    else if (ctx.forEach && path.startsWith(ctx.forEach.asName + '.')) {
+      const field = path.slice(ctx.forEach.asName.length + 1);
+      const item = ctx.forEach.item;
+      const v = (item !== null && typeof item === 'object') ? (item as any)[field] : undefined;
+      value = stringifyForeachItem(v);
+    }
     // {{nodes.xxx.outputs.yyy}}
     else {
       const nodeMatch = path.match(/^nodes\.([\w-]+)\.outputs\.([\w-]+)$/);
@@ -658,14 +736,96 @@ function resolveTemplate(template: string, ctx: {
         const [, nodeId, outputName] = nodeMatch;
         value = ctx.nodes[nodeId]?.outputs[outputName] || '';
       } else {
-        return `{{${path}}}`;
+        return `{{${raw ? 'raw:' : ''}${path}}}`;
       }
     }
-    return shellMode ? shellEscapeAnsiC(value) : value;
+    return shellMode && !raw ? shellEscapeAnsiC(value) : value;
   });
 }
+// ─── Per-run scratch dir (`{{run.tmp_dir}}`) ────────────────
+/**
+ * Compute the absolute scratch-dir path for a pipeline run. Returns
+ * empty string if input.project doesn't resolve. Does NOT mkdir.
+ */
+function computePipelineTmpDir(pipeline: Pipeline): string {
+  const name = pipeline.input?.project;
+  if (!name) return '';
+  const proj = getProjectInfo(name);
+  if (!proj) return '';
+  // `.forge/worktrees/` matches the existing convention used by the auto-
+  // worktree path (see line ~1450) and by older fortinet-* pipelines.
+  return join(proj.path, '.forge', 'worktrees', `pipeline-${pipeline.id}`);
+}
+/**
+ * Create the scratch dir at pipeline start. Idempotent. Failure is
+ * non-fatal — `tmpDir` stays unset and `{{run.tmp_dir}}` renders empty.
+ */
+function ensurePipelineTmpDir(pipeline: Pipeline): void {
+  const dir = computePipelineTmpDir(pipeline);
+  if (!dir) return;
+  try {
+    mkdirSync(dir, { recursive: true });
+    pipeline.tmpDir = dir;
+  } catch (e) {
+    console.warn(`[pipeline] could not create tmp_dir ${dir}: ${(e as Error).message}`);
+  }
+}
+/**
+ * Remove the scratch dir. Called on `done` (if settings allow) and
+ * by the GC sweep for expired failed/cancelled runs.
+ */
+export function cleanupPipelineTmpDir(pipeline: Pipeline): void {
+  const dir = pipeline.tmpDir || computePipelineTmpDir(pipeline);
+  if (!dir || !existsSync(dir)) return;
+  try {
+    rmSync(dir, { recursive: true, force: true });
+  } catch (e) {
+    console.warn(`[pipeline] cleanupPipelineTmpDir(${dir}) failed: ${(e as Error).message}`);
+  }
+}
+// ─── for_each: source resolution ──────────────────────────
+/**
+ * Resolve a workflow's `for_each.source` into a concrete array of items.
+ * Called once at startPipeline time — the items list is frozen for the
+ * lifetime of the run (no dynamic re-resolution per iteration).
+ *
+ * Rules:
+ *   - source is array → used directly
+ *   - source is string → resolveTemplate against (input, vars, nodes), then split
+ *     by `spec.split` (default ",") + trim + drop empties
+ *   - anything else → []
+ *
+ * `nodes` enables `{{nodes.<id>.outputs.<name>}}` references — required when
+ * the workflow uses `for_each.before:` to compute items dynamically. Optional
+ * for the simple case (template references only input/vars).
+ */
+export function resolveForEachSource(
+  spec: ForEachSpec,
+  input: Record<string, string>,
+  vars: Record<string, string>,
+  nodes?: Record<string, PipelineNodeState>,
+): unknown[] {
+  let raw: unknown = spec.source;
+  if (typeof raw === 'string') {
+    raw = resolveTemplate(raw, { input, vars, nodes: nodes || {} }, false);
+  }
+  if (Array.isArray(raw)) {
+    return raw;
+  }
+  if (typeof raw === 'string') {
+    const sep = spec.split ?? ',';
+    return raw.split(sep).map((s) => s.trim()).filter((s) => s.length > 0);
+  }
+  return [];
+}
 // ─── Project-level pipeline lock ─────────────────────────
 const projectPipelineLocks = new Map<string, string>(); // projectPath → pipelineId
@@ -728,6 +888,39 @@ export function startPipeline(
     };
   }
+  // Resolve for_each (if declared). Two paths:
+  //   - No `before:` → items resolved immediately from input/vars (frozen).
+  //   - With `before:` → defer; setup-phase nodes run first, then
+  //     checkPipelineCompletion resolves items using their outputs and flips
+  //     itemsResolved=true to enter the loop body.
+  let forEachState: Pipeline['forEach'];
+  if (workflow.for_each) {
+    const beforeIds = workflow.for_each.before;
+    if (beforeIds && beforeIds.length > 0) {
+      forEachState = {
+        items: [],
+        currentIndex: 0,
+        total: 0,
+        asName: workflow.for_each.as || 'item',
+        onFailure: workflow.for_each.on_failure || 'continue',
+        before: [...beforeIds],
+        itemsResolved: false,
+        iterations: [],
+      };
+    } else {
+      const items = resolveForEachSource(workflow.for_each, input, workflow.vars);
+      forEachState = {
+        items,
+        currentIndex: 0,
+        total: items.length,
+        asName: workflow.for_each.as || 'item',
+        onFailure: workflow.for_each.on_failure || 'continue',
+        itemsResolved: true,
+        iterations: [],
+      };
+    }
+  }
   const pipeline: Pipeline = {
     id,
     workflowName,
@@ -738,8 +931,27 @@ export function startPipeline(
     nodeOrder,
     createdAt: new Date().toISOString(),
     skills: opts.skills && opts.skills.length ? [...opts.skills] : undefined,
+    forEach: forEachState,
   };
+  ensurePipelineTmpDir(pipeline);
+  // Empty for_each source → nothing to iterate; settle done immediately.
+  // (E.g. user submitted `bug_ids: ""` or an empty array; not an error.)
+  // Deferred (with `before:`) skips this — items get resolved post-setup.
+  if (forEachState && forEachState.itemsResolved && forEachState.total === 0) {
+    pipeline.status = 'done';
+    pipeline.completedAt = new Date().toISOString();
+    savePipeline(pipeline);
+    // Mirror checkPipelineCompletion's done-side cleanup so an empty
+    // for_each behaves consistently with a normal done pipeline.
+    if (loadSettings().pipelineTmpCleanDoneImmediate !== false) {
+      cleanupPipelineTmpDir(pipeline);
+    }
+    setupTaskListener(pipeline.id);
+    return pipeline;
+  }
   savePipeline(pipeline);
   // Start nodes that have no dependencies
@@ -1052,8 +1264,10 @@ function finishConversation(pipeline: Pipeline, status: 'done' | 'failed') {
   const conv = pipeline.conversation!;
   for (const msg of conv.messages) {
     if (msg.status === 'running' && msg.taskId) {
-      // Cancel the running task
-      try { const { cancelTask } = require('./task-manager'); cancelTask(msg.taskId); } catch {}
+      // Cancel the running task — may already be finished, ignore those
+      try { cancelTask(msg.taskId); } catch (e) {
+        console.warn(`[pipeline] cancelTask(${msg.taskId}) in finishConversation: ${(e as Error).message}`);
+      }
       msg.status = status === 'done' ? 'done' : 'failed';
       if (!msg.content) msg.content = status === 'done' ? '(conversation ended)' : '(conversation failed)';
     }
@@ -1075,7 +1289,6 @@ export function cancelConversation(pipelineId: string): boolean {
   // Cancel any running task
   for (const msg of pipeline.conversation.messages) {
     if (msg.status === 'running' && msg.taskId) {
-      const { cancelTask } = require('./task-manager');
       cancelTask(msg.taskId);
     }
     if (msg.status === 'pending') msg.status = 'failed';
@@ -1224,7 +1437,40 @@ function recoverStuckPipelines() {
           node.status = 'done';
           node.completedAt = new Date().toISOString();
           changed = true;
-        } else if (task.status === 'failed' || task.status === 'cancelled') {
+        } else if (task.status === 'failed') {
+          // Honor retries config — if the node has retry budget left,
+          // reset to pending so scheduleReadyNodes picks it up again.
+          // Without this branch, the reconciler races with the
+          // subscription-based failed handler (lib/pipeline.ts ~1567)
+          // and can settle the node as failed BEFORE retry logic runs,
+          // which is why yaml `retries: N` sometimes appears to have
+          // no effect (the failure was caught here, not there).
+          const nodeDef = workflow.nodes[nodeId];
+          const attempts = node.attempts ?? 1;
+          const maxAttempts = 1 + (nodeDef?.retries ?? 0);
+          if (attempts < maxAttempts) {
+            console.warn(`[pipeline:reconcile] node ${nodeId} failed (attempt ${attempts}/${maxAttempts}): ${task.error || 'Task failed'} — resetting to pending for retry`);
+            node.status = 'pending';
+            node.taskId = undefined;
+            node.outputs = {};
+            node.error = undefined;
+            node.completedAt = undefined;
+            changed = true;
+          } else {
+            node.status = 'failed';
+            node.error = task.error || 'Task failed';
+            if (attempts > 1) {
+              node.error = `[after ${attempts} attempts] ${node.error}`;
+            }
+            node.completedAt = new Date().toISOString();
+            changed = true;
+          }
+        } else if (task.status === 'cancelled') {
+          // Cancellation cascade is handled by the subscription path
+          // (lib/pipeline.ts ~1605) — keep the reconciler's original
+          // behavior here (mark node failed without cascade) so we
+          // don't end up with a half-cancelled pipeline whose other
+          // pending nodes never get skipped.
           node.status = 'failed';
           node.error = task.error || 'Task failed';
           node.completedAt = new Date().toISOString();
@@ -1270,7 +1516,6 @@ function reapOrphanedPipelineTasks() {
           const t = getTask(node.taskId);
           if (t && t.status === 'running') {
             try {
-              const { cancelTask } = require('./task-manager');
               cancelTask(node.taskId);
               reaped += 1;
             } catch (err) {
@@ -1329,7 +1574,6 @@ export async function retryNode(pipelineId: string, nodeId: string): Promise<{ o
   // so it doesn't keep occupying the project lock / project slot.
   if (nodeState.status === 'running' && nodeState.taskId) {
     try {
-      const { cancelTask } = require('./task-manager');
       cancelTask(nodeState.taskId);
     } catch (err) {
       console.warn(`[pipeline] retryNode: cancelTask(${nodeState.taskId}) threw:`, err);
@@ -1398,25 +1642,61 @@ export function cancelPipeline(id: string): boolean {
   pipeline.status = 'cancelled';
   pipeline.completedAt = new Date().toISOString();
-  // Cancel all running tasks
+  // Cancel all running tasks AND mark their node states. Without
+  // updating the node state, the orchestrator's task event listener
+  // (which doesn't currently fire on 'cancelled') would leave the
+  // node stuck at 'running' forever.
   for (const [, node] of Object.entries(pipeline.nodes)) {
     if (node.status === 'running' && node.taskId) {
-      const { cancelTask } = require('./task-manager');
-      cancelTask(node.taskId);
+      try { cancelTask(node.taskId); } catch (e) {
+        console.warn(`[pipeline] cancelTask(${node.taskId}) during cancelPipeline: ${(e as Error).message}`);
+      }
+      node.status = 'cancelled';
+      node.completedAt = new Date().toISOString();
     }
     if (node.status === 'pending') node.status = 'skipped';
   }
   savePipeline(pipeline);
+  // Propagate to pipeline_runs DB row immediately so Job
+  // countMyInflightPipelines / isJobBusy sees it without waiting for
+  // the periodic reconcile (30s grace window).
+  import('./pipeline-scheduler').then(({ syncRunStatus }) => {
+    try { syncRunStatus(id); } catch (e) {
+      console.warn(`[pipeline] syncRunStatus(${id}) after cancel: ${(e as Error).message}`);
+    }
+  }).catch(() => {});
   return true;
 }
 // ─── Node Scheduling ──────────────────────────────────────
 async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
-  const ctx = { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes };
+  // forEach ctx — exposes {{<asName>}} / {{loop.index}} / {{loop.total}}
+  // to every template substitution in this iteration. Undefined when this
+  // workflow isn't a for_each one (then templates see no forEach namespace,
+  // {{item}} etc. pass through as literal text).
+  const forEachCtx = pipeline.forEach
+    ? {
+        asName: pipeline.forEach.asName,
+        item: pipeline.forEach.items[pipeline.forEach.currentIndex],
+        index: pipeline.forEach.currentIndex,
+        total: pipeline.forEach.total,
+      }
+    : undefined;
+  const ctx = { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes, tmpDir: pipeline.tmpDir, forEach: forEachCtx };
+  // for_each setup phase: while items aren't resolved, only `before:` nodes
+  // are eligible — loop-body nodes wait until items are produced + we transition
+  // to itemsResolved=true (in checkPipelineCompletion).
+  const inSetupPhase = pipeline.forEach && !pipeline.forEach.itemsResolved;
+  const beforeSet = new Set(pipeline.forEach?.before || []);
   for (const nodeId of pipeline.nodeOrder) {
+    if (inSetupPhase && !beforeSet.has(nodeId)) continue;
     const nodeState = pipeline.nodes[nodeId];
     if (nodeState.status !== 'pending') continue;
@@ -1468,12 +1748,14 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
     const useWorktree = nodeDef.worktree !== false && !nodeDef.workdir;
     const branchName = nodeDef.branch ? resolveTemplate(nodeDef.branch, ctx) : `pipeline/${pipeline.id.slice(0, 8)}`;
     if (useWorktree) try {
-      const { execSync } = require('node:child_process');
       const worktreePath = `${projectInfo.path}/.forge/worktrees/${branchName.replace(/\//g, '-')}`;
-      const { mkdirSync } = require('node:fs');
       mkdirSync(`${projectInfo.path}/.forge/worktrees`, { recursive: true });
-      // Create branch if needed
+      // Create branch if needed.
+      // Silent catch: `git branch X` fails with "already exists" — the
+      // common case here, not an error worth logging. If the project
+      // dir isn't a git repo we fail later at `git worktree add` with
+      // a real message, so this stays quiet.
       try { execSync(`git branch ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' }); } catch {}
       // Create or reuse worktree
@@ -1481,10 +1763,11 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
         execSync(`git worktree add "${worktreePath}" ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' });
         console.log(`[pipeline] Created worktree: ${worktreePath} (branch: ${branchName})`);
       } catch {
-        const { existsSync } = require('node:fs');
         if (existsSync(worktreePath)) {
           console.log(`[pipeline] Reusing worktree: ${worktreePath}`);
         } else {
+          // Silent: preemptive cleanup before re-adding. Fails when
+          // path doesn't exist — exactly when we DON'T need cleanup.
           try { execSync(`git worktree remove "${worktreePath}" --force`, { cwd: projectInfo.path, stdio: 'pipe' }); } catch {}
           execSync(`git worktree add "${worktreePath}" ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' });
           console.log(`[pipeline] Recreated worktree: ${worktreePath}`);
@@ -1510,7 +1793,6 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
       // upstream output (e.g. accidentally including KEY=value lines).
       if (resolved && !resolved.includes('\n')) {
         try {
-          const { statSync } = require('node:fs');
           if (statSync(resolved).isDirectory()) {
             effectivePath = resolved;
           } else {
@@ -1611,6 +1893,7 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
     nodeState.status = 'running';
     nodeState.taskId = task.id;
     nodeState.iterations++;
+    nodeState.attempts = (nodeState.attempts ?? 0) + 1;
     nodeState.startedAt = new Date().toISOString();
     savePipeline(pipeline);
@@ -1622,39 +1905,178 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
 }
 function checkPipelineCompletion(pipeline: Pipeline) {
-  const states = Object.values(pipeline.nodes);
-  const allDone = states.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
+  if (pipeline.status !== 'running') return;
+  // ── for_each setup-phase: gate before flipping into the loop body ──
+  // While items aren't resolved, "completion" only inspects `before:` nodes.
+  // Once they all settle: success → resolve source from their outputs, init
+  // iterations, transition to itemsResolved=true. Any failure → fail-fast
+  // (we never enter the loop without items).
+  if (pipeline.forEach && !pipeline.forEach.itemsResolved) {
+    const beforeIds = pipeline.forEach.before || [];
+    const beforeStates = beforeIds.map(id => pipeline.nodes[id]).filter(Boolean);
+    const setupDone = beforeStates.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
+    if (!setupDone) return;
+    const setupFailed = beforeStates.some(s => s.status === 'failed' || s.status === 'skipped');
+    if (setupFailed) {
+      pipeline.status = 'failed';
+      pipeline.completedAt = new Date().toISOString();
+      savePipeline(pipeline);
+      finalizePipeline(pipeline);
+      return;
+    }
+    // All setup nodes done — resolve items using their outputs.
+    const workflow = getWorkflow(pipeline.workflowName);
+    if (!workflow || !workflow.for_each) {
+      pipeline.status = 'failed';
+      pipeline.completedAt = new Date().toISOString();
+      savePipeline(pipeline);
+      finalizePipeline(pipeline);
+      return;
+    }
+    let items: unknown[];
+    try {
+      items = resolveForEachSource(workflow.for_each, pipeline.input, pipeline.vars, pipeline.nodes);
+    } catch (e) {
+      console.warn(`[pipeline] for_each source resolution failed: ${(e as Error).message}`);
+      pipeline.status = 'failed';
+      pipeline.completedAt = new Date().toISOString();
+      savePipeline(pipeline);
+      finalizePipeline(pipeline);
+      return;
+    }
+    pipeline.forEach.items = items;
+    pipeline.forEach.total = items.length;
+    pipeline.forEach.itemsResolved = true;
+    // Empty resolved items → settle done (setup succeeded but nothing to iterate).
+    if (items.length === 0) {
+      pipeline.status = 'done';
+      pipeline.completedAt = new Date().toISOString();
+      savePipeline(pipeline);
+      finalizePipeline(pipeline);
+      return;
+    }
-  if (allDone && pipeline.status === 'running') {
-    const anyFailed = states.some(s => s.status === 'failed');
-    pipeline.status = anyFailed ? 'failed' : 'done';
-    pipeline.completedAt = new Date().toISOString();
     savePipeline(pipeline);
-    notifyPipelineComplete(pipeline);
+    void scheduleReadyNodes(pipeline, workflow);
+    return;
+  }
-    // Sync run status to project pipeline runs
-    try {
-      const { syncRunStatus } = require('./pipeline-scheduler');
-      syncRunStatus(pipeline.id);
-    } catch {}
-    // Log worktree info for user review
-    for (const [nodeId, state] of Object.entries(pipeline.nodes)) {
-      const wt = (state as any).worktreePath;
-      const branch = (state as any).worktreeBranch;
-      if (wt && branch) {
-        console.log(`[pipeline] Worktree preserved: ${wt} (branch: ${branch}) — review changes, then: git worktree remove "${wt}"`);
+  // ── loop-body / plain DAG: completion check excludes `before:` nodes ──
+  const beforeSet = new Set(pipeline.forEach?.before || []);
+  const bodyStates = Object.entries(pipeline.nodes)
+    .filter(([id]) => !beforeSet.has(id))
+    .map(([, s]) => s);
+  const allDone = bodyStates.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
+  if (!allDone) return;
+  const anyFailed = bodyStates.some(s => s.status === 'failed');
+  // ── for_each: this is iteration boundary, not pipeline boundary ──
+  if (pipeline.forEach) {
+    snapshotIteration(pipeline, anyFailed);
+    const stop = anyFailed && pipeline.forEach.onFailure === 'stop';
+    pipeline.forEach.currentIndex++;
+    const hasMore = !stop && pipeline.forEach.currentIndex < pipeline.forEach.total;
+    if (hasMore) {
+      // Reset loop-body node states for next iteration; `before:` nodes
+      // stay `done` so their outputs are reused. iterations[] history persists.
+      for (const nodeId of Object.keys(pipeline.nodes)) {
+        if (beforeSet.has(nodeId)) continue;
+        pipeline.nodes[nodeId] = { status: 'pending', outputs: {}, iterations: 0 };
+      }
+      savePipeline(pipeline);
+      const workflow = getWorkflow(pipeline.workflowName);
+      if (workflow) {
+        void scheduleReadyNodes(pipeline, workflow);
       }
+      return;
     }
-    // Release project lock
-    const workflow = getWorkflow(pipeline.workflowName);
-    if (workflow) {
-      const projectNames = new Set(Object.values(workflow.nodes).map(n => n.project));
-      for (const pName of projectNames) {
-        const pInfo = getProjectInfo(resolveTemplate(pName, { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes }));
-        if (pInfo) releaseProjectLock(pInfo.path, pipeline.id);
-      }
+    // Done iterating — overall failed iff any iteration failed.
+    const overallFailed = pipeline.forEach.iterations.some(i => i.status === 'failed');
+    pipeline.status = overallFailed ? 'failed' : 'done';
+  } else {
+    pipeline.status = anyFailed ? 'failed' : 'done';
+  }
+  pipeline.completedAt = new Date().toISOString();
+  savePipeline(pipeline);
+  finalizePipeline(pipeline);
+}
+/**
+ * Capture one iteration's node states into pipeline.forEach.iterations[].
+ * Called from checkPipelineCompletion right before deciding next-iter or
+ * finalize. `startedAt` = previous iter's completedAt (or pipeline.createdAt
+ * for the very first iter) — gives a reasonable timeline view in UI without
+ * needing to track per-iter start timestamps separately.
+ */
+export function snapshotIteration(pipeline: Pipeline, anyFailed: boolean) {
+  if (!pipeline.forEach) return;
+  const prev = pipeline.forEach.iterations[pipeline.forEach.iterations.length - 1];
+  const startedAt = prev?.completedAt || pipeline.createdAt;
+  const beforeSet = new Set(pipeline.forEach.before || []);
+  const nodes: Record<string, { status: PipelineNodeStatus; outputs: Record<string, string>; error?: string; taskId?: string }> = {};
+  for (const [id, s] of Object.entries(pipeline.nodes)) {
+    if (beforeSet.has(id)) continue;  // setup nodes ran once; not part of per-iter history
+    nodes[id] = { status: s.status, outputs: { ...s.outputs }, error: s.error, taskId: s.taskId };
+  }
+  pipeline.forEach.iterations.push({
+    index: pipeline.forEach.currentIndex,
+    status: anyFailed ? 'failed' : 'done',
+    startedAt,
+    completedAt: new Date().toISOString(),
+    nodes,
+  });
+}
+/**
+ * Post-settle housekeeping: cleanup tmp dir on done, notify, sync to
+ * pipeline_runs, log worktrees, release project locks.
+ * Extracted from checkPipelineCompletion so both the normal DAG path
+ * and the for_each "all iterations done" path share one implementation.
+ */
+function finalizePipeline(pipeline: Pipeline) {
+  // Wipe per-run scratch dir on success unless user opted to keep it.
+  // Failed runs leave it for inspection — GC sweeps later.
+  if (pipeline.status === 'done' && loadSettings().pipelineTmpCleanDoneImmediate !== false) {
+    cleanupPipelineTmpDir(pipeline);
+  }
+  notifyPipelineComplete(pipeline);
+  // Sync run status to project pipeline runs. Dynamic import avoids the
+  // circular dep (pipeline-scheduler imports from pipeline.ts at its top).
+  import('./pipeline-scheduler').then(({ syncRunStatus }) => {
+    try { syncRunStatus(pipeline.id); } catch (e) {
+      console.warn(`[pipeline] syncRunStatus(${pipeline.id}) failed: ${(e as Error).message}`);
+    }
+  }).catch((e) => {
+    console.warn(`[pipeline] dynamic import of pipeline-scheduler failed: ${e?.message ?? e}`);
+  });
+  // Log worktree info (last iteration's nodes for a for_each pipeline).
+  for (const [, state] of Object.entries(pipeline.nodes)) {
+    const wt = (state as any).worktreePath;
+    const branch = (state as any).worktreeBranch;
+    if (wt && branch) {
+      console.log(`[pipeline] Worktree preserved: ${wt} (branch: ${branch}) — review changes, then: git worktree remove "${wt}"`);
+    }
+  }
+  // Release project lock
+  const workflow = getWorkflow(pipeline.workflowName);
+  if (workflow) {
+    const projectNames = new Set(Object.values(workflow.nodes).map(n => n.project));
+    for (const pName of projectNames) {
+      const pInfo = getProjectInfo(resolveTemplate(pName, { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes, tmpDir: pipeline.tmpDir }));
+      if (pInfo) releaseProjectLock(pInfo.path, pipeline.id);
     }
   }
 }
@@ -1669,7 +2091,7 @@ function setupTaskListener(pipelineId: string) {
   const cleanup = onTaskEvent((taskId, event, data) => {
     if (event !== 'status') return;
-    if (data !== 'done' && data !== 'failed') return;
+    if (data !== 'done' && data !== 'failed' && data !== 'cancelled') return;
     const pipeline = getPipeline(pipelineId);
     if (!pipeline || pipeline.status !== 'running') {
@@ -1744,11 +2166,66 @@ function setupTaskListener(pipelineId: string) {
       savePipeline(pipeline);
       // No per-step done notification — only notify on start and failure
     } else if (data === 'failed') {
-      nodeState.status = 'failed';
-      nodeState.error = task?.error || 'Task failed';
+      // Retry budget: if the node has `retries` configured and we
+      // haven't exhausted it, re-queue the node for another attempt
+      // instead of marking failed.
+      const attempts = nodeState.attempts ?? 1;
+      const maxAttempts = 1 + (nodeDef.retries ?? 0);
+      if (attempts < maxAttempts) {
+        const errMsg = task?.error || 'Task failed';
+        const delayMs = nodeDef.retryDelayMs ?? 0;
+        console.warn(`[pipeline] node ${nodeId} failed (attempt ${attempts}/${maxAttempts}): ${errMsg} — retrying in ${delayMs}ms`);
+        notifyStep(pipeline, nodeId, 'failed', `attempt ${attempts}/${maxAttempts}: ${errMsg} — will retry`);
+        // Reset node to pending so scheduleReadyNodes picks it up
+        // again. Wipe transient state so the retry is a clean attempt.
+        nodeState.status = 'pending';
+        nodeState.taskId = undefined;
+        nodeState.outputs = {};
+        nodeState.error = undefined;
+        nodeState.completedAt = undefined;
+        savePipeline(pipeline);
+        if (delayMs > 0) {
+          setTimeout(() => {
+            const fresh = getPipeline(pipelineId);
+            if (fresh && fresh.status === 'running') {
+              scheduleReadyNodes(fresh, workflow);
+            }
+          }, delayMs);
+          return; // skip immediate scheduleReadyNodes below
+        }
+      } else {
+        nodeState.status = 'failed';
+        nodeState.error = task?.error || 'Task failed';
+        if (attempts > 1) {
+          nodeState.error = `[after ${attempts} attempts] ${nodeState.error}`;
+        }
+        nodeState.completedAt = new Date().toISOString();
+        savePipeline(pipeline);
+        notifyStep(pipeline, nodeId, 'failed', nodeState.error);
+      }
+    } else if (data === 'cancelled') {
+      // Cascade: a cancelled task means user pulled the plug (either
+      // via cancelPipeline above, or by killing the underlying task
+      // directly). Mark this node cancelled, then mark the whole
+      // pipeline cancelled so Job/UI see a clean terminal state.
+      nodeState.status = 'cancelled';
+      nodeState.error = task?.error || 'Task cancelled';
       nodeState.completedAt = new Date().toISOString();
+      // Sibling pending nodes should be skipped — nothing else will
+      // run on this cancelled pipeline.
+      for (const [, sib] of Object.entries(pipeline.nodes)) {
+        if (sib.status === 'pending') sib.status = 'skipped';
+      }
+      if (pipeline.status === 'running') {
+        pipeline.status = 'cancelled';
+        pipeline.completedAt = new Date().toISOString();
+      }
       savePipeline(pipeline);
-      notifyStep(pipeline, nodeId, 'failed', nodeState.error);
+      notifyStep(pipeline, nodeId, 'cancelled', nodeState.error);
+      // Propagate to DB so Job sees it without reconcile delay.
+      import('./pipeline-scheduler').then(({ syncRunStatus }) => {
+        try { syncRunStatus(pipelineId); } catch {}
+      }).catch(() => {});
     }
     // Schedule next ready nodes
@@ -1834,7 +2311,9 @@ async function notifyStep(pipeline: Pipeline, nodeId: string, status: string, er
         disable_web_page_preview: true,
       }),
     });
-  } catch {}
+  } catch (e) {
+    console.warn(`[pipeline] notifyStep telegram failed (${pipeline.id}/${nodeId}): ${(e as Error).message}`);
+  }
 }
 async function notifyPipelineComplete(pipeline: Pipeline) {
@@ -1858,5 +2337,7 @@ async function notifyPipelineComplete(pipeline: Pipeline) {
         disable_web_page_preview: true,
       }),
     });
-  } catch {}
+  } catch (e) {
+    console.warn(`[pipeline] notifyPipelineComplete telegram failed (${pipeline.id}): ${(e as Error).message}`);
+  }
 }