@aion0/forge 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/RELEASE_NOTES.md +60 -5
  2. package/app/api/agents/[id]/test/route.ts +150 -0
  3. package/app/api/connectors/[id]/sync-cli/route.ts +73 -0
  4. package/app/api/connectors/tool-test/route.ts +70 -0
  5. package/app/api/jobs/[id]/cancel/route.ts +50 -0
  6. package/app/api/jobs/[id]/dispatched-pipelines/route.ts +24 -0
  7. package/app/api/jobs/[id]/run/route.ts +22 -2
  8. package/app/api/jobs/route.ts +11 -1
  9. package/app/api/pipelines/[id]/schema/route.ts +53 -0
  10. package/app/api/pipelines/bulk-delete/route.ts +39 -0
  11. package/app/api/pipelines/gc/route.ts +27 -0
  12. package/app/api/schedules/[id]/cancel/route.ts +27 -0
  13. package/app/api/schedules/[id]/route.ts +173 -0
  14. package/app/api/schedules/[id]/run/route.ts +45 -0
  15. package/app/api/schedules/[id]/runs/route.ts +22 -0
  16. package/app/api/schedules/[id]/stop/route.ts +33 -0
  17. package/app/api/schedules/route.ts +175 -0
  18. package/app/api/tasks/bulk-delete/route.ts +47 -0
  19. package/bin/forge-server.mjs +22 -1
  20. package/cli/mw.mjs +186 -7657
  21. package/cli/mw.ts +26 -0
  22. package/components/ConnectorsPanel.tsx +46 -0
  23. package/components/Dashboard.tsx +23 -10
  24. package/components/JobsView.tsx +245 -6
  25. package/components/PipelineEditor.tsx +38 -1
  26. package/components/PipelineView.tsx +325 -4
  27. package/components/ScheduleCreateModal.tsx +1507 -0
  28. package/components/SchedulesView.tsx +605 -0
  29. package/components/SettingsModal.tsx +106 -0
  30. package/docs/Team-Workflow-Integration.md +487 -0
  31. package/docs/UI-Design-Brief-SidePanel.md +278 -0
  32. package/lib/__tests__/foreach-batch-yaml.test.ts +33 -0
  33. package/lib/__tests__/foreach-before.test.ts +201 -0
  34. package/lib/__tests__/foreach-parse.test.ts +114 -0
  35. package/lib/__tests__/foreach-snapshot.test.ts +112 -0
  36. package/lib/__tests__/foreach-source.test.ts +105 -0
  37. package/lib/__tests__/foreach-template.test.ts +112 -0
  38. package/lib/chat/agent-loop.ts +3 -3
  39. package/lib/chat-standalone.ts +26 -1
  40. package/lib/claude-process.ts +8 -5
  41. package/lib/connectors/sync.ts +8 -2
  42. package/lib/crypto.ts +1 -1
  43. package/lib/dirs.ts +22 -7
  44. package/lib/help-docs/05-pipelines.md +171 -0
  45. package/lib/help-docs/13-schedules.md +165 -0
  46. package/lib/help-docs/23-automation-states.md +148 -0
  47. package/lib/help-docs/CLAUDE.md +6 -6
  48. package/lib/init.ts +25 -6
  49. package/lib/jobs/recipes.ts +3 -2
  50. package/lib/jobs/scheduler.ts +215 -11
  51. package/lib/jobs/store.ts +79 -3
  52. package/lib/jobs/types.ts +31 -0
  53. package/lib/logger.ts +1 -1
  54. package/lib/notify.ts +13 -6
  55. package/lib/pipeline-gc.ts +105 -0
  56. package/lib/pipeline-scheduler.ts +29 -0
  57. package/lib/pipeline.ts +811 -330
  58. package/lib/schedules/action-runner.ts +257 -0
  59. package/lib/schedules/scheduler.ts +422 -0
  60. package/lib/schedules/state.ts +41 -0
  61. package/lib/schedules/store.ts +618 -0
  62. package/lib/schedules/types.ts +117 -0
  63. package/lib/settings.ts +35 -0
  64. package/lib/task-manager.ts +56 -13
  65. package/lib/workflow-marketplace.ts +7 -1
  66. package/lib/workspace/skill-installer.ts +7 -6
  67. package/package.json +3 -1
  68. package/lib/help-docs/19-jobs.md +0 -145
  69. package/lib/help-docs/20-mantis-bug-fix.md +0 -115
  70. package/lib/help-docs/22-recipes.md +0 -124
package/lib/pipeline.ts CHANGED
@@ -6,10 +6,11 @@
6
6
  */
7
7
 
8
8
  import { randomUUID } from 'node:crypto';
9
- import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, statSync } from 'node:fs';
9
+ import { existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, statSync, unlinkSync, rmSync } from 'node:fs';
10
+ import { execSync } from 'node:child_process';
10
11
  import { join } from 'node:path';
11
12
  import YAML from 'yaml';
12
- import { createTask, getTask, onTaskEvent, taskModelOverrides, taskAppendSystemPromptOverrides } from './task-manager';
13
+ import { createTask, getTask, onTaskEvent, taskModelOverrides, taskAppendSystemPromptOverrides, cancelTask } from './task-manager';
13
14
  import { getProjectInfo } from './projects';
14
15
  import { loadSettings } from './settings';
15
16
  import { getAgent, listAgents } from './agents';
@@ -47,6 +48,16 @@ export interface WorkflowNode {
47
48
  outputs: { name: string; extract: 'result' | 'git_diff' | 'stdout' | 'plugin' }[];
48
49
  routes: { condition: string; next: string }[];
49
50
  maxIterations: number;
51
+ /** Auto-retry the node on transient failure. Default 0 (fail-fast,
52
+ * matches old behavior). E.g. retries: 2 → 1 initial + 2 retries =
53
+ * 3 total attempts before the node is marked failed. Retries spawn
54
+ * a fresh task each time; output state is wiped between attempts.
55
+ * Use for nodes prone to transient errors: rate-limited API calls,
56
+ * flaky network, race-condition-prone shell ops. */
57
+ retries?: number;
58
+ /** Milliseconds to wait before each retry. Default 0 (immediate).
59
+ * Use 5000+ for downstream rate-limit recovery. */
60
+ retryDelayMs?: number;
50
61
  }
51
62
 
52
63
  // ─── Conversation Mode Types ──────────────────────────────
@@ -80,23 +91,131 @@ export interface ConversationConfig {
80
91
 
81
92
  // ─── Workflow ─────────────────────────────────────────────
82
93
 
94
+ /** Type tag for an extended pipeline input field. */
95
+ export type WorkflowInputType = 'string' | 'integer' | 'number' | 'boolean' | 'enum';
96
+
97
+ /** Extended input field spec. Used when the yaml's `input:` block
98
+ * declares an object instead of a plain description string. */
99
+ export interface WorkflowInputFieldSpec {
100
+ description?: string;
101
+ label?: string;
102
+ type?: WorkflowInputType;
103
+ enum?: string[]; // for type: 'enum'
104
+ required?: boolean; // overrides the description-heuristic
105
+ default?: string | number | boolean;
106
+ multiline?: boolean; // forces textarea regardless of description heuristic
107
+ }
108
+
109
+ /** A workflow input value is either a legacy description string
110
+ * (`input: { bug_id: "Mantis bug id" }`) or a full field spec
111
+ * (`input: { bug_id: { description: "…", type: integer, required: true } }`).
112
+ * Schedule UI / pipeline-schema endpoint normalize this into a uniform
113
+ * field record before rendering. */
114
+ export type WorkflowInputSpec = string | WorkflowInputFieldSpec;
115
+
116
+ /**
117
+ * Workflow-level loop spec. Declaring this on a `dag` workflow turns the
118
+ * pipeline into "run the whole DAG N times, once per item in `source`".
119
+ * Each iteration's per-node state is fully reset between rounds, but the
120
+ * pipeline_run id stays the same — i.e. it's ONE run, M iterations, not
121
+ * M sibling runs. Designed for batch use cases (list of bug ids, list
122
+ * of MR ids, …) where each item flows through the same node chain.
123
+ *
124
+ * Resolved + validated at startPipeline time:
125
+ * - `source` is templated against `input` / `vars` and parsed to an array
126
+ * - empty array → pipeline immediately settles to done (0 iterations)
127
+ *
128
+ * Inside the run, node prompts reference the current item via
129
+ * `{{<asName>}}` (defaults to `{{item}}`) and current position via
130
+ * `{{loop.index}}` / `{{loop.total}}`.
131
+ *
132
+ * Not supported (kept Non-Goals — see forge-pipeline-foreach-design.md):
133
+ * - nested for_each
134
+ * - parallel iterations (always sequential in v1)
135
+ * - dynamic source from an upstream node's output
136
+ * - cross-iteration output access
137
+ */
138
+ export interface ForEachSpec {
139
+ /** Templated string ("{{input.bug_ids}}") or literal array; resolved at
140
+ * startPipeline time (no `before:`) or after setup phase finishes (with
141
+ * `before:` — then source may reference `{{nodes.<id>.outputs.<name>}}`). */
142
+ source: string | unknown[];
143
+ /** Separator when `source` resolves to a string. Default ",". */
144
+ split?: string;
145
+ /** Variable name exposed inside nodes via `{{<asName>}}`. Default "item". */
146
+ as?: string;
147
+ /** Iteration-failure policy: "continue" (next iteration runs anyway, pipeline ends `failed` if any iter failed) or "stop" (first failure halts). Default "continue". */
148
+ on_failure?: 'continue' | 'stop';
149
+ /** Node ids that run ONCE before the loop body — for resolving items
150
+ * dynamically from upstream (e.g. a list-iids shell node). These nodes:
151
+ * - are scheduled first in isolation (loop-body nodes wait)
152
+ * - keep `done` status across all iterations
153
+ * - are excluded from per-iter snapshots + per-iter reset
154
+ * The for_each.source template can reference their outputs. */
155
+ before?: string[];
156
+ }
157
+
83
158
  export interface Workflow {
84
159
  name: string;
85
160
  type?: 'dag' | 'conversation'; // default: 'dag'
86
161
  description?: string;
87
162
  vars: Record<string, string>;
88
- input: Record<string, string>; // required input fields
163
+ input: Record<string, WorkflowInputSpec>;
89
164
  nodes: Record<string, WorkflowNode>;
165
+ /** Loop over a list — each iteration runs the full DAG once. See ForEachSpec. */
166
+ for_each?: ForEachSpec;
90
167
  // Conversation mode fields (only when type === 'conversation')
91
168
  conversation?: ConversationConfig;
92
169
  }
93
170
 
94
- export type PipelineNodeStatus = 'pending' | 'running' | 'done' | 'failed' | 'skipped';
171
+ /** Extract a description string from either input shape. */
172
+ export function inputDescription(spec: WorkflowInputSpec | undefined): string {
173
+ if (!spec) return '';
174
+ if (typeof spec === 'string') return spec;
175
+ return spec.description || '';
176
+ }
177
+
178
+ /** Normalize any input spec into a uniform field record. */
179
+ export function normalizeInputField(name: string, spec: WorkflowInputSpec | undefined): {
180
+ name: string;
181
+ description: string;
182
+ label: string;
183
+ type: WorkflowInputType;
184
+ enum: string[] | null;
185
+ required: boolean;
186
+ default: string | number | boolean | null;
187
+ multiline: boolean;
188
+ } {
189
+ if (!spec || typeof spec === 'string') {
190
+ const description = typeof spec === 'string' ? spec : '';
191
+ // Legacy heuristic: descriptions containing "optional" mean non-required.
192
+ const required = !!description && !/optional|leave blank|leave empty/i.test(description);
193
+ const multiline = /multi-line|multiline|prompt|description|body|template/i.test(description);
194
+ return { name, description, label: '', type: 'string', enum: null, required, default: null, multiline };
195
+ }
196
+ const t: WorkflowInputType = (spec.type || 'string') as WorkflowInputType;
197
+ return {
198
+ name,
199
+ description: spec.description || '',
200
+ label: spec.label || '',
201
+ type: t,
202
+ enum: Array.isArray(spec.enum) ? spec.enum.map(String) : null,
203
+ required: spec.required ?? true,
204
+ default: spec.default ?? null,
205
+ multiline: spec.multiline ?? false,
206
+ };
207
+ }
208
+
209
+ export type PipelineNodeStatus = 'pending' | 'running' | 'done' | 'failed' | 'skipped' | 'cancelled';
95
210
 
96
211
  export interface PipelineNodeState {
97
212
  status: PipelineNodeStatus;
98
213
  taskId?: string;
99
214
  outputs: Record<string, string>;
215
+ /** Number of times THIS node's task has been launched. 1 = first
216
+ * attempt; 2,3,... = retries. Bumped each time we (re)create the
217
+ * task. Capped by WorkflowNode.retries (default 0 = no retry). */
218
+ attempts?: number;
100
219
  iterations: number;
101
220
  startedAt?: string;
102
221
  completedAt?: string;
@@ -121,6 +240,60 @@ export interface Pipeline {
121
240
  * recovery use the same set as the original run.
122
241
  */
123
242
  skills?: string[];
243
+ /**
244
+ * Absolute path to this run's scratch dir, served to YAML nodes as
245
+ * `{{run.tmp_dir}}`. Layout: `<project_dir>/.forge/worktrees/pipeline-<id>/`.
246
+ * Resolved from `input.project`; empty / undefined when project lookup
247
+ * failed (the template then renders as empty string).
248
+ * `done` runs wipe this immediately; `failed`/`cancelled` keep it for
249
+ * GC (see lib/pipeline-gc.ts).
250
+ */
251
+ tmpDir?: string;
252
+ /**
253
+ * Loop state when this pipeline run was started against a workflow that
254
+ * declared `for_each:`. Absent on plain DAG runs (the orchestrator
255
+ * branches on `pipeline.forEach !== undefined`).
256
+ *
257
+ * Lifecycle: `currentIndex` starts at 0; one iteration = one full DAG
258
+ * pass. When all nodes settle, `iterations` gets a snapshot of the
259
+ * round's node states, then everything resets and `currentIndex` ticks
260
+ * up. The pipeline stays `running` until `currentIndex >= total`.
261
+ */
262
+ forEach?: {
263
+ /** The resolved item list. Populated at startPipeline (no `before:`) or
264
+ * at the setup→loop transition (with `before:`). Empty during setup phase. */
265
+ items: unknown[];
266
+ /** 0-based index of the currently-executing iteration. */
267
+ currentIndex: number;
268
+ /** Cached items.length; redundant but cheap and makes UI math easy. */
269
+ total: number;
270
+ /** Variable name nodes use via `{{<asName>}}`. */
271
+ asName: string;
272
+ /** Set by startPipeline from workflow.for_each.on_failure. Default "continue". */
273
+ onFailure: 'continue' | 'stop';
274
+ /** Mirror of spec.before — frozen at start. Loop-setup nodes that run
275
+ * ONCE, never reset, never snapshot. Undefined or empty = no setup phase. */
276
+ before?: string[];
277
+ /** Phase flag: false during setup (before nodes still running or pending);
278
+ * true once items[] is final + loop can start. For backward compat,
279
+ * pipelines without `before:` are created with itemsResolved=true. */
280
+ itemsResolved: boolean;
281
+ /** Per-iteration history snapshots (oldest first). Appended at iteration boundary in checkPipelineCompletion. */
282
+ iterations: Array<{
283
+ index: number;
284
+ status: 'done' | 'failed' | 'cancelled';
285
+ startedAt: string;
286
+ completedAt: string;
287
+ /** Compact snapshot. `taskId` is retained so the UI can re-open
288
+ * the original task drawer for any past iteration's node. */
289
+ nodes: Record<string, {
290
+ status: PipelineNodeStatus;
291
+ outputs: Record<string, string>;
292
+ error?: string;
293
+ taskId?: string;
294
+ }>;
295
+ }>;
296
+ };
124
297
  // Conversation mode state
125
298
  conversation?: {
126
299
  config: ConversationConfig;
@@ -132,273 +305,15 @@ export interface Pipeline {
132
305
 
133
306
  // ─── Workflow Loading ─────────────────────────────────────
134
307
 
135
- // ─── Built-in workflows ──────────────────────────────────
136
-
137
- export const BUILTIN_WORKFLOWS: Record<string, string> = {
138
- 'issue-fix-and-review': `
139
- name: issue-fix-and-review
140
- description: "Fetch GitHub issue → fix code → create PR → review PR → notify"
141
- input:
142
- issue_id: "GitHub issue number"
143
- project: "Project name"
144
- base_branch: "Base branch (default: auto-detect)"
145
- extra_context: "Additional instructions for the fix (optional)"
146
- nodes:
147
- setup:
148
- mode: shell
149
- project: "{{input.project}}"
150
- prompt: |
151
- cd "$(git rev-parse --show-toplevel)" && \
152
- if [ -n "$(git status --porcelain)" ]; then echo "ERROR: Working directory has uncommitted changes. Please commit or stash first." && exit 1; fi && \
153
- ORIG_BRANCH=$(git branch --show-current || git rev-parse --short HEAD) && \
154
- REPO=$(gh repo view --json nameWithOwner -q .nameWithOwner 2>/dev/null || git remote get-url origin | sed 's/.*github.com[:/]//;s/.git$//') && \
155
- BASE="{{input.base_branch}}" && \
156
- if [ -z "$BASE" ] || [ "$BASE" = "auto-detect" ]; then BASE=$(git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' || echo main); fi && \
157
- git checkout "$BASE" 2>/dev/null || true && \
158
- git pull origin "$BASE" 2>/dev/null || true && \
159
- OLD_BRANCH=$(git branch --list "fix/{{input.issue_id}}-*" | head -1 | tr -d ' *') && \
160
- if [ -n "$OLD_BRANCH" ]; then git branch -D "$OLD_BRANCH" 2>/dev/null || true; fi && \
161
- echo "REPO=$REPO" && echo "BASE=$BASE" && echo "ORIG_BRANCH=$ORIG_BRANCH"
162
- outputs:
163
- - name: info
164
- extract: stdout
165
- fetch-issue:
166
- mode: shell
167
- project: "{{input.project}}"
168
- depends_on: [setup]
169
- prompt: |
170
- ISSUE_ID="{{input.issue_id}}" && \
171
- if [ -z "$ISSUE_ID" ]; then echo "__SKIP__ No issue_id provided" && exit 0; fi && \
172
- SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
173
- REPO=$(echo "$SETUP_INFO" | grep REPO= | cut -d= -f2) && \
174
- gh issue view "$ISSUE_ID" --json title,body,labels,number -R "$REPO"
175
- outputs:
176
- - name: issue_json
177
- extract: stdout
178
- fix-code:
179
- project: "{{input.project}}"
180
- depends_on: [fetch-issue]
181
- prompt: |
182
- A GitHub issue needs to be fixed. Here is the issue data:
183
-
184
- {{nodes.fetch-issue.outputs.issue_json}}
185
-
186
- Steps:
187
- 1. Create a new branch from the current branch (which is already on the base). Name format: fix/{{input.issue_id}}-<short-description> (e.g. fix/3-add-validation, fix/15-null-pointer). Any old branch for this issue has been cleaned up.
188
- 2. Analyze the issue and fix the code.
189
- 3. Stage and commit with a message referencing #{{input.issue_id}}.
190
-
191
- Base branch info: {{nodes.setup.outputs.info}}
192
-
193
- Additional context from user: {{input.extra_context}}
194
- outputs:
195
- - name: summary
196
- extract: result
197
- - name: diff
198
- extract: git_diff
199
- push-and-pr:
200
- mode: shell
201
- project: "{{input.project}}"
202
- depends_on: [fix-code]
203
- prompt: |
204
- SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
205
- REPO=$(echo "$SETUP_INFO" | grep REPO= | cut -d= -f2) && \
206
- BRANCH=$(git branch --show-current) && \
207
- git push -u origin "$BRANCH" --force-with-lease 2>&1 && \
208
- PR_URL=$(gh pr create --title "Fix #{{input.issue_id}}" \
209
- --body "Auto-fix by Forge Pipeline for issue #{{input.issue_id}}." -R "$REPO" 2>/dev/null || \
210
- gh pr view "$BRANCH" --json url -q .url -R "$REPO" 2>/dev/null) && \
211
- echo "$PR_URL"
212
- outputs:
213
- - name: pr_url
214
- extract: stdout
215
- review:
216
- project: "{{input.project}}"
217
- depends_on: [push-and-pr]
218
- prompt: |
219
- Review the code changes for issue #{{input.issue_id}}.
220
-
221
- Fix summary: {{nodes.fix-code.outputs.summary}}
222
-
223
- Git diff:
224
- {{nodes.fix-code.outputs.diff}}
225
-
226
- Check for:
227
- - Bugs and logic errors
228
- - Security vulnerabilities
229
- - Performance issues
230
- - Whether the fix actually addresses the issue
231
-
232
- Respond with:
233
- 1. APPROVED or CHANGES_REQUESTED
234
- 2. Specific issues found with file paths and line numbers
235
- outputs:
236
- - name: review_result
237
- extract: result
238
- cleanup:
239
- mode: shell
240
- project: "{{input.project}}"
241
- depends_on: [review]
242
- prompt: |
243
- SETUP_INFO=$'{{nodes.setup.outputs.info}}' && \
244
- ORIG=$(echo "$SETUP_INFO" | grep ORIG_BRANCH= | cut -d= -f2) && \
245
- PR_URL=$'{{nodes.push-and-pr.outputs.pr_url}}' && \
246
- if [ -n "$(git status --porcelain)" ]; then
247
- echo "Issue #{{input.issue_id}} — PR: $PR_URL (staying on $(git branch --show-current))"
248
- else
249
- git checkout "$ORIG" 2>/dev/null || true
250
- echo "Issue #{{input.issue_id}} — PR: $PR_URL (switched back to $ORIG)"
251
- fi
252
- outputs:
253
- - name: result
254
- extract: stdout
255
- `,
256
- 'multi-agent-collaboration': `
257
- name: multi-agent-collaboration
258
- type: conversation
259
- description: "Two agents collaborate: one designs, one implements"
260
- input:
261
- project: "Project name"
262
- task: "What to build or fix"
263
- agents:
264
- - id: architect
265
- agent: claude
266
- role: "You are a software architect. Round 1: design the solution with clear steps. Later rounds: review the implementation and say DONE if satisfied."
267
- - id: implementer
268
- agent: claude
269
- role: "You are a developer. Implement what the architect designs. After implementing, say DONE."
270
- max_rounds: 3
271
- stop_condition: "both agents say DONE"
272
- initial_prompt: "Task: {{input.task}}"
273
- `,
274
- 'review-mr': `
275
- name: review-mr
276
- description: "Review PR — AI code review with GitHub comment"
277
- input:
278
- project: "Project name"
279
- branch: "Branch name or PR number (empty = auto-detect latest open PR)"
280
- base_branch: "Target branch (default: main)"
281
- vars:
282
- default_base: main
283
- nodes:
284
- resolve-pr:
285
- mode: shell
286
- project: "{{input.project}}"
287
- worktree: false
288
- prompt: |
289
- INPUT_BRANCH="{{input.branch}}" && \\
290
- BASE="{{input.base_branch}}" && \\
291
- if [ -z "$BASE" ] || echo "$BASE" | grep -q '{{'; then BASE="main"; fi && \\
292
- if [ -z "$INPUT_BRANCH" ] || echo "$INPUT_BRANCH" | grep -q '{{'; then \\
293
- INPUT_BRANCH=$(gh pr list --state open --base "$BASE" --json number -q '.[0].number' 2>/dev/null); \\
294
- if [ -z "$INPUT_BRANCH" ]; then echo "ERROR: No open PR found targeting $BASE" && exit 1; fi; \\
295
- fi && \\
296
- if echo "$INPUT_BRANCH" | grep -qE '^[0-9]+$'; then \\
297
- PR_NUM="$INPUT_BRANCH"; \\
298
- else \\
299
- PR_NUM=$(gh pr list --state open --head "$INPUT_BRANCH" --json number -q '.[0].number' 2>/dev/null); \\
300
- if [ -z "$PR_NUM" ]; then echo "ERROR: No open PR for branch $INPUT_BRANCH" && exit 1; fi; \\
301
- fi && \\
302
- echo "$PR_NUM"
303
- outputs:
304
- - name: pr_number
305
- extract: stdout
306
- fetch-diff:
307
- mode: shell
308
- project: "{{input.project}}"
309
- worktree: false
310
- depends_on: [resolve-pr]
311
- prompt: "gh pr diff {{nodes.resolve-pr.outputs.pr_number}}"
312
- outputs:
313
- - name: diff
314
- extract: stdout
315
- fetch-files:
316
- mode: shell
317
- project: "{{input.project}}"
318
- worktree: false
319
- depends_on: [resolve-pr]
320
- prompt: |
321
- PR_NUM="{{nodes.resolve-pr.outputs.pr_number}}" && \\
322
- echo "=== PR #$PR_NUM ===" && \\
323
- gh pr view "$PR_NUM" --json title,author,additions,deletions,changedFiles,commits,body --jq '"Title: " + .title + "\\nAuthor: " + .author.login + "\\nFiles: " + (.changedFiles|tostring) + " changed, +" + (.additions|tostring) + "/-" + (.deletions|tostring) + "\\nCommits: " + (.commits|length|tostring) + "\\n\\n=== PR Description ===\\n" + (.body // "(no description)")' && \\
324
- echo "" && \\
325
- echo "=== Changed Files ===" && \\
326
- gh pr diff "$PR_NUM" --name-only
327
- outputs:
328
- - name: stats
329
- extract: stdout
330
- review:
331
- project: "{{input.project}}"
332
- worktree: false
333
- depends_on: [fetch-diff, fetch-files, resolve-pr]
334
- prompt: |
335
- You are a senior code reviewer. Perform a thorough code review of this PR.
336
-
337
- ## PR Info & Description
338
- {{nodes.fetch-files.outputs.stats}}
339
-
340
- ## Diff
341
- {{nodes.fetch-diff.outputs.diff}}
342
-
343
- ## Review Requirements
344
-
345
- **First**: Verify the PR description against actual changes:
346
- - Is every claimed change actually implemented?
347
- - Any claimed changes that are NOT in the diff?
348
- - Any changes in the diff NOT mentioned in the description?
349
-
350
- **Then**: Review code quality:
351
- 1. Bug risk — logic errors, edge cases, null references
352
- 2. Security — injection, hardcoded secrets, sensitive data exposure
353
- 3. Performance — obvious bottlenecks
354
- 4. Code quality — readability, naming, DRY
355
-
356
- ## Output
357
-
358
- Write the full review report to /tmp/forge-review-pr{{nodes.resolve-pr.outputs.pr_number}}.md in this format:
359
-
360
- ## 🤖 Forge AI Code Review — PR #{{nodes.resolve-pr.outputs.pr_number}}
361
-
362
- ### 📋 Summary
363
- - Verdict: ✅ Approve / ⚠️ Request Changes / ❌ Reject
364
- - One-line summary
365
-
366
- ### ✅ PR Description Verification
367
- List each change claimed in the PR description, mark ✓ implemented / ✗ not implemented / ⚠️ partial
368
-
369
- ### 🔴 Blockers (must fix)
370
- (write "None" if none)
371
-
372
- ### 🟡 Suggestions
373
-
374
- ### 🟢 Nice-to-have
375
-
376
- ### 💡 Highlights
377
-
378
- ---
379
- _Generated by [Forge](https://github.com/aiwatching/forge) Pipeline_
380
-
381
- **You MUST write the complete report to /tmp/forge-review-pr{{nodes.resolve-pr.outputs.pr_number}}.md. This is the most important step.**
382
- outputs:
383
- - name: report
384
- extract: result
385
- post-comment:
386
- mode: shell
387
- project: "{{input.project}}"
388
- worktree: false
389
- depends_on: [review, resolve-pr]
390
- prompt: |
391
- PR_NUM="{{nodes.resolve-pr.outputs.pr_number}}" && \\
392
- REPORT="/tmp/forge-review-pr\${PR_NUM}.md" && \\
393
- if [ ! -f "$REPORT" ]; then echo "ERROR: Review report not found at $REPORT" && exit 1; fi && \\
394
- gh pr comment "$PR_NUM" --body-file "$REPORT" && \\
395
- rm -f "$REPORT" && \\
396
- echo "Comment posted to PR #$PR_NUM"
397
- outputs:
398
- - name: result
399
- extract: stdout
400
- `,
401
- };
308
+ // All pipelines now live in the marketplace (`forge-workflow` repo).
309
+ // Empty BUILTIN_WORKFLOWS preserves the constant export so any external
310
+ // caller doesn't break, but no pipelines are shipped baked-in any more.
311
+ // To use the previously-builtin ones, install from marketplace:
312
+ // - issue-fix-and-review
313
+ // - multi-agent-collaboration
314
+ // - review-mr
315
+ export const BUILTIN_WORKFLOWS: Record<string, string> = {};
316
+
402
317
 
403
318
  export interface WorkflowWithMeta extends Workflow {
404
319
  builtin?: boolean;
@@ -409,22 +324,32 @@ export function listWorkflows(): WorkflowWithMeta[] {
409
324
  const userWorkflows: WorkflowWithMeta[] = [];
410
325
  if (existsSync(WORKFLOWS_DIR)) {
411
326
  for (const f of readdirSync(WORKFLOWS_DIR).filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))) {
327
+ const fullPath = join(WORKFLOWS_DIR, f);
412
328
  try {
413
- userWorkflows.push({ ...parseWorkflow(readFileSync(join(WORKFLOWS_DIR, f), 'utf-8')), builtin: false });
414
- } catch {}
329
+ userWorkflows.push({ ...parseWorkflow(readFileSync(fullPath, 'utf-8')), builtin: false });
330
+ } catch (e) {
331
+ // A broken yaml shouldn't crash the listing, but the user needs
332
+ // to know WHY their pipeline isn't showing up. Common offender:
333
+ // `outputs:` written as a map (`{name: "$STDOUT"}`) when the
334
+ // parser expects an array (`- name: x; extract: stdout`) and
335
+ // therefore .map() throws.
336
+ console.warn(`[listWorkflows] skip ${fullPath}: ${(e as Error).message}`);
337
+ }
415
338
  }
416
339
  }
417
340
 
418
341
  // Built-in workflows (don't override user ones with same name)
419
342
  const userNames = new Set(userWorkflows.map(w => w.name));
420
343
  const builtins: WorkflowWithMeta[] = [];
421
- for (const [, yaml] of Object.entries(BUILTIN_WORKFLOWS)) {
344
+ for (const [key, yaml] of Object.entries(BUILTIN_WORKFLOWS)) {
422
345
  try {
423
346
  const w = parseWorkflow(yaml);
424
347
  if (!userNames.has(w.name)) {
425
348
  builtins.push({ ...w, builtin: true });
426
349
  }
427
- } catch {}
350
+ } catch (e) {
351
+ console.warn(`[listWorkflows] builtin ${key} failed to parse: ${(e as Error).message}`);
352
+ }
428
353
  }
429
354
 
430
355
  return [...builtins, ...userWorkflows];
@@ -434,7 +359,67 @@ export function getWorkflow(name: string): WorkflowWithMeta | null {
434
359
  return listWorkflows().find(w => w.name === name) || null;
435
360
  }
436
361
 
437
- function parseWorkflow(raw: string): Workflow {
362
+ /**
363
+ * Reserved template namespaces — these names already mean something in
364
+ * `{{...}}` substitution (input.x / vars.x / nodes.X.outputs.Y /
365
+ * run.tmp_dir / raw: prefix / loop.index|total). Picking one of them as
366
+ * the `for_each.as` variable would shadow it and silently break templates.
367
+ */
368
+ const FOREACH_RESERVED_NAMES = new Set(['input', 'vars', 'nodes', 'run', 'raw', 'loop']);
369
+
370
+ /**
371
+ * Parse + validate a workflow's `for_each:` block. Returns undefined when
372
+ * the workflow doesn't declare it (the common case). Throws on malformed
373
+ * input so authors find out at workflow load, not at first fire.
374
+ */
375
+ function parseForEach(raw: any, workflowType: string): ForEachSpec | undefined {
376
+ if (raw === undefined || raw === null) return undefined;
377
+ if (workflowType !== 'dag') {
378
+ throw new Error(`workflow.for_each is only supported on type='dag' workflows (got '${workflowType}')`);
379
+ }
380
+ if (typeof raw !== 'object' || Array.isArray(raw)) {
381
+ throw new Error("workflow.for_each must be an object with at least { source: ... }");
382
+ }
383
+ const source = raw.source;
384
+ if (source === undefined || source === null || source === '') {
385
+ throw new Error("workflow.for_each.source is required (template like '{{input.bug_ids}}' or literal array)");
386
+ }
387
+ if (typeof source !== 'string' && !Array.isArray(source)) {
388
+ throw new Error("workflow.for_each.source must be a template string or array literal");
389
+ }
390
+ const asName = raw.as ?? 'item';
391
+ if (typeof asName !== 'string' || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(asName)) {
392
+ throw new Error("workflow.for_each.as must be a valid identifier (letters / digits / underscore, starting with letter or _)");
393
+ }
394
+ if (FOREACH_RESERVED_NAMES.has(asName)) {
395
+ throw new Error(`workflow.for_each.as='${asName}' collides with reserved template namespace (input / vars / nodes / run / raw / loop)`);
396
+ }
397
+ const onFailure = raw.on_failure ?? raw.onFailure ?? 'continue';
398
+ if (onFailure !== 'continue' && onFailure !== 'stop') {
399
+ throw new Error(`workflow.for_each.on_failure must be 'continue' or 'stop' (got '${onFailure}')`);
400
+ }
401
+ const split = raw.split;
402
+ if (split !== undefined && typeof split !== 'string') {
403
+ throw new Error("workflow.for_each.split must be a string");
404
+ }
405
+ let before: string[] | undefined;
406
+ if (raw.before !== undefined && raw.before !== null) {
407
+ if (!Array.isArray(raw.before) || raw.before.some((s: any) => typeof s !== 'string' || !s)) {
408
+ throw new Error("workflow.for_each.before must be an array of node id strings");
409
+ }
410
+ // node-id existence validated in parseWorkflow once nodes are built.
411
+ before = [...raw.before];
412
+ }
413
+ return {
414
+ source,
415
+ split,
416
+ as: asName,
417
+ on_failure: onFailure,
418
+ before,
419
+ };
420
+ }
421
+
422
+ export function parseWorkflow(raw: string): Workflow {
438
423
  const parsed = YAML.parse(raw);
439
424
  const workflowType = parsed.type || 'dag';
440
425
  const nodes: Record<string, WorkflowNode> = {};
@@ -464,6 +449,10 @@ function parseWorkflow(raw: string): Workflow {
464
449
  next: r.next,
465
450
  })),
466
451
  maxIterations: n.max_iterations || n.maxIterations || 3,
452
+ retries: Number.isFinite(Number(n.retries)) ? Math.max(0, Math.trunc(Number(n.retries))) : 0,
453
+ retryDelayMs: Number.isFinite(Number(n.retry_delay_ms ?? n.retryDelayMs))
454
+ ? Math.max(0, Math.trunc(Number(n.retry_delay_ms ?? n.retryDelayMs)))
455
+ : 0,
467
456
  };
468
457
  }
469
458
 
@@ -486,6 +475,15 @@ function parseWorkflow(raw: string): Workflow {
486
475
  };
487
476
  }
488
477
 
478
+ const for_each = parseForEach(parsed.for_each, workflowType);
479
+ if (for_each?.before) {
480
+ for (const id of for_each.before) {
481
+ if (!nodes[id]) {
482
+ throw new Error(`workflow.for_each.before references unknown node id '${id}' — must match one of: ${Object.keys(nodes).join(', ') || '(no nodes)'}`);
483
+ }
484
+ }
485
+ }
486
+
489
487
  return {
490
488
  name: parsed.name || 'unnamed',
491
489
  type: workflowType,
@@ -493,6 +491,7 @@ function parseWorkflow(raw: string): Workflow {
493
491
  vars: parsed.vars || {},
494
492
  input: parsed.input || {},
495
493
  nodes,
494
+ for_each,
496
495
  conversation,
497
496
  };
498
497
  }
@@ -520,15 +519,41 @@ export function deletePipeline(id: string): boolean {
520
519
  const filePath = join(PIPELINES_DIR, `${id}.json`);
521
520
  try {
522
521
  if (existsSync(filePath)) {
523
- const { unlinkSync } = require('node:fs');
524
522
  unlinkSync(filePath);
525
523
  __pipelineCache.delete(filePath);
526
524
  return true;
527
525
  }
528
- } catch {}
526
+ } catch (e) {
527
+ console.warn(`[pipeline] deletePipeline ${id} failed: ${(e as Error).message}`);
528
+ }
529
529
  return false;
530
530
  }
531
531
 
532
+ /** Bulk-delete terminal pipelines created before `beforeIso`. Skips
533
+ * anything still in 'running' or 'pending' so we never blow away
534
+ * state from a live run. Returns count removed. */
535
+ export interface BulkDeletePipelinesFilter {
536
+ before: string; // ISO 8601
537
+ statuses?: Array<'done' | 'failed' | 'cancelled'>;
538
+ }
539
+
540
+ export function bulkDeletePipelines(filter: BulkDeletePipelinesFilter): number {
541
+ const wanted = new Set(filter.statuses && filter.statuses.length
542
+ ? filter.statuses
543
+ : ['done', 'failed', 'cancelled']);
544
+ const cutoff = new Date(filter.before).getTime();
545
+ if (!Number.isFinite(cutoff)) return 0;
546
+
547
+ let removed = 0;
548
+ for (const p of listPipelines()) {
549
+ if (!wanted.has(p.status as any)) continue;
550
+ const createdMs = new Date(p.createdAt).getTime();
551
+ if (!Number.isFinite(createdMs) || createdMs >= cutoff) continue;
552
+ if (deletePipeline(p.id)) removed++;
553
+ }
554
+ return removed;
555
+ }
556
+
532
557
  // Parsed-pipeline cache keyed by absolute path → { mtime, pipeline }.
533
558
  // Pipeline files only change when a pipeline run advances (savePipeline) —
534
559
  // re-reading + JSON.parse on every list call was burning 1-3s with 200
@@ -638,19 +663,72 @@ function shellEscapeAnsiC(s: string): string {
638
663
  .replace(/\t/g, '\\t');
639
664
  }
640
665
 
641
- function resolveTemplate(template: string, ctx: {
666
+ /** Stringify a for_each item for template substitution. Scalars become
667
+ * their natural string; objects/arrays JSON-stringified (callers usually
668
+ * pull out a sub-field with `{{<asName>.foo}}` instead). */
669
+ function stringifyForeachItem(v: unknown): string {
670
+ if (v === null || v === undefined) return '';
671
+ if (typeof v === 'string') return v;
672
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v);
673
+ try { return JSON.stringify(v); } catch { return String(v); }
674
+ }
675
+
676
+ export function resolveTemplate(template: string, ctx: {
642
677
  input: Record<string, string>;
643
678
  vars: Record<string, string>;
644
679
  nodes: Record<string, PipelineNodeState>;
680
+ tmpDir?: string;
681
+ forEach?: {
682
+ asName: string;
683
+ item: unknown;
684
+ index: number;
685
+ total: number;
686
+ };
645
687
  }, shellMode?: boolean): string {
646
688
  return template.replace(/\{\{(.*?)\}\}/g, (_, expr) => {
647
- const path = expr.trim();
689
+ let path = expr.trim();
690
+
691
+ // `{{raw:…}}` opts out of shell-mode ANSI-C escaping so the value
692
+ // is substituted verbatim. Use this when the value lands in a
693
+ // quoted heredoc, a file written by `cat <<EOF`, or stdin to jq —
694
+ // anywhere the default `\n → \\n` escaping would surface as literal
695
+ // backslash-n in the final output (LLM-generated multi-line text
696
+ // posted as comments / chat seeds, etc.).
697
+ let raw = false;
698
+ if (path.startsWith('raw:')) {
699
+ raw = true;
700
+ path = path.slice(4).trim();
701
+ }
702
+
648
703
  let value = '';
649
704
 
650
705
  // {{input.xxx}}
651
706
  if (path.startsWith('input.')) value = ctx.input[path.slice(6)] || '';
652
707
  // {{vars.xxx}}
653
708
  else if (path.startsWith('vars.')) value = ctx.vars[path.slice(5)] || '';
709
+ // {{run.tmp_dir}} — absolute path to this pipeline run's scratch dir
710
+ // (`<project_dir>/.forge/worktrees/pipeline-<id>/`). Empty if project
711
+ // lookup failed at startPipeline time. Yaml is responsible for mkdir-ing
712
+ // any sub-paths like `{{run.tmp_dir}}/mr-${MR_IID}`.
713
+ else if (path === 'run.tmp_dir') value = ctx.tmpDir || '';
714
+ // {{loop.index}} — current iteration (0-based), only set when this
715
+ // pipeline run was started against a for_each workflow.
716
+ else if (ctx.forEach && path === 'loop.index') value = String(ctx.forEach.index);
717
+ // {{loop.total}} — total iterations the pipeline will run.
718
+ else if (ctx.forEach && path === 'loop.total') value = String(ctx.forEach.total);
719
+ // {{<asName>}} — current item itself (scalar → str, object → JSON).
720
+ else if (ctx.forEach && path === ctx.forEach.asName) {
721
+ value = stringifyForeachItem(ctx.forEach.item);
722
+ }
723
+ // {{<asName>.foo}} — sub-field of object item. Returns empty string
724
+ // (not literal placeholder) on missing field so node prompts can
725
+ // safely conditionalize on whether the field was set.
726
+ else if (ctx.forEach && path.startsWith(ctx.forEach.asName + '.')) {
727
+ const field = path.slice(ctx.forEach.asName.length + 1);
728
+ const item = ctx.forEach.item;
729
+ const v = (item !== null && typeof item === 'object') ? (item as any)[field] : undefined;
730
+ value = stringifyForeachItem(v);
731
+ }
654
732
  // {{nodes.xxx.outputs.yyy}}
655
733
  else {
656
734
  const nodeMatch = path.match(/^nodes\.([\w-]+)\.outputs\.([\w-]+)$/);
@@ -658,14 +736,96 @@ function resolveTemplate(template: string, ctx: {
658
736
  const [, nodeId, outputName] = nodeMatch;
659
737
  value = ctx.nodes[nodeId]?.outputs[outputName] || '';
660
738
  } else {
661
- return `{{${path}}}`;
739
+ return `{{${raw ? 'raw:' : ''}${path}}}`;
662
740
  }
663
741
  }
664
742
 
665
- return shellMode ? shellEscapeAnsiC(value) : value;
743
+ return shellMode && !raw ? shellEscapeAnsiC(value) : value;
666
744
  });
667
745
  }
668
746
 
747
+ // ─── Per-run scratch dir (`{{run.tmp_dir}}`) ────────────────
748
+
749
+ /**
750
+ * Compute the absolute scratch-dir path for a pipeline run. Returns
751
+ * empty string if input.project doesn't resolve. Does NOT mkdir.
752
+ */
753
+ function computePipelineTmpDir(pipeline: Pipeline): string {
754
+ const name = pipeline.input?.project;
755
+ if (!name) return '';
756
+ const proj = getProjectInfo(name);
757
+ if (!proj) return '';
758
+ // `.forge/worktrees/` matches the existing convention used by the auto-
759
+ // worktree path (see line ~1450) and by older fortinet-* pipelines.
760
+ return join(proj.path, '.forge', 'worktrees', `pipeline-${pipeline.id}`);
761
+ }
762
+
763
+ /**
764
+ * Create the scratch dir at pipeline start. Idempotent. Failure is
765
+ * non-fatal — `tmpDir` stays unset and `{{run.tmp_dir}}` renders empty.
766
+ */
767
+ function ensurePipelineTmpDir(pipeline: Pipeline): void {
768
+ const dir = computePipelineTmpDir(pipeline);
769
+ if (!dir) return;
770
+ try {
771
+ mkdirSync(dir, { recursive: true });
772
+ pipeline.tmpDir = dir;
773
+ } catch (e) {
774
+ console.warn(`[pipeline] could not create tmp_dir ${dir}: ${(e as Error).message}`);
775
+ }
776
+ }
777
+
778
+ /**
779
+ * Remove the scratch dir. Called on `done` (if settings allow) and
780
+ * by the GC sweep for expired failed/cancelled runs.
781
+ */
782
+ export function cleanupPipelineTmpDir(pipeline: Pipeline): void {
783
+ const dir = pipeline.tmpDir || computePipelineTmpDir(pipeline);
784
+ if (!dir || !existsSync(dir)) return;
785
+ try {
786
+ rmSync(dir, { recursive: true, force: true });
787
+ } catch (e) {
788
+ console.warn(`[pipeline] cleanupPipelineTmpDir(${dir}) failed: ${(e as Error).message}`);
789
+ }
790
+ }
791
+
792
+ // ─── for_each: source resolution ──────────────────────────
793
+
794
+ /**
795
+ * Resolve a workflow's `for_each.source` into a concrete array of items.
796
+ * Called once at startPipeline time — the items list is frozen for the
797
+ * lifetime of the run (no dynamic re-resolution per iteration).
798
+ *
799
+ * Rules:
800
+ * - source is array → used directly
801
+ * - source is string → resolveTemplate against (input, vars, nodes), then split
802
+ * by `spec.split` (default ",") + trim + drop empties
803
+ * - anything else → []
804
+ *
805
+ * `nodes` enables `{{nodes.<id>.outputs.<name>}}` references — required when
806
+ * the workflow uses `for_each.before:` to compute items dynamically. Optional
807
+ * for the simple case (template references only input/vars).
808
+ */
809
+ export function resolveForEachSource(
810
+ spec: ForEachSpec,
811
+ input: Record<string, string>,
812
+ vars: Record<string, string>,
813
+ nodes?: Record<string, PipelineNodeState>,
814
+ ): unknown[] {
815
+ let raw: unknown = spec.source;
816
+ if (typeof raw === 'string') {
817
+ raw = resolveTemplate(raw, { input, vars, nodes: nodes || {} }, false);
818
+ }
819
+ if (Array.isArray(raw)) {
820
+ return raw;
821
+ }
822
+ if (typeof raw === 'string') {
823
+ const sep = spec.split ?? ',';
824
+ return raw.split(sep).map((s) => s.trim()).filter((s) => s.length > 0);
825
+ }
826
+ return [];
827
+ }
828
+
669
829
  // ─── Project-level pipeline lock ─────────────────────────
670
830
  const projectPipelineLocks = new Map<string, string>(); // projectPath → pipelineId
671
831
 
@@ -728,6 +888,39 @@ export function startPipeline(
728
888
  };
729
889
  }
730
890
 
891
+ // Resolve for_each (if declared). Two paths:
892
+ // - No `before:` → items resolved immediately from input/vars (frozen).
893
+ // - With `before:` → defer; setup-phase nodes run first, then
894
+ // checkPipelineCompletion resolves items using their outputs and flips
895
+ // itemsResolved=true to enter the loop body.
896
+ let forEachState: Pipeline['forEach'];
897
+ if (workflow.for_each) {
898
+ const beforeIds = workflow.for_each.before;
899
+ if (beforeIds && beforeIds.length > 0) {
900
+ forEachState = {
901
+ items: [],
902
+ currentIndex: 0,
903
+ total: 0,
904
+ asName: workflow.for_each.as || 'item',
905
+ onFailure: workflow.for_each.on_failure || 'continue',
906
+ before: [...beforeIds],
907
+ itemsResolved: false,
908
+ iterations: [],
909
+ };
910
+ } else {
911
+ const items = resolveForEachSource(workflow.for_each, input, workflow.vars);
912
+ forEachState = {
913
+ items,
914
+ currentIndex: 0,
915
+ total: items.length,
916
+ asName: workflow.for_each.as || 'item',
917
+ onFailure: workflow.for_each.on_failure || 'continue',
918
+ itemsResolved: true,
919
+ iterations: [],
920
+ };
921
+ }
922
+ }
923
+
731
924
  const pipeline: Pipeline = {
732
925
  id,
733
926
  workflowName,
@@ -738,8 +931,27 @@ export function startPipeline(
738
931
  nodeOrder,
739
932
  createdAt: new Date().toISOString(),
740
933
  skills: opts.skills && opts.skills.length ? [...opts.skills] : undefined,
934
+ forEach: forEachState,
741
935
  };
742
936
 
937
+ ensurePipelineTmpDir(pipeline);
938
+
939
+ // Empty for_each source → nothing to iterate; settle done immediately.
940
+ // (E.g. user submitted `bug_ids: ""` or an empty array; not an error.)
941
+ // Deferred (with `before:`) skips this — items get resolved post-setup.
942
+ if (forEachState && forEachState.itemsResolved && forEachState.total === 0) {
943
+ pipeline.status = 'done';
944
+ pipeline.completedAt = new Date().toISOString();
945
+ savePipeline(pipeline);
946
+ // Mirror checkPipelineCompletion's done-side cleanup so an empty
947
+ // for_each behaves consistently with a normal done pipeline.
948
+ if (loadSettings().pipelineTmpCleanDoneImmediate !== false) {
949
+ cleanupPipelineTmpDir(pipeline);
950
+ }
951
+ setupTaskListener(pipeline.id);
952
+ return pipeline;
953
+ }
954
+
743
955
  savePipeline(pipeline);
744
956
 
745
957
  // Start nodes that have no dependencies
@@ -1052,8 +1264,10 @@ function finishConversation(pipeline: Pipeline, status: 'done' | 'failed') {
1052
1264
  const conv = pipeline.conversation!;
1053
1265
  for (const msg of conv.messages) {
1054
1266
  if (msg.status === 'running' && msg.taskId) {
1055
- // Cancel the running task
1056
- try { const { cancelTask } = require('./task-manager'); cancelTask(msg.taskId); } catch {}
1267
+ // Cancel the running task — may already be finished, ignore those
1268
+ try { cancelTask(msg.taskId); } catch (e) {
1269
+ console.warn(`[pipeline] cancelTask(${msg.taskId}) in finishConversation: ${(e as Error).message}`);
1270
+ }
1057
1271
  msg.status = status === 'done' ? 'done' : 'failed';
1058
1272
  if (!msg.content) msg.content = status === 'done' ? '(conversation ended)' : '(conversation failed)';
1059
1273
  }
@@ -1075,7 +1289,6 @@ export function cancelConversation(pipelineId: string): boolean {
1075
1289
  // Cancel any running task
1076
1290
  for (const msg of pipeline.conversation.messages) {
1077
1291
  if (msg.status === 'running' && msg.taskId) {
1078
- const { cancelTask } = require('./task-manager');
1079
1292
  cancelTask(msg.taskId);
1080
1293
  }
1081
1294
  if (msg.status === 'pending') msg.status = 'failed';
@@ -1224,7 +1437,40 @@ function recoverStuckPipelines() {
1224
1437
  node.status = 'done';
1225
1438
  node.completedAt = new Date().toISOString();
1226
1439
  changed = true;
1227
- } else if (task.status === 'failed' || task.status === 'cancelled') {
1440
+ } else if (task.status === 'failed') {
1441
+ // Honor retries config — if the node has retry budget left,
1442
+ // reset to pending so scheduleReadyNodes picks it up again.
1443
+ // Without this branch, the reconciler races with the
1444
+ // subscription-based failed handler (lib/pipeline.ts ~1567)
1445
+ // and can settle the node as failed BEFORE retry logic runs,
1446
+ // which is why yaml `retries: N` sometimes appears to have
1447
+ // no effect (the failure was caught here, not there).
1448
+ const nodeDef = workflow.nodes[nodeId];
1449
+ const attempts = node.attempts ?? 1;
1450
+ const maxAttempts = 1 + (nodeDef?.retries ?? 0);
1451
+ if (attempts < maxAttempts) {
1452
+ console.warn(`[pipeline:reconcile] node ${nodeId} failed (attempt ${attempts}/${maxAttempts}): ${task.error || 'Task failed'} — resetting to pending for retry`);
1453
+ node.status = 'pending';
1454
+ node.taskId = undefined;
1455
+ node.outputs = {};
1456
+ node.error = undefined;
1457
+ node.completedAt = undefined;
1458
+ changed = true;
1459
+ } else {
1460
+ node.status = 'failed';
1461
+ node.error = task.error || 'Task failed';
1462
+ if (attempts > 1) {
1463
+ node.error = `[after ${attempts} attempts] ${node.error}`;
1464
+ }
1465
+ node.completedAt = new Date().toISOString();
1466
+ changed = true;
1467
+ }
1468
+ } else if (task.status === 'cancelled') {
1469
+ // Cancellation cascade is handled by the subscription path
1470
+ // (lib/pipeline.ts ~1605) — keep the reconciler's original
1471
+ // behavior here (mark node failed without cascade) so we
1472
+ // don't end up with a half-cancelled pipeline whose other
1473
+ // pending nodes never get skipped.
1228
1474
  node.status = 'failed';
1229
1475
  node.error = task.error || 'Task failed';
1230
1476
  node.completedAt = new Date().toISOString();
@@ -1270,7 +1516,6 @@ function reapOrphanedPipelineTasks() {
1270
1516
  const t = getTask(node.taskId);
1271
1517
  if (t && t.status === 'running') {
1272
1518
  try {
1273
- const { cancelTask } = require('./task-manager');
1274
1519
  cancelTask(node.taskId);
1275
1520
  reaped += 1;
1276
1521
  } catch (err) {
@@ -1329,7 +1574,6 @@ export async function retryNode(pipelineId: string, nodeId: string): Promise<{ o
1329
1574
  // so it doesn't keep occupying the project lock / project slot.
1330
1575
  if (nodeState.status === 'running' && nodeState.taskId) {
1331
1576
  try {
1332
- const { cancelTask } = require('./task-manager');
1333
1577
  cancelTask(nodeState.taskId);
1334
1578
  } catch (err) {
1335
1579
  console.warn(`[pipeline] retryNode: cancelTask(${nodeState.taskId}) threw:`, err);
@@ -1398,25 +1642,61 @@ export function cancelPipeline(id: string): boolean {
1398
1642
  pipeline.status = 'cancelled';
1399
1643
  pipeline.completedAt = new Date().toISOString();
1400
1644
 
1401
- // Cancel all running tasks
1645
+ // Cancel all running tasks AND mark their node states. Without
1646
+ // updating the node state, the orchestrator's task event listener
1647
+ // (which doesn't currently fire on 'cancelled') would leave the
1648
+ // node stuck at 'running' forever.
1402
1649
  for (const [, node] of Object.entries(pipeline.nodes)) {
1403
1650
  if (node.status === 'running' && node.taskId) {
1404
- const { cancelTask } = require('./task-manager');
1405
- cancelTask(node.taskId);
1651
+ try { cancelTask(node.taskId); } catch (e) {
1652
+ console.warn(`[pipeline] cancelTask(${node.taskId}) during cancelPipeline: ${(e as Error).message}`);
1653
+ }
1654
+ node.status = 'cancelled';
1655
+ node.completedAt = new Date().toISOString();
1406
1656
  }
1407
1657
  if (node.status === 'pending') node.status = 'skipped';
1408
1658
  }
1409
1659
 
1410
1660
  savePipeline(pipeline);
1661
+
1662
+ // Propagate to pipeline_runs DB row immediately so Job
1663
+ // countMyInflightPipelines / isJobBusy sees it without waiting for
1664
+ // the periodic reconcile (30s grace window).
1665
+ import('./pipeline-scheduler').then(({ syncRunStatus }) => {
1666
+ try { syncRunStatus(id); } catch (e) {
1667
+ console.warn(`[pipeline] syncRunStatus(${id}) after cancel: ${(e as Error).message}`);
1668
+ }
1669
+ }).catch(() => {});
1670
+
1411
1671
  return true;
1412
1672
  }
1413
1673
 
1414
1674
  // ─── Node Scheduling ──────────────────────────────────────
1415
1675
 
1416
1676
  async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1417
- const ctx = { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes };
1677
+ // forEach ctx exposes {{<asName>}} / {{loop.index}} / {{loop.total}}
1678
+ // to every template substitution in this iteration. Undefined when this
1679
+ // workflow isn't a for_each one (then templates see no forEach namespace,
1680
+ // {{item}} etc. pass through as literal text).
1681
+ const forEachCtx = pipeline.forEach
1682
+ ? {
1683
+ asName: pipeline.forEach.asName,
1684
+ item: pipeline.forEach.items[pipeline.forEach.currentIndex],
1685
+ index: pipeline.forEach.currentIndex,
1686
+ total: pipeline.forEach.total,
1687
+ }
1688
+ : undefined;
1689
+ const ctx = { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes, tmpDir: pipeline.tmpDir, forEach: forEachCtx };
1690
+
1691
+ // for_each setup phase: while items aren't resolved, only `before:` nodes
1692
+ // are eligible — loop-body nodes wait until items are produced + we transition
1693
+ // to itemsResolved=true (in checkPipelineCompletion).
1694
+ const inSetupPhase = pipeline.forEach && !pipeline.forEach.itemsResolved;
1695
+ const beforeSet = new Set(pipeline.forEach?.before || []);
1418
1696
 
1419
1697
  for (const nodeId of pipeline.nodeOrder) {
1698
+ if (inSetupPhase && !beforeSet.has(nodeId)) continue;
1699
+
1420
1700
  const nodeState = pipeline.nodes[nodeId];
1421
1701
  if (nodeState.status !== 'pending') continue;
1422
1702
 
@@ -1468,12 +1748,14 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1468
1748
  const useWorktree = nodeDef.worktree !== false && !nodeDef.workdir;
1469
1749
  const branchName = nodeDef.branch ? resolveTemplate(nodeDef.branch, ctx) : `pipeline/${pipeline.id.slice(0, 8)}`;
1470
1750
  if (useWorktree) try {
1471
- const { execSync } = require('node:child_process');
1472
1751
  const worktreePath = `${projectInfo.path}/.forge/worktrees/${branchName.replace(/\//g, '-')}`;
1473
- const { mkdirSync } = require('node:fs');
1474
1752
  mkdirSync(`${projectInfo.path}/.forge/worktrees`, { recursive: true });
1475
1753
 
1476
- // Create branch if needed
1754
+ // Create branch if needed.
1755
+ // Silent catch: `git branch X` fails with "already exists" — the
1756
+ // common case here, not an error worth logging. If the project
1757
+ // dir isn't a git repo we fail later at `git worktree add` with
1758
+ // a real message, so this stays quiet.
1477
1759
  try { execSync(`git branch ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' }); } catch {}
1478
1760
 
1479
1761
  // Create or reuse worktree
@@ -1481,10 +1763,11 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1481
1763
  execSync(`git worktree add "${worktreePath}" ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' });
1482
1764
  console.log(`[pipeline] Created worktree: ${worktreePath} (branch: ${branchName})`);
1483
1765
  } catch {
1484
- const { existsSync } = require('node:fs');
1485
1766
  if (existsSync(worktreePath)) {
1486
1767
  console.log(`[pipeline] Reusing worktree: ${worktreePath}`);
1487
1768
  } else {
1769
+ // Silent: preemptive cleanup before re-adding. Fails when
1770
+ // path doesn't exist — exactly when we DON'T need cleanup.
1488
1771
  try { execSync(`git worktree remove "${worktreePath}" --force`, { cwd: projectInfo.path, stdio: 'pipe' }); } catch {}
1489
1772
  execSync(`git worktree add "${worktreePath}" ${branchName}`, { cwd: projectInfo.path, stdio: 'pipe' });
1490
1773
  console.log(`[pipeline] Recreated worktree: ${worktreePath}`);
@@ -1510,7 +1793,6 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1510
1793
  // upstream output (e.g. accidentally including KEY=value lines).
1511
1794
  if (resolved && !resolved.includes('\n')) {
1512
1795
  try {
1513
- const { statSync } = require('node:fs');
1514
1796
  if (statSync(resolved).isDirectory()) {
1515
1797
  effectivePath = resolved;
1516
1798
  } else {
@@ -1611,6 +1893,7 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1611
1893
  nodeState.status = 'running';
1612
1894
  nodeState.taskId = task.id;
1613
1895
  nodeState.iterations++;
1896
+ nodeState.attempts = (nodeState.attempts ?? 0) + 1;
1614
1897
  nodeState.startedAt = new Date().toISOString();
1615
1898
  savePipeline(pipeline);
1616
1899
 
@@ -1622,39 +1905,178 @@ async function scheduleReadyNodes(pipeline: Pipeline, workflow: Workflow) {
1622
1905
  }
1623
1906
 
1624
1907
  function checkPipelineCompletion(pipeline: Pipeline) {
1625
- const states = Object.values(pipeline.nodes);
1626
- const allDone = states.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
1908
+ if (pipeline.status !== 'running') return;
1909
+
1910
+ // ── for_each setup-phase: gate before flipping into the loop body ──
1911
+ // While items aren't resolved, "completion" only inspects `before:` nodes.
1912
+ // Once they all settle: success → resolve source from their outputs, init
1913
+ // iterations, transition to itemsResolved=true. Any failure → fail-fast
1914
+ // (we never enter the loop without items).
1915
+ if (pipeline.forEach && !pipeline.forEach.itemsResolved) {
1916
+ const beforeIds = pipeline.forEach.before || [];
1917
+ const beforeStates = beforeIds.map(id => pipeline.nodes[id]).filter(Boolean);
1918
+ const setupDone = beforeStates.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
1919
+ if (!setupDone) return;
1920
+
1921
+ const setupFailed = beforeStates.some(s => s.status === 'failed' || s.status === 'skipped');
1922
+ if (setupFailed) {
1923
+ pipeline.status = 'failed';
1924
+ pipeline.completedAt = new Date().toISOString();
1925
+ savePipeline(pipeline);
1926
+ finalizePipeline(pipeline);
1927
+ return;
1928
+ }
1929
+
1930
+ // All setup nodes done — resolve items using their outputs.
1931
+ const workflow = getWorkflow(pipeline.workflowName);
1932
+ if (!workflow || !workflow.for_each) {
1933
+ pipeline.status = 'failed';
1934
+ pipeline.completedAt = new Date().toISOString();
1935
+ savePipeline(pipeline);
1936
+ finalizePipeline(pipeline);
1937
+ return;
1938
+ }
1939
+ let items: unknown[];
1940
+ try {
1941
+ items = resolveForEachSource(workflow.for_each, pipeline.input, pipeline.vars, pipeline.nodes);
1942
+ } catch (e) {
1943
+ console.warn(`[pipeline] for_each source resolution failed: ${(e as Error).message}`);
1944
+ pipeline.status = 'failed';
1945
+ pipeline.completedAt = new Date().toISOString();
1946
+ savePipeline(pipeline);
1947
+ finalizePipeline(pipeline);
1948
+ return;
1949
+ }
1950
+ pipeline.forEach.items = items;
1951
+ pipeline.forEach.total = items.length;
1952
+ pipeline.forEach.itemsResolved = true;
1953
+
1954
+ // Empty resolved items → settle done (setup succeeded but nothing to iterate).
1955
+ if (items.length === 0) {
1956
+ pipeline.status = 'done';
1957
+ pipeline.completedAt = new Date().toISOString();
1958
+ savePipeline(pipeline);
1959
+ finalizePipeline(pipeline);
1960
+ return;
1961
+ }
1627
1962
 
1628
- if (allDone && pipeline.status === 'running') {
1629
- const anyFailed = states.some(s => s.status === 'failed');
1630
- pipeline.status = anyFailed ? 'failed' : 'done';
1631
- pipeline.completedAt = new Date().toISOString();
1632
1963
  savePipeline(pipeline);
1633
- notifyPipelineComplete(pipeline);
1964
+ void scheduleReadyNodes(pipeline, workflow);
1965
+ return;
1966
+ }
1634
1967
 
1635
- // Sync run status to project pipeline runs
1636
- try {
1637
- const { syncRunStatus } = require('./pipeline-scheduler');
1638
- syncRunStatus(pipeline.id);
1639
- } catch {}
1640
-
1641
- // Log worktree info for user review
1642
- for (const [nodeId, state] of Object.entries(pipeline.nodes)) {
1643
- const wt = (state as any).worktreePath;
1644
- const branch = (state as any).worktreeBranch;
1645
- if (wt && branch) {
1646
- console.log(`[pipeline] Worktree preserved: ${wt} (branch: ${branch}) — review changes, then: git worktree remove "${wt}"`);
1968
+ // ── loop-body / plain DAG: completion check excludes `before:` nodes ──
1969
+ const beforeSet = new Set(pipeline.forEach?.before || []);
1970
+ const bodyStates = Object.entries(pipeline.nodes)
1971
+ .filter(([id]) => !beforeSet.has(id))
1972
+ .map(([, s]) => s);
1973
+ const allDone = bodyStates.every(s => s.status === 'done' || s.status === 'skipped' || s.status === 'failed');
1974
+ if (!allDone) return;
1975
+
1976
+ const anyFailed = bodyStates.some(s => s.status === 'failed');
1977
+
1978
+ // ── for_each: this is iteration boundary, not pipeline boundary ──
1979
+ if (pipeline.forEach) {
1980
+ snapshotIteration(pipeline, anyFailed);
1981
+
1982
+ const stop = anyFailed && pipeline.forEach.onFailure === 'stop';
1983
+ pipeline.forEach.currentIndex++;
1984
+ const hasMore = !stop && pipeline.forEach.currentIndex < pipeline.forEach.total;
1985
+
1986
+ if (hasMore) {
1987
+ // Reset loop-body node states for next iteration; `before:` nodes
1988
+ // stay `done` so their outputs are reused. iterations[] history persists.
1989
+ for (const nodeId of Object.keys(pipeline.nodes)) {
1990
+ if (beforeSet.has(nodeId)) continue;
1991
+ pipeline.nodes[nodeId] = { status: 'pending', outputs: {}, iterations: 0 };
1992
+ }
1993
+ savePipeline(pipeline);
1994
+ const workflow = getWorkflow(pipeline.workflowName);
1995
+ if (workflow) {
1996
+ void scheduleReadyNodes(pipeline, workflow);
1647
1997
  }
1998
+ return;
1648
1999
  }
1649
2000
 
1650
- // Release project lock
1651
- const workflow = getWorkflow(pipeline.workflowName);
1652
- if (workflow) {
1653
- const projectNames = new Set(Object.values(workflow.nodes).map(n => n.project));
1654
- for (const pName of projectNames) {
1655
- const pInfo = getProjectInfo(resolveTemplate(pName, { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes }));
1656
- if (pInfo) releaseProjectLock(pInfo.path, pipeline.id);
1657
- }
2001
+ // Done iterating — overall failed iff any iteration failed.
2002
+ const overallFailed = pipeline.forEach.iterations.some(i => i.status === 'failed');
2003
+ pipeline.status = overallFailed ? 'failed' : 'done';
2004
+ } else {
2005
+ pipeline.status = anyFailed ? 'failed' : 'done';
2006
+ }
2007
+
2008
+ pipeline.completedAt = new Date().toISOString();
2009
+ savePipeline(pipeline);
2010
+ finalizePipeline(pipeline);
2011
+ }
2012
+
2013
+ /**
2014
+ * Capture one iteration's node states into pipeline.forEach.iterations[].
2015
+ * Called from checkPipelineCompletion right before deciding next-iter or
2016
+ * finalize. `startedAt` = previous iter's completedAt (or pipeline.createdAt
2017
+ * for the very first iter) — gives a reasonable timeline view in UI without
2018
+ * needing to track per-iter start timestamps separately.
2019
+ */
2020
+ export function snapshotIteration(pipeline: Pipeline, anyFailed: boolean) {
2021
+ if (!pipeline.forEach) return;
2022
+ const prev = pipeline.forEach.iterations[pipeline.forEach.iterations.length - 1];
2023
+ const startedAt = prev?.completedAt || pipeline.createdAt;
2024
+ const beforeSet = new Set(pipeline.forEach.before || []);
2025
+ const nodes: Record<string, { status: PipelineNodeStatus; outputs: Record<string, string>; error?: string; taskId?: string }> = {};
2026
+ for (const [id, s] of Object.entries(pipeline.nodes)) {
2027
+ if (beforeSet.has(id)) continue; // setup nodes ran once; not part of per-iter history
2028
+ nodes[id] = { status: s.status, outputs: { ...s.outputs }, error: s.error, taskId: s.taskId };
2029
+ }
2030
+ pipeline.forEach.iterations.push({
2031
+ index: pipeline.forEach.currentIndex,
2032
+ status: anyFailed ? 'failed' : 'done',
2033
+ startedAt,
2034
+ completedAt: new Date().toISOString(),
2035
+ nodes,
2036
+ });
2037
+ }
2038
+
2039
+ /**
2040
+ * Post-settle housekeeping: cleanup tmp dir on done, notify, sync to
2041
+ * pipeline_runs, log worktrees, release project locks.
2042
+ * Extracted from checkPipelineCompletion so both the normal DAG path
2043
+ * and the for_each "all iterations done" path share one implementation.
2044
+ */
2045
+ function finalizePipeline(pipeline: Pipeline) {
2046
+ // Wipe per-run scratch dir on success unless user opted to keep it.
2047
+ // Failed runs leave it for inspection — GC sweeps later.
2048
+ if (pipeline.status === 'done' && loadSettings().pipelineTmpCleanDoneImmediate !== false) {
2049
+ cleanupPipelineTmpDir(pipeline);
2050
+ }
2051
+
2052
+ notifyPipelineComplete(pipeline);
2053
+
2054
+ // Sync run status to project pipeline runs. Dynamic import avoids the
2055
+ // circular dep (pipeline-scheduler imports from pipeline.ts at its top).
2056
+ import('./pipeline-scheduler').then(({ syncRunStatus }) => {
2057
+ try { syncRunStatus(pipeline.id); } catch (e) {
2058
+ console.warn(`[pipeline] syncRunStatus(${pipeline.id}) failed: ${(e as Error).message}`);
2059
+ }
2060
+ }).catch((e) => {
2061
+ console.warn(`[pipeline] dynamic import of pipeline-scheduler failed: ${e?.message ?? e}`);
2062
+ });
2063
+
2064
+ // Log worktree info (last iteration's nodes for a for_each pipeline).
2065
+ for (const [, state] of Object.entries(pipeline.nodes)) {
2066
+ const wt = (state as any).worktreePath;
2067
+ const branch = (state as any).worktreeBranch;
2068
+ if (wt && branch) {
2069
+ console.log(`[pipeline] Worktree preserved: ${wt} (branch: ${branch}) — review changes, then: git worktree remove "${wt}"`);
2070
+ }
2071
+ }
2072
+
2073
+ // Release project lock
2074
+ const workflow = getWorkflow(pipeline.workflowName);
2075
+ if (workflow) {
2076
+ const projectNames = new Set(Object.values(workflow.nodes).map(n => n.project));
2077
+ for (const pName of projectNames) {
2078
+ const pInfo = getProjectInfo(resolveTemplate(pName, { input: pipeline.input, vars: pipeline.vars, nodes: pipeline.nodes, tmpDir: pipeline.tmpDir }));
2079
+ if (pInfo) releaseProjectLock(pInfo.path, pipeline.id);
1658
2080
  }
1659
2081
  }
1660
2082
  }
@@ -1669,7 +2091,7 @@ function setupTaskListener(pipelineId: string) {
1669
2091
 
1670
2092
  const cleanup = onTaskEvent((taskId, event, data) => {
1671
2093
  if (event !== 'status') return;
1672
- if (data !== 'done' && data !== 'failed') return;
2094
+ if (data !== 'done' && data !== 'failed' && data !== 'cancelled') return;
1673
2095
 
1674
2096
  const pipeline = getPipeline(pipelineId);
1675
2097
  if (!pipeline || pipeline.status !== 'running') {
@@ -1744,11 +2166,66 @@ function setupTaskListener(pipelineId: string) {
1744
2166
  savePipeline(pipeline);
1745
2167
  // No per-step done notification — only notify on start and failure
1746
2168
  } else if (data === 'failed') {
1747
- nodeState.status = 'failed';
1748
- nodeState.error = task?.error || 'Task failed';
2169
+ // Retry budget: if the node has `retries` configured and we
2170
+ // haven't exhausted it, re-queue the node for another attempt
2171
+ // instead of marking failed.
2172
+ const attempts = nodeState.attempts ?? 1;
2173
+ const maxAttempts = 1 + (nodeDef.retries ?? 0);
2174
+ if (attempts < maxAttempts) {
2175
+ const errMsg = task?.error || 'Task failed';
2176
+ const delayMs = nodeDef.retryDelayMs ?? 0;
2177
+ console.warn(`[pipeline] node ${nodeId} failed (attempt ${attempts}/${maxAttempts}): ${errMsg} — retrying in ${delayMs}ms`);
2178
+ notifyStep(pipeline, nodeId, 'failed', `attempt ${attempts}/${maxAttempts}: ${errMsg} — will retry`);
2179
+ // Reset node to pending so scheduleReadyNodes picks it up
2180
+ // again. Wipe transient state so the retry is a clean attempt.
2181
+ nodeState.status = 'pending';
2182
+ nodeState.taskId = undefined;
2183
+ nodeState.outputs = {};
2184
+ nodeState.error = undefined;
2185
+ nodeState.completedAt = undefined;
2186
+ savePipeline(pipeline);
2187
+ if (delayMs > 0) {
2188
+ setTimeout(() => {
2189
+ const fresh = getPipeline(pipelineId);
2190
+ if (fresh && fresh.status === 'running') {
2191
+ scheduleReadyNodes(fresh, workflow);
2192
+ }
2193
+ }, delayMs);
2194
+ return; // skip immediate scheduleReadyNodes below
2195
+ }
2196
+ } else {
2197
+ nodeState.status = 'failed';
2198
+ nodeState.error = task?.error || 'Task failed';
2199
+ if (attempts > 1) {
2200
+ nodeState.error = `[after ${attempts} attempts] ${nodeState.error}`;
2201
+ }
2202
+ nodeState.completedAt = new Date().toISOString();
2203
+ savePipeline(pipeline);
2204
+ notifyStep(pipeline, nodeId, 'failed', nodeState.error);
2205
+ }
2206
+ } else if (data === 'cancelled') {
2207
+ // Cascade: a cancelled task means user pulled the plug (either
2208
+ // via cancelPipeline above, or by killing the underlying task
2209
+ // directly). Mark this node cancelled, then mark the whole
2210
+ // pipeline cancelled so Job/UI see a clean terminal state.
2211
+ nodeState.status = 'cancelled';
2212
+ nodeState.error = task?.error || 'Task cancelled';
1749
2213
  nodeState.completedAt = new Date().toISOString();
2214
+ // Sibling pending nodes should be skipped — nothing else will
2215
+ // run on this cancelled pipeline.
2216
+ for (const [, sib] of Object.entries(pipeline.nodes)) {
2217
+ if (sib.status === 'pending') sib.status = 'skipped';
2218
+ }
2219
+ if (pipeline.status === 'running') {
2220
+ pipeline.status = 'cancelled';
2221
+ pipeline.completedAt = new Date().toISOString();
2222
+ }
1750
2223
  savePipeline(pipeline);
1751
- notifyStep(pipeline, nodeId, 'failed', nodeState.error);
2224
+ notifyStep(pipeline, nodeId, 'cancelled', nodeState.error);
2225
+ // Propagate to DB so Job sees it without reconcile delay.
2226
+ import('./pipeline-scheduler').then(({ syncRunStatus }) => {
2227
+ try { syncRunStatus(pipelineId); } catch {}
2228
+ }).catch(() => {});
1752
2229
  }
1753
2230
 
1754
2231
  // Schedule next ready nodes
@@ -1834,7 +2311,9 @@ async function notifyStep(pipeline: Pipeline, nodeId: string, status: string, er
1834
2311
  disable_web_page_preview: true,
1835
2312
  }),
1836
2313
  });
1837
- } catch {}
2314
+ } catch (e) {
2315
+ console.warn(`[pipeline] notifyStep telegram failed (${pipeline.id}/${nodeId}): ${(e as Error).message}`);
2316
+ }
1838
2317
  }
1839
2318
 
1840
2319
  async function notifyPipelineComplete(pipeline: Pipeline) {
@@ -1858,5 +2337,7 @@ async function notifyPipelineComplete(pipeline: Pipeline) {
1858
2337
  disable_web_page_preview: true,
1859
2338
  }),
1860
2339
  });
1861
- } catch {}
2340
+ } catch (e) {
2341
+ console.warn(`[pipeline] notifyPipelineComplete telegram failed (${pipeline.id}): ${(e as Error).message}`);
2342
+ }
1862
2343
  }