@zibby/workflow-templates 0.7.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ /**
2
+ * notify — LLM + SKILLS.CHAT_NOTIFY. Posts ONE human-reviewable card
3
+ * summarizing the improvement proposals to the configured chat destination.
4
+ * This is the "NOTIFY" half of READ → PROPOSE → NOTIFY: the human reads the
5
+ * card and acts manually. The supervisor never applies a change itself in v1.
6
+ *
7
+ * Mirrors sentry-triage's dispatch node: chatNotifySkill.resolve() picks
8
+ * slack or lark from which env var is set (SLACK_CHANNEL vs LARK_RECEIVE_ID),
9
+ * so the LLM only ever sees one provider's tools.
10
+ *
11
+ * ENV tab config — required:
12
+ * SLACK_CHANNEL OR LARK_RECEIVE_ID — provider selector + destination
13
+ * ENV tab config — optional:
14
+ * SLACK_MENTIONS / LARK_MENTIONS — JSON array of mentions on the card
15
+ */
16
+
17
+ import { z, SKILLS } from '@zibby/core';
18
+
19
+ const DispatchedRecordSchema = z.object({
20
+ status: z.enum(['sent', 'skipped', 'failed']),
21
+ // nullish (not optional) on purpose — the LLM emits explicit null rather
22
+ // than omitting keys; .optional() would reject null and fail the node.
23
+ recipient: z.object({
24
+ kind: z.enum(['channel', 'user_dm', 'usergroup']).nullish(),
25
+ id: z.string().nullish(),
26
+ label: z.string().nullish(),
27
+ }).nullish(),
28
+ proposalCount: z.number().nullish(),
29
+ messageTs: z.string().nullish(), // Slack
30
+ messageId: z.string().nullish(), // Lark
31
+ detail: z.string().nullish(),
32
+ });
33
+
34
+ const NotifyOutputSchema = z.object({
35
+ dispatched: z.array(DispatchedRecordSchema),
36
+ summary: z.object({
37
+ total: z.number(),
38
+ sent: z.number(),
39
+ skipped: z.number(),
40
+ failed: z.number(),
41
+ }),
42
+ });
43
+
44
+ const CHANGE_KIND_LABEL = {
45
+ add_test_gate: 'Add a test gate',
46
+ tweak_prompt: 'Tweak the prompt',
47
+ add_human_approval_gate: 'Add a human-approval gate',
48
+ drop_redundant_step: 'Drop a redundant step',
49
+ other: 'Other',
50
+ };
51
+
52
+ const NOTIFY_PROMPT = (state = {}) => {
53
+ const proposals = state?.propose_improvements?.proposals || [];
54
+ const scan = state?.scan_pipelines || {};
55
+ const lookbackHours = scan.lookbackHours || state?.lookbackHours || 24;
56
+
57
+ const slackChannel = process.env.SLACK_CHANNEL || '';
58
+ const larkReceiveId = process.env.LARK_RECEIVE_ID || '';
59
+
60
+ // ── No-op short-circuit ─────────────────────────────────────────
61
+ // Nothing flagged → keep the run green without a model round-trip or
62
+ // forcing channel setup. Return the empty envelope verbatim.
63
+ if (proposals.length === 0) {
64
+ return `pipeline-supervisor found no problem pipelines this run — nothing to propose.
65
+
66
+ Return this exact JSON envelope and call no tools:
67
+
68
+ \`\`\`json
69
+ { "dispatched": [{ "status": "skipped", "proposalCount": 0, "detail": "no flagged pipelines" }], "summary": { "total": 0, "sent": 0, "skipped": 1, "failed": 0 } }
70
+ \`\`\`
71
+ `;
72
+ }
73
+
74
+ // ── Provider selection ──────────────────────────────────────────
75
+ let provider, postTool, channelId, mentionsRaw;
76
+ if (slackChannel) {
77
+ provider = 'slack';
78
+ postTool = 'slack_post_message';
79
+ channelId = slackChannel;
80
+ mentionsRaw = process.env.SLACK_MENTIONS || '[]';
81
+ } else if (larkReceiveId) {
82
+ provider = 'lark';
83
+ postTool = 'lark_send_message';
84
+ channelId = larkReceiveId;
85
+ mentionsRaw = process.env.LARK_MENTIONS || '[]';
86
+ } else {
87
+ throw new Error(
88
+ 'pipeline-supervisor has proposals to post but no destination configured. ' +
89
+ 'Go to Project Settings → ENV and set ONE of:\n' +
90
+ ' - SLACK_CHANNEL=#your-channel (uses connected Slack integration)\n' +
91
+ ' - LARK_RECEIVE_ID=oc_xxxxxxxx (uses connected Lark integration)'
92
+ );
93
+ }
94
+
95
+ let mentions;
96
+ try { mentions = JSON.parse(mentionsRaw); } catch { mentions = []; }
97
+ if (!Array.isArray(mentions)) mentions = [];
98
+
99
+ const windowLabel = lookbackHours < 48
100
+ ? `the past ${lookbackHours} hours`
101
+ : `the past ${Math.round(lookbackHours / 24)} days`;
102
+
103
+ const writeGuide = provider === 'slack'
104
+ ? `# How to post it — a Slack review card
105
+ Post ONCE with \`slack_post_message({ channel, text, blocks })\`. \`text\` = a one-line fallback. \`blocks\` = real Block Kit objects only:
106
+ 1. \`header\` — { "type": "header", "text": { "type": "plain_text", "text": "🛠️ Pipeline Supervisor — ${windowLabel}", "emoji": true } }
107
+ 2. \`context\` — one line: how many pipelines flagged, scanned over ${windowLabel}.
108
+ 3. Per proposal — a \`section\` then a small \`context\`:
109
+ { "type": "divider" }
110
+ { "type": "section", "text": { "type": "mrkdwn", "text": "*<pipeline>* — <problem>\\n*Suggestion (${'`'}<changeKind label>${'`'}):* <suggestion>" } }
111
+ { "type": "context", "elements": [{ "type": "mrkdwn", "text": "↳ <evidence — the concrete metric>" }] }
112
+ 4. final \`context\` — make clear these are PROPOSALS for a human to review and apply; the supervisor did NOT change anything.
113
+ - header text is plain_text; section & context are mrkdwn.
114
+ - Real Block Kit types only (header / section / divider / context).`
115
+ : `# How to write it — a Lark review note, talk like a teammate
116
+ Post ONCE with \`lark_send_message({ receive_id, msg_type:"text", content })\`. Open with one sentence about ${windowLabel} and how many pipelines you flagged. Then, per proposal: the pipeline name, the problem, your suggested change (say which of the four moves it is), and the evidence number. End by making clear these are PROPOSALS for a human to review and apply — the supervisor changed nothing. No form blocks; sound like a person.`;
117
+
118
+ return `You are the notify node of pipeline-supervisor. Post ONE chat message with the **${postTool}** tool summarizing the improvement proposals for a human to review.
119
+
120
+ # Destination
121
+ Channel/receive_id: ${JSON.stringify(channelId)} (${provider}). Post with \`${postTool}\`.
122
+ ${mentions.length > 0 ? `Prepend these mentions: ${JSON.stringify(mentions.join(' '))}` : ''}
123
+
124
+ # Framing (important)
125
+ These are PROPOSALS. The supervisor read other pipelines' run history and is SUGGESTING changes a human will review and apply by hand. Do NOT imply anything was already changed. Each card line should read like "Pipeline X failed 4/5 runs on step Y — I'd suggest <change>. (review)".
126
+
127
+ # changeKind → human label
128
+ ${Object.entries(CHANGE_KIND_LABEL).map(([k, v]) => `- ${k} → ${v}`).join('\n')}
129
+
130
+ ${writeGuide}
131
+
132
+ # Output (outputSchema-enforced)
133
+ Return ONE record for the message you posted (status "sent"), or "failed" with a \`detail\`. \`proposalCount\` = number of proposals in the card. \`recipient\` records where it went.
134
+
135
+ \`\`\`json
136
+ {
137
+ "dispatched": [
138
+ { "status": "sent", "recipient": { "kind": "channel", "id": ${JSON.stringify(channelId)} }, "proposalCount": ${proposals.length}${provider === 'slack' ? ',\n "messageTs": "1716109330.555"' : ',\n "messageId": "om_xxxxx"'} }
139
+ ],
140
+ "summary": { "total": 1, "sent": 1, "skipped": 0, "failed": 0 }
141
+ }
142
+ \`\`\`
143
+
144
+ # Proposals to post
145
+
146
+ \`\`\`json
147
+ ${JSON.stringify(proposals, null, 2)}
148
+ \`\`\`
149
+
150
+ # Rules
151
+ - ONE message → ONE \`sent\` record.
152
+ - Don't invent pipelines, metrics, or suggestions — only what's in the data above.
153
+ - Keep it tight. If there are 2 proposals, a short card is the right answer.
154
+ `;
155
+ };
156
+
157
+ export const notifyNode = {
158
+ name: 'notify',
159
+ skills: [SKILLS.CHAT_NOTIFY],
160
+ outputSchema: NotifyOutputSchema,
161
+ prompt: NOTIFY_PROMPT,
162
+ };
@@ -0,0 +1,91 @@
1
+ /**
2
+ * propose_improvements — LLM. Reads the per-pipeline health summary from
3
+ * scan_pipelines and emits ONE concrete, reviewable improvement proposal
4
+ * per FLAGGED pipeline. No tools — everything it needs is inlined as JSON.
5
+ *
6
+ * This is the "propose" half of READ → PROPOSE → NOTIFY. It does NOT touch
7
+ * any other workflow's graph. It only describes a change a human can apply.
8
+ *
9
+ * ─────────────────────────────────────────────────────────────────────────
10
+ * TODO (future, DELIBERATELY NOT IMPLEMENTED in v1 — the safe L3 boundary):
11
+ * Auto-PATCH the target pipeline's graph. When we promote this template
12
+ * from "notify only" to "self-iterating", a new node AFTER human approval
13
+ * would call the workflow-update API to actually apply an accepted
14
+ * `changeKind` (e.g. insert a test-gate node, edit a prompt). That step
15
+ * must be gated behind explicit human approval + snapshot/dry-run/verify/
16
+ * rollback (see MEMORY: app-upgrade-strategy-agentic). v1 stops at the
17
+ * proposal so a human reviews and applies the change by hand.
18
+ * ─────────────────────────────────────────────────────────────────────────
19
+ */
20
+
21
+ import { z } from '@zibby/core';
22
+
23
+ const ProposalSchema = z.object({
24
+ workflowType: z.string(),
25
+ problem: z.string(),
26
+ changeKind: z.enum([
27
+ 'add_test_gate',
28
+ 'tweak_prompt',
29
+ 'add_human_approval_gate',
30
+ 'drop_redundant_step',
31
+ 'other',
32
+ ]),
33
+ suggestion: z.string(),
34
+ evidence: z.string().optional(),
35
+ confidence: z.number().min(0).max(1).optional(),
36
+ });
37
+
38
+ const ProposeOutputSchema = z.object({
39
+ proposals: z.array(ProposalSchema),
40
+ });
41
+
42
+ const GUIDE = `You are the propose_improvements node of pipeline-supervisor — a workflow that watches a Zibby project's OTHER pipelines and proposes concrete fixes. This is "Zibby managing Zibby."
43
+
44
+ You are given a per-pipeline health summary as JSON below. Each entry is one pipeline (a workflow type) with its recent run stats: total / failed / succeeded / running, failRate, medianDurationMs, a worstRun example, and whether it's \`flagged\` (+ \`flagReason\`).
45
+
46
+ # Your job
47
+ For EACH pipeline where \`flagged === true\`, emit ONE proposal. Do NOT propose anything for un-flagged pipelines. If nothing is flagged, return an empty \`proposals\` array.
48
+
49
+ # Pick ONE concrete change per problem — \`changeKind\` must be one of:
50
+ - **add_test_gate** — the pipeline ships broken output / fails late. Propose inserting a validation/test step that catches the failure earlier (before the expensive/irreversible step).
51
+ - **tweak_prompt** — an LLM node is making the same mistake repeatedly (e.g. wrong format, hallucinated tool call). Propose a specific prompt change.
52
+ - **add_human_approval_gate** — the pipeline takes a risky/irreversible action and keeps getting it wrong. Propose a human-approval gate before that step.
53
+ - **drop_redundant_step** — a step adds latency or failure surface with no value (e.g. an LLM round-trip that adds no judgment). Propose dropping it. Use this for clear "slow outlier" flags.
54
+ - **other** — only when none of the above fit; explain in \`suggestion\`.
55
+
56
+ # Each proposal must be:
57
+ - **Specific**: name the pipeline, the symptom, and the exact change. Not "improve reliability" — instead "add a JSON-schema validation gate after the 'generate' node; 3 of the last 4 runs failed there with a malformed-output error."
58
+ - **Evidence-backed**: put the concrete number / worstRun detail in \`evidence\` ("failRate 75% over 4 runs; worst run exec_abc failed on step 'deploy'"). Pull it straight from the data — never invent a metric.
59
+ - **Reviewable, not auto-applied**: phrase \`suggestion\` as a recommendation a human will read and apply. You are NOT editing any graph.
60
+ - **confidence** reflects how clean the signal is. A pipeline failing 4/4 on the same step → 0.9. A borderline slow outlier → 0.5.
61
+
62
+ # Rules
63
+ - ONE proposal per flagged pipeline. No duplicates.
64
+ - Only use pipelines/numbers present in the data block. Don't invent pipelines, steps, or error messages.
65
+ - \`problem\` is one sentence (the symptom). \`suggestion\` is one-to-three sentences (the fix).
66
+ - Temperature 0. This is analysis, not creative writing.
67
+ - Call NO tools — you have everything you need below.`;
68
+
69
+ const PROPOSE_PROMPT = (state = {}) => {
70
+ const pipelines = state?.scan_pipelines?.pipelines || [];
71
+ const flagged = pipelines.filter((p) => p.flagged);
72
+ return `${GUIDE}
73
+
74
+ ## Context for this run
75
+ - Scanned project: ${state?.scan_pipelines?.projectId || '(unknown)'}
76
+ - Lookback: ${state?.scan_pipelines?.lookbackHours || '?'}h
77
+ - Pipelines analyzed: ${pipelines.length}; flagged as problems: ${flagged.length}
78
+
79
+ ## Pipeline health summary (propose ONLY for flagged === true)
80
+
81
+ \`\`\`json
82
+ ${JSON.stringify(pipelines, null, 2)}
83
+ \`\`\`
84
+ `;
85
+ };
86
+
87
+ export const proposeNode = {
88
+ name: 'propose_improvements',
89
+ outputSchema: ProposeOutputSchema,
90
+ prompt: PROPOSE_PROMPT,
91
+ };
@@ -0,0 +1,316 @@
1
+ /**
2
+ * scan_pipelines — DETERMINISTIC. Reads the project's recent executions
3
+ * across ALL pipelines via the Zibby REST API, then rolls them up per
4
+ * pipeline (workflow type) into a health summary the proposer reasons over.
5
+ *
6
+ * ── How the supervisor reads OTHER pipelines' results (the chosen mechanism) ──
7
+ *
8
+ * Mechanism: a DIRECT, authed HTTPS call to the Zibby REST API
9
+ * GET {ZIBBY_ACCOUNT_API_URL|api-prod.zibby.app}/executions?projectId=<id>&limit=<n>
10
+ * (the same `listExecutions` route the dashboard + MCP server use), carrying
11
+ * Authorization: Bearer <ZIBBY_PAT>
12
+ *
13
+ * Auth — why a user PAT, and NOT the injected PROJECT_API_TOKEN:
14
+ * The executor injects PROJECT_API_TOKEN (a `zby_*` PROJECT token) into
15
+ * every Fargate task. That token authenticates as the PROJECT
16
+ * (authType:'project') and carries NO userId. But every cross-pipeline
17
+ * READ route — /executions, /jobs/:projectId, /all/:projectId — pulls
18
+ * `userId` out of the authorizer context and 401/403s when it's absent
19
+ * (executions.js listExecutions: `if (!userId) return 401`;
20
+ * workflow-logs.js: verifyProjectAccess(userId, …)). The remote MCP server
21
+ * (mcp-server.js) goes further and validates a `zby_pat_*` PAT specifically.
22
+ * So the project token literally cannot read these routes.
23
+ *
24
+ * The credential that works is a USER personal access token (zby_pat_…),
25
+ * supplied at deploy time as ZIBBY_PAT in the ENV tab. It resolves to a
26
+ * userId via the authorizer's PAT path, and verifyProjectAccess then
27
+ * confirms that user can see the supervised project. This is the same
28
+ * credential class the MCP server requires, so the auth model is identical
29
+ * whether you reach the data via REST (this node) or via the MCP tools
30
+ * (zibby_list_executions / zibby_get_all_workflow_logs).
31
+ *
32
+ * Why REST over the MCP tools:
33
+ * - No MCP client to stand up inside the workflow process; one fetch().
34
+ * - The MCP `zibby_list_executions` tool is a thin proxy to THIS SAME
35
+ * REST route, so we lose nothing by calling it directly.
36
+ * - Deterministic + free: no LLM round-trip to drive a tool call for a
37
+ * pure data pull.
38
+ *
39
+ * Per-pipeline rollup:
40
+ * - "pipeline" = one workflow type/slug in the project. We group the
41
+ * recent executions by `workflowType` and compute total / failed /
42
+ * succeeded / running, failRate, and a median completed-run duration.
43
+ * - A pipeline is `flagged` when failRate >= minFailRate (with >= 3 runs
44
+ * so a single fluke doesn't trip it) OR it's a clear "slow" outlier.
45
+ * - worstRun cites the single worst recent run so the proposer has a
46
+ * concrete example to point at. failedStep/errorSummary are best-effort
47
+ * — populated from whatever the execution row carries; absent is fine.
48
+ */
49
+
50
+ import { z } from 'zod';
51
+
52
+ const PipelineHealthSchema = z.object({
53
+ workflowType: z.string(),
54
+ workflowUuid: z.string().optional(),
55
+ total: z.number(),
56
+ failed: z.number(),
57
+ succeeded: z.number(),
58
+ running: z.number(),
59
+ failRate: z.number(),
60
+ medianDurationMs: z.number().optional(),
61
+ worstRun: z.object({
62
+ executionId: z.string().optional(),
63
+ status: z.string().optional(),
64
+ durationMs: z.number().optional(),
65
+ failedStep: z.string().optional(),
66
+ errorSummary: z.string().optional(),
67
+ startedAt: z.string().optional(),
68
+ }).optional(),
69
+ flagged: z.boolean(),
70
+ flagReason: z.string().optional(),
71
+ });
72
+
73
+ const ScanOutputSchema = z.object({
74
+ projectId: z.string(),
75
+ lookbackHours: z.number(),
76
+ scannedAt: z.string(),
77
+ totalExecutions: z.number(),
78
+ pipelines: z.array(PipelineHealthSchema),
79
+ });
80
+
81
+ // Statuses the executions API uses. Anything in FAILED_STATUSES counts
82
+ // against the pipeline; SUCCEEDED is the clean path; the rest are in-flight.
83
+ const FAILED_STATUSES = new Set(['failed', 'cancelled', 'blocked', 'insufficient_context']);
84
+ const SUCCEEDED_STATUSES = new Set(['completed']);
85
+ const RUNNING_STATUSES = new Set(['running', 'queued', 'starting', 'uploading']);
86
+
87
+ function getAccountApiUrl() {
88
+ const raw = process.env.ZIBBY_ACCOUNT_API_URL;
89
+ if (raw) return raw.replace(/\/$/, '');
90
+ const env = process.env.ZIBBY_ENV || 'prod';
91
+ if (env === 'local') return 'http://localhost:3001';
92
+ return process.env.ZIBBY_PROD_ACCOUNT_API_URL || 'https://api-prod.zibby.app';
93
+ }
94
+
95
+ function median(nums) {
96
+ const xs = nums.filter((n) => typeof n === 'number' && isFinite(n)).sort((a, b) => a - b);
97
+ if (xs.length === 0) return undefined;
98
+ const mid = Math.floor(xs.length / 2);
99
+ return xs.length % 2 ? xs[mid] : Math.round((xs[mid - 1] + xs[mid]) / 2);
100
+ }
101
+
102
+ // Best-effort duration extraction. Execution rows don't carry a uniform
103
+ // durationMs; derive it from start/end timestamps when both exist.
104
+ function durationMsOf(exec) {
105
+ if (typeof exec.durationMs === 'number') return exec.durationMs;
106
+ const start = exec.startedAt || exec.createdAt;
107
+ const end = exec.completedAt || exec.finishedAt || exec.updatedAt;
108
+ if (start && end) {
109
+ const d = new Date(end).getTime() - new Date(start).getTime();
110
+ if (isFinite(d) && d >= 0) return d;
111
+ }
112
+ return undefined;
113
+ }
114
+
115
+ // Best-effort "what step failed / why" — execution rows vary; surface
116
+ // whatever's there without inventing anything.
117
+ function failureDetail(exec) {
118
+ return {
119
+ failedStep: exec.failedStep || exec.currentStep || exec.lastNode || undefined,
120
+ errorSummary: (exec.error || exec.errorMessage || exec.failureReason || '')
121
+ .toString().slice(0, 280) || undefined,
122
+ };
123
+ }
124
+
125
+ export const scanPipelinesNode = {
126
+ name: 'scan_pipelines',
127
+ outputSchema: ScanOutputSchema,
128
+ // 2 min — a single paginated /executions pull is usually <2s; headroom
129
+ // for a large project's history + transient API slowness.
130
+ timeout: 2 * 60 * 1000,
131
+ execute: async (context) => {
132
+ const state = (context?.state && typeof context.state.getAll === 'function')
133
+ ? context.state.getAll()
134
+ : context;
135
+
136
+ const lookbackHours = Number(state?.lookbackHours) || 24;
137
+ const minFailRate = typeof state?.minFailRate === 'number' ? state.minFailRate : 0.4;
138
+ const maxPipelines = Number(state?.maxPipelines) || 25;
139
+ const filters = Array.isArray(state?.targetWorkflowTypes)
140
+ ? state.targetWorkflowTypes.map((s) => String(s).toLowerCase())
141
+ : null;
142
+
143
+ // Supervised project: explicit override, else the running project.
144
+ const projectId = process.env.SUPERVISOR_PROJECT_ID || process.env.PROJECT_ID;
145
+ const pat = process.env.ZIBBY_PAT || process.env.ZIBBY_USER_TOKEN;
146
+
147
+ if (!projectId) {
148
+ throw new Error(
149
+ 'pipeline-supervisor: no project to supervise. PROJECT_ID is injected by the ' +
150
+ 'executor; set SUPERVISOR_PROJECT_ID in the ENV tab to point at a different project.'
151
+ );
152
+ }
153
+ if (!pat) {
154
+ throw new Error(
155
+ 'pipeline-supervisor: ZIBBY_PAT is not set. The supervisor reads OTHER pipelines\' ' +
156
+ 'executions via the Zibby REST API, which requires a USER personal access token ' +
157
+ '(zby_pat_…). The Fargate-injected PROJECT_API_TOKEN is a project token and the ' +
158
+ '/executions route rejects it (no userId). Create a PAT in the dashboard and set it ' +
159
+ 'as ZIBBY_PAT in Project Settings → ENV.'
160
+ );
161
+ }
162
+
163
+ const base = getAccountApiUrl();
164
+ // limit=200 is the API ceiling; one page covers lookback for any realistic
165
+ // project. We post-filter by lookbackHours below rather than relying on a
166
+ // server-side time filter the route doesn't expose.
167
+ const url = `${base}/executions?projectId=${encodeURIComponent(projectId)}&limit=200`;
168
+ console.log(`Scanning executions: ${url}`);
169
+ console.log(`Lookback: ${lookbackHours}h · minFailRate: ${minFailRate} · maxPipelines: ${maxPipelines}`);
170
+
171
+ const res = await fetch(url, {
172
+ method: 'GET',
173
+ headers: { Authorization: `Bearer ${pat}` },
174
+ });
175
+
176
+ if (!res.ok) {
177
+ const body = await res.text().catch(() => '');
178
+ if (res.status === 401 || res.status === 403) {
179
+ throw new Error(
180
+ `pipeline-supervisor: ${res.status} reading /executions. ZIBBY_PAT is invalid, ` +
181
+ `expired, or its owner can't access project ${projectId}. ${body.slice(0, 200)}`
182
+ );
183
+ }
184
+ throw new Error(`pipeline-supervisor: /executions returned ${res.status}: ${body.slice(0, 300)}`);
185
+ }
186
+
187
+ const payload = await res.json().catch(() => ({}));
188
+ const all = Array.isArray(payload?.executions) ? payload.executions : [];
189
+
190
+ // Window + name filter.
191
+ const cutoff = Date.now() - lookbackHours * 3600 * 1000;
192
+ const inWindow = all.filter((e) => {
193
+ const t = new Date(e.createdAt || e.startedAt || 0).getTime();
194
+ return isFinite(t) && t >= cutoff;
195
+ });
196
+ const considered = filters
197
+ ? inWindow.filter((e) => {
198
+ const wt = String(e.workflowType || '').toLowerCase();
199
+ return filters.some((f) => wt.includes(f));
200
+ })
201
+ : inWindow;
202
+
203
+ console.log(
204
+ `Fetched ${all.length} execution(s); ${inWindow.length} in the last ${lookbackHours}h` +
205
+ `${filters ? `, ${considered.length} after type filter` : ''}.`
206
+ );
207
+
208
+ // ── Group by pipeline (workflowType) ──────────────────────────────
209
+ const byPipeline = new Map();
210
+ for (const e of considered) {
211
+ const wt = e.workflowType || '(unknown)';
212
+ if (!byPipeline.has(wt)) byPipeline.set(wt, []);
213
+ byPipeline.get(wt).push(e);
214
+ }
215
+
216
+ let pipelines = [];
217
+ for (const [workflowType, runs] of byPipeline.entries()) {
218
+ const failed = runs.filter((r) => FAILED_STATUSES.has(r.status));
219
+ const succeeded = runs.filter((r) => SUCCEEDED_STATUSES.has(r.status));
220
+ const running = runs.filter((r) => RUNNING_STATUSES.has(r.status));
221
+ // Fail rate over TERMINAL runs only — in-flight runs aren't a verdict yet.
222
+ const terminal = failed.length + succeeded.length;
223
+ const failRate = terminal > 0 ? failed.length / terminal : 0;
224
+ const durations = succeeded.map(durationMsOf).filter((d) => typeof d === 'number');
225
+ const medianDurationMs = median(durations);
226
+
227
+ // Worst run = a failure if any, else the slowest succeeded run.
228
+ let worstRun;
229
+ const worstFail = failed
230
+ .slice()
231
+ .sort((a, b) => new Date(b.createdAt || 0) - new Date(a.createdAt || 0))[0];
232
+ if (worstFail) {
233
+ const det = failureDetail(worstFail);
234
+ worstRun = {
235
+ executionId: worstFail.executionId,
236
+ status: worstFail.status,
237
+ durationMs: durationMsOf(worstFail),
238
+ startedAt: worstFail.createdAt || worstFail.startedAt,
239
+ ...det,
240
+ };
241
+ } else {
242
+ const slow = succeeded
243
+ .slice()
244
+ .sort((a, b) => (durationMsOf(b) || 0) - (durationMsOf(a) || 0))[0];
245
+ if (slow) {
246
+ worstRun = {
247
+ executionId: slow.executionId,
248
+ status: slow.status,
249
+ durationMs: durationMsOf(slow),
250
+ startedAt: slow.createdAt || slow.startedAt,
251
+ };
252
+ }
253
+ }
254
+
255
+ // Flag: enough terminal runs AND failRate over threshold. The >= 3
256
+ // guard keeps a single failed run from flagging a pipeline that's
257
+ // otherwise fine.
258
+ let flagged = false;
259
+ let flagReason;
260
+ if (terminal >= 3 && failRate >= minFailRate) {
261
+ flagged = true;
262
+ flagReason = `failRate ${(failRate * 100).toFixed(0)}% over ${terminal} terminal run(s) (≥ ${(minFailRate * 100).toFixed(0)}% threshold)`;
263
+ }
264
+
265
+ pipelines.push({
266
+ workflowType,
267
+ total: runs.length,
268
+ failed: failed.length,
269
+ succeeded: succeeded.length,
270
+ running: running.length,
271
+ failRate: Number(failRate.toFixed(3)),
272
+ medianDurationMs,
273
+ worstRun,
274
+ flagged,
275
+ flagReason,
276
+ });
277
+ }
278
+
279
+ // ── Slow-outlier flag (cross-pipeline) ────────────────────────────
280
+ // A pipeline whose median run is > 3× the median-of-medians is "slow",
281
+ // even if it's not failing. Only meaningful with a few pipelines that
282
+ // actually have durations.
283
+ const meds = pipelines.map((p) => p.medianDurationMs).filter((d) => typeof d === 'number');
284
+ const globalMed = median(meds);
285
+ if (globalMed && globalMed > 0) {
286
+ for (const p of pipelines) {
287
+ if (!p.flagged && typeof p.medianDurationMs === 'number' && p.medianDurationMs > globalMed * 3) {
288
+ p.flagged = true;
289
+ p.flagReason = `median run ${(p.medianDurationMs / 1000).toFixed(0)}s is >3× the project median (${(globalMed / 1000).toFixed(0)}s) — slow outlier`;
290
+ }
291
+ }
292
+ }
293
+
294
+ // Flagged first, then worst failRate, then most runs. Cap to maxPipelines.
295
+ pipelines.sort((a, b) =>
296
+ (Number(b.flagged) - Number(a.flagged)) ||
297
+ (b.failRate - a.failRate) ||
298
+ (b.total - a.total)
299
+ );
300
+ pipelines = pipelines.slice(0, maxPipelines);
301
+
302
+ const flaggedCount = pipelines.filter((p) => p.flagged).length;
303
+ console.log(`Rolled up ${pipelines.length} pipeline(s); ${flaggedCount} flagged.`);
304
+ for (const p of pipelines.filter((x) => x.flagged)) {
305
+ console.log(` ⚠ ${p.workflowType}: ${p.flagReason}`);
306
+ }
307
+
308
+ return {
309
+ projectId,
310
+ lookbackHours,
311
+ scannedAt: new Date().toISOString(),
312
+ totalExecutions: considered.length,
313
+ pipelines,
314
+ };
315
+ },
316
+ };
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "pipeline-supervisor",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "description": "Zibby managing Zibby — a scheduled supervisor that scans the project's other pipelines, finds the failing/slow ones, and posts human-reviewable improvement proposals to Slack or Lark. Read + propose + notify only (v1 never edits other workflows).",
7
+ "main": "graph.mjs",
8
+ "scripts": {
9
+ "test": "vitest run"
10
+ },
11
+ "dependencies": {
12
+ "@zibby/core": "^0.5.1",
13
+ "@zibby/skills": "^0.1.26",
14
+ "zod": "^3.23.0"
15
+ },
16
+ "devDependencies": {
17
+ "vitest": "^2.1.5"
18
+ }
19
+ }