@zibby/workflow-templates 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/browser-test-automation/icon.png +0 -0
- package/code-analysis/icon.png +0 -0
- package/generate-test-cases/icon.png +0 -0
- package/index.js +351 -1
- package/notify-lark/icon.png +0 -0
- package/notify-notion/icon.png +0 -0
- package/notify-slack/icon.png +0 -0
- package/package.json +4 -1
- package/pipeline-supervisor/README.md +51 -0
- package/pipeline-supervisor/graph.mjs +75 -0
- package/pipeline-supervisor/icon.png +0 -0
- package/pipeline-supervisor/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +1 -0
- package/pipeline-supervisor/nodes/notify-node.js +162 -0
- package/pipeline-supervisor/nodes/propose-node.js +91 -0
- package/pipeline-supervisor/nodes/scan-pipelines-node.js +316 -0
- package/pipeline-supervisor/package.json +19 -0
- package/pipeline-supervisor/state.js +151 -0
- package/sentry-triage/icon.png +0 -0
|
Binary file
|
package/code-analysis/icon.png
CHANGED
|
Binary file
|
|
Binary file
|
package/index.js
CHANGED
|
@@ -387,7 +387,357 @@ export const TEMPLATES = {
|
|
|
387
387
|
'Post the digest to our #leadership Lark group + #eng Slack channel',
|
|
388
388
|
],
|
|
389
389
|
},
|
|
390
|
-
}
|
|
390
|
+
},
|
|
391
|
+
|
|
392
|
+
// ── pipeline-supervisor: Zibby managing Zibby (read + propose + notify) ─
|
|
393
|
+
'pipeline-supervisor': {
|
|
394
|
+
name: 'pipeline-supervisor',
|
|
395
|
+
displayName: 'Pipeline Supervisor',
|
|
396
|
+
description: 'Zibby managing Zibby — a scheduled supervisor that scans the project\'s other pipelines, flags the ones failing or running slow, and posts human-reviewable improvement proposals (add a test gate / tweak a prompt / add an approval gate / drop a redundant step) to Slack or Lark. Read + propose + notify only; it never edits another workflow.',
|
|
397
|
+
path: join(__dirname, 'pipeline-supervisor'),
|
|
398
|
+
defaultSlug: 'pipeline-supervisor',
|
|
399
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
|
|
400
|
+
features: [
|
|
401
|
+
'3-node graph: scan_pipelines → propose_improvements → notify',
|
|
402
|
+
'Reads other pipelines\' executions via the Zibby REST API (user-PAT authed)',
|
|
403
|
+
'Per-pipeline health rollup: fail-rate, median duration, worst recent run',
|
|
404
|
+
'LLM proposes ONE concrete, evidence-backed change per flagged pipeline',
|
|
405
|
+
'Posts a review card to Slack OR Lark (chat_notify OR-group)',
|
|
406
|
+
'Read + propose + notify only — never edits another workflow\'s graph (safe L3)',
|
|
407
|
+
'Cron-friendly: e.g. daily, default lookbackHours=24 / minFailRate=0.4',
|
|
408
|
+
],
|
|
409
|
+
marketplace: {
|
|
410
|
+
slug: 'pipeline-supervisor',
|
|
411
|
+
tagline: 'Zibby watching Zibby — find your flaky pipelines and propose the fix.',
|
|
412
|
+
iconPrompt: [
|
|
413
|
+
'Hand-painted storybook illustration in a warm gouache style with soft brushwork and gentle painterly texture, featuring a friendly round robot-supervisor mascot character with two big kindly eyes and a rosy blush, wearing a tiny site-foreman hard hat in soft violet (#7553FF), standing on a little mint-green platform and holding a small glowing clipboard with a checkmark.',
|
|
414
|
+
'In front of the mascot float three small rounded pipeline-segment shapes connected left-to-right like a little assembly line: the first two glow calm green, the third glows a gentle warning amber with a tiny magnifier-free spotlight on it — reinforcing the "watch the pipelines, spot the broken one, suggest a fix" idea WITHOUT any literal magnifying glass.',
|
|
415
|
+
'Background is a soft sunrise gradient of pale peach at the top blending through buttercream into a wash of dusty lavender at the base, tying the warm scene to the violet hard hat; a few small fluffy pastel clouds float in for friendliness.',
|
|
416
|
+
'Centered composition with the robot-supervisor as the focal point, the three pipeline segments arrayed below-front; plenty of breathing room so the silhouette reads at 64×64.',
|
|
417
|
+
'Mood is warm, reassuring, helpful — a friendly watchful helper, NOT tactical or corporate or alarming.',
|
|
418
|
+
'Soft rounded square 1024×1024 canvas with a subtle paper-grain texture.',
|
|
419
|
+
'NO text, NO letters, NO photo-realism, NO sleek 3D render, NO magnifying glass, NO speech bubbles, NO dark navy or near-black backgrounds, NO literal Slack / Lark / Zibby wordmark.',
|
|
420
|
+
].join('\n'),
|
|
421
|
+
tags: ['On-call', 'Bug Triage', 'Notifications'],
|
|
422
|
+
capabilities: [
|
|
423
|
+
'Scheduled scan of every pipeline\'s recent run history',
|
|
424
|
+
'Flags pipelines by failure rate and slow-outlier duration',
|
|
425
|
+
'LLM proposes a specific, evidence-backed improvement per problem pipeline',
|
|
426
|
+
'Posts a review card to Slack or Lark — whichever your project has connected',
|
|
427
|
+
'Proposal-only: a human reviews and applies; the supervisor changes nothing',
|
|
428
|
+
],
|
|
429
|
+
conversationStarters: [
|
|
430
|
+
'Scan my pipelines from the last 24h and flag the ones failing 40%+ of runs',
|
|
431
|
+
'Which of my workflows is the flakiest, and what should I change?',
|
|
432
|
+
'Propose a fix for any pipeline that keeps failing on the same step',
|
|
433
|
+
'Only supervise my deploy pipelines and post proposals to #eng',
|
|
434
|
+
],
|
|
435
|
+
},
|
|
436
|
+
},
|
|
437
|
+
|
|
438
|
+
// ── ticket-triage: tracker-neutral triage building block ──────────
|
|
439
|
+
'ticket-triage': {
|
|
440
|
+
name: 'ticket-triage',
|
|
441
|
+
displayName: 'Ticket Triage',
|
|
442
|
+
description: 'Tracker-neutral triage building block — LLM-classifies one ticket (from any tracker) into a severity (CRITICAL…NOISE), a shouldAutofix decision, and a human summary. The first block of the bug-autofix pipeline; usable on its own.',
|
|
443
|
+
path: join(__dirname, 'ticket-triage'),
|
|
444
|
+
defaultSlug: 'ticket-triage',
|
|
445
|
+
deps: { zod: '^3.23.0' },
|
|
446
|
+
features: [
|
|
447
|
+
'Single LLM node: classify → { severity, shouldAutofix, summary }',
|
|
448
|
+
'Tracker-neutral input (Jira today; GitHub / Linear are extension points)',
|
|
449
|
+
'Severity rubric adapted from sentry-triage, with auditable reasoning',
|
|
450
|
+
'Tunable SEVERITY_THRESHOLD + free-form AUTOFIX_RULES per deploy',
|
|
451
|
+
'Designed to be dispatched as a sub-graph by the bug-autofix orchestrator',
|
|
452
|
+
],
|
|
453
|
+
marketplace: {
|
|
454
|
+
slug: 'ticket-triage',
|
|
455
|
+
tagline: 'Triage any ticket: how bad is it, and should a bot fix it?',
|
|
456
|
+
iconPrompt: [
|
|
457
|
+
'A clean, friendly flat-design app icon for "Ticket Triage" — a workflow that sorts incoming tickets by severity.',
|
|
458
|
+
'Subject: a single rounded paper ticket/tag shape (like a luggage tag) centered on the canvas, tilted slightly, with three small colored signal dots stacked along its right edge — one green, one amber, one red — reading top-to-bottom as a severity scale.',
|
|
459
|
+
'Background: a soft rounded square with a gentle teal-to-sky gradient (#2DD4BF → #38BDF8). Subtle long shadow under the ticket for depth.',
|
|
460
|
+
'Style: modern flat illustration with soft gradients, the family of Linear / Height product icons. Crisp at 64×64.',
|
|
461
|
+
'NO text, NO letters, NO photorealism, NO mascot, NO trademarked Jira/Linear logos.',
|
|
462
|
+
].join('\n'),
|
|
463
|
+
tags: ['Bug Triage', 'child-workflow'],
|
|
464
|
+
capabilities: [
|
|
465
|
+
'Classifies one ticket into a severity bucket with auditable reasoning',
|
|
466
|
+
'Decides shouldAutofix — is this concrete enough for an agent to fix?',
|
|
467
|
+
'Tracker-agnostic input; only Jira flows end-to-end in v1',
|
|
468
|
+
'Composable: the bug-autofix orchestrator calls it as a sub-graph',
|
|
469
|
+
],
|
|
470
|
+
conversationStarters: [
|
|
471
|
+
'Triage this bug ticket and tell me if a bot could fix it',
|
|
472
|
+
'How severe is PROJ-123, and should we auto-fix it?',
|
|
473
|
+
'Never mark auth tickets as auto-fixable',
|
|
474
|
+
],
|
|
475
|
+
},
|
|
476
|
+
},
|
|
477
|
+
|
|
478
|
+
// ── code-fix: clone → agent fix + inline test-gate → PR ───────────
|
|
479
|
+
'code-fix': {
|
|
480
|
+
name: 'code-fix',
|
|
481
|
+
displayName: 'Code Fix (clone → fix → PR)',
|
|
482
|
+
description: 'Clones a repo into an isolated workspace, has an agent fix one ticket with an inline test-gate (run the tests, feed failures back for one retry), and opens a PR. Output: { pr_url, branch }. The "do the work" block of the bug-autofix pipeline.',
|
|
483
|
+
path: join(__dirname, 'code-fix'),
|
|
484
|
+
defaultSlug: 'code-fix',
|
|
485
|
+
deps: { zod: '^3.23.0', axios: '^1.6.0' },
|
|
486
|
+
features: [
|
|
487
|
+
'3-node graph: setup (clone) → fix_code (agent + inline test-gate) → create_pr',
|
|
488
|
+
'Agent edits in an isolated Fargate workspace; SKILLS.GIT auto-auth clone',
|
|
489
|
+
'Inline test-gate: run npm test / pytest, feed failures back for ONE retry',
|
|
490
|
+
'Opens a GitHub PR off the pushed branch → { pr_url, branch }',
|
|
491
|
+
'Stops at the PR — a human reviews + merges (no auto-merge, no in-engine approval)',
|
|
492
|
+
'Auto-detects the test command (or override via TEST_COMMAND)',
|
|
493
|
+
],
|
|
494
|
+
marketplace: {
|
|
495
|
+
slug: 'code-fix',
|
|
496
|
+
tagline: 'Hand it a ticket + a repo; get back a tested fix PR.',
|
|
497
|
+
iconPrompt: [
|
|
498
|
+
'A bold, modern app icon for "Code Fix" — a workflow that fixes a bug and opens a pull request.',
|
|
499
|
+
'Subject: a rounded wrench crossed subtly behind a git-branch / pull-request glyph (two nodes joined by a curved line), with a small green checkmark badge in the lower-right indicating tests passed.',
|
|
500
|
+
'Background: a soft rounded square with a deep indigo-to-violet gradient (#4F46E5 → #7C3AED). Soft inner glow behind the glyph.',
|
|
501
|
+
'Style: clean flat-with-depth product illustration, glossy but not skeuomorphic; the family of GitHub / Linear product marks. Crisp at 64×64.',
|
|
502
|
+
'NO text, NO letters, NO photorealism, NO mascot, NO trademarked GitHub/GitLab logos.',
|
|
503
|
+
].join('\n'),
|
|
504
|
+
tags: ['Bug Triage', 'Testing', 'child-workflow'],
|
|
505
|
+
capabilities: [
|
|
506
|
+
'Clones the target repo into an isolated per-run workspace',
|
|
507
|
+
'Agent makes the smallest correct change to fix the ticket',
|
|
508
|
+
'Inline test-gate runs the suite and retries once on failure',
|
|
509
|
+
'Opens a GitHub PR with the diff, test status, and ticket link',
|
|
510
|
+
'Composable: the bug-autofix orchestrator calls it as a sub-graph',
|
|
511
|
+
],
|
|
512
|
+
conversationStarters: [
|
|
513
|
+
'Fix PROJ-123 in the web repo and open a PR',
|
|
514
|
+
'Run the tests after fixing and retry once if they fail',
|
|
515
|
+
'Use pytest -q as the test command',
|
|
516
|
+
],
|
|
517
|
+
},
|
|
518
|
+
},
|
|
519
|
+
|
|
520
|
+
// ── tracker-writeback: transition + comment + chat notify ─────────
|
|
521
|
+
'tracker-writeback': {
|
|
522
|
+
name: 'tracker-writeback',
|
|
523
|
+
displayName: 'Tracker Writeback',
|
|
524
|
+
description: 'Closes the loop after triage/fix — transitions the tracker issue (Jira → In Review when a PR opened), comments the PR link + verdict, and posts a Slack or Lark note. The writeback block of the bug-autofix pipeline (Jira in v1; GitHub / Linear are extension points).',
|
|
525
|
+
path: join(__dirname, 'tracker-writeback'),
|
|
526
|
+
defaultSlug: 'tracker-writeback',
|
|
527
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
|
|
528
|
+
features: [
|
|
529
|
+
'Single LLM node with SKILLS.JIRA + SKILLS.CHAT_NOTIFY',
|
|
530
|
+
'Jira: transition (fuzzy status match, reused) + comment the PR link/verdict',
|
|
531
|
+
'PR link rides in a comment (no Jira remote-link tool) — documented v1 fallback',
|
|
532
|
+
'Posts a short Slack OR Lark note (chat_notify, like notify-slack)',
|
|
533
|
+
'Runs on both branches: autofixed (PR up for review) and notify-only',
|
|
534
|
+
'jiraFetch is now exported so the remote-link extension point can reuse it',
|
|
535
|
+
],
|
|
536
|
+
marketplace: {
|
|
537
|
+
slug: 'tracker-writeback',
|
|
538
|
+
tagline: 'Update the ticket, link the PR, ping the channel — done.',
|
|
539
|
+
iconPrompt: [
|
|
540
|
+
'A clean, friendly flat-design app icon for "Tracker Writeback" — a workflow that updates a ticket and notifies a chat channel.',
|
|
541
|
+
'Subject: a rounded ticket/card shape with a small curved "return" arrow looping back into it, and a tiny speech-bubble badge in the upper-right (the chat notification).',
|
|
542
|
+
'Background: a soft rounded square with a warm coral-to-amber gradient (#FB7185 → #FBBF24). Subtle drop shadow for depth.',
|
|
543
|
+
'Style: modern flat illustration with soft gradients, the family of Linear / Slack product icons. Crisp at 64×64.',
|
|
544
|
+
'NO text, NO letters, NO photorealism, NO mascot, NO trademarked Jira/Slack/Lark logos.',
|
|
545
|
+
].join('\n'),
|
|
546
|
+
tags: ['Notifications', 'child-workflow'],
|
|
547
|
+
capabilities: [
|
|
548
|
+
'Transitions the Jira issue to a review/triage status (fuzzy-matched)',
|
|
549
|
+
'Comments the triage summary + the PR link on the ticket',
|
|
550
|
+
'Posts a short Slack or Lark note — whichever the project has connected',
|
|
551
|
+
'Runs on both the autofixed and notify-only branches',
|
|
552
|
+
'Composable: the bug-autofix orchestrator calls it as a sub-graph',
|
|
553
|
+
],
|
|
554
|
+
conversationStarters: [
|
|
555
|
+
'Move PROJ-123 to In Review and comment the PR link',
|
|
556
|
+
'Post to #eng when a fix PR is opened',
|
|
557
|
+
'When a ticket can\'t be auto-fixed, comment why and ping a human',
|
|
558
|
+
],
|
|
559
|
+
},
|
|
560
|
+
},
|
|
561
|
+
|
|
562
|
+
// ── bug-autofix: the ⭐ orchestrator (poll → triage → fix → writeback) ─
|
|
563
|
+
'bug-autofix': {
|
|
564
|
+
name: 'bug-autofix',
|
|
565
|
+
displayName: 'Bug Autofix Pipeline',
|
|
566
|
+
description: 'The composable bug-autofix SDLC pipeline. Polls a tracker, then connects three reusable building blocks via sub-graph dispatch: ticket-triage → (autofixable?) → code-fix → tracker-writeback. High-severity autofixable bugs get a tested fix PR opened and the ticket moved to In Review; everything else is triaged and a human is notified. Stops at the PR — a human merges.',
|
|
567
|
+
path: join(__dirname, 'bug-autofix'),
|
|
568
|
+
defaultSlug: 'bug-autofix',
|
|
569
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
|
|
570
|
+
features: [
|
|
571
|
+
'Orchestrator: poll → ticket-triage → code-fix → tracker-writeback',
|
|
572
|
+
'Composes 3 marketplace templates via Zibby sub-graph dispatch ({ workflow: slug })',
|
|
573
|
+
'Routes on triage: severity ≥ floor AND shouldAutofix → fix; else notify-only',
|
|
574
|
+
'Cron-poll of the tracker, or per-ticket webhook (ticketKey)',
|
|
575
|
+
'Each child is a separate linked execution (parentExecutionId tree-view)',
|
|
576
|
+
'Stops at PR opened + tracker written back — human merges (no in-engine approval)',
|
|
577
|
+
],
|
|
578
|
+
marketplace: {
|
|
579
|
+
slug: 'bug-autofix',
|
|
580
|
+
tagline: 'Poll bugs, triage, open a tested fix PR, write the ticket back — on autopilot.',
|
|
581
|
+
iconPrompt: [
|
|
582
|
+
'A premium, hi-fi app icon for "Bug Autofix" — an orchestrator that turns a bug ticket into a reviewed fix PR automatically.',
|
|
583
|
+
'Visual style: 3D-rendered hero object floating in space, in the style of Apple Vision Pro icons or a Stripe product render. Glossy, dimensional, soft rim-light.',
|
|
584
|
+
'Subject: a friendly 3D ladybug-style bug figure being lifted/healed by a glowing violet loop-arrow that circles it (the autofix cycle), with three small connected glossy nodes orbiting around — suggesting a pipeline of stages. A tiny green checkmark spark where the loop closes.',
|
|
585
|
+
'Background: a deep midnight-navy gradient (#0F172A → #1E1B4B) with a soft violet glow (#7C3AED) behind the subject and a few faint star specks. Square 1024×1024.',
|
|
586
|
+
'Composition: subject centered, subtle drop shadow. Mood: confident, premium, "it just handles it" — the flagship of the set.',
|
|
587
|
+
'NO text, NO letters, NO flat sticker style, NO trademarked logos. This one is DEEP and 3D-rendered, distinct from the three flat building-block icons.',
|
|
588
|
+
].join('\n'),
|
|
589
|
+
tags: ['Bug Triage', 'Incidents'],
|
|
590
|
+
capabilities: [
|
|
591
|
+
'Polls your tracker for candidate bugs (JQL) or runs per-ticket via webhook',
|
|
592
|
+
'Triages each ticket, then routes autofixable ones to an agent fix',
|
|
593
|
+
'Opens a tested fix PR and moves the ticket to In Review',
|
|
594
|
+
'Notifies a human for anything it shouldn\'t auto-fix',
|
|
595
|
+
'Composes three reusable templates — swap or reuse the blocks independently',
|
|
596
|
+
],
|
|
597
|
+
conversationStarters: [
|
|
598
|
+
'Poll my open bugs and auto-fix the autofixable ones',
|
|
599
|
+
'When a P1 bug comes in, open a fix PR and move it to In Review',
|
|
600
|
+
'Only auto-fix HIGH+ severity bugs; notify a human for the rest',
|
|
601
|
+
'Run the bug-autofix pipeline on PROJ-123',
|
|
602
|
+
],
|
|
603
|
+
},
|
|
604
|
+
},
|
|
605
|
+
|
|
606
|
+
// ── github-ai-scout: daily AI-project discovery → Slack shortlist ─
|
|
607
|
+
'github-ai-scout': {
|
|
608
|
+
name: 'github-ai-scout',
|
|
609
|
+
displayName: 'Daily GitHub AI Scout',
|
|
610
|
+
description: 'A daily scout that searches GitHub for new/trending AI projects, scores them against YOUR configurable rubric with an LLM, and posts a Slack shortlist for a human to review. General + config-driven — the query, recency/star thresholds, and rubric are all deploy-time inputs. Proposes a shortlist; never stars, forks, or auto-adds anything.',
|
|
611
|
+
path: join(__dirname, 'github-ai-scout'),
|
|
612
|
+
defaultSlug: 'github-ai-scout',
|
|
613
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
|
|
614
|
+
features: [
|
|
615
|
+
'3-node graph: scan (GitHub search API) → score (LLM rubric) → digest (Slack)',
|
|
616
|
+
'Cron-friendly: daily schedule, default daysBack=30 / minStars=30',
|
|
617
|
+
'Configurable query + rubric — point it at any topic + taste, nothing is hard-coded',
|
|
618
|
+
'LLM ranks + filters candidates; only ever picks from the scanned repos (no invented repos)',
|
|
619
|
+
'excludeRepos allow-list dedups against projects you already track',
|
|
620
|
+
'Renders a native Slack Block-Kit card via the notify-slack sub-graph',
|
|
621
|
+
'Proposes a shortlist — a human reviews, the scout never auto-acts',
|
|
622
|
+
],
|
|
623
|
+
marketplace: {
|
|
624
|
+
slug: 'github-ai-scout',
|
|
625
|
+
tagline: 'Scout new AI projects on GitHub every morning — scored to your taste, shortlisted in Slack.',
|
|
626
|
+
iconPrompt: [
|
|
627
|
+
'A premium, hi-fi app icon for "Daily GitHub AI Scout" — a workflow that scans GitHub for new AI projects and shortlists the best ones.',
|
|
628
|
+
'Visual style: 3D-rendered hero object floating in space, in the style of Apple Vision Pro icons or a Stripe product render. Glossy, dimensional, with subtle reflections and a soft rim-light.',
|
|
629
|
+
'Subject: a sleek 3D-rendered pair of binoculars (or a small telescope) tilted up in three-quarter perspective, its glossy violet body catching a soft rim-light, sweeping a thin glowing scan-beam across a small cluster of glossy golden five-pointed STARS — two or three stars caught bright in the beam, a few fainter ones drifting past. A single tiny AI spark — a small four-point sparkle in soft cyan — glints near the lens to signal the smart-scoring step, and one small rounded code-bracket glyph "{ }" floats subtly in the lower corner to hint at "code repositories" WITHOUT any logo.',
|
|
630
|
+
'Background: a deep midnight-navy gradient (#0F172A at the top, #1E1B4B at the bottom) with a single soft violet glow (#7553FF) behind the binoculars and a scatter of faint star-like specks across the canvas. Square format, 1024×1024.',
|
|
631
|
+
'Composition: binoculars centered and slightly raised, scan-beam angled up-right toward the brightest star, subtle drop shadow on the canvas. Mood: curious, premium, "always watching the frontier".',
|
|
632
|
+
'NO text, NO letters, NO numbers, NO flat sticker style, NO mascot face, NO magnifying glass, NO trademarked GitHub octocat / Slack / OpenAI logos — this one is DEEP and 3D-rendered.',
|
|
633
|
+
].join('\n'),
|
|
634
|
+
tags: ['Reports', 'Notifications'],
|
|
635
|
+
capabilities: [
|
|
636
|
+
'Searches GitHub daily for newly-created, trending repositories matching your query',
|
|
637
|
+
'Filters by recency (created within N days) and a minimum star count',
|
|
638
|
+
'Scores every candidate 1-5 against your plain-English rubric with an LLM',
|
|
639
|
+
'Keeps a tight shortlist of the best finds — drops abandoned demos and trivial wrappers',
|
|
640
|
+
'Dedups against repos you already track via an excludeRepos allow-list',
|
|
641
|
+
'Posts a numbered Block-Kit shortlist to Slack — stars, language, license, one-line reason, link',
|
|
642
|
+
],
|
|
643
|
+
conversationStarters: [
|
|
644
|
+
'Scout new AI agent frameworks on GitHub every morning',
|
|
645
|
+
'Find trending RAG and LLM repos created this month and shortlist the best 8',
|
|
646
|
+
'Only surface repos with 100+ stars and a real README; skip the ones I already track',
|
|
647
|
+
'Post a daily GitHub AI radar to our #ai-radar Slack channel',
|
|
648
|
+
],
|
|
649
|
+
},
|
|
650
|
+
},
|
|
651
|
+
|
|
652
|
+
// ── github-code-review: review a GitHub PR → post the review back ─
|
|
653
|
+
'github-code-review': {
|
|
654
|
+
name: 'github-code-review',
|
|
655
|
+
displayName: 'GitHub Code Review',
|
|
656
|
+
description: 'Reviews a GitHub pull request with an LLM and posts the review back to the PR — a summary plus inline comments and an APPROVE / COMMENT / REQUEST_CHANGES verdict. If the PR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitHub required; Jira/Linear optional (ticket context only).',
|
|
657
|
+
path: join(__dirname, 'github-code-review'),
|
|
658
|
+
defaultSlug: 'github-code-review',
|
|
659
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
|
|
660
|
+
features: [
|
|
661
|
+
'3-node graph: fetch_pr (github) → fetch_ticket (optional) → review (LLM)',
|
|
662
|
+
'Posts a real PR review via github_create_review — summary body + inline comments + verdict',
|
|
663
|
+
'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
|
|
664
|
+
'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
|
|
665
|
+
'Webhook-triggered on a PR: { owner, repo, prNumber }',
|
|
666
|
+
'Reviews + comments only — never merges, closes, or pushes',
|
|
667
|
+
],
|
|
668
|
+
marketplace: {
|
|
669
|
+
slug: 'github-code-review',
|
|
670
|
+
tagline: 'Auto-review every GitHub PR — inline comments, a verdict, and objectives checked against the linked ticket.',
|
|
671
|
+
iconPrompt: [
|
|
672
|
+
'A premium, hi-fi app icon for "GitHub Code Review" — a workflow that reviews a GitHub pull request and posts the review back inline.',
|
|
673
|
+
'Visual style: 3D-rendered hero object floating in space, in the style of Apple Vision Pro icons or a Stripe product render. Glossy, dimensional, with subtle reflections and a soft rim-light.',
|
|
674
|
+
'Subject: a sleek 3D-rendered DARK octocat-style cat-with-tentacles silhouette (an original friendly cephalopod-cat creature, NOT the literal trademarked GitHub Octocat logo) rendered in glossy obsidian-black with a soft violet rim-light, holding a small glowing magnifier over a floating diff card; on the diff card two short code lines glow — one soft red (a removed line), one soft green (an added line) — and a small cyan check-mark sparkle sits at the corner to signal "reviewed". The whole motif reads "dark code-review".',
|
|
675
|
+
'Background: a deep charcoal-to-midnight gradient (#0D1117 at the top — GitHub\'s dark canvas — to #161B22 at the bottom) with a single soft violet glow (#7C3AED) behind the creature and a scatter of faint specks. Square format, 1024×1024.',
|
|
676
|
+
'Composition: creature centered and slightly raised, magnifier angled toward the diff card, subtle drop shadow. Mood: focused, premium, "nothing ships unreviewed".',
|
|
677
|
+
'NO text, NO letters, NO numbers, NO flat sticker style, NO the real trademarked GitHub Octocat logo or GitHub wordmark — make it an ORIGINAL dark cephalopod-cat. This one is DEEP and 3D-rendered.',
|
|
678
|
+
].join('\n'),
|
|
679
|
+
tags: ['Code Review'],
|
|
680
|
+
capabilities: [
|
|
681
|
+
'Reads the PR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
|
|
682
|
+
'Posts the review back to the PR: a summary, inline comments on specific lines, and a verdict',
|
|
683
|
+
'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
|
|
684
|
+
'Optionally pulls ticket context from Jira or Linear — but neither is required to run',
|
|
685
|
+
'Webhook-driven: point a PR-opened/synchronize hook at it',
|
|
686
|
+
'Proposes feedback only — a human still decides and merges',
|
|
687
|
+
],
|
|
688
|
+
conversationStarters: [
|
|
689
|
+
'Review pull request #412 in acme/web-app',
|
|
690
|
+
'Auto-review every new PR and post inline comments',
|
|
691
|
+
'Review this PR and check it against the linked Jira ticket\'s acceptance criteria',
|
|
692
|
+
'Block the merge if a PR introduces a security or missing-test issue',
|
|
693
|
+
],
|
|
694
|
+
},
|
|
695
|
+
},
|
|
696
|
+
|
|
697
|
+
// ── gitlab-code-review: review a GitLab MR → post the review back ─
|
|
698
|
+
'gitlab-code-review': {
|
|
699
|
+
name: 'gitlab-code-review',
|
|
700
|
+
displayName: 'GitLab Code Review',
|
|
701
|
+
description: 'Reviews a GitLab merge request with an LLM and posts the review back to the MR — a summary note plus inline discussion comments and a clear verdict. If the MR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitLab required; Jira/Linear optional (ticket context only).',
|
|
702
|
+
path: join(__dirname, 'gitlab-code-review'),
|
|
703
|
+
defaultSlug: 'gitlab-code-review',
|
|
704
|
+
deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
|
|
705
|
+
features: [
|
|
706
|
+
'3-node graph: fetch_mr (gitlab) → fetch_ticket (optional) → review (LLM)',
|
|
707
|
+
'Posts a real MR review via gitlab_create_mr_review — summary note + inline discussions',
|
|
708
|
+
'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
|
|
709
|
+
'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
|
|
710
|
+
'Webhook-triggered on an MR: { projectId, mrIid }',
|
|
711
|
+
'Works against gitlab.com and self-hosted instances',
|
|
712
|
+
],
|
|
713
|
+
marketplace: {
|
|
714
|
+
slug: 'gitlab-code-review',
|
|
715
|
+
tagline: 'Auto-review every GitLab MR — inline discussions, a verdict, and objectives checked against the linked ticket.',
|
|
716
|
+
iconPrompt: [
|
|
717
|
+
'A premium, hi-fi app icon for "GitLab Code Review" — a workflow that reviews a GitLab merge request and posts the review back inline.',
|
|
718
|
+
'Visual style: 3D-rendered hero object floating in space, in the style of Apple Vision Pro icons or a Stripe product render. Glossy, dimensional, with subtle reflections and a soft rim-light.',
|
|
719
|
+
'Subject: a sleek 3D-rendered ORANGE FOX head (an original friendly fox character evoking GitLab\'s tanuki-fox spirit WITHOUT copying the literal trademarked GitLab logo), rendered in glossy gradient orange (#FC6D26 → #E24329) with a soft rim-light, peering over a floating merge-request card; on the card two short code lines glow — one soft red (a removed line), one soft green (an added line) — and two small branch lines MERGE into one with a glowing node at the join to signal "merge request". A tiny cyan check-mark sparkle sits at the corner for "reviewed".',
|
|
720
|
+
'Background: a deep midnight-navy gradient (#1F1B3A at the top to #2E2150 at the bottom) that makes the orange fox POP, with a soft warm-orange glow behind the fox and a scatter of faint specks. Square format, 1024×1024.',
|
|
721
|
+
'Composition: fox centered and slightly raised, peering down at the merge-request card, subtle drop shadow. Mood: clever, warm, premium — clearly ORANGE-FOX so it is instantly distinguishable from the dark GitHub review icon.',
|
|
722
|
+
'NO text, NO letters, NO numbers, NO flat sticker style, NO the real trademarked GitLab tanuki logo or wordmark — make it an ORIGINAL orange fox. This one is DEEP and 3D-rendered.',
|
|
723
|
+
].join('\n'),
|
|
724
|
+
tags: ['Code Review'],
|
|
725
|
+
capabilities: [
|
|
726
|
+
'Reads the MR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
|
|
727
|
+
'Posts the review back to the MR: a summary note, inline discussion comments on specific lines, and a verdict',
|
|
728
|
+
'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
|
|
729
|
+
'Optionally pulls ticket context from Jira or Linear — but neither is required to run',
|
|
730
|
+
'Works against gitlab.com and self-hosted GitLab instances',
|
|
731
|
+
'Proposes feedback only — a human still decides, approves, and merges',
|
|
732
|
+
],
|
|
733
|
+
conversationStarters: [
|
|
734
|
+
'Review merge request !73 in acme/web-app',
|
|
735
|
+
'Auto-review every new MR and post inline discussions',
|
|
736
|
+
'Review this MR and check it against the linked Jira ticket\'s acceptance criteria',
|
|
737
|
+
'Flag any MR that introduces a security or missing-test issue',
|
|
738
|
+
],
|
|
739
|
+
},
|
|
740
|
+
},
|
|
391
741
|
};
|
|
392
742
|
|
|
393
743
|
export class TemplateFactory {
|
package/notify-lark/icon.png
CHANGED
|
Binary file
|
package/notify-notion/icon.png
CHANGED
|
Binary file
|
package/notify-slack/icon.png
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zibby/workflow-templates",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2",
|
|
4
4
|
"description": "Built-in workflow templates for Zibby — browser-test-automation, code-analysis, generate-test-cases, notify-slack, notify-lark, notify-notion, sentry-triage.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -25,6 +25,8 @@
|
|
|
25
25
|
"./sentry-triage/*": "./sentry-triage/*",
|
|
26
26
|
"./ai-spend-weekly-digest": "./ai-spend-weekly-digest/graph.mjs",
|
|
27
27
|
"./ai-spend-weekly-digest/*": "./ai-spend-weekly-digest/*",
|
|
28
|
+
"./pipeline-supervisor": "./pipeline-supervisor/graph.mjs",
|
|
29
|
+
"./pipeline-supervisor/*": "./pipeline-supervisor/*",
|
|
28
30
|
"./package.json": "./package.json"
|
|
29
31
|
},
|
|
30
32
|
"scripts": {
|
|
@@ -56,6 +58,7 @@
|
|
|
56
58
|
"notify-lark/",
|
|
57
59
|
"notify-notion/",
|
|
58
60
|
"sentry-triage/",
|
|
61
|
+
"pipeline-supervisor/",
|
|
59
62
|
"index.js",
|
|
60
63
|
"register-nodes.js",
|
|
61
64
|
"global-setup.js",
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# pipeline-supervisor
|
|
2
|
+
|
|
3
|
+
**Zibby managing Zibby.** A scheduled "监工 / supervisor" workflow that watches
|
|
4
|
+
the project's *other* pipelines, finds the ones that are failing or slow, and
|
|
5
|
+
posts a human-reviewable improvement proposal to Slack or Lark.
|
|
6
|
+
|
|
7
|
+
v1 is strictly **READ → PROPOSE → NOTIFY**. It never edits another workflow's
|
|
8
|
+
graph — that's the safe L3 starting point. The auto-PATCH step is a marked TODO
|
|
9
|
+
in `nodes/propose-node.js`, deliberately not implemented.
|
|
10
|
+
|
|
11
|
+
## Graph
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
scan_pipelines (deterministic + Zibby REST API, PAT-authed)
|
|
15
|
+
→ propose_improvements (LLM — one proposal per flagged pipeline)
|
|
16
|
+
→ notify (LLM + SKILLS.CHAT_NOTIFY — one review card)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
If `scan_pipelines` flags nothing, the graph short-circuits straight to
|
|
20
|
+
`notify` (which posts/skips without a Claude call on the proposer).
|
|
21
|
+
|
|
22
|
+
## How it reads other pipelines
|
|
23
|
+
|
|
24
|
+
A direct authed `GET /executions?projectId=<id>&limit=200` against the Zibby
|
|
25
|
+
REST API (the same route the dashboard + remote MCP server use), carrying a
|
|
26
|
+
**user personal access token** in `Authorization: Bearer`.
|
|
27
|
+
|
|
28
|
+
It must be a USER PAT (`zby_pat_…`), **not** the Fargate-injected
|
|
29
|
+
`PROJECT_API_TOKEN`: every cross-pipeline read route (`/executions`, `/jobs`,
|
|
30
|
+
`/all`) requires a `userId` from the authorizer, and a project token carries
|
|
31
|
+
none — so it 401s. See the header comment in `nodes/scan-pipelines-node.js`
|
|
32
|
+
for the full rationale.
|
|
33
|
+
|
|
34
|
+
## Config (ENV tab)
|
|
35
|
+
|
|
36
|
+
Required:
|
|
37
|
+
- `ZIBBY_PAT` — user personal access token the supervisor reads executions with.
|
|
38
|
+
- `SLACK_CHANNEL` **or** `LARK_RECEIVE_ID` — where the review card goes.
|
|
39
|
+
|
|
40
|
+
Optional:
|
|
41
|
+
- `SUPERVISOR_PROJECT_ID` — project to supervise (defaults to the running project).
|
|
42
|
+
- `SLACK_MENTIONS` / `LARK_MENTIONS` — JSON array of mentions on the card.
|
|
43
|
+
|
|
44
|
+
## Input (per-run dials)
|
|
45
|
+
|
|
46
|
+
| field | default | meaning |
|
|
47
|
+
|---|---|---|
|
|
48
|
+
| `lookbackHours` | 24 | hours of execution history to scan |
|
|
49
|
+
| `minFailRate` | 0.4 | flag a pipeline failing ≥ this fraction of recent runs |
|
|
50
|
+
| `targetWorkflowTypes` | — | optional name filter (case-insensitive substring) |
|
|
51
|
+
| `maxPipelines` | 25 | cap on distinct pipelines analyzed per run |
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline-supervisor — Zibby's flagship "监工 / supervisor" workflow.
|
|
3
|
+
* "Zibby managing Zibby": a scheduled run that watches the project's OTHER
|
|
4
|
+
* pipelines and PROPOSES improvements. v1 is strictly READ + PROPOSE +
|
|
5
|
+
* NOTIFY — it never edits another workflow's graph (the safe L3 boundary).
|
|
6
|
+
*
|
|
7
|
+
* scan_pipelines (deterministic + Zibby REST) → pull recent
|
|
8
|
+
* executions across pipelines (PAT-authed), roll
|
|
9
|
+
* up per-pipeline health, flag failing/slow ones.
|
|
10
|
+
* ↓
|
|
11
|
+
* propose_improvements (LLM) → one concrete,
|
|
12
|
+
* reviewable proposal per flagged pipeline
|
|
13
|
+
* (add test gate / tweak prompt / add approval
|
|
14
|
+
* gate / drop redundant step).
|
|
15
|
+
* ↓
|
|
16
|
+
* notify (LLM + SKILLS.CHAT_NOTIFY) → ONE review card
|
|
17
|
+
* to Slack OR Lark: "Pipeline X failed 4/5 on step
|
|
18
|
+
* Y. I'd suggest <change>. (review)".
|
|
19
|
+
*
|
|
20
|
+
* Short-circuit: if scan_pipelines flags nothing, skip the LLM proposer and
|
|
21
|
+
* route straight to notify (which posts a green "all healthy" no-op). The
|
|
22
|
+
* idle path is the common case — most runs find nothing wrong — so we don't
|
|
23
|
+
* spend a Claude call manufacturing proposals from an empty list.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { WorkflowAgent, WorkflowGraph } from '@zibby/core';
|
|
27
|
+
|
|
28
|
+
import { scanPipelinesNode } from './nodes/scan-pipelines-node.js';
|
|
29
|
+
import { proposeNode } from './nodes/propose-node.js';
|
|
30
|
+
import { notifyNode } from './nodes/notify-node.js';
|
|
31
|
+
|
|
32
|
+
import {
|
|
33
|
+
pipelineSupervisorInputSchema,
|
|
34
|
+
pipelineSupervisorContextSchema,
|
|
35
|
+
} from './state.js';
|
|
36
|
+
|
|
37
|
+
export class PipelineSupervisorAgent extends WorkflowAgent {
|
|
38
|
+
buildGraph() {
|
|
39
|
+
const graph = new WorkflowGraph();
|
|
40
|
+
graph
|
|
41
|
+
.setInputSchema(pipelineSupervisorInputSchema)
|
|
42
|
+
.setContextSchema(pipelineSupervisorContextSchema);
|
|
43
|
+
|
|
44
|
+
graph.addNode('scan_pipelines', scanPipelinesNode);
|
|
45
|
+
graph.addNode('propose_improvements', proposeNode);
|
|
46
|
+
graph.addNode('notify', notifyNode);
|
|
47
|
+
|
|
48
|
+
graph.setEntryPoint('scan_pipelines');
|
|
49
|
+
|
|
50
|
+
// No flagged pipelines → skip the proposer, go straight to notify. The
|
|
51
|
+
// notify node's no-op short-circuit posts (or skips) without a model
|
|
52
|
+
// round-trip, so an all-healthy scan costs one Claude call at most.
|
|
53
|
+
graph.addConditionalEdges('scan_pipelines', (state) => {
|
|
54
|
+
const flagged = (state?.scan_pipelines?.pipelines || []).filter((p) => p.flagged);
|
|
55
|
+
return flagged.length === 0 ? 'notify' : 'propose_improvements';
|
|
56
|
+
});
|
|
57
|
+
graph.addEdge('propose_improvements', 'notify');
|
|
58
|
+
graph.addEdge('notify', 'END');
|
|
59
|
+
|
|
60
|
+
return graph;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async onComplete(result) {
|
|
64
|
+
const pipelines = result?.state?.scan_pipelines?.pipelines || [];
|
|
65
|
+
const flagged = pipelines.filter((p) => p.flagged).length;
|
|
66
|
+
const proposals = result?.state?.propose_improvements?.proposals?.length || 0;
|
|
67
|
+
const s = result?.state?.notify?.summary || {};
|
|
68
|
+
console.log(
|
|
69
|
+
`[pipeline-supervisor] complete — pipelines=${pipelines.length}, flagged=${flagged}, ` +
|
|
70
|
+
`proposals=${proposals}, sent=${s.sent || 0}, skipped=${s.skipped || 0}, failed=${s.failed || 0}`,
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export default PipelineSupervisorAgent;
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":"4.1.7","results":[[":__tests__/graph.integration.test.js",{"duration":7.337875000000054,"failed":false}]]}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* notify — LLM + SKILLS.CHAT_NOTIFY. Posts ONE human-reviewable card
|
|
3
|
+
* summarizing the improvement proposals to the configured chat destination.
|
|
4
|
+
* This is the "NOTIFY" half of READ → PROPOSE → NOTIFY: the human reads the
|
|
5
|
+
* card and acts manually. The supervisor never applies a change itself in v1.
|
|
6
|
+
*
|
|
7
|
+
* Mirrors sentry-triage's dispatch node: chatNotifySkill.resolve() picks
|
|
8
|
+
* slack or lark from which env var is set (SLACK_CHANNEL vs LARK_RECEIVE_ID),
|
|
9
|
+
* so the LLM only ever sees one provider's tools.
|
|
10
|
+
*
|
|
11
|
+
* ENV tab config — required:
|
|
12
|
+
* SLACK_CHANNEL OR LARK_RECEIVE_ID — provider selector + destination
|
|
13
|
+
* ENV tab config — optional:
|
|
14
|
+
* SLACK_MENTIONS / LARK_MENTIONS — JSON array of mentions on the card
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { z, SKILLS } from '@zibby/core';
|
|
18
|
+
|
|
19
|
+
const DispatchedRecordSchema = z.object({
|
|
20
|
+
status: z.enum(['sent', 'skipped', 'failed']),
|
|
21
|
+
// nullish (not optional) on purpose — the LLM emits explicit null rather
|
|
22
|
+
// than omitting keys; .optional() would reject null and fail the node.
|
|
23
|
+
recipient: z.object({
|
|
24
|
+
kind: z.enum(['channel', 'user_dm', 'usergroup']).nullish(),
|
|
25
|
+
id: z.string().nullish(),
|
|
26
|
+
label: z.string().nullish(),
|
|
27
|
+
}).nullish(),
|
|
28
|
+
proposalCount: z.number().nullish(),
|
|
29
|
+
messageTs: z.string().nullish(), // Slack
|
|
30
|
+
messageId: z.string().nullish(), // Lark
|
|
31
|
+
detail: z.string().nullish(),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const NotifyOutputSchema = z.object({
|
|
35
|
+
dispatched: z.array(DispatchedRecordSchema),
|
|
36
|
+
summary: z.object({
|
|
37
|
+
total: z.number(),
|
|
38
|
+
sent: z.number(),
|
|
39
|
+
skipped: z.number(),
|
|
40
|
+
failed: z.number(),
|
|
41
|
+
}),
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const CHANGE_KIND_LABEL = {
|
|
45
|
+
add_test_gate: 'Add a test gate',
|
|
46
|
+
tweak_prompt: 'Tweak the prompt',
|
|
47
|
+
add_human_approval_gate: 'Add a human-approval gate',
|
|
48
|
+
drop_redundant_step: 'Drop a redundant step',
|
|
49
|
+
other: 'Other',
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const NOTIFY_PROMPT = (state = {}) => {
|
|
53
|
+
const proposals = state?.propose_improvements?.proposals || [];
|
|
54
|
+
const scan = state?.scan_pipelines || {};
|
|
55
|
+
const lookbackHours = scan.lookbackHours || state?.lookbackHours || 24;
|
|
56
|
+
|
|
57
|
+
const slackChannel = process.env.SLACK_CHANNEL || '';
|
|
58
|
+
const larkReceiveId = process.env.LARK_RECEIVE_ID || '';
|
|
59
|
+
|
|
60
|
+
// ── No-op short-circuit ─────────────────────────────────────────
|
|
61
|
+
// Nothing flagged → keep the run green without a model round-trip or
|
|
62
|
+
// forcing channel setup. Return the empty envelope verbatim.
|
|
63
|
+
if (proposals.length === 0) {
|
|
64
|
+
return `pipeline-supervisor found no problem pipelines this run — nothing to propose.
|
|
65
|
+
|
|
66
|
+
Return this exact JSON envelope and call no tools:
|
|
67
|
+
|
|
68
|
+
\`\`\`json
|
|
69
|
+
{ "dispatched": [{ "status": "skipped", "proposalCount": 0, "detail": "no flagged pipelines" }], "summary": { "total": 0, "sent": 0, "skipped": 1, "failed": 0 } }
|
|
70
|
+
\`\`\`
|
|
71
|
+
`;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── Provider selection ──────────────────────────────────────────
|
|
75
|
+
let provider, postTool, channelId, mentionsRaw;
|
|
76
|
+
if (slackChannel) {
|
|
77
|
+
provider = 'slack';
|
|
78
|
+
postTool = 'slack_post_message';
|
|
79
|
+
channelId = slackChannel;
|
|
80
|
+
mentionsRaw = process.env.SLACK_MENTIONS || '[]';
|
|
81
|
+
} else if (larkReceiveId) {
|
|
82
|
+
provider = 'lark';
|
|
83
|
+
postTool = 'lark_send_message';
|
|
84
|
+
channelId = larkReceiveId;
|
|
85
|
+
mentionsRaw = process.env.LARK_MENTIONS || '[]';
|
|
86
|
+
} else {
|
|
87
|
+
throw new Error(
|
|
88
|
+
'pipeline-supervisor has proposals to post but no destination configured. ' +
|
|
89
|
+
'Go to Project Settings → ENV and set ONE of:\n' +
|
|
90
|
+
' - SLACK_CHANNEL=#your-channel (uses connected Slack integration)\n' +
|
|
91
|
+
' - LARK_RECEIVE_ID=oc_xxxxxxxx (uses connected Lark integration)'
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
let mentions;
|
|
96
|
+
try { mentions = JSON.parse(mentionsRaw); } catch { mentions = []; }
|
|
97
|
+
if (!Array.isArray(mentions)) mentions = [];
|
|
98
|
+
|
|
99
|
+
const windowLabel = lookbackHours < 48
|
|
100
|
+
? `the past ${lookbackHours} hours`
|
|
101
|
+
: `the past ${Math.round(lookbackHours / 24)} days`;
|
|
102
|
+
|
|
103
|
+
const writeGuide = provider === 'slack'
|
|
104
|
+
? `# How to post it — a Slack review card
|
|
105
|
+
Post ONCE with \`slack_post_message({ channel, text, blocks })\`. \`text\` = a one-line fallback. \`blocks\` = real Block Kit objects only:
|
|
106
|
+
1. \`header\` — { "type": "header", "text": { "type": "plain_text", "text": "🛠️ Pipeline Supervisor — ${windowLabel}", "emoji": true } }
|
|
107
|
+
2. \`context\` — one line: how many pipelines flagged, scanned over ${windowLabel}.
|
|
108
|
+
3. Per proposal — a \`section\` then a small \`context\`:
|
|
109
|
+
{ "type": "divider" }
|
|
110
|
+
{ "type": "section", "text": { "type": "mrkdwn", "text": "*<pipeline>* — <problem>\\n*Suggestion (${'`'}<changeKind label>${'`'}):* <suggestion>" } }
|
|
111
|
+
{ "type": "context", "elements": [{ "type": "mrkdwn", "text": "↳ <evidence — the concrete metric>" }] }
|
|
112
|
+
4. final \`context\` — make clear these are PROPOSALS for a human to review and apply; the supervisor did NOT change anything.
|
|
113
|
+
- header text is plain_text; section & context are mrkdwn.
|
|
114
|
+
- Real Block Kit types only (header / section / divider / context).`
|
|
115
|
+
: `# How to write it — a Lark review note, talk like a teammate
|
|
116
|
+
Post ONCE with \`lark_send_message({ receive_id, msg_type:"text", content })\`. Open with one sentence about ${windowLabel} and how many pipelines you flagged. Then, per proposal: the pipeline name, the problem, your suggested change (say which of the four moves it is), and the evidence number. End by making clear these are PROPOSALS for a human to review and apply — the supervisor changed nothing. No form blocks; sound like a person.`;
|
|
117
|
+
|
|
118
|
+
return `You are the notify node of pipeline-supervisor. Post ONE chat message with the **${postTool}** tool summarizing the improvement proposals for a human to review.
|
|
119
|
+
|
|
120
|
+
# Destination
|
|
121
|
+
Channel/receive_id: ${JSON.stringify(channelId)} (${provider}). Post with \`${postTool}\`.
|
|
122
|
+
${mentions.length > 0 ? `Prepend these mentions: ${JSON.stringify(mentions.join(' '))}` : ''}
|
|
123
|
+
|
|
124
|
+
# Framing (important)
|
|
125
|
+
These are PROPOSALS. The supervisor read other pipelines' run history and is SUGGESTING changes a human will review and apply by hand. Do NOT imply anything was already changed. Each card line should read like "Pipeline X failed 4/5 runs on step Y — I'd suggest <change>. (review)".
|
|
126
|
+
|
|
127
|
+
# changeKind → human label
|
|
128
|
+
${Object.entries(CHANGE_KIND_LABEL).map(([k, v]) => `- ${k} → ${v}`).join('\n')}
|
|
129
|
+
|
|
130
|
+
${writeGuide}
|
|
131
|
+
|
|
132
|
+
# Output (outputSchema-enforced)
|
|
133
|
+
Return ONE record for the message you posted (status "sent"), or "failed" with a \`detail\`. \`proposalCount\` = number of proposals in the card. \`recipient\` records where it went.
|
|
134
|
+
|
|
135
|
+
\`\`\`json
|
|
136
|
+
{
|
|
137
|
+
"dispatched": [
|
|
138
|
+
{ "status": "sent", "recipient": { "kind": "channel", "id": ${JSON.stringify(channelId)} }, "proposalCount": ${proposals.length}${provider === 'slack' ? ',\n "messageTs": "1716109330.555"' : ',\n "messageId": "om_xxxxx"'} }
|
|
139
|
+
],
|
|
140
|
+
"summary": { "total": 1, "sent": 1, "skipped": 0, "failed": 0 }
|
|
141
|
+
}
|
|
142
|
+
\`\`\`
|
|
143
|
+
|
|
144
|
+
# Proposals to post
|
|
145
|
+
|
|
146
|
+
\`\`\`json
|
|
147
|
+
${JSON.stringify(proposals, null, 2)}
|
|
148
|
+
\`\`\`
|
|
149
|
+
|
|
150
|
+
# Rules
|
|
151
|
+
- ONE message → ONE \`sent\` record.
|
|
152
|
+
- Don't invent pipelines, metrics, or suggestions — only what's in the data above.
|
|
153
|
+
- Keep it tight. If there are 2 proposals, a short card is the right answer.
|
|
154
|
+
`;
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
export const notifyNode = {
|
|
158
|
+
name: 'notify',
|
|
159
|
+
skills: [SKILLS.CHAT_NOTIFY],
|
|
160
|
+
outputSchema: NotifyOutputSchema,
|
|
161
|
+
prompt: NOTIFY_PROMPT,
|
|
162
|
+
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* propose_improvements — LLM. Reads the per-pipeline health summary from
|
|
3
|
+
* scan_pipelines and emits ONE concrete, reviewable improvement proposal
|
|
4
|
+
* per FLAGGED pipeline. No tools — everything it needs is inlined as JSON.
|
|
5
|
+
*
|
|
6
|
+
* This is the "propose" half of READ → PROPOSE → NOTIFY. It does NOT touch
|
|
7
|
+
* any other workflow's graph. It only describes a change a human can apply.
|
|
8
|
+
*
|
|
9
|
+
* ─────────────────────────────────────────────────────────────────────────
|
|
10
|
+
* TODO (future, DELIBERATELY NOT IMPLEMENTED in v1 — the safe L3 boundary):
|
|
11
|
+
* Auto-PATCH the target pipeline's graph. When we promote this template
|
|
12
|
+
* from "notify only" to "self-iterating", a new node AFTER human approval
|
|
13
|
+
* would call the workflow-update API to actually apply an accepted
|
|
14
|
+
* `changeKind` (e.g. insert a test-gate node, edit a prompt). That step
|
|
15
|
+
* must be gated behind explicit human approval + snapshot/dry-run/verify/
|
|
16
|
+
* rollback (see MEMORY: app-upgrade-strategy-agentic). v1 stops at the
|
|
17
|
+
* proposal so a human reviews and applies the change by hand.
|
|
18
|
+
* ─────────────────────────────────────────────────────────────────────────
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { z } from '@zibby/core';
|
|
22
|
+
|
|
23
|
+
const ProposalSchema = z.object({
|
|
24
|
+
workflowType: z.string(),
|
|
25
|
+
problem: z.string(),
|
|
26
|
+
changeKind: z.enum([
|
|
27
|
+
'add_test_gate',
|
|
28
|
+
'tweak_prompt',
|
|
29
|
+
'add_human_approval_gate',
|
|
30
|
+
'drop_redundant_step',
|
|
31
|
+
'other',
|
|
32
|
+
]),
|
|
33
|
+
suggestion: z.string(),
|
|
34
|
+
evidence: z.string().optional(),
|
|
35
|
+
confidence: z.number().min(0).max(1).optional(),
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const ProposeOutputSchema = z.object({
|
|
39
|
+
proposals: z.array(ProposalSchema),
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const GUIDE = `You are the propose_improvements node of pipeline-supervisor — a workflow that watches a Zibby project's OTHER pipelines and proposes concrete fixes. This is "Zibby managing Zibby."
|
|
43
|
+
|
|
44
|
+
You are given a per-pipeline health summary as JSON below. Each entry is one pipeline (a workflow type) with its recent run stats: total / failed / succeeded / running, failRate, medianDurationMs, a worstRun example, and whether it's \`flagged\` (+ \`flagReason\`).
|
|
45
|
+
|
|
46
|
+
# Your job
|
|
47
|
+
For EACH pipeline where \`flagged === true\`, emit ONE proposal. Do NOT propose anything for un-flagged pipelines. If nothing is flagged, return an empty \`proposals\` array.
|
|
48
|
+
|
|
49
|
+
# Pick ONE concrete change per problem — \`changeKind\` must be one of:
|
|
50
|
+
- **add_test_gate** — the pipeline ships broken output / fails late. Propose inserting a validation/test step that catches the failure earlier (before the expensive/irreversible step).
|
|
51
|
+
- **tweak_prompt** — an LLM node is making the same mistake repeatedly (e.g. wrong format, hallucinated tool call). Propose a specific prompt change.
|
|
52
|
+
- **add_human_approval_gate** — the pipeline takes a risky/irreversible action and keeps getting it wrong. Propose a human-approval gate before that step.
|
|
53
|
+
- **drop_redundant_step** — a step adds latency or failure surface with no value (e.g. an LLM round-trip that adds no judgment). Propose dropping it. Use this for clear "slow outlier" flags.
|
|
54
|
+
- **other** — only when none of the above fit; explain in \`suggestion\`.
|
|
55
|
+
|
|
56
|
+
# Each proposal must be:
|
|
57
|
+
- **Specific**: name the pipeline, the symptom, and the exact change. Not "improve reliability" — instead "add a JSON-schema validation gate after the 'generate' node; 3 of the last 4 runs failed there with a malformed-output error."
|
|
58
|
+
- **Evidence-backed**: put the concrete number / worstRun detail in \`evidence\` ("failRate 75% over 4 runs; worst run exec_abc failed on step 'deploy'"). Pull it straight from the data — never invent a metric.
|
|
59
|
+
- **Reviewable, not auto-applied**: phrase \`suggestion\` as a recommendation a human will read and apply. You are NOT editing any graph.
|
|
60
|
+
- **confidence** reflects how clean the signal is. A pipeline failing 4/4 on the same step → 0.9. A borderline slow outlier → 0.5.
|
|
61
|
+
|
|
62
|
+
# Rules
|
|
63
|
+
- ONE proposal per flagged pipeline. No duplicates.
|
|
64
|
+
- Only use pipelines/numbers present in the data block. Don't invent pipelines, steps, or error messages.
|
|
65
|
+
- \`problem\` is one sentence (the symptom). \`suggestion\` is one-to-three sentences (the fix).
|
|
66
|
+
- Temperature 0. This is analysis, not creative writing.
|
|
67
|
+
- Call NO tools — you have everything you need below.`;
|
|
68
|
+
|
|
69
|
+
const PROPOSE_PROMPT = (state = {}) => {
|
|
70
|
+
const pipelines = state?.scan_pipelines?.pipelines || [];
|
|
71
|
+
const flagged = pipelines.filter((p) => p.flagged);
|
|
72
|
+
return `${GUIDE}
|
|
73
|
+
|
|
74
|
+
## Context for this run
|
|
75
|
+
- Scanned project: ${state?.scan_pipelines?.projectId || '(unknown)'}
|
|
76
|
+
- Lookback: ${state?.scan_pipelines?.lookbackHours || '?'}h
|
|
77
|
+
- Pipelines analyzed: ${pipelines.length}; flagged as problems: ${flagged.length}
|
|
78
|
+
|
|
79
|
+
## Pipeline health summary (propose ONLY for flagged === true)
|
|
80
|
+
|
|
81
|
+
\`\`\`json
|
|
82
|
+
${JSON.stringify(pipelines, null, 2)}
|
|
83
|
+
\`\`\`
|
|
84
|
+
`;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
export const proposeNode = {
|
|
88
|
+
name: 'propose_improvements',
|
|
89
|
+
outputSchema: ProposeOutputSchema,
|
|
90
|
+
prompt: PROPOSE_PROMPT,
|
|
91
|
+
};
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* scan_pipelines — DETERMINISTIC. Reads the project's recent executions
|
|
3
|
+
* across ALL pipelines via the Zibby REST API, then rolls them up per
|
|
4
|
+
* pipeline (workflow type) into a health summary the proposer reasons over.
|
|
5
|
+
*
|
|
6
|
+
* ── How the supervisor reads OTHER pipelines' results (the chosen mechanism) ──
|
|
7
|
+
*
|
|
8
|
+
* Mechanism: a DIRECT, authed HTTPS call to the Zibby REST API
|
|
9
|
+
* GET {ZIBBY_ACCOUNT_API_URL|api-prod.zibby.app}/executions?projectId=<id>&limit=<n>
|
|
10
|
+
* (the same `listExecutions` route the dashboard + MCP server use), carrying
|
|
11
|
+
* Authorization: Bearer <ZIBBY_PAT>
|
|
12
|
+
*
|
|
13
|
+
* Auth — why a user PAT, and NOT the injected PROJECT_API_TOKEN:
|
|
14
|
+
* The executor injects PROJECT_API_TOKEN (a `zby_*` PROJECT token) into
|
|
15
|
+
* every Fargate task. That token authenticates as the PROJECT
|
|
16
|
+
* (authType:'project') and carries NO userId. But every cross-pipeline
|
|
17
|
+
* READ route — /executions, /jobs/:projectId, /all/:projectId — pulls
|
|
18
|
+
* `userId` out of the authorizer context and 401/403s when it's absent
|
|
19
|
+
* (executions.js listExecutions: `if (!userId) return 401`;
|
|
20
|
+
* workflow-logs.js: verifyProjectAccess(userId, …)). The remote MCP server
|
|
21
|
+
* (mcp-server.js) goes further and validates a `zby_pat_*` PAT specifically.
|
|
22
|
+
* So the project token literally cannot read these routes.
|
|
23
|
+
*
|
|
24
|
+
* The credential that works is a USER personal access token (zby_pat_…),
|
|
25
|
+
* supplied at deploy time as ZIBBY_PAT in the ENV tab. It resolves to a
|
|
26
|
+
* userId via the authorizer's PAT path, and verifyProjectAccess then
|
|
27
|
+
* confirms that user can see the supervised project. This is the same
|
|
28
|
+
* credential class the MCP server requires, so the auth model is identical
|
|
29
|
+
* whether you reach the data via REST (this node) or via the MCP tools
|
|
30
|
+
* (zibby_list_executions / zibby_get_all_workflow_logs).
|
|
31
|
+
*
|
|
32
|
+
* Why REST over the MCP tools:
|
|
33
|
+
* - No MCP client to stand up inside the workflow process; one fetch().
|
|
34
|
+
* - The MCP `zibby_list_executions` tool is a thin proxy to THIS SAME
|
|
35
|
+
* REST route, so we lose nothing by calling it directly.
|
|
36
|
+
* - Deterministic + free: no LLM round-trip to drive a tool call for a
|
|
37
|
+
* pure data pull.
|
|
38
|
+
*
|
|
39
|
+
* Per-pipeline rollup:
|
|
40
|
+
* - "pipeline" = one workflow type/slug in the project. We group the
|
|
41
|
+
* recent executions by `workflowType` and compute total / failed /
|
|
42
|
+
* succeeded / running, failRate, and a median completed-run duration.
|
|
43
|
+
* - A pipeline is `flagged` when failRate >= minFailRate (with >= 3 runs
|
|
44
|
+
* so a single fluke doesn't trip it) OR it's a clear "slow" outlier.
|
|
45
|
+
* - worstRun cites the single worst recent run so the proposer has a
|
|
46
|
+
* concrete example to point at. failedStep/errorSummary are best-effort
|
|
47
|
+
* — populated from whatever the execution row carries; absent is fine.
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
import { z } from 'zod';
|
|
51
|
+
|
|
52
|
+
const PipelineHealthSchema = z.object({
|
|
53
|
+
workflowType: z.string(),
|
|
54
|
+
workflowUuid: z.string().optional(),
|
|
55
|
+
total: z.number(),
|
|
56
|
+
failed: z.number(),
|
|
57
|
+
succeeded: z.number(),
|
|
58
|
+
running: z.number(),
|
|
59
|
+
failRate: z.number(),
|
|
60
|
+
medianDurationMs: z.number().optional(),
|
|
61
|
+
worstRun: z.object({
|
|
62
|
+
executionId: z.string().optional(),
|
|
63
|
+
status: z.string().optional(),
|
|
64
|
+
durationMs: z.number().optional(),
|
|
65
|
+
failedStep: z.string().optional(),
|
|
66
|
+
errorSummary: z.string().optional(),
|
|
67
|
+
startedAt: z.string().optional(),
|
|
68
|
+
}).optional(),
|
|
69
|
+
flagged: z.boolean(),
|
|
70
|
+
flagReason: z.string().optional(),
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const ScanOutputSchema = z.object({
|
|
74
|
+
projectId: z.string(),
|
|
75
|
+
lookbackHours: z.number(),
|
|
76
|
+
scannedAt: z.string(),
|
|
77
|
+
totalExecutions: z.number(),
|
|
78
|
+
pipelines: z.array(PipelineHealthSchema),
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Statuses the executions API uses. Anything in FAILED_STATUSES counts
|
|
82
|
+
// against the pipeline; SUCCEEDED is the clean path; the rest are in-flight.
|
|
83
|
+
const FAILED_STATUSES = new Set(['failed', 'cancelled', 'blocked', 'insufficient_context']);
|
|
84
|
+
const SUCCEEDED_STATUSES = new Set(['completed']);
|
|
85
|
+
const RUNNING_STATUSES = new Set(['running', 'queued', 'starting', 'uploading']);
|
|
86
|
+
|
|
87
|
+
function getAccountApiUrl() {
|
|
88
|
+
const raw = process.env.ZIBBY_ACCOUNT_API_URL;
|
|
89
|
+
if (raw) return raw.replace(/\/$/, '');
|
|
90
|
+
const env = process.env.ZIBBY_ENV || 'prod';
|
|
91
|
+
if (env === 'local') return 'http://localhost:3001';
|
|
92
|
+
return process.env.ZIBBY_PROD_ACCOUNT_API_URL || 'https://api-prod.zibby.app';
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function median(nums) {
|
|
96
|
+
const xs = nums.filter((n) => typeof n === 'number' && isFinite(n)).sort((a, b) => a - b);
|
|
97
|
+
if (xs.length === 0) return undefined;
|
|
98
|
+
const mid = Math.floor(xs.length / 2);
|
|
99
|
+
return xs.length % 2 ? xs[mid] : Math.round((xs[mid - 1] + xs[mid]) / 2);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Best-effort duration extraction. Execution rows don't carry a uniform
|
|
103
|
+
// durationMs; derive it from start/end timestamps when both exist.
|
|
104
|
+
function durationMsOf(exec) {
|
|
105
|
+
if (typeof exec.durationMs === 'number') return exec.durationMs;
|
|
106
|
+
const start = exec.startedAt || exec.createdAt;
|
|
107
|
+
const end = exec.completedAt || exec.finishedAt || exec.updatedAt;
|
|
108
|
+
if (start && end) {
|
|
109
|
+
const d = new Date(end).getTime() - new Date(start).getTime();
|
|
110
|
+
if (isFinite(d) && d >= 0) return d;
|
|
111
|
+
}
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Best-effort "what step failed / why" — execution rows vary; surface
|
|
116
|
+
// whatever's there without inventing anything.
|
|
117
|
+
function failureDetail(exec) {
|
|
118
|
+
return {
|
|
119
|
+
failedStep: exec.failedStep || exec.currentStep || exec.lastNode || undefined,
|
|
120
|
+
errorSummary: (exec.error || exec.errorMessage || exec.failureReason || '')
|
|
121
|
+
.toString().slice(0, 280) || undefined,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export const scanPipelinesNode = {
|
|
126
|
+
name: 'scan_pipelines',
|
|
127
|
+
outputSchema: ScanOutputSchema,
|
|
128
|
+
// 2 min — a single paginated /executions pull is usually <2s; headroom
|
|
129
|
+
// for a large project's history + transient API slowness.
|
|
130
|
+
timeout: 2 * 60 * 1000,
|
|
131
|
+
execute: async (context) => {
|
|
132
|
+
const state = (context?.state && typeof context.state.getAll === 'function')
|
|
133
|
+
? context.state.getAll()
|
|
134
|
+
: context;
|
|
135
|
+
|
|
136
|
+
const lookbackHours = Number(state?.lookbackHours) || 24;
|
|
137
|
+
const minFailRate = typeof state?.minFailRate === 'number' ? state.minFailRate : 0.4;
|
|
138
|
+
const maxPipelines = Number(state?.maxPipelines) || 25;
|
|
139
|
+
const filters = Array.isArray(state?.targetWorkflowTypes)
|
|
140
|
+
? state.targetWorkflowTypes.map((s) => String(s).toLowerCase())
|
|
141
|
+
: null;
|
|
142
|
+
|
|
143
|
+
// Supervised project: explicit override, else the running project.
|
|
144
|
+
const projectId = process.env.SUPERVISOR_PROJECT_ID || process.env.PROJECT_ID;
|
|
145
|
+
const pat = process.env.ZIBBY_PAT || process.env.ZIBBY_USER_TOKEN;
|
|
146
|
+
|
|
147
|
+
if (!projectId) {
|
|
148
|
+
throw new Error(
|
|
149
|
+
'pipeline-supervisor: no project to supervise. PROJECT_ID is injected by the ' +
|
|
150
|
+
'executor; set SUPERVISOR_PROJECT_ID in the ENV tab to point at a different project.'
|
|
151
|
+
);
|
|
152
|
+
}
|
|
153
|
+
if (!pat) {
|
|
154
|
+
throw new Error(
|
|
155
|
+
'pipeline-supervisor: ZIBBY_PAT is not set. The supervisor reads OTHER pipelines\' ' +
|
|
156
|
+
'executions via the Zibby REST API, which requires a USER personal access token ' +
|
|
157
|
+
'(zby_pat_…). The Fargate-injected PROJECT_API_TOKEN is a project token and the ' +
|
|
158
|
+
'/executions route rejects it (no userId). Create a PAT in the dashboard and set it ' +
|
|
159
|
+
'as ZIBBY_PAT in Project Settings → ENV.'
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const base = getAccountApiUrl();
|
|
164
|
+
// limit=200 is the API ceiling; one page covers lookback for any realistic
|
|
165
|
+
// project. We post-filter by lookbackHours below rather than relying on a
|
|
166
|
+
// server-side time filter the route doesn't expose.
|
|
167
|
+
const url = `${base}/executions?projectId=${encodeURIComponent(projectId)}&limit=200`;
|
|
168
|
+
console.log(`Scanning executions: ${url}`);
|
|
169
|
+
console.log(`Lookback: ${lookbackHours}h · minFailRate: ${minFailRate} · maxPipelines: ${maxPipelines}`);
|
|
170
|
+
|
|
171
|
+
const res = await fetch(url, {
|
|
172
|
+
method: 'GET',
|
|
173
|
+
headers: { Authorization: `Bearer ${pat}` },
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
if (!res.ok) {
|
|
177
|
+
const body = await res.text().catch(() => '');
|
|
178
|
+
if (res.status === 401 || res.status === 403) {
|
|
179
|
+
throw new Error(
|
|
180
|
+
`pipeline-supervisor: ${res.status} reading /executions. ZIBBY_PAT is invalid, ` +
|
|
181
|
+
`expired, or its owner can't access project ${projectId}. ${body.slice(0, 200)}`
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
throw new Error(`pipeline-supervisor: /executions returned ${res.status}: ${body.slice(0, 300)}`);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const payload = await res.json().catch(() => ({}));
|
|
188
|
+
const all = Array.isArray(payload?.executions) ? payload.executions : [];
|
|
189
|
+
|
|
190
|
+
// Window + name filter.
|
|
191
|
+
const cutoff = Date.now() - lookbackHours * 3600 * 1000;
|
|
192
|
+
const inWindow = all.filter((e) => {
|
|
193
|
+
const t = new Date(e.createdAt || e.startedAt || 0).getTime();
|
|
194
|
+
return isFinite(t) && t >= cutoff;
|
|
195
|
+
});
|
|
196
|
+
const considered = filters
|
|
197
|
+
? inWindow.filter((e) => {
|
|
198
|
+
const wt = String(e.workflowType || '').toLowerCase();
|
|
199
|
+
return filters.some((f) => wt.includes(f));
|
|
200
|
+
})
|
|
201
|
+
: inWindow;
|
|
202
|
+
|
|
203
|
+
console.log(
|
|
204
|
+
`Fetched ${all.length} execution(s); ${inWindow.length} in the last ${lookbackHours}h` +
|
|
205
|
+
`${filters ? `, ${considered.length} after type filter` : ''}.`
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
// ── Group by pipeline (workflowType) ──────────────────────────────
|
|
209
|
+
const byPipeline = new Map();
|
|
210
|
+
for (const e of considered) {
|
|
211
|
+
const wt = e.workflowType || '(unknown)';
|
|
212
|
+
if (!byPipeline.has(wt)) byPipeline.set(wt, []);
|
|
213
|
+
byPipeline.get(wt).push(e);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let pipelines = [];
|
|
217
|
+
for (const [workflowType, runs] of byPipeline.entries()) {
|
|
218
|
+
const failed = runs.filter((r) => FAILED_STATUSES.has(r.status));
|
|
219
|
+
const succeeded = runs.filter((r) => SUCCEEDED_STATUSES.has(r.status));
|
|
220
|
+
const running = runs.filter((r) => RUNNING_STATUSES.has(r.status));
|
|
221
|
+
// Fail rate over TERMINAL runs only — in-flight runs aren't a verdict yet.
|
|
222
|
+
const terminal = failed.length + succeeded.length;
|
|
223
|
+
const failRate = terminal > 0 ? failed.length / terminal : 0;
|
|
224
|
+
const durations = succeeded.map(durationMsOf).filter((d) => typeof d === 'number');
|
|
225
|
+
const medianDurationMs = median(durations);
|
|
226
|
+
|
|
227
|
+
// Worst run = a failure if any, else the slowest succeeded run.
|
|
228
|
+
let worstRun;
|
|
229
|
+
const worstFail = failed
|
|
230
|
+
.slice()
|
|
231
|
+
.sort((a, b) => new Date(b.createdAt || 0) - new Date(a.createdAt || 0))[0];
|
|
232
|
+
if (worstFail) {
|
|
233
|
+
const det = failureDetail(worstFail);
|
|
234
|
+
worstRun = {
|
|
235
|
+
executionId: worstFail.executionId,
|
|
236
|
+
status: worstFail.status,
|
|
237
|
+
durationMs: durationMsOf(worstFail),
|
|
238
|
+
startedAt: worstFail.createdAt || worstFail.startedAt,
|
|
239
|
+
...det,
|
|
240
|
+
};
|
|
241
|
+
} else {
|
|
242
|
+
const slow = succeeded
|
|
243
|
+
.slice()
|
|
244
|
+
.sort((a, b) => (durationMsOf(b) || 0) - (durationMsOf(a) || 0))[0];
|
|
245
|
+
if (slow) {
|
|
246
|
+
worstRun = {
|
|
247
|
+
executionId: slow.executionId,
|
|
248
|
+
status: slow.status,
|
|
249
|
+
durationMs: durationMsOf(slow),
|
|
250
|
+
startedAt: slow.createdAt || slow.startedAt,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Flag: enough terminal runs AND failRate over threshold. The >= 3
|
|
256
|
+
// guard keeps a single failed run from flagging a pipeline that's
|
|
257
|
+
// otherwise fine.
|
|
258
|
+
let flagged = false;
|
|
259
|
+
let flagReason;
|
|
260
|
+
if (terminal >= 3 && failRate >= minFailRate) {
|
|
261
|
+
flagged = true;
|
|
262
|
+
flagReason = `failRate ${(failRate * 100).toFixed(0)}% over ${terminal} terminal run(s) (≥ ${(minFailRate * 100).toFixed(0)}% threshold)`;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
pipelines.push({
|
|
266
|
+
workflowType,
|
|
267
|
+
total: runs.length,
|
|
268
|
+
failed: failed.length,
|
|
269
|
+
succeeded: succeeded.length,
|
|
270
|
+
running: running.length,
|
|
271
|
+
failRate: Number(failRate.toFixed(3)),
|
|
272
|
+
medianDurationMs,
|
|
273
|
+
worstRun,
|
|
274
|
+
flagged,
|
|
275
|
+
flagReason,
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ── Slow-outlier flag (cross-pipeline) ────────────────────────────
|
|
280
|
+
// A pipeline whose median run is > 3× the median-of-medians is "slow",
|
|
281
|
+
// even if it's not failing. Only meaningful with a few pipelines that
|
|
282
|
+
// actually have durations.
|
|
283
|
+
const meds = pipelines.map((p) => p.medianDurationMs).filter((d) => typeof d === 'number');
|
|
284
|
+
const globalMed = median(meds);
|
|
285
|
+
if (globalMed && globalMed > 0) {
|
|
286
|
+
for (const p of pipelines) {
|
|
287
|
+
if (!p.flagged && typeof p.medianDurationMs === 'number' && p.medianDurationMs > globalMed * 3) {
|
|
288
|
+
p.flagged = true;
|
|
289
|
+
p.flagReason = `median run ${(p.medianDurationMs / 1000).toFixed(0)}s is >3× the project median (${(globalMed / 1000).toFixed(0)}s) — slow outlier`;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Flagged first, then worst failRate, then most runs. Cap to maxPipelines.
|
|
295
|
+
pipelines.sort((a, b) =>
|
|
296
|
+
(Number(b.flagged) - Number(a.flagged)) ||
|
|
297
|
+
(b.failRate - a.failRate) ||
|
|
298
|
+
(b.total - a.total)
|
|
299
|
+
);
|
|
300
|
+
pipelines = pipelines.slice(0, maxPipelines);
|
|
301
|
+
|
|
302
|
+
const flaggedCount = pipelines.filter((p) => p.flagged).length;
|
|
303
|
+
console.log(`Rolled up ${pipelines.length} pipeline(s); ${flaggedCount} flagged.`);
|
|
304
|
+
for (const p of pipelines.filter((x) => x.flagged)) {
|
|
305
|
+
console.log(` ⚠ ${p.workflowType}: ${p.flagReason}`);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
projectId,
|
|
310
|
+
lookbackHours,
|
|
311
|
+
scannedAt: new Date().toISOString(),
|
|
312
|
+
totalExecutions: considered.length,
|
|
313
|
+
pipelines,
|
|
314
|
+
};
|
|
315
|
+
},
|
|
316
|
+
};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pipeline-supervisor",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"private": true,
|
|
5
|
+
"type": "module",
|
|
6
|
+
"description": "Zibby managing Zibby — a scheduled supervisor that scans the project's other pipelines, finds the failing/slow ones, and posts human-reviewable improvement proposals to Slack or Lark. Read + propose + notify only (v1 never edits other workflows).",
|
|
7
|
+
"main": "graph.mjs",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"test": "vitest run"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@zibby/core": "^0.5.1",
|
|
13
|
+
"@zibby/skills": "^0.1.26",
|
|
14
|
+
"zod": "^3.23.0"
|
|
15
|
+
},
|
|
16
|
+
"devDependencies": {
|
|
17
|
+
"vitest": "^2.1.5"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline-supervisor — input + context schemas.
|
|
3
|
+
*
|
|
4
|
+
* "Zibby managing Zibby." A scheduled workflow that watches the project's
|
|
5
|
+
* OTHER pipelines, finds the ones that are failing / slow / repeatedly
|
|
6
|
+
* erroring, and posts a human-reviewable improvement proposal to Slack or
|
|
7
|
+
* Lark. v1 is strictly READ + PROPOSE + NOTIFY — it never edits another
|
|
8
|
+
* workflow's graph. That's the safe L3 starting point; the auto-PATCH step
|
|
9
|
+
* is a clearly-marked TODO in propose-node.js, deliberately NOT implemented.
|
|
10
|
+
*
|
|
11
|
+
* Trigger payload (inputSchema) carries the three per-run dials a human
|
|
12
|
+
* would actually want to tune at schedule time: how far back to look, how
|
|
13
|
+
* bad a pipeline has to be before we flag it, and an optional name filter.
|
|
14
|
+
* Everything else (the chat destination, the supervisor's read credential)
|
|
15
|
+
* is deploy-time ENV-tab config:
|
|
16
|
+
*
|
|
17
|
+
* Required:
|
|
18
|
+
* ZIBBY_PAT Personal access token (zby_pat_…) the supervisor
|
|
19
|
+
* uses to READ this project's executions across
|
|
20
|
+
* ALL pipelines. The Fargate-injected
|
|
21
|
+
* PROJECT_API_TOKEN is a PROJECT token (authType
|
|
22
|
+
* 'project', no userId) and the /executions +
|
|
23
|
+
* /jobs + /all read routes all require a user
|
|
24
|
+
* identity — so they 401 for a project token.
|
|
25
|
+
* A user PAT is the credential that works. See
|
|
26
|
+
* nodes/scan-pipelines-node.js for the full
|
|
27
|
+
* auth rationale.
|
|
28
|
+
* SLACK_CHANNEL channel id "C012345" or "#name" ─┐ set
|
|
29
|
+
* LARK_RECEIVE_ID oc_… chat id, ou_… open id, or email ─┘ ONE
|
|
30
|
+
*
|
|
31
|
+
* Optional:
|
|
32
|
+
* SUPERVISOR_PROJECT_ID Project UUID to supervise. Defaults to the
|
|
33
|
+
* running project (PROJECT_ID env, injected by the
|
|
34
|
+
* executor) — i.e. the supervisor watches its own
|
|
35
|
+
* project's other pipelines. Set this to point it
|
|
36
|
+
* at a DIFFERENT project the PAT owner can access.
|
|
37
|
+
* SLACK_MENTIONS JSON array — appended to the proposal card.
|
|
38
|
+
* LARK_MENTIONS JSON array — appended to the proposal card.
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
import { z } from 'zod';
|
|
42
|
+
|
|
43
|
+
export const pipelineSupervisorInputSchema = z.object({
|
|
44
|
+
lookbackHours: z.number().int().min(1).max(720).default(24)
|
|
45
|
+
.describe('How many hours of execution history to scan across pipelines (1–720, default 24).'),
|
|
46
|
+
|
|
47
|
+
// A pipeline with >= this fraction of recent runs failing is "problem".
|
|
48
|
+
// 0.4 = flag anything failing 2 in 5 or worse. Tunable so a noisy team
|
|
49
|
+
// can raise it (only page on near-total breakage) or a strict team can
|
|
50
|
+
// lower it (catch flakiness early).
|
|
51
|
+
minFailRate: z.number().min(0).max(1).default(0.4)
|
|
52
|
+
.describe('Minimum failure rate (0–1) for a pipeline to be flagged as a problem. Default 0.4 = failing ≥40% of recent runs.'),
|
|
53
|
+
|
|
54
|
+
// Optional name filter. When set, only pipelines whose workflow type /
|
|
55
|
+
// slug matches one of these strings (case-insensitive substring) are
|
|
56
|
+
// considered — lets you supervise just "the deploy ones" without noise
|
|
57
|
+
// from every test run. Omit to consider every pipeline in the project.
|
|
58
|
+
targetWorkflowTypes: z.array(z.string().min(1)).optional()
|
|
59
|
+
.describe('Optional: only supervise pipelines whose workflow type/slug matches one of these (case-insensitive substring). Omit to scan all.'),
|
|
60
|
+
|
|
61
|
+
// Cap on how many distinct pipelines we'll fetch per-run job/log detail
|
|
62
|
+
// for. The scan lists executions cheaply; deep per-pipeline log reads are
|
|
63
|
+
// the expensive part, so we bound them. 25 covers any realistic project.
|
|
64
|
+
maxPipelines: z.number().int().min(1).max(100).default(25)
|
|
65
|
+
.describe('Max number of distinct pipelines to analyze in one run (1–100, default 25).'),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
export const pipelineSupervisorContextSchema = z.object({
|
|
69
|
+
workspace: z.string().optional()
|
|
70
|
+
.describe('Workspace path — runner-injected; the supervisor doesn\'t need it but graph.run requires it.'),
|
|
71
|
+
|
|
72
|
+
// scan_pipelines — DETERMINISTIC. Pulls recent executions via the Zibby
|
|
73
|
+
// REST API (PAT-authed) and rolls them up per pipeline into a health
|
|
74
|
+
// summary the proposer reasons over.
|
|
75
|
+
scan_pipelines: z.object({
|
|
76
|
+
projectId: z.string().optional(),
|
|
77
|
+
lookbackHours: z.number().optional(),
|
|
78
|
+
scannedAt: z.string().optional(),
|
|
79
|
+
totalExecutions: z.number().optional(),
|
|
80
|
+
pipelines: z.array(z.object({
|
|
81
|
+
// A "pipeline" = one workflow type/slug within the project. Executions
|
|
82
|
+
// are grouped by their workflow identity.
|
|
83
|
+
workflowType: z.string(),
|
|
84
|
+
workflowUuid: z.string().optional(),
|
|
85
|
+
total: z.number(),
|
|
86
|
+
failed: z.number(),
|
|
87
|
+
succeeded: z.number(),
|
|
88
|
+
running: z.number(),
|
|
89
|
+
failRate: z.number(),
|
|
90
|
+
// Median wall-clock duration (ms) of completed runs — the "slow" signal.
|
|
91
|
+
medianDurationMs: z.number().optional(),
|
|
92
|
+
// The single worst recent run, for the proposer to cite a concrete
|
|
93
|
+
// example ("failed on step Y at 14:02").
|
|
94
|
+
worstRun: z.object({
|
|
95
|
+
executionId: z.string().optional(),
|
|
96
|
+
status: z.string().optional(),
|
|
97
|
+
durationMs: z.number().optional(),
|
|
98
|
+
failedStep: z.string().optional(),
|
|
99
|
+
errorSummary: z.string().optional(),
|
|
100
|
+
startedAt: z.string().optional(),
|
|
101
|
+
}).optional(),
|
|
102
|
+
// Whether this pipeline crossed minFailRate (or the slow threshold).
|
|
103
|
+
flagged: z.boolean(),
|
|
104
|
+
flagReason: z.string().optional(),
|
|
105
|
+
})),
|
|
106
|
+
}).optional(),
|
|
107
|
+
|
|
108
|
+
// propose_improvements — LLM. Reads the per-pipeline health summary and
|
|
109
|
+
// emits one concrete, reviewable improvement proposal per flagged pipeline.
|
|
110
|
+
propose_improvements: z.object({
|
|
111
|
+
proposals: z.array(z.object({
|
|
112
|
+
workflowType: z.string(),
|
|
113
|
+
problem: z.string(),
|
|
114
|
+
// The kind of change suggested — constrained so the UI / future
|
|
115
|
+
// auto-PATCH step can route on it. Maps to the four moves in the brief.
|
|
116
|
+
changeKind: z.enum([
|
|
117
|
+
'add_test_gate',
|
|
118
|
+
'tweak_prompt',
|
|
119
|
+
'add_human_approval_gate',
|
|
120
|
+
'drop_redundant_step',
|
|
121
|
+
'other',
|
|
122
|
+
]),
|
|
123
|
+
suggestion: z.string(),
|
|
124
|
+
evidence: z.string().optional(),
|
|
125
|
+
confidence: z.number().min(0).max(1).optional(),
|
|
126
|
+
})),
|
|
127
|
+
}).optional(),
|
|
128
|
+
|
|
129
|
+
// notify — LLM + SKILLS.CHAT_NOTIFY. Posts ONE review card summarizing
|
|
130
|
+
// the proposals to the configured Slack or Lark destination.
|
|
131
|
+
notify: z.object({
|
|
132
|
+
dispatched: z.array(z.object({
|
|
133
|
+
status: z.enum(['sent', 'skipped', 'failed']),
|
|
134
|
+
recipient: z.object({
|
|
135
|
+
kind: z.enum(['channel', 'user_dm', 'usergroup']).nullish(),
|
|
136
|
+
id: z.string().nullish(),
|
|
137
|
+
label: z.string().nullish(),
|
|
138
|
+
}).nullish(),
|
|
139
|
+
proposalCount: z.number().nullish(),
|
|
140
|
+
messageTs: z.string().nullish(), // Slack
|
|
141
|
+
messageId: z.string().nullish(), // Lark
|
|
142
|
+
detail: z.string().nullish(),
|
|
143
|
+
})),
|
|
144
|
+
summary: z.object({
|
|
145
|
+
total: z.number(),
|
|
146
|
+
sent: z.number(),
|
|
147
|
+
skipped: z.number(),
|
|
148
|
+
failed: z.number(),
|
|
149
|
+
}),
|
|
150
|
+
}).optional(),
|
|
151
|
+
});
|
package/sentry-triage/icon.png
CHANGED
|
Binary file
|