@zibby/workflow-templates 0.9.2 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,6 @@
8
8
  import { WorkflowAgent, WorkflowGraph } from '@zibby/core';
9
9
  import {
10
10
  preflightNode,
11
- cacheReplayNode,
12
11
  executeLiveNode,
13
12
  generateScriptNode,
14
13
  } from './nodes/index.mjs';
@@ -25,40 +24,40 @@ export class BrowserTestAutomationAgent extends WorkflowAgent {
25
24
  .setInputSchema(browserTestAutomationInputSchema)
26
25
  .setContextSchema(browserTestAutomationContextSchema);
27
26
 
27
+ // Branch route fns — shared by each decision node's condition AND its
28
+ // labeled edges so the logic lives in exactly one place.
29
+ const routeHasAssertions = (state) =>
30
+ (state.preflight?.assertions || []).length > 0 ? 'execute_live' : 'END';
31
+ const routeRanOk = (state) => {
32
+ const r = state.execute_live;
33
+ return ((r?.steps?.length > 0) || (r?.actions?.length > 0)) ? 'generate_script' : 'END';
34
+ };
35
+
28
36
  graph.addNode('preflight', preflightNode);
29
- graph.addNode('cache_replay', cacheReplayNode);
37
+ // Explicit decision nodes → each branch renders as a clean Condition diamond
38
+ // instead of hanging off a work node.
39
+ graph.addConditionalNode('has_assertions', { condition: routeHasAssertions });
30
40
  graph.addNode('execute_live', executeLiveNode);
41
+ graph.addConditionalNode('ran_ok', { condition: routeRanOk });
31
42
  graph.addNode('generate_script', generateScriptNode);
32
43
 
33
44
  graph.setEntryPoint('preflight');
34
45
 
35
- // Short-circuit when preflight produced nothing usable. Triggered when:
36
- // - the user invoked `zibby workflow run browser-tests` with no spec
37
- // (state.input is undefined / empty), so preflight had nothing to
38
- // analyze and the LLM came back with `assertions: []`
39
- // - the spec is so vague the LLM can't extract any assertions
40
- // Without this gate the graph would barrel into execute_live, fire up
41
- // a real browser session + a second expensive LLM call, then waste
42
- // ~30s before failing bad UX and bad bill.
43
- graph.addConditionalEdges('preflight', (state) => {
44
- const assertions = state.preflight?.assertions || [];
45
- return assertions.length > 0 ? 'cache_replay' : 'END';
46
- });
47
-
48
- // Lever-#2 fork: cache_replay attempted a Playwright-only replay of
49
- // a prior successful action sequence. On hit it side-wrote
50
- // state.execute_live with synthesized output, so we can skip
51
- // execute_live and jump straight to generate_script — zero LLM
52
- // tokens. On miss / replay failure / cold cache, fall through to
53
- // the normal LLM-driven execute_live path.
54
- graph.addConditionalEdges('cache_replay', (state) => {
55
- return state.cache_replay?.hit === true ? 'generate_script' : 'execute_live';
46
+ // Short-circuit when preflight produced nothing usable (no spec / too vague
47
+ // assertions:[]). Without this the graph would barrel into execute_live,
48
+ // fire up a real browser + a second expensive LLM call, and waste ~30s
49
+ // before failing. Routed through an explicit decision node so it renders as
50
+ // a Condition diamond.
51
+ graph.addEdge('preflight', 'has_assertions');
52
+ graph.addConditionalEdges('has_assertions', routeHasAssertions, {
53
+ labels: { execute_live: 'has assertions', END: 'no assertions' },
56
54
  });
57
55
 
58
- graph.addConditionalEdges('execute_live', (state) => {
59
- const result = state.execute_live;
60
- const hasExecution = (result?.steps?.length > 0) || (result?.actions?.length > 0);
61
- return hasExecution ? 'generate_script' : 'END';
56
+ // Did the live run actually capture any browser steps? If yes, turn them
57
+ // into a Playwright script; if it produced nothing, end (nothing to script).
58
+ graph.addEdge('execute_live', 'ran_ok');
59
+ graph.addConditionalEdges('ran_ok', routeRanOk, {
60
+ labels: { generate_script: 'captured steps', END: 'nothing to script' },
62
61
  });
63
62
 
64
63
  graph.addEdge('generate_script', 'END');
@@ -1,4 +1,3 @@
1
1
  export { preflightNode } from './preflight.mjs';
2
- export { cacheReplayNode } from './cache-replay.mjs';
3
2
  export { executeLiveNode } from './execute-live.mjs';
4
3
  export { generateScriptNode } from './generate-script.mjs';
@@ -50,7 +50,9 @@ export function buildAnalysisGraph(graph) {
50
50
  prompt: generateTestCasesPrompt
51
51
  })
52
52
  .addNode('finalize', finalizeNode)
53
- .setNodeType('validation_check', 'decision')
53
+ // No setNodeType('validation_check','decision') needed — addConditionalNode
54
+ // makes it render as a Condition diamond automatically (engine derives the
55
+ // 'decision' display type from the ConditionalNode class).
54
56
  .setEntryPoint('setup')
55
57
  .addEdge('setup', 'analyze_ticket')
56
58
  .addEdge('analyze_ticket', 'validation_check')
package/index.js CHANGED
@@ -9,7 +9,7 @@ export const TEMPLATES = {
9
9
  'browser-test-automation': {
10
10
  name: 'browser-test-automation',
11
11
  displayName: 'Browser Test Automation (Full Workflow)',
12
- description: 'Complete browser test automation workflow with title generation, live execution, and script generation',
12
+ description: 'End-to-end browser tests authored as agent workflows. Extracts a title and assertion checklist from a plain-English spec, runs it live with AI + a real browser, then emits a Playwright script with stable selectors.',
13
13
  path: join(__dirname, 'browser-test-automation'),
14
14
  default: true,
15
15
  // Suggested slug for `zibby workflow new <slug> -t <name>`. Used in
@@ -73,7 +73,7 @@ export const TEMPLATES = {
73
73
  'code-analysis': {
74
74
  name: 'code-analysis',
75
75
  displayName: 'Code Analysis (Ticket → Code + Tests)',
76
- description: 'Multi-node workflow that analyzes a Jira ticket against a code repo, generates code changes, and emits test cases',
76
+ description: 'Takes a Jira ticket end-to-end against a code repo: analyzes feasibility, generates scoped code changes, and emits matching test cases. An LLM gate skips code-gen when the ticket can\'t be implemented as-is.',
77
77
  path: join(__dirname, 'code-analysis'),
78
78
  defaultSlug: 'ticket-analyzer',
79
79
  // Runtime deps the scaffolded copy needs in addition to @zibby/core.
@@ -123,7 +123,7 @@ export const TEMPLATES = {
123
123
  'generate-test-cases': {
124
124
  name: 'generate-test-cases',
125
125
  displayName: 'Generate Test Cases (Diff → Test Specs)',
126
- description: 'Standalone slice takes an existing code diff and generates plain-English test specifications for it. Skips ticket-analysis and code-gen.',
126
+ description: 'Hand it a PR diff; get back prioritized, plain-English test specs. Skips ticket-analysis and code-gen entirely, exploring the codebase to ground each spec in real components. Specs are runnable by any browser-driving agent.',
127
127
  path: join(__dirname, 'generate-test-cases'),
128
128
  defaultSlug: 'tests-from-diff',
129
129
  deps: {
@@ -148,7 +148,7 @@ export const TEMPLATES = {
148
148
  'Mood: friendly, approachable, slightly handmade. Like a children\'s book illustration applied to a developer tool.',
149
149
  'NO text, NO photo-realism, NO sleek 3D render — this one is hand-drawn and warm.',
150
150
  ].join('\n'),
151
- tags: ['Code Review', 'Testing'],
151
+ tags: ['Testing'],
152
152
  capabilities: [
153
153
  'Skips ticket analysis — feed it the diff directly',
154
154
  'LLM explores the codebase to ground test steps in real components',
@@ -170,7 +170,7 @@ export const TEMPLATES = {
170
170
  'notify-slack': {
171
171
  name: 'notify-slack',
172
172
  displayName: 'Notify Slack',
173
- description: 'Reusable child workflow — posts a structured Block Kit alert to a Slack channel. Dispatched by other workflows (Sentry triage, autofix, incident) via sub-graph.',
173
+ description: 'Reusable child workflow — posts a severity-coded Block Kit alert to a Slack channel. Dispatched as a sub-graph from any parent (Sentry triage, autofix, incident). Returns the message timestamp so the parent can thread follow-ups.',
174
174
  path: join(__dirname, 'notify-slack'),
175
175
  defaultSlug: 'alert-slack',
176
176
  deps: { zod: '^3.23.0 || ^4.0.0', '@zibby/skills': '^0.1.28' },
@@ -212,7 +212,7 @@ export const TEMPLATES = {
212
212
  'notify-lark': {
213
213
  name: 'notify-lark',
214
214
  displayName: 'Notify Lark',
215
- description: 'Reusable child workflow — posts a structured Interactive Card to a Lark / Feishu chat. Dispatched by other workflows via sub-graph.',
215
+ description: 'Reusable child workflow — posts a severity-coded Interactive Card to a Lark / Feishu chat. Dispatched as a sub-graph from any parent. Auto-detects the recipient type and returns the message id for threaded replies.',
216
216
  path: join(__dirname, 'notify-lark'),
217
217
  defaultSlug: 'alert-lark',
218
218
  deps: { zod: '^3.23.0 || ^4.0.0', '@zibby/skills': '^0.1.28' },
@@ -257,7 +257,7 @@ export const TEMPLATES = {
257
257
  'notify-notion': {
258
258
  name: 'notify-notion',
259
259
  displayName: 'Notify Notion',
260
- description: 'Reusable child workflow — creates a Notion page in a database OR appends blocks to an existing page. Dispatched by other workflows (digest, incident archives, weekly reports) via sub-graph.',
260
+ description: 'Reusable child workflow — creates a Notion page in a database OR appends blocks to an existing page. Renders rich report objects to native Notion blocks and returns pageId + pageUrl. Dispatched as a sub-graph from any parent (digests, incident archives, weekly reports).',
261
261
  path: join(__dirname, 'notify-notion'),
262
262
  defaultSlug: 'archive-notion',
263
263
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -283,8 +283,7 @@ export const TEMPLATES = {
283
283
  ].join('\n'),
284
284
  tags: ['Notifications', 'Docs', 'Reports'],
285
285
  capabilities: [
286
- 'Create a new page in a Notion database (POST /v1/pages)',
287
- 'Append blocks to an existing page (PATCH /v1/blocks/{pageId}/children)',
286
+ 'Create a new page in a Notion database, or append blocks to an existing page',
288
287
  'Renders rich report-objects to native Notion blocks (headings, callouts, tables, code, embeds)',
289
288
  'Severity-mapped page-icon emoji + colored callout backgrounds',
290
289
  'Sub-graph dispatchable from any parent workflow',
@@ -303,7 +302,7 @@ export const TEMPLATES = {
303
302
  'sentry-triage': {
304
303
  name: 'sentry-triage',
305
304
  displayName: 'Sentry Triage Bot',
306
- description: 'Hourly Sentry triage that classifies new issues with an LLM rubric (CRITICAL/HIGH/MEDIUM/LOW/NOISE) and posts above-threshold alerts to your Slack or Lark — whichever you have connected.',
305
+ description: 'Hourly Sentry triage that classifies new issues with an auditable LLM rubric (CRITICALNOISE) and posts above-threshold alerts to your Slack or Lark — whichever you have connected. Batches related issues and @-mentions on-call only for CRITICAL.',
307
306
  path: join(__dirname, 'sentry-triage'),
308
307
  defaultSlug: 'sentry-triage',
309
308
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -347,7 +346,7 @@ export const TEMPLATES = {
347
346
  'ai-spend-weekly-digest': {
348
347
  name: 'ai-spend-weekly-digest',
349
348
  displayName: 'AI Spend Weekly Digest',
350
- description: 'Weekly digest of OpenAI / Anthropic / Cursor admin billing pulls trailing-28d cost+usage across all three providers, detects per-project anomalies vs 3-week baseline, and posts a rich report card to Lark and/or Slack via in-process sub-graph dispatch.',
349
+ description: 'Weekly digest of OpenAI / Anthropic / Cursor admin billing. Pulls cost+usage across all three providers in parallel, detects per-project anomalies vs a 3-week baseline, and posts a rich report card to Lark and/or Slack. Partial-failure resilient — one provider being down doesn\'t kill the run.',
351
350
  path: join(__dirname, 'ai-spend-weekly-digest'),
352
351
  defaultSlug: 'ai-spend-weekly-digest',
353
352
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -376,9 +375,8 @@ export const TEMPLATES = {
376
375
  'Pulls org-wide cost+usage from OpenAI, Anthropic, and Cursor admin APIs in parallel',
377
376
  'Joins customer attribution from provider-native project / workspace / member metadata',
378
377
  'Detects per-project anomalies (σ + ratio) against a 3-week rolling baseline',
379
- 'Drafts the leadership-grade narrative with an LLM, falls back to deterministic copy if model is unavailable',
380
- 'Posts a rich Block-Kit / Lark Card report (trend bars, top spenders table, anomalies, provider breakdown)',
381
- 'Fan-out to Lark + Slack in parallel — partial-failure resilient',
378
+ 'Drafts a leadership-grade narrative with an LLM, with deterministic copy as fallback',
379
+ 'Posts a rich Block-Kit / Lark Card report and fans out to Slack + Lark in parallel',
382
380
  ],
383
381
  conversationStarters: [
384
382
  'Run a weekly AI spend digest every Monday morning',
@@ -393,7 +391,7 @@ export const TEMPLATES = {
393
391
  'pipeline-supervisor': {
394
392
  name: 'pipeline-supervisor',
395
393
  displayName: 'Pipeline Supervisor',
396
- description: 'Zibby managing Zibby — a scheduled supervisor that scans the project\'s other pipelines, flags the ones failing or running slow, and posts human-reviewable improvement proposals (add a test gate / tweak a prompt / add an approval gate / drop a redundant step) to Slack or Lark. Read + propose + notify only; it never edits another workflow.',
394
+ description: 'Zibby managing Zibby — a scheduled supervisor that scans the project\'s other pipelines, flags the ones failing or running slow, and posts one evidence-backed improvement proposal per problem pipeline to Slack or Lark. Read + propose + notify only; a human applies the change, the supervisor never edits a workflow.',
397
395
  path: join(__dirname, 'pipeline-supervisor'),
398
396
  defaultSlug: 'pipeline-supervisor',
399
397
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -439,7 +437,7 @@ export const TEMPLATES = {
439
437
  'ticket-triage': {
440
438
  name: 'ticket-triage',
441
439
  displayName: 'Ticket Triage',
442
- description: 'Tracker-neutral triage building block — LLM-classifies one ticket (from any tracker) into a severity (CRITICAL…NOISE), a shouldAutofix decision, and a human summary. The first block of the bug-autofix pipeline; usable on its own.',
440
+ description: 'Tracker-neutral triage building block — an LLM classifies one ticket into a severity (CRITICAL…NOISE), a shouldAutofix decision, and a human summary, all with auditable reasoning. The first block of the bug-autofix pipeline, usable on its own (Jira flows end-to-end in v1).',
443
441
  path: join(__dirname, 'ticket-triage'),
444
442
  defaultSlug: 'ticket-triage',
445
443
  deps: { zod: '^3.23.0' },
@@ -479,7 +477,7 @@ export const TEMPLATES = {
479
477
  'code-fix': {
480
478
  name: 'code-fix',
481
479
  displayName: 'Code Fix (clone → fix → PR)',
482
- description: 'Clones a repo into an isolated workspace, has an agent fix one ticket with an inline test-gate (run the tests, feed failures back for one retry), and opens a PR. Output: { pr_url, branch }. The "do the work" block of the bug-autofix pipeline.',
480
+ description: 'Hand it a ticket + a repo; get back a tested fix PR. Clones the repo into an isolated workspace, has an agent fix the ticket behind an inline test-gate (run the suite, feed failures back for one retry), and opens a GitHub PR { pr_url, branch }. Stops at the PR a human reviews and merges.',
483
481
  path: join(__dirname, 'code-fix'),
484
482
  defaultSlug: 'code-fix',
485
483
  deps: { zod: '^3.23.0', axios: '^1.6.0' },
@@ -521,7 +519,7 @@ export const TEMPLATES = {
521
519
  'tracker-writeback': {
522
520
  name: 'tracker-writeback',
523
521
  displayName: 'Tracker Writeback',
524
- description: 'Closes the loop after triage/fix — transitions the tracker issue (Jira → In Review when a PR opened), comments the PR link + verdict, and posts a Slack or Lark note. The writeback block of the bug-autofix pipeline (Jira in v1; GitHub / Linear are extension points).',
522
+ description: 'Closes the loop after triage/fix — transitions the Jira issue (→ In Review when a PR opened), comments the PR link + verdict, and posts a short Slack or Lark note. Runs on both the autofixed and notify-only branches. The writeback block of the bug-autofix pipeline (Jira in v1).',
525
523
  path: join(__dirname, 'tracker-writeback'),
526
524
  defaultSlug: 'tracker-writeback',
527
525
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -563,7 +561,7 @@ export const TEMPLATES = {
563
561
  'bug-autofix': {
564
562
  name: 'bug-autofix',
565
563
  displayName: 'Bug Autofix Pipeline',
566
- description: 'The composable bug-autofix SDLC pipeline. Polls a tracker, then connects three reusable building blocks via sub-graph dispatch: ticket-triage → (autofixable?) → code-fix → tracker-writeback. High-severity autofixable bugs get a tested fix PR opened and the ticket moved to In Review; everything else is triaged and a human is notified. Stops at the PR — a human merges.',
564
+ description: 'The composable bug-autofix pipeline. Polls a tracker, then chains three reusable blocks via sub-graph: ticket-triage → code-fix → tracker-writeback. High-severity autofixable bugs get a tested fix PR and the ticket moved to In Review; everything else is triaged and a human notified. Stops at the PR — a human merges.',
567
565
  path: join(__dirname, 'bug-autofix'),
568
566
  defaultSlug: 'bug-autofix',
569
567
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -607,7 +605,7 @@ export const TEMPLATES = {
607
605
  'github-ai-scout': {
608
606
  name: 'github-ai-scout',
609
607
  displayName: 'Daily GitHub AI Scout',
610
- description: 'A daily scout that searches GitHub for new/trending AI projects, scores them against YOUR configurable rubric with an LLM, and posts a Slack shortlist for a human to review. General + config-driven — the query, recency/star thresholds, and rubric are all deploy-time inputs. Proposes a shortlist; never stars, forks, or auto-adds anything.',
608
+ description: 'A daily scout that searches GitHub for trending AI projects, scores each against YOUR plain-English rubric with an LLM, and posts a Slack shortlist for a human to review. Query, recency/star thresholds, and rubric are all deploy-time inputs. Proposes a shortlist only — never stars, forks, or auto-adds.',
611
609
  path: join(__dirname, 'github-ai-scout'),
612
610
  defaultSlug: 'github-ai-scout',
613
611
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -633,12 +631,11 @@ export const TEMPLATES = {
633
631
  ].join('\n'),
634
632
  tags: ['Reports', 'Notifications'],
635
633
  capabilities: [
636
- 'Searches GitHub daily for newly-created, trending repositories matching your query',
637
- 'Filters by recency (created within N days) and a minimum star count',
634
+ 'Searches GitHub daily for trending repos matching your query, filtered by recency + minimum stars',
638
635
  'Scores every candidate 1-5 against your plain-English rubric with an LLM',
639
636
  'Keeps a tight shortlist of the best finds — drops abandoned demos and trivial wrappers',
640
637
  'Dedups against repos you already track via an excludeRepos allow-list',
641
- 'Posts a numbered Block-Kit shortlist to Slack — stars, language, license, one-line reason, link',
638
+ 'Posts a numbered Block-Kit shortlist to Slack — stars, language, license, reason, link',
642
639
  ],
643
640
  conversationStarters: [
644
641
  'Scout new AI agent frameworks on GitHub every morning',
@@ -653,15 +650,17 @@ export const TEMPLATES = {
653
650
  'github-code-review': {
654
651
  name: 'github-code-review',
655
652
  displayName: 'GitHub Code Review',
656
- description: 'Reviews a GitHub pull request with an LLM and posts the review back to the PR — a summary plus inline comments and an APPROVE / COMMENT / REQUEST_CHANGES verdict. If the PR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitHub required; Jira/Linear optional (ticket context only).',
653
+ description: 'Reviews a GitHub PR with an LLM and posts back a summary, inline comments, and an APPROVE / COMMENT / REQUEST_CHANGES verdict. Agent-driven: supply your own review rules; it can pull context from a linked Jira/Linear ticket and a Notion page, validating against acceptance criteria CodeRabbit-style. GitHub required; the rest optional.',
657
654
  path: join(__dirname, 'github-code-review'),
658
655
  defaultSlug: 'github-code-review',
659
656
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
660
657
  features: [
661
- '3-node graph: fetch_pr (github) → fetch_ticket (optional) → review (LLM)',
658
+ 'Graph: fetch_pr (github) → fetch_ticket (optional) → fetch_context (optional Notion) → review (LLM) → notify (optional Slack/Lark)',
662
659
  'Posts a real PR review via github_create_review — summary body + inline comments + verdict',
660
+ 'AGENT-DRIVEN: supply your own authoritative review rules with conditions (reviewRules) the agent treats as binding',
663
661
  'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
664
- 'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
662
+ 'OPTIONAL context sources — Jira/Linear ticket + a Notion page all fetched via direct tool calls so none gate deploy',
663
+ 'OPTIONAL notify — posts the verdict + summary to Slack or Lark when done (never blocks the review)',
665
664
  'Webhook-triggered on a PR: { owner, repo, prNumber }',
666
665
  'Reviews + comments only — never merges, closes, or pushes',
667
666
  ],
@@ -678,17 +677,16 @@ export const TEMPLATES = {
678
677
  ].join('\n'),
679
678
  tags: ['Code Review'],
680
679
  capabilities: [
681
- 'Reads the PR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
680
+ 'Reviews the PR diff for correctness, bugs, security, tests, design, and style',
682
681
  'Posts the review back to the PR: a summary, inline comments on specific lines, and a verdict',
683
682
  'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
684
- 'Optionally pulls ticket context from Jira or Linear but neither is required to run',
685
- 'Webhook-driven: point a PR-opened/synchronize hook at it',
686
- 'Proposes feedback only — a human still decides and merges',
683
+ 'Apply your own authoritative review rules with conditions (e.g. "REQUEST_CHANGES if billing/ changes without tests")',
684
+ 'Optionally pull Jira/Linear ticket + Notion context and notify Slack or Lark — none required, none block the review',
687
685
  ],
688
686
  conversationStarters: [
689
687
  'Review pull request #412 in acme/web-app',
690
- 'Auto-review every new PR and post inline comments',
691
- 'Review this PR and check it against the linked Jira ticket\'s acceptance criteria',
688
+ 'Auto-review every PR against our team rules and ping #code-reviews on Slack',
689
+ 'Review this PR against the linked Jira ticket and our Notion engineering standards',
692
690
  'Block the merge if a PR introduces a security or missing-test issue',
693
691
  ],
694
692
  },
@@ -698,15 +696,17 @@ export const TEMPLATES = {
698
696
  'gitlab-code-review': {
699
697
  name: 'gitlab-code-review',
700
698
  displayName: 'GitLab Code Review',
701
- description: 'Reviews a GitLab merge request with an LLM and posts the review back to the MR — a summary note plus inline discussion comments and a clear verdict. If the MR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitLab required; Jira/Linear optional (ticket context only).',
699
+ description: 'Reviews a GitLab MR with an LLM and posts back a summary note, inline discussion comments, and a verdict. Agent-driven: supply your own review rules; it can pull context from a linked Jira/Linear ticket and a Notion page, validating against acceptance criteria CodeRabbit-style. Works on gitlab.com + self-hosted. GitLab required; the rest optional.',
702
700
  path: join(__dirname, 'gitlab-code-review'),
703
701
  defaultSlug: 'gitlab-code-review',
704
702
  deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
705
703
  features: [
706
- '3-node graph: fetch_mr (gitlab) → fetch_ticket (optional) → review (LLM)',
704
+ 'Graph: fetch_mr (gitlab) → fetch_ticket (optional) → fetch_context (optional Notion) → review (LLM) → notify (optional Slack/Lark)',
707
705
  'Posts a real MR review via gitlab_create_mr_review — summary note + inline discussions',
706
+ 'AGENT-DRIVEN: supply your own authoritative review rules with conditions (reviewRules) the agent treats as binding',
708
707
  'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
709
- 'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
708
+ 'OPTIONAL context sources — Jira/Linear ticket + a Notion page all fetched via direct tool calls so none gate deploy',
709
+ 'OPTIONAL notify — posts the verdict + summary to Slack or Lark when done (never blocks the review)',
710
710
  'Webhook-triggered on an MR: { projectId, mrIid }',
711
711
  'Works against gitlab.com and self-hosted instances',
712
712
  ],
@@ -723,21 +723,82 @@ export const TEMPLATES = {
723
723
  ].join('\n'),
724
724
  tags: ['Code Review'],
725
725
  capabilities: [
726
- 'Reads the MR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
727
- 'Posts the review back to the MR: a summary note, inline discussion comments on specific lines, and a verdict',
726
+ 'Reviews the MR diff for correctness, bugs, security, tests, design, and style',
727
+ 'Posts the review back to the MR: a summary note, inline discussion comments, and a verdict',
728
728
  'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
729
- 'Optionally pulls ticket context from Jira or Linear but neither is required to run',
730
- 'Works against gitlab.com and self-hosted GitLab instances',
731
- 'Proposes feedback only — a human still decides, approves, and merges',
729
+ 'Apply your own authoritative review rules with conditions (e.g. "REQUEST_CHANGES if billing/ changes without tests")',
730
+ 'Works on gitlab.com + self-hosted; optionally pulls Jira/Linear + Notion context and notifies Slack or Lark',
732
731
  ],
733
732
  conversationStarters: [
734
733
  'Review merge request !73 in acme/web-app',
735
- 'Auto-review every new MR and post inline discussions',
736
- 'Review this MR and check it against the linked Jira ticket\'s acceptance criteria',
734
+ 'Auto-review every MR against our team rules and ping our Lark group',
735
+ 'Review this MR against the linked Jira ticket and our Notion engineering standards',
737
736
  'Flag any MR that introduces a security or missing-test issue',
738
737
  ],
739
738
  },
740
739
  },
740
+
741
+ // ── github-comment-response: in-thread reply companion to github-code-review ─
742
+ // The CodeRabbit-style conversational back-and-forth: a HUMAN replies to the
743
+ // review bot's comment in a PR thread, and this agent replies IN THAT SAME
744
+ // THREAD (concede if they're right, clarify with code evidence if not) — NOT a
745
+ // fresh full review. Resolved by github-webhook.js as the reply agent
746
+ // (link.commentResponseSlug || GITHUB_COMMENT_RESPONSE_SLUG || link.agentSlug).
747
+ // A COMPANION to github-code-review — tagged `child-workflow` so it's dropped
748
+ // from the browse facet row (same convention as notify-slack / ticket-triage).
749
+ // requiredIntegrations (GitHub) is DERIVED from graph.mjs at sync time, like
750
+ // every other template — not declared here.
751
+ 'github-comment-response': {
752
+ name: 'github-comment-response',
753
+ displayName: 'GitHub Comment Response',
754
+ description: 'Replies conversationally, in-thread, to a human\'s reply on a GitHub PR review comment — the CodeRabbit-style back-and-forth. Reads the thread (bot comment + human reply + anchored diff) and posts a focused reply to the SAME thread, not a fresh review. The conversational companion to GitHub Code Review. GitHub required.',
755
+ path: join(__dirname, 'github-comment-response'),
756
+ defaultSlug: 'github-comment-response',
757
+ deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
758
+ features: [
759
+ 'Single-node graph: respond (github) — reads the review-comment thread + replies in-thread',
760
+ 'Replies in the SAME thread (github_reply_review_thread) — not a fresh full review',
761
+ 'Top-level/summary comments answered via github_reply_issue_comment',
762
+ 'Concedes when the human is right; clarifies with code evidence when not',
763
+ 'Webhook-triggered on a reply to a bot thread: { owner, repo, prNumber, commentId, threadType, triggeringComment }',
764
+ 'Graceful degradation: posts an honest acknowledge-and-clarify reply if the thread read fails',
765
+ ],
766
+ // COMPANION agent — intentionally NO `marketplace` field so the sync
767
+ // (backend/scripts/marketplace-sync-from-templates.mjs) NEVER publishes it
768
+ // to the public browse grid. Registry entry kept so `zibby template add
769
+ // github-comment-response` resolves it for CLI direct-deploy alongside
770
+ // github-code-review. Resolved at runtime by gitlab/github-webhook.js as the
771
+ // reply agent (link.commentResponseSlug || *_COMMENT_RESPONSE_SLUG || agentSlug).
772
+ },
773
+
774
+ // ── gitlab-comment-response: in-thread reply companion to gitlab-code-review ─
775
+ // The GitLab analog of github-comment-response. A HUMAN replies to the review
776
+ // bot's note in an MR discussion, and this agent replies IN THAT SAME
777
+ // DISCUSSION — NOT a fresh review. Resolved by gitlab-webhook.js as the reply
778
+ // agent (link.commentResponseSlug || GITLAB_COMMENT_RESPONSE_SLUG ||
779
+ // link.agentSlug). A COMPANION to gitlab-code-review — tagged `child-workflow`
780
+ // so it's dropped from the browse facet row. requiredIntegrations (GitLab) is
781
+ // DERIVED from graph.mjs at sync time, like every other template.
782
+ 'gitlab-comment-response': {
783
+ name: 'gitlab-comment-response',
784
+ displayName: 'GitLab Comment Response',
785
+ description: 'Replies conversationally, in-thread, to a human\'s reply on a GitLab MR review note — the CodeRabbit-style back-and-forth. Reads the discussion (bot note + human reply + anchored diff) and posts a focused reply to the SAME discussion, not a fresh review. The conversational companion to GitLab Code Review. GitLab required.',
786
+ path: join(__dirname, 'gitlab-comment-response'),
787
+ defaultSlug: 'gitlab-comment-response',
788
+ deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
789
+ features: [
790
+ 'Single-node graph: respond (gitlab) — reads the MR discussion + replies in-thread',
791
+ 'Replies in the SAME discussion (gitlab_reply_discussion) — not a fresh full review',
792
+ 'General/non-threaded MR comments answered via gitlab_post_mr_note',
793
+ 'Concedes when the human is right; clarifies with code evidence when not',
794
+ 'Webhook-triggered on a reply to a bot discussion: { projectId, mrIid, discussionId, threadType, triggeringComment }',
795
+ 'Works against gitlab.com and self-hosted instances; graceful degradation on discussion-read failure',
796
+ ],
797
+ // COMPANION agent — intentionally NO `marketplace` field (see the
798
+ // github-comment-response note above): never synced to the public grid,
799
+ // but kept in the registry so `zibby template add gitlab-comment-response`
800
+ // resolves it for CLI direct-deploy alongside gitlab-code-review.
801
+ },
741
802
  };
742
803
 
743
804
  export class TemplateFactory {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zibby/workflow-templates",
3
- "version": "0.9.2",
3
+ "version": "0.9.5",
4
4
  "description": "Built-in workflow templates for Zibby — browser-test-automation, code-analysis, generate-test-cases, notify-slack, notify-lark, notify-notion, sentry-triage.",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -45,10 +45,10 @@
45
45
  "homepage": "https://zibby.dev",
46
46
  "repository": {
47
47
  "type": "git",
48
- "url": "https://github.com/ZibbyHQ/workflow-templates"
48
+ "url": "https://github.com/ZibbyDev/workflow-templates"
49
49
  },
50
50
  "bugs": {
51
- "url": "https://github.com/ZibbyHQ/workflow-templates/issues"
51
+ "url": "https://github.com/ZibbyDev/workflow-templates/issues"
52
52
  },
53
53
  "files": [
54
54
  "browser-test-automation/",
@@ -74,15 +74,15 @@
74
74
  },
75
75
  "dependencies": {
76
76
  "@anthropic-ai/sdk": "^0.88.0",
77
- "@zibby/agent-workflow": "^0.4.2",
78
- "@zibby/skills": "^0.1.27",
77
+ "@zibby/agent-workflow": "^0.4.13",
78
+ "@zibby/skills": "^0.1.41",
79
79
  "axios": "^1.15.0",
80
80
  "handlebars": "^4.7.9",
81
81
  "zod": "^3.23.0 || ^4.0.0"
82
82
  },
83
83
  "peerDependencies": {
84
84
  "@playwright/test": ">=1.49.0",
85
- "@zibby/core": ">=0.5.0",
85
+ "@zibby/core": ">=0.5.9",
86
86
  "playwright": ">=1.49.0"
87
87
  },
88
88
  "devDependencies": {
@@ -50,21 +50,28 @@ export class SentryTriageAgent extends WorkflowAgent {
50
50
  .setInputSchema(sentryTriageInputSchema)
51
51
  .setContextSchema(sentryTriageContextSchema);
52
52
 
53
+ // Route OUT of the decision: skip everything when Sentry returned nothing
54
+ // this window, else classify. (Shared by the decision node's condition and
55
+ // its labeled edges so the logic lives in one place.)
56
+ const routeHasIssues = (state) =>
57
+ (state?.fetch_issues?.issues || []).length === 0 ? 'END' : 'classify';
58
+
53
59
  graph.addNode('fetch_issues', fetchIssuesNode);
60
+ // Explicit decision node → renders as a clean Condition diamond. The branch
61
+ // comes OUT of this, not hung off the fetch_issues work node.
62
+ graph.addConditionalNode('has_issues', { condition: routeHasIssues });
54
63
  graph.addNode('classify', classifyNode);
55
64
  graph.addNode('dispatch_alerts', dispatchNode);
56
65
 
57
66
  graph.setEntryPoint('fetch_issues');
58
- // Short-circuit when Sentry returned nothing for this window. The
59
- // empty-list case is the common idle path (steady-state apps don't
60
- // throw new errors every hour), and running classify + dispatch on
61
- // an empty input wastes two Claude calls per run — at hourly cadence
62
- // across many tenants that adds up. Cleaner to route directly to END
63
- // at the graph level than to short-circuit inside each downstream
64
- // node's prompt (which still spends a model round-trip).
65
- graph.addConditionalEdges('fetch_issues', (state) => {
66
- const issues = state?.fetch_issues?.issues || [];
67
- return issues.length === 0 ? 'END' : 'classify';
67
+ graph.addEdge('fetch_issues', 'has_issues');
68
+ // Short-circuit when Sentry returned nothing for this window. The empty-list
69
+ // case is the common idle path, and running classify + dispatch on empty
70
+ // input wastes two Claude calls per run — at hourly cadence across many
71
+ // tenants that adds up. Routing to END at the graph level (vs short-circuit
72
+ // inside each prompt) skips the model round-trips entirely.
73
+ graph.addConditionalEdges('has_issues', routeHasIssues, {
74
+ labels: { classify: 'has issues', END: 'no issues' },
68
75
  });
69
76
  graph.addEdge('classify', 'dispatch_alerts');
70
77
  graph.addEdge('dispatch_alerts', 'END');
@@ -1,213 +0,0 @@
1
- /**
2
- * cache_replay node — lever-#2 read path inside the workflow.
3
- *
4
- * Sits between `preflight` and `execute_live` in the graph. Tries to
5
- * replay a prior successful run's action sequence via Playwright
6
- * directly, completely skipping the LLM. On a clean cache hit it
7
- * populates `state.execute_live` with the result so downstream
8
- * `generate_script` works exactly as if execute_live had run.
9
- *
10
- * Conditional edge after this node:
11
- * - state.cache_replay.hit === true → skip execute_live → generate_script
12
- * - state.cache_replay.hit === false → execute_live (LLM-driven path)
13
- *
14
- * Not user-configurable per-spec — the cache key derivation handles
15
- * staleness (page fingerprint drift invalidates) and replay failures
16
- * fall through cleanly to the LLM path.
17
- */
18
-
19
- import { z } from '@zibby/core';
20
- import { chromium } from 'playwright';
21
- import { spawn } from 'child_process';
22
- import { extractDomain, replayActions } from '@zibby/ui-memory';
23
- import { join } from 'path';
24
-
25
- const REPLAY_TIMEOUT_MS = 60_000;
26
-
27
- export const cacheReplayNode = {
28
- name: 'cache_replay',
29
- skills: [],
30
- timeout: 90000,
31
- outputSchema: z.object({
32
- hit: z.boolean(),
33
- elapsed_ms: z.number().nullish(),
34
- executed: z.number().nullish(),
35
- total: z.number().nullish(),
36
- cache_key: z.string().nullish(),
37
- error: z.string().nullish(),
38
- // When hit, we also write a synthesized execute_live block so the
39
- // downstream generate_script node sees what it expects.
40
- execute_live_synthesized: z.boolean().nullish(),
41
- }),
42
-
43
- execute: async (context) => {
44
- // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
45
- // ...state.getAll() }`. So `context.testSpec` works (spread) AND
46
- // `context.state.get('testSpec')` works (instance). Reading from the
47
- // spread is the natural shape — `context.state` is reserved for the
48
- // .set(key, value) side-write below.
49
- const cwd = context.cwd || context.workspace || process.cwd();
50
- const testSpec = context.testSpec || '';
51
- const specPath = context.specPath || '';
52
-
53
- // Derive domain from the spec text (no DOM access yet — pure parse).
54
- const domain = extractDomainFromSpec(testSpec);
55
- if (!domain) {
56
- return { hit: false, error: 'cannot derive domain from spec' };
57
- }
58
-
59
- // Cache key requires page_fingerprint, which is page-state-dependent
60
- // and only available AFTER navigation. We compute a key WITHOUT
61
- // fingerprint first and look up by (domain, spec_path) prefix —
62
- // the persister wrote spec_path too. If we find a candidate, we
63
- // use its stored fingerprint to compute the full key and verify.
64
- //
65
- // Lookup order:
66
- // 1. Exact (domain, spec_path) match in action_cache.
67
- // 2. If found, use its actions for replay attempt.
68
- // 3. On replay success: signal hit, populate state.execute_live.
69
- // 4. On replay failure (or cache miss): hit=false, fall back to LLM.
70
- const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
71
- if (!cached) {
72
- return { hit: false, error: 'no cached actions for this spec' };
73
- }
74
-
75
- // Run the replay in a freshly-launched Playwright browser. Cleanly
76
- // independent from the @zibby/mcp-browser path execute_live uses.
77
- const t0 = Date.now();
78
- const browser = await chromium.launch({ headless: true });
79
- const page = await browser.newPage();
80
- let replayResult;
81
- try {
82
- replayResult = await Promise.race([
83
- replayActions({
84
- actions: cached.actions,
85
- page,
86
- log: (m) => console.log(`[cache_replay] ${m}`),
87
- }),
88
- new Promise((_, reject) =>
89
- setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
90
- ),
91
- ]);
92
- } catch (err) {
93
- replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
94
- }
95
- const finalUrl = page.url();
96
- await browser.close().catch(() => {});
97
- const elapsedMs = Date.now() - t0;
98
-
99
- if (!replayResult.success) {
100
- // Increment failure_count so we can drop chronic misses later.
101
- await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
102
- return {
103
- hit: false,
104
- elapsed_ms: elapsedMs,
105
- executed: replayResult.executed,
106
- total: replayResult.total,
107
- cache_key: cached.cache_key,
108
- error: replayResult.error,
109
- };
110
- }
111
-
112
- // HIT path. Side-write the synthesized execute_live output via
113
- // context.state.set so downstream generate_script reads the same
114
- // shape it expects (actions[], finalUrl, …). The customExecute
115
- // return-value lands in state.cache_replay; the execute_live slot
116
- // has to be populated separately.
117
- if (typeof context.state?.set === 'function') {
118
- context.state.set('execute_live', {
119
- success: true,
120
- steps: cached.actions.map((a) => a.description),
121
- actions: cached.actions,
122
- assertions: [],
123
- finalUrl,
124
- browserClosed: true,
125
- notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
126
- });
127
- }
128
-
129
- return {
130
- hit: true,
131
- elapsed_ms: elapsedMs,
132
- executed: replayResult.executed,
133
- total: replayResult.total,
134
- cache_key: cached.cache_key,
135
- execute_live_synthesized: true,
136
- };
137
- },
138
- };
139
-
140
- // ─── helpers ────────────────────────────────────────────────────────────
141
-
142
- function extractDomainFromSpec(spec) {
143
- if (!spec) return null;
144
- // Find the first http(s) URL in the spec and run it through the
145
- // SAME `extractDomain` the persister uses, so the cache-key lookup
146
- // matches what was actually written (notably: `www.` is stripped).
147
- const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
148
- if (!m) return null;
149
- return extractDomain(m[0]);
150
- }
151
-
152
- /**
153
- * Find a cached row by (domain, spec_path). Picks the row with
154
- * highest success_count if multiple match.
155
- * Uses dolt via subprocess (matching the rest of the codebase's
156
- * Dolt-access pattern).
157
- */
158
- async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
159
- const dbDir = join(cwd, '.zibby', 'memory');
160
- const safeDomain = escapeSql(domain);
161
- const safeSpec = escapeSql(specPath);
162
- const sql = `SELECT cache_key, actions_json, page_fingerprint
163
- FROM action_cache
164
- WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
165
- ORDER BY success_count DESC, last_used_at DESC
166
- LIMIT 1`;
167
- const rows = await runDoltJson(dbDir, sql);
168
- if (!rows || rows.length === 0) return null;
169
- try {
170
- const actions = JSON.parse(rows[0].actions_json);
171
- return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
172
- } catch {
173
- return null;
174
- }
175
- }
176
-
177
- async function incrementCacheFailure({ cwd, cacheKey }) {
178
- const dbDir = join(cwd, '.zibby', 'memory');
179
- const sql = `UPDATE action_cache
180
- SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
181
- WHERE cache_key = ${escapeSql(cacheKey)}`;
182
- await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
183
- }
184
-
185
- function escapeSql(v) {
186
- if (v == null) return 'NULL';
187
- return `'${String(v).replace(/'/g, "''")}'`;
188
- }
189
-
190
- function runDoltJson(dir, sql) {
191
- return new Promise((resolve) => {
192
- const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
193
- let out = '';
194
- child.stdout.on('data', (d) => { out += d; });
195
- child.on('close', () => {
196
- try {
197
- const parsed = JSON.parse(out);
198
- resolve(parsed.rows || []);
199
- } catch {
200
- resolve([]);
201
- }
202
- });
203
- child.on('error', () => resolve([]));
204
- });
205
- }
206
-
207
- function runDoltExec(dir, sql) {
208
- return new Promise((resolve, reject) => {
209
- const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
210
- child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
211
- child.on('error', reject);
212
- });
213
- }