npm - @zibby/workflow-templates - Versions diffs - 0.9.2 → 0.9.5 - Mend

@zibby/workflow-templates 0.9.2 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/browser-test-automation/graph.mjs +26 -27
package/browser-test-automation/nodes/index.mjs +0 -1
package/code-analysis/graph.js +3 -1
package/index.js +103 -42
package/package.json +6 -6
package/sentry-triage/graph.mjs +17 -10
package/browser-test-automation/nodes/cache-replay.mjs +0 -213

package/browser-test-automation/graph.mjs CHANGED Viewed

@@ -8,7 +8,6 @@
 import { WorkflowAgent, WorkflowGraph } from '@zibby/core';
 import {
   preflightNode,
-  cacheReplayNode,
   executeLiveNode,
   generateScriptNode,
 } from './nodes/index.mjs';
@@ -25,40 +24,40 @@ export class BrowserTestAutomationAgent extends WorkflowAgent {
       .setInputSchema(browserTestAutomationInputSchema)
       .setContextSchema(browserTestAutomationContextSchema);
+    // Branch route fns — shared by each decision node's condition AND its
+    // labeled edges so the logic lives in exactly one place.
+    const routeHasAssertions = (state) =>
+      (state.preflight?.assertions || []).length > 0 ? 'execute_live' : 'END';
+    const routeRanOk = (state) => {
+      const r = state.execute_live;
+      return ((r?.steps?.length > 0) || (r?.actions?.length > 0)) ? 'generate_script' : 'END';
+    };
     graph.addNode('preflight', preflightNode);
-    graph.addNode('cache_replay', cacheReplayNode);
+    // Explicit decision nodes → each branch renders as a clean Condition diamond
+    // instead of hanging off a work node.
+    graph.addConditionalNode('has_assertions', { condition: routeHasAssertions });
     graph.addNode('execute_live', executeLiveNode);
+    graph.addConditionalNode('ran_ok', { condition: routeRanOk });
     graph.addNode('generate_script', generateScriptNode);
     graph.setEntryPoint('preflight');
-    // Short-circuit when preflight produced nothing usable. Triggered when:
-    //   - the user invoked `zibby workflow run browser-tests` with no spec
-    //     (state.input is undefined / empty), so preflight had nothing to
-    //     analyze and the LLM came back with `assertions: []`
-    //   - the spec is so vague the LLM can't extract any assertions
-    // Without this gate the graph would barrel into execute_live, fire up
-    // a real browser session + a second expensive LLM call, then waste
-    // ~30s before failing — bad UX and bad bill.
-    graph.addConditionalEdges('preflight', (state) => {
-      const assertions = state.preflight?.assertions || [];
-      return assertions.length > 0 ? 'cache_replay' : 'END';
-    });
-    // Lever-#2 fork: cache_replay attempted a Playwright-only replay of
-    // a prior successful action sequence. On hit it side-wrote
-    // state.execute_live with synthesized output, so we can skip
-    // execute_live and jump straight to generate_script — zero LLM
-    // tokens. On miss / replay failure / cold cache, fall through to
-    // the normal LLM-driven execute_live path.
-    graph.addConditionalEdges('cache_replay', (state) => {
-      return state.cache_replay?.hit === true ? 'generate_script' : 'execute_live';
+    // Short-circuit when preflight produced nothing usable (no spec / too vague
+    // → assertions:[]). Without this the graph would barrel into execute_live,
+    // fire up a real browser + a second expensive LLM call, and waste ~30s
+    // before failing. Routed through an explicit decision node so it renders as
+    // a Condition diamond.
+    graph.addEdge('preflight', 'has_assertions');
+    graph.addConditionalEdges('has_assertions', routeHasAssertions, {
+      labels: { execute_live: 'has assertions', END: 'no assertions' },
     });
-    graph.addConditionalEdges('execute_live', (state) => {
-      const result = state.execute_live;
-      const hasExecution = (result?.steps?.length > 0) || (result?.actions?.length > 0);
-      return hasExecution ? 'generate_script' : 'END';
+    // Did the live run actually capture any browser steps? If yes, turn them
+    // into a Playwright script; if it produced nothing, end (nothing to script).
+    graph.addEdge('execute_live', 'ran_ok');
+    graph.addConditionalEdges('ran_ok', routeRanOk, {
+      labels: { generate_script: 'captured steps', END: 'nothing to script' },
     });
     graph.addEdge('generate_script', 'END');

package/browser-test-automation/nodes/index.mjs CHANGED Viewed

@@ -1,4 +1,3 @@
 export { preflightNode } from './preflight.mjs';
-export { cacheReplayNode } from './cache-replay.mjs';
 export { executeLiveNode } from './execute-live.mjs';
 export { generateScriptNode } from './generate-script.mjs';

package/code-analysis/graph.js CHANGED Viewed

@@ -50,7 +50,9 @@ export function buildAnalysisGraph(graph) {
       prompt: generateTestCasesPrompt
     })
     .addNode('finalize', finalizeNode)
-    .setNodeType('validation_check', 'decision')
+    // No setNodeType('validation_check','decision') needed — addConditionalNode
+    // makes it render as a Condition diamond automatically (engine derives the
+    // 'decision' display type from the ConditionalNode class).
     .setEntryPoint('setup')
     .addEdge('setup', 'analyze_ticket')
     .addEdge('analyze_ticket', 'validation_check')

package/index.js CHANGED Viewed

@@ -9,7 +9,7 @@ export const TEMPLATES = {
   'browser-test-automation': {
     name: 'browser-test-automation',
     displayName: 'Browser Test Automation (Full Workflow)',
-    description: 'Complete browser test automation workflow with title generation, live execution, and script generation',
+    description: 'End-to-end browser tests authored as agent workflows. Extracts a title and assertion checklist from a plain-English spec, runs it live with AI + a real browser, then emits a Playwright script with stable selectors.',
     path: join(__dirname, 'browser-test-automation'),
     default: true,
     // Suggested slug for `zibby workflow new <slug> -t <name>`. Used in
@@ -73,7 +73,7 @@ export const TEMPLATES = {
   'code-analysis': {
     name: 'code-analysis',
     displayName: 'Code Analysis (Ticket → Code + Tests)',
-    description: 'Multi-node workflow that analyzes a Jira ticket against a code repo, generates code changes, and emits test cases',
+    description: 'Takes a Jira ticket end-to-end against a code repo: analyzes feasibility, generates scoped code changes, and emits matching test cases. An LLM gate skips code-gen when the ticket can\'t be implemented as-is.',
     path: join(__dirname, 'code-analysis'),
     defaultSlug: 'ticket-analyzer',
     // Runtime deps the scaffolded copy needs in addition to @zibby/core.
@@ -123,7 +123,7 @@ export const TEMPLATES = {
   'generate-test-cases': {
     name: 'generate-test-cases',
     displayName: 'Generate Test Cases (Diff → Test Specs)',
-    description: 'Standalone slice — takes an existing code diff and generates plain-English test specifications for it. Skips ticket-analysis and code-gen.',
+    description: 'Hand it a PR diff; get back prioritized, plain-English test specs. Skips ticket-analysis and code-gen entirely, exploring the codebase to ground each spec in real components. Specs are runnable by any browser-driving agent.',
     path: join(__dirname, 'generate-test-cases'),
     defaultSlug: 'tests-from-diff',
     deps: {
@@ -148,7 +148,7 @@ export const TEMPLATES = {
         'Mood: friendly, approachable, slightly handmade. Like a children\'s book illustration applied to a developer tool.',
         'NO text, NO photo-realism, NO sleek 3D render — this one is hand-drawn and warm.',
       ].join('\n'),
-      tags: ['Code Review', 'Testing'],
+      tags: ['Testing'],
       capabilities: [
         'Skips ticket analysis — feed it the diff directly',
         'LLM explores the codebase to ground test steps in real components',
@@ -170,7 +170,7 @@ export const TEMPLATES = {
   'notify-slack': {
     name: 'notify-slack',
     displayName: 'Notify Slack',
-    description: 'Reusable child workflow — posts a structured Block Kit alert to a Slack channel. Dispatched by other workflows (Sentry triage, autofix, incident) via sub-graph.',
+    description: 'Reusable child workflow — posts a severity-coded Block Kit alert to a Slack channel. Dispatched as a sub-graph from any parent (Sentry triage, autofix, incident). Returns the message timestamp so the parent can thread follow-ups.',
     path: join(__dirname, 'notify-slack'),
     defaultSlug: 'alert-slack',
     deps: { zod: '^3.23.0 || ^4.0.0', '@zibby/skills': '^0.1.28' },
@@ -212,7 +212,7 @@ export const TEMPLATES = {
   'notify-lark': {
     name: 'notify-lark',
     displayName: 'Notify Lark',
-    description: 'Reusable child workflow — posts a structured Interactive Card to a Lark / Feishu chat. Dispatched by other workflows via sub-graph.',
+    description: 'Reusable child workflow — posts a severity-coded Interactive Card to a Lark / Feishu chat. Dispatched as a sub-graph from any parent. Auto-detects the recipient type and returns the message id for threaded replies.',
     path: join(__dirname, 'notify-lark'),
     defaultSlug: 'alert-lark',
     deps: { zod: '^3.23.0 || ^4.0.0', '@zibby/skills': '^0.1.28' },
@@ -257,7 +257,7 @@ export const TEMPLATES = {
   'notify-notion': {
     name: 'notify-notion',
     displayName: 'Notify Notion',
-    description: 'Reusable child workflow — creates a Notion page in a database OR appends blocks to an existing page. Dispatched by other workflows (digest, incident archives, weekly reports) via sub-graph.',
+    description: 'Reusable child workflow — creates a Notion page in a database OR appends blocks to an existing page. Renders rich report objects to native Notion blocks and returns pageId + pageUrl. Dispatched as a sub-graph from any parent (digests, incident archives, weekly reports).',
     path: join(__dirname, 'notify-notion'),
     defaultSlug: 'archive-notion',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -283,8 +283,7 @@ export const TEMPLATES = {
       ].join('\n'),
       tags: ['Notifications', 'Docs', 'Reports'],
       capabilities: [
-        'Create a new page in a Notion database (POST /v1/pages)',
-        'Append blocks to an existing page (PATCH /v1/blocks/{pageId}/children)',
+        'Create a new page in a Notion database, or append blocks to an existing page',
         'Renders rich report-objects to native Notion blocks (headings, callouts, tables, code, embeds)',
         'Severity-mapped page-icon emoji + colored callout backgrounds',
         'Sub-graph dispatchable from any parent workflow',
@@ -303,7 +302,7 @@ export const TEMPLATES = {
   'sentry-triage': {
     name: 'sentry-triage',
     displayName: 'Sentry Triage Bot',
-    description: 'Hourly Sentry triage that classifies new issues with an LLM rubric (CRITICAL/HIGH/MEDIUM/LOW/NOISE) and posts above-threshold alerts to your Slack or Lark — whichever you have connected.',
+    description: 'Hourly Sentry triage that classifies new issues with an auditable LLM rubric (CRITICAL…NOISE) and posts above-threshold alerts to your Slack or Lark — whichever you have connected. Batches related issues and @-mentions on-call only for CRITICAL.',
     path: join(__dirname, 'sentry-triage'),
     defaultSlug: 'sentry-triage',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -347,7 +346,7 @@ export const TEMPLATES = {
   'ai-spend-weekly-digest': {
     name: 'ai-spend-weekly-digest',
     displayName: 'AI Spend Weekly Digest',
-    description: 'Weekly digest of OpenAI / Anthropic / Cursor admin billing — pulls trailing-28d cost+usage across all three providers, detects per-project anomalies vs 3-week baseline, and posts a rich report card to Lark and/or Slack via in-process sub-graph dispatch.',
+    description: 'Weekly digest of OpenAI / Anthropic / Cursor admin billing. Pulls cost+usage across all three providers in parallel, detects per-project anomalies vs a 3-week baseline, and posts a rich report card to Lark and/or Slack. Partial-failure resilient — one provider being down doesn\'t kill the run.',
     path: join(__dirname, 'ai-spend-weekly-digest'),
     defaultSlug: 'ai-spend-weekly-digest',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -376,9 +375,8 @@ export const TEMPLATES = {
         'Pulls org-wide cost+usage from OpenAI, Anthropic, and Cursor admin APIs in parallel',
         'Joins customer attribution from provider-native project / workspace / member metadata',
         'Detects per-project anomalies (σ + ratio) against a 3-week rolling baseline',
-        'Drafts the leadership-grade narrative with an LLM, falls back to deterministic copy if model is unavailable',
-        'Posts a rich Block-Kit / Lark Card report (trend bars, top spenders table, anomalies, provider breakdown)',
-        'Fan-out to Lark + Slack in parallel — partial-failure resilient',
+        'Drafts a leadership-grade narrative with an LLM, with deterministic copy as fallback',
+        'Posts a rich Block-Kit / Lark Card report and fans out to Slack + Lark in parallel',
       ],
       conversationStarters: [
         'Run a weekly AI spend digest every Monday morning',
@@ -393,7 +391,7 @@ export const TEMPLATES = {
   'pipeline-supervisor': {
     name: 'pipeline-supervisor',
     displayName: 'Pipeline Supervisor',
-    description: 'Zibby managing Zibby — a scheduled supervisor that scans the project\'s other pipelines, flags the ones failing or running slow, and posts human-reviewable improvement proposals (add a test gate / tweak a prompt / add an approval gate / drop a redundant step) to Slack or Lark. Read + propose + notify only; it never edits another workflow.',
+    description: 'Zibby managing Zibby — a scheduled supervisor that scans the project\'s other pipelines, flags the ones failing or running slow, and posts one evidence-backed improvement proposal per problem pipeline to Slack or Lark. Read + propose + notify only; a human applies the change, the supervisor never edits a workflow.',
     path: join(__dirname, 'pipeline-supervisor'),
     defaultSlug: 'pipeline-supervisor',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -439,7 +437,7 @@ export const TEMPLATES = {
   'ticket-triage': {
     name: 'ticket-triage',
     displayName: 'Ticket Triage',
-    description: 'Tracker-neutral triage building block — LLM-classifies one ticket (from any tracker) into a severity (CRITICAL…NOISE), a shouldAutofix decision, and a human summary. The first block of the bug-autofix pipeline; usable on its own.',
+    description: 'Tracker-neutral triage building block — an LLM classifies one ticket into a severity (CRITICAL…NOISE), a shouldAutofix decision, and a human summary, all with auditable reasoning. The first block of the bug-autofix pipeline, usable on its own (Jira flows end-to-end in v1).',
     path: join(__dirname, 'ticket-triage'),
     defaultSlug: 'ticket-triage',
     deps: { zod: '^3.23.0' },
@@ -479,7 +477,7 @@ export const TEMPLATES = {
   'code-fix': {
     name: 'code-fix',
     displayName: 'Code Fix (clone → fix → PR)',
-    description: 'Clones a repo into an isolated workspace, has an agent fix one ticket with an inline test-gate (run the tests, feed failures back for one retry), and opens a PR. Output: { pr_url, branch }. The "do the work" block of the bug-autofix pipeline.',
+    description: 'Hand it a ticket + a repo; get back a tested fix PR. Clones the repo into an isolated workspace, has an agent fix the ticket behind an inline test-gate (run the suite, feed failures back for one retry), and opens a GitHub PR → { pr_url, branch }. Stops at the PR — a human reviews and merges.',
     path: join(__dirname, 'code-fix'),
     defaultSlug: 'code-fix',
     deps: { zod: '^3.23.0', axios: '^1.6.0' },
@@ -521,7 +519,7 @@ export const TEMPLATES = {
   'tracker-writeback': {
     name: 'tracker-writeback',
     displayName: 'Tracker Writeback',
-    description: 'Closes the loop after triage/fix — transitions the tracker issue (Jira → In Review when a PR opened), comments the PR link + verdict, and posts a Slack or Lark note. The writeback block of the bug-autofix pipeline (Jira in v1; GitHub / Linear are extension points).',
+    description: 'Closes the loop after triage/fix — transitions the Jira issue (→ In Review when a PR opened), comments the PR link + verdict, and posts a short Slack or Lark note. Runs on both the autofixed and notify-only branches. The writeback block of the bug-autofix pipeline (Jira in v1).',
     path: join(__dirname, 'tracker-writeback'),
     defaultSlug: 'tracker-writeback',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -563,7 +561,7 @@ export const TEMPLATES = {
   'bug-autofix': {
     name: 'bug-autofix',
     displayName: 'Bug Autofix Pipeline',
-    description: 'The composable bug-autofix SDLC pipeline. Polls a tracker, then connects three reusable building blocks via sub-graph dispatch: ticket-triage → (autofixable?) → code-fix → tracker-writeback. High-severity autofixable bugs get a tested fix PR opened and the ticket moved to In Review; everything else is triaged and a human is notified. Stops at the PR — a human merges.',
+    description: 'The composable bug-autofix pipeline. Polls a tracker, then chains three reusable blocks via sub-graph: ticket-triage → code-fix → tracker-writeback. High-severity autofixable bugs get a tested fix PR and the ticket moved to In Review; everything else is triaged and a human notified. Stops at the PR — a human merges.',
     path: join(__dirname, 'bug-autofix'),
     defaultSlug: 'bug-autofix',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.26' },
@@ -607,7 +605,7 @@ export const TEMPLATES = {
   'github-ai-scout': {
     name: 'github-ai-scout',
     displayName: 'Daily GitHub AI Scout',
-    description: 'A daily scout that searches GitHub for new/trending AI projects, scores them against YOUR configurable rubric with an LLM, and posts a Slack shortlist for a human to review. General + config-driven — the query, recency/star thresholds, and rubric are all deploy-time inputs. Proposes a shortlist; never stars, forks, or auto-adds anything.',
+    description: 'A daily scout that searches GitHub for trending AI projects, scores each against YOUR plain-English rubric with an LLM, and posts a Slack shortlist for a human to review. Query, recency/star thresholds, and rubric are all deploy-time inputs. Proposes a shortlist only — never stars, forks, or auto-adds.',
     path: join(__dirname, 'github-ai-scout'),
     defaultSlug: 'github-ai-scout',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.25' },
@@ -633,12 +631,11 @@ export const TEMPLATES = {
       ].join('\n'),
       tags: ['Reports', 'Notifications'],
       capabilities: [
-        'Searches GitHub daily for newly-created, trending repositories matching your query',
-        'Filters by recency (created within N days) and a minimum star count',
+        'Searches GitHub daily for trending repos matching your query, filtered by recency + minimum stars',
         'Scores every candidate 1-5 against your plain-English rubric with an LLM',
         'Keeps a tight shortlist of the best finds — drops abandoned demos and trivial wrappers',
         'Dedups against repos you already track via an excludeRepos allow-list',
-        'Posts a numbered Block-Kit shortlist to Slack — stars, language, license, one-line reason, link',
+        'Posts a numbered Block-Kit shortlist to Slack — stars, language, license, reason, link',
       ],
       conversationStarters: [
         'Scout new AI agent frameworks on GitHub every morning',
@@ -653,15 +650,17 @@ export const TEMPLATES = {
   'github-code-review': {
     name: 'github-code-review',
     displayName: 'GitHub Code Review',
-    description: 'Reviews a GitHub pull request with an LLM and posts the review back to the PR — a summary plus inline comments and an APPROVE / COMMENT / REQUEST_CHANGES verdict. If the PR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitHub required; Jira/Linear optional (ticket context only).',
+    description: 'Reviews a GitHub PR with an LLM and posts back a summary, inline comments, and an APPROVE / COMMENT / REQUEST_CHANGES verdict. Agent-driven: supply your own review rules; it can pull context from a linked Jira/Linear ticket and a Notion page, validating against acceptance criteria CodeRabbit-style. GitHub required; the rest optional.',
     path: join(__dirname, 'github-code-review'),
     defaultSlug: 'github-code-review',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
     features: [
-      '3-node graph: fetch_pr (github) → fetch_ticket (optional) → review (LLM)',
+      'Graph: fetch_pr (github) → fetch_ticket (optional) → fetch_context (optional Notion) → review (LLM) → notify (optional Slack/Lark)',
       'Posts a real PR review via github_create_review — summary body + inline comments + verdict',
+      'AGENT-DRIVEN: supply your own authoritative review rules with conditions (reviewRules) the agent treats as binding',
       'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
-      'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
+      'OPTIONAL context sources — Jira/Linear ticket + a Notion page — all fetched via direct tool calls so none gate deploy',
+      'OPTIONAL notify — posts the verdict + summary to Slack or Lark when done (never blocks the review)',
       'Webhook-triggered on a PR: { owner, repo, prNumber }',
       'Reviews + comments only — never merges, closes, or pushes',
     ],
@@ -678,17 +677,16 @@ export const TEMPLATES = {
       ].join('\n'),
       tags: ['Code Review'],
       capabilities: [
-        'Reads the PR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
+        'Reviews the PR diff for correctness, bugs, security, tests, design, and style',
         'Posts the review back to the PR: a summary, inline comments on specific lines, and a verdict',
         'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
-        'Optionally pulls ticket context from Jira or Linear — but neither is required to run',
-        'Webhook-driven: point a PR-opened/synchronize hook at it',
-        'Proposes feedback only — a human still decides and merges',
+        'Apply your own authoritative review rules with conditions (e.g. "REQUEST_CHANGES if billing/ changes without tests")',
+        'Optionally pull Jira/Linear ticket + Notion context and notify Slack or Lark — none required, none block the review',
       ],
       conversationStarters: [
         'Review pull request #412 in acme/web-app',
-        'Auto-review every new PR and post inline comments',
-        'Review this PR and check it against the linked Jira ticket\'s acceptance criteria',
+        'Auto-review every PR against our team rules and ping #code-reviews on Slack',
+        'Review this PR against the linked Jira ticket and our Notion engineering standards',
         'Block the merge if a PR introduces a security or missing-test issue',
       ],
     },
@@ -698,15 +696,17 @@ export const TEMPLATES = {
   'gitlab-code-review': {
     name: 'gitlab-code-review',
     displayName: 'GitLab Code Review',
-    description: 'Reviews a GitLab merge request with an LLM and posts the review back to the MR — a summary note plus inline discussion comments and a clear verdict. If the MR is linked to a Jira or Linear ticket (and that integration is connected), it ALSO validates the change against the ticket\'s acceptance criteria and renders an objectives-met table, the way CodeRabbit does. GitLab required; Jira/Linear optional (ticket context only).',
+    description: 'Reviews a GitLab MR with an LLM and posts back a summary note, inline discussion comments, and a verdict. Agent-driven: supply your own review rules; it can pull context from a linked Jira/Linear ticket and a Notion page, validating against acceptance criteria CodeRabbit-style. Works on gitlab.com + self-hosted. GitLab required; the rest optional.',
     path: join(__dirname, 'gitlab-code-review'),
     defaultSlug: 'gitlab-code-review',
     deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
     features: [
-      '3-node graph: fetch_mr (gitlab) → fetch_ticket (optional) → review (LLM)',
+      'Graph: fetch_mr (gitlab) → fetch_ticket (optional) → fetch_context (optional Notion) → review (LLM) → notify (optional Slack/Lark)',
       'Posts a real MR review via gitlab_create_mr_review — summary note + inline discussions',
+      'AGENT-DRIVEN: supply your own authoritative review rules with conditions (reviewRules) the agent treats as binding',
       'DYNAMIC prompt: linked ticket → objectives-met table vs ticket acceptance criteria; no ticket → standalone diff review (never mentions a ticket)',
-      'Jira/Linear are OPTIONAL — fetched via direct tool calls so they don\'t gate deploy',
+      'OPTIONAL context sources — Jira/Linear ticket + a Notion page — all fetched via direct tool calls so none gate deploy',
+      'OPTIONAL notify — posts the verdict + summary to Slack or Lark when done (never blocks the review)',
       'Webhook-triggered on an MR: { projectId, mrIid }',
       'Works against gitlab.com and self-hosted instances',
     ],
@@ -723,21 +723,82 @@ export const TEMPLATES = {
       ].join('\n'),
       tags: ['Code Review'],
       capabilities: [
-        'Reads the MR diff + changed files and reviews correctness, bugs, security, tests, design, and style',
-        'Posts the review back to the MR: a summary note, inline discussion comments on specific lines, and a verdict',
+        'Reviews the MR diff for correctness, bugs, security, tests, design, and style',
+        'Posts the review back to the MR: a summary note, inline discussion comments, and a verdict',
         'When a ticket is linked, validates the change against its acceptance criteria with an objectives-met table',
-        'Optionally pulls ticket context from Jira or Linear — but neither is required to run',
-        'Works against gitlab.com and self-hosted GitLab instances',
-        'Proposes feedback only — a human still decides, approves, and merges',
+        'Apply your own authoritative review rules with conditions (e.g. "REQUEST_CHANGES if billing/ changes without tests")',
+        'Works on gitlab.com + self-hosted; optionally pulls Jira/Linear + Notion context and notifies Slack or Lark',
       ],
       conversationStarters: [
         'Review merge request !73 in acme/web-app',
-        'Auto-review every new MR and post inline discussions',
-        'Review this MR and check it against the linked Jira ticket\'s acceptance criteria',
+        'Auto-review every MR against our team rules and ping our Lark group',
+        'Review this MR against the linked Jira ticket and our Notion engineering standards',
         'Flag any MR that introduces a security or missing-test issue',
       ],
     },
   },
+  // ── github-comment-response: in-thread reply companion to github-code-review ─
+  // The CodeRabbit-style conversational back-and-forth: a HUMAN replies to the
+  // review bot's comment in a PR thread, and this agent replies IN THAT SAME
+  // THREAD (concede if they're right, clarify with code evidence if not) — NOT a
+  // fresh full review. Resolved by github-webhook.js as the reply agent
+  // (link.commentResponseSlug || GITHUB_COMMENT_RESPONSE_SLUG || link.agentSlug).
+  // A COMPANION to github-code-review — tagged `child-workflow` so it's dropped
+  // from the browse facet row (same convention as notify-slack / ticket-triage).
+  // requiredIntegrations (GitHub) is DERIVED from graph.mjs at sync time, like
+  // every other template — not declared here.
+  'github-comment-response': {
+    name: 'github-comment-response',
+    displayName: 'GitHub Comment Response',
+    description: 'Replies conversationally, in-thread, to a human\'s reply on a GitHub PR review comment — the CodeRabbit-style back-and-forth. Reads the thread (bot comment + human reply + anchored diff) and posts a focused reply to the SAME thread, not a fresh review. The conversational companion to GitHub Code Review. GitHub required.',
+    path: join(__dirname, 'github-comment-response'),
+    defaultSlug: 'github-comment-response',
+    deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
+    features: [
+      'Single-node graph: respond (github) — reads the review-comment thread + replies in-thread',
+      'Replies in the SAME thread (github_reply_review_thread) — not a fresh full review',
+      'Top-level/summary comments answered via github_reply_issue_comment',
+      'Concedes when the human is right; clarifies with code evidence when not',
+      'Webhook-triggered on a reply to a bot thread: { owner, repo, prNumber, commentId, threadType, triggeringComment }',
+      'Graceful degradation: posts an honest acknowledge-and-clarify reply if the thread read fails',
+    ],
+    // COMPANION agent — intentionally NO `marketplace` field so the sync
+    // (backend/scripts/marketplace-sync-from-templates.mjs) NEVER publishes it
+    // to the public browse grid. Registry entry kept so `zibby template add
+    // github-comment-response` resolves it for CLI direct-deploy alongside
+    // github-code-review. Resolved at runtime by gitlab/github-webhook.js as the
+    // reply agent (link.commentResponseSlug || *_COMMENT_RESPONSE_SLUG || agentSlug).
+  },
+  // ── gitlab-comment-response: in-thread reply companion to gitlab-code-review ─
+  // The GitLab analog of github-comment-response. A HUMAN replies to the review
+  // bot's note in an MR discussion, and this agent replies IN THAT SAME
+  // DISCUSSION — NOT a fresh review. Resolved by gitlab-webhook.js as the reply
+  // agent (link.commentResponseSlug || GITLAB_COMMENT_RESPONSE_SLUG ||
+  // link.agentSlug). A COMPANION to gitlab-code-review — tagged `child-workflow`
+  // so it's dropped from the browse facet row. requiredIntegrations (GitLab) is
+  // DERIVED from graph.mjs at sync time, like every other template.
+  'gitlab-comment-response': {
+    name: 'gitlab-comment-response',
+    displayName: 'GitLab Comment Response',
+    description: 'Replies conversationally, in-thread, to a human\'s reply on a GitLab MR review note — the CodeRabbit-style back-and-forth. Reads the discussion (bot note + human reply + anchored diff) and posts a focused reply to the SAME discussion, not a fresh review. The conversational companion to GitLab Code Review. GitLab required.',
+    path: join(__dirname, 'gitlab-comment-response'),
+    defaultSlug: 'gitlab-comment-response',
+    deps: { zod: '^3.23.0', '@zibby/skills': '^0.1.33' },
+    features: [
+      'Single-node graph: respond (gitlab) — reads the MR discussion + replies in-thread',
+      'Replies in the SAME discussion (gitlab_reply_discussion) — not a fresh full review',
+      'General/non-threaded MR comments answered via gitlab_post_mr_note',
+      'Concedes when the human is right; clarifies with code evidence when not',
+      'Webhook-triggered on a reply to a bot discussion: { projectId, mrIid, discussionId, threadType, triggeringComment }',
+      'Works against gitlab.com and self-hosted instances; graceful degradation on discussion-read failure',
+    ],
+    // COMPANION agent — intentionally NO `marketplace` field (see the
+    // github-comment-response note above): never synced to the public grid,
+    // but kept in the registry so `zibby template add gitlab-comment-response`
+    // resolves it for CLI direct-deploy alongside gitlab-code-review.
+  },
 };
 export class TemplateFactory {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zibby/workflow-templates",
-  "version": "0.9.2",
+  "version": "0.9.5",
   "description": "Built-in workflow templates for Zibby — browser-test-automation, code-analysis, generate-test-cases, notify-slack, notify-lark, notify-notion, sentry-triage.",
   "type": "module",
   "main": "index.js",
@@ -45,10 +45,10 @@
   "homepage": "https://zibby.dev",
   "repository": {
     "type": "git",
-    "url": "https://github.com/ZibbyHQ/workflow-templates"
+    "url": "https://github.com/ZibbyDev/workflow-templates"
   },
   "bugs": {
-    "url": "https://github.com/ZibbyHQ/workflow-templates/issues"
+    "url": "https://github.com/ZibbyDev/workflow-templates/issues"
   },
   "files": [
     "browser-test-automation/",
@@ -74,15 +74,15 @@
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.88.0",
-    "@zibby/agent-workflow": "^0.4.2",
-    "@zibby/skills": "^0.1.27",
+    "@zibby/agent-workflow": "^0.4.13",
+    "@zibby/skills": "^0.1.41",
     "axios": "^1.15.0",
     "handlebars": "^4.7.9",
     "zod": "^3.23.0 || ^4.0.0"
   },
   "peerDependencies": {
     "@playwright/test": ">=1.49.0",
-    "@zibby/core": ">=0.5.0",
+    "@zibby/core": ">=0.5.9",
     "playwright": ">=1.49.0"
   },
   "devDependencies": {

package/sentry-triage/graph.mjs CHANGED Viewed

@@ -50,21 +50,28 @@ export class SentryTriageAgent extends WorkflowAgent {
       .setInputSchema(sentryTriageInputSchema)
       .setContextSchema(sentryTriageContextSchema);
+    // Route OUT of the decision: skip everything when Sentry returned nothing
+    // this window, else classify. (Shared by the decision node's condition and
+    // its labeled edges so the logic lives in one place.)
+    const routeHasIssues = (state) =>
+      (state?.fetch_issues?.issues || []).length === 0 ? 'END' : 'classify';
     graph.addNode('fetch_issues',    fetchIssuesNode);
+    // Explicit decision node → renders as a clean Condition diamond. The branch
+    // comes OUT of this, not hung off the fetch_issues work node.
+    graph.addConditionalNode('has_issues', { condition: routeHasIssues });
     graph.addNode('classify',        classifyNode);
     graph.addNode('dispatch_alerts', dispatchNode);
     graph.setEntryPoint('fetch_issues');
-    // Short-circuit when Sentry returned nothing for this window. The
-    // empty-list case is the common idle path (steady-state apps don't
-    // throw new errors every hour), and running classify + dispatch on
-    // an empty input wastes two Claude calls per run — at hourly cadence
-    // across many tenants that adds up. Cleaner to route directly to END
-    // at the graph level than to short-circuit inside each downstream
-    // node's prompt (which still spends a model round-trip).
-    graph.addConditionalEdges('fetch_issues', (state) => {
-      const issues = state?.fetch_issues?.issues || [];
-      return issues.length === 0 ? 'END' : 'classify';
+    graph.addEdge('fetch_issues', 'has_issues');
+    // Short-circuit when Sentry returned nothing for this window. The empty-list
+    // case is the common idle path, and running classify + dispatch on empty
+    // input wastes two Claude calls per run — at hourly cadence across many
+    // tenants that adds up. Routing to END at the graph level (vs short-circuit
+    // inside each prompt) skips the model round-trips entirely.
+    graph.addConditionalEdges('has_issues', routeHasIssues, {
+      labels: { classify: 'has issues', END: 'no issues' },
     });
     graph.addEdge('classify',        'dispatch_alerts');
     graph.addEdge('dispatch_alerts', 'END');

package/browser-test-automation/nodes/cache-replay.mjs DELETED Viewed

@@ -1,213 +0,0 @@
-/**
- * cache_replay node — lever-#2 read path inside the workflow.
- *
- * Sits between `preflight` and `execute_live` in the graph. Tries to
- * replay a prior successful run's action sequence via Playwright
- * directly, completely skipping the LLM. On a clean cache hit it
- * populates `state.execute_live` with the result so downstream
- * `generate_script` works exactly as if execute_live had run.
- *
- * Conditional edge after this node:
- *   - state.cache_replay.hit === true → skip execute_live → generate_script
- *   - state.cache_replay.hit === false → execute_live (LLM-driven path)
- *
- * Not user-configurable per-spec — the cache key derivation handles
- * staleness (page fingerprint drift invalidates) and replay failures
- * fall through cleanly to the LLM path.
- */
-import { z } from '@zibby/core';
-import { chromium } from 'playwright';
-import { spawn } from 'child_process';
-import { extractDomain, replayActions } from '@zibby/ui-memory';
-import { join } from 'path';
-const REPLAY_TIMEOUT_MS = 60_000;
-export const cacheReplayNode = {
-  name: 'cache_replay',
-  skills: [],
-  timeout: 90000,
-  outputSchema: z.object({
-    hit: z.boolean(),
-    elapsed_ms: z.number().nullish(),
-    executed: z.number().nullish(),
-    total: z.number().nullish(),
-    cache_key: z.string().nullish(),
-    error: z.string().nullish(),
-    // When hit, we also write a synthesized execute_live block so the
-    // downstream generate_script node sees what it expects.
-    execute_live_synthesized: z.boolean().nullish(),
-  }),
-  execute: async (context) => {
-    // graph.js builds nodeContext as `{ state, invokeAgent, _coreInvokeAgent,
-    // ...state.getAll() }`. So `context.testSpec` works (spread) AND
-    // `context.state.get('testSpec')` works (instance). Reading from the
-    // spread is the natural shape — `context.state` is reserved for the
-    // .set(key, value) side-write below.
-    const cwd = context.cwd || context.workspace || process.cwd();
-    const testSpec = context.testSpec || '';
-    const specPath = context.specPath || '';
-    // Derive domain from the spec text (no DOM access yet — pure parse).
-    const domain = extractDomainFromSpec(testSpec);
-    if (!domain) {
-      return { hit: false, error: 'cannot derive domain from spec' };
-    }
-    // Cache key requires page_fingerprint, which is page-state-dependent
-    // and only available AFTER navigation. We compute a key WITHOUT
-    // fingerprint first and look up by (domain, spec_path) prefix —
-    // the persister wrote spec_path too. If we find a candidate, we
-    // use its stored fingerprint to compute the full key and verify.
-    //
-    // Lookup order:
-    //   1. Exact (domain, spec_path) match in action_cache.
-    //   2. If found, use its actions for replay attempt.
-    //   3. On replay success: signal hit, populate state.execute_live.
-    //   4. On replay failure (or cache miss): hit=false, fall back to LLM.
-    const cached = await lookupCacheByDomainAndSpec({ cwd, domain, specPath });
-    if (!cached) {
-      return { hit: false, error: 'no cached actions for this spec' };
-    }
-    // Run the replay in a freshly-launched Playwright browser. Cleanly
-    // independent from the @zibby/mcp-browser path execute_live uses.
-    const t0 = Date.now();
-    const browser = await chromium.launch({ headless: true });
-    const page = await browser.newPage();
-    let replayResult;
-    try {
-      replayResult = await Promise.race([
-        replayActions({
-          actions: cached.actions,
-          page,
-          log: (m) => console.log(`[cache_replay] ${m}`),
-        }),
-        new Promise((_, reject) =>
-          setTimeout(() => reject(new Error('replay timeout')), REPLAY_TIMEOUT_MS),
-        ),
-      ]);
-    } catch (err) {
-      replayResult = { success: false, error: err.message, executed: 0, total: cached.actions.length };
-    }
-    const finalUrl = page.url();
-    await browser.close().catch(() => {});
-    const elapsedMs = Date.now() - t0;
-    if (!replayResult.success) {
-      // Increment failure_count so we can drop chronic misses later.
-      await incrementCacheFailure({ cwd, cacheKey: cached.cache_key });
-      return {
-        hit: false,
-        elapsed_ms: elapsedMs,
-        executed: replayResult.executed,
-        total: replayResult.total,
-        cache_key: cached.cache_key,
-        error: replayResult.error,
-      };
-    }
-    // HIT path. Side-write the synthesized execute_live output via
-    // context.state.set so downstream generate_script reads the same
-    // shape it expects (actions[], finalUrl, …). The customExecute
-    // return-value lands in state.cache_replay; the execute_live slot
-    // has to be populated separately.
-    if (typeof context.state?.set === 'function') {
-      context.state.set('execute_live', {
-        success: true,
-        steps: cached.actions.map((a) => a.description),
-        actions: cached.actions,
-        assertions: [],
-        finalUrl,
-        browserClosed: true,
-        notes: 'cache_replay hit — actions replayed via Playwright, no LLM',
-      });
-    }
-    return {
-      hit: true,
-      elapsed_ms: elapsedMs,
-      executed: replayResult.executed,
-      total: replayResult.total,
-      cache_key: cached.cache_key,
-      execute_live_synthesized: true,
-    };
-  },
-};
-// ─── helpers ────────────────────────────────────────────────────────────
-function extractDomainFromSpec(spec) {
-  if (!spec) return null;
-  // Find the first http(s) URL in the spec and run it through the
-  // SAME `extractDomain` the persister uses, so the cache-key lookup
-  // matches what was actually written (notably: `www.` is stripped).
-  const m = String(spec).match(/https?:\/\/[^\s"'<>]+/);
-  if (!m) return null;
-  return extractDomain(m[0]);
-}
-/**
- * Find a cached row by (domain, spec_path). Picks the row with
- * highest success_count if multiple match.
- * Uses dolt via subprocess (matching the rest of the codebase's
- * Dolt-access pattern).
- */
-async function lookupCacheByDomainAndSpec({ cwd, domain, specPath }) {
-  const dbDir = join(cwd, '.zibby', 'memory');
-  const safeDomain = escapeSql(domain);
-  const safeSpec = escapeSql(specPath);
-  const sql = `SELECT cache_key, actions_json, page_fingerprint
-    FROM action_cache
-    WHERE domain = ${safeDomain} AND spec_path = ${safeSpec}
-    ORDER BY success_count DESC, last_used_at DESC
-    LIMIT 1`;
-  const rows = await runDoltJson(dbDir, sql);
-  if (!rows || rows.length === 0) return null;
-  try {
-    const actions = JSON.parse(rows[0].actions_json);
-    return { cache_key: rows[0].cache_key, actions, fingerprint: rows[0].page_fingerprint };
-  } catch {
-    return null;
-  }
-}
-async function incrementCacheFailure({ cwd, cacheKey }) {
-  const dbDir = join(cwd, '.zibby', 'memory');
-  const sql = `UPDATE action_cache
-    SET failure_count = failure_count + 1, last_replay_status = 'replay-failed'
-    WHERE cache_key = ${escapeSql(cacheKey)}`;
-  await runDoltExec(dbDir, sql).catch(() => { /* non-fatal */ });
-}
-function escapeSql(v) {
-  if (v == null) return 'NULL';
-  return `'${String(v).replace(/'/g, "''")}'`;
-}
-function runDoltJson(dir, sql) {
-  return new Promise((resolve) => {
-    const child = spawn('dolt', ['sql', '-r', 'json', '-q', sql], { cwd: dir });
-    let out = '';
-    child.stdout.on('data', (d) => { out += d; });
-    child.on('close', () => {
-      try {
-        const parsed = JSON.parse(out);
-        resolve(parsed.rows || []);
-      } catch {
-        resolve([]);
-      }
-    });
-    child.on('error', () => resolve([]));
-  });
-}
-function runDoltExec(dir, sql) {
-  return new Promise((resolve, reject) => {
-    const child = spawn('dolt', ['sql', '-q', sql], { cwd: dir });
-    child.on('close', (code) => (code === 0 ? resolve() : reject(new Error(`dolt exit ${code}`))));
-    child.on('error', reject);
-  });
-}