npm - guild-agents - Versions diffs - 1.5.0 → 2.0.0 - Mend

guild-agents 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +71 -67
package/bin/guild.js +4 -85
package/package.json +1 -1
package/src/commands/doctor.js +11 -33
package/src/commands/init.js +1 -1
package/src/templates/skills/build-feature/SKILL.md +7 -38
package/src/templates/skills/build-feature/evals/evals.json +2 -2
package/src/templates/skills/council/SKILL.md +4 -14
package/src/templates/skills/council/evals/evals.json +3 -13
package/src/templates/skills/create-pr/SKILL.md +2 -5
package/src/templates/skills/guild-specialize/SKILL.md +2 -5
package/src/templates/skills/qa-cycle/SKILL.md +0 -7
package/src/templates/skills/re-specialize/SKILL.md +0 -3
package/src/templates/skills/session-end/SKILL.md +77 -30
package/src/templates/skills/session-start/SKILL.md +51 -20
package/src/utils/eval-runner.js +2 -8
package/src/utils/generators.js +3 -4
package/src/utils/skill-parser.js +83 -0
package/src/utils/trigger-runner.js +1 -1
package/src/commands/logs.js +0 -63
package/src/commands/reset-learnings.js +0 -44
package/src/commands/run.js +0 -105
package/src/commands/stats.js +0 -147
package/src/templates/agents/learnings-extractor.md +0 -49
package/src/templates/skills/dev-flow/SKILL.md +0 -81
package/src/templates/skills/dev-flow/evals/evals.json +0 -36
package/src/templates/skills/dev-flow/evals/triggers.json +0 -16
package/src/templates/skills/new-feature/SKILL.md +0 -119
package/src/templates/skills/new-feature/evals/evals.json +0 -41
package/src/templates/skills/new-feature/evals/triggers.json +0 -16
package/src/templates/skills/review/SKILL.md +0 -97
package/src/templates/skills/review/evals/evals.json +0 -43
package/src/templates/skills/review/evals/triggers.json +0 -16
package/src/templates/skills/status/SKILL.md +0 -100
package/src/templates/skills/status/evals/evals.json +0 -40
package/src/templates/skills/status/evals/triggers.json +0 -16
package/src/templates/skills/verify/SKILL.md +0 -114
package/src/templates/skills/verify/evals/triggers.json +0 -16
package/src/utils/accounting.js +0 -139
package/src/utils/dispatch-protocol.js +0 -71
package/src/utils/dispatch.js +0 -172
package/src/utils/executor.js +0 -293
package/src/utils/learnings-io.js +0 -76
package/src/utils/learnings.js +0 -204
package/src/utils/orchestrator-io.js +0 -356
package/src/utils/orchestrator.js +0 -590
package/src/utils/pricing.js +0 -28
package/src/utils/providers/claude-code.js +0 -43
package/src/utils/skill-loader.js +0 -83
package/src/utils/trace.js +0 -400
package/src/utils/workflow-parser.js +0 -225

package/src/templates/skills/review/SKILL.md DELETED Viewed

@@ -1,97 +0,0 @@
----
-name: review
-description: "Standalone code review on the current diff"
-user-invocable: true
-workflow:
-  version: 1
-  steps:
-    - id: gather-diff
-      role: system
-      intent: "Get current git diff (staged or unstaged) and run tests + lint for context."
-      commands: [git diff --staged, git diff, npm test, npm run lint]
-      produces: [diff-content, test-result, lint-result]
-    - id: review
-      role: code-reviewer
-      intent: "Review code quality, patterns, security, and tests. Classify findings by severity."
-      requires: [diff-content, test-result, lint-result]
-      produces: [review-report]
-      model-tier: reasoning
-    - id: present
-      role: system
-      intent: "Present findings organized by severity with final verdict."
-      requires: [review-report]
-      produces: [formatted-report]
-      gate: true
----
-# Review
-Runs an independent code review on the current project changes. Invokes the Code Reviewer agent to analyze quality, patterns, security, and technical debt.
-## When to use
-- Before creating a PR
-- To review your own changes before requesting review from others
-- When you want a second opinion on the code you wrote
-## Usage
-`/review`
-## Process
-### Step 1 — Get diff and verification state
-Get the current changes:
-1. First try `git diff --staged` (staged changes)
-2. If there are no staged changes, use `git diff` (unstaged changes)
-3. If there are no changes at all, report that there is nothing to review
-Run automated verification to give context to the reviewer:
-1. Run project tests (e.g., `npm test`) — capture result
-2. Run project lint (e.g., `npm run lint`) — capture result
-3. Include both results as context for the Code Reviewer
-Note: The Code Reviewer does not have access to Bash (only Read, Glob, Grep), so tests and lint are run here before invoking the reviewer.
-### Step 2 — Invoke Code Reviewer
-Invoke the Code Reviewer agent using Task tool with `model: "opus"` (reasoning tier):
-1. Read `.claude/agents/code-reviewer.md` to assume the role
-2. Read CLAUDE.md to understand the project conventions
-3. Receive the full diff + test and lint results from Step 1
-4. If tests or lint failed, this is automatically a Blocker finding
-5. Review the full diff
-6. Classify each finding by severity:
-   - **Blocker**: Must be fixed before merge
-   - **Warning**: Should be fixed, introduces technical debt
-   - **Suggestion**: Optional improvement
-### Step 3 — Present findings
-Present the report organized by severity:
-- Total count of findings by type
-- Detail of each finding: file, description, suggested fix
-- Final verdict: Approved / Approved with warnings / Blocked
-If there are blockers, suggest fixing them and running `/review` again.
-## Example Session
-```text
-User: /review
-Reviewing diff: 4 files changed, +127 -34
-Code Reviewer (opus) — Reviewing changes...
-Findings:
-- [Warning] src/api/users.js:45 — No input validation on email parameter
-- [Suggestion] src/utils/format.js:12 — Consider using Intl.DateTimeFormat
-- [Blocker] src/db/queries.js:78 — SQL injection vulnerability in raw query
-1 blocker, 1 warning, 1 suggestion.
-```

package/src/templates/skills/review/evals/evals.json DELETED Viewed

@@ -1,43 +0,0 @@
-{
-  "skill": "review",
-  "evals": [
-    {
-      "id": "rv-has-core-steps",
-      "description": "Review has gather-diff, review, and present steps",
-      "expectations": [
-        { "text": "Has gather-diff step", "assertion": "step-exists:gather-diff" },
-        { "text": "Has review step", "assertion": "step-exists:review" },
-        { "text": "Has present step", "assertion": "step-exists:present" }
-      ]
-    },
-    {
-      "id": "rv-reviewer-role",
-      "description": "Review step uses code-reviewer role",
-      "expectations": [
-        { "text": "review uses code-reviewer role", "assertion": "step-role:review:code-reviewer" }
-      ]
-    },
-    {
-      "id": "rv-reasoning-tier",
-      "description": "Code review uses reasoning tier (opus)",
-      "expectations": [
-        { "text": "review uses reasoning tier", "assertion": "step-model-tier:review:reasoning" }
-      ]
-    },
-    {
-      "id": "rv-presentation-gate",
-      "description": "Present step has gate for user review",
-      "expectations": [
-        { "text": "present has gate", "assertion": "gate-exists:present" }
-      ]
-    },
-    {
-      "id": "rv-dependencies",
-      "description": "Review requires diff content, present requires report",
-      "expectations": [
-        { "text": "review requires diff-content", "assertion": "step-requires:review:diff-content" },
-        { "text": "present requires review-report", "assertion": "step-requires:present:review-report" }
-      ]
-    }
-  ]
-}

package/src/templates/skills/review/evals/triggers.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-  "skill": "review",
-  "matcherType": "keyword",
-  "description": "Standalone code review on the current diff",
-  "threshold": 0.3,
-  "tests": [
-    { "prompt": "review my code", "shouldTrigger": true },
-    { "prompt": "do a code review on the current changes", "shouldTrigger": true },
-    { "prompt": "check my diff for issues", "shouldTrigger": true },
-    { "prompt": "review the current diff", "shouldTrigger": true },
-    { "prompt": "create a pull request", "shouldTrigger": false },
-    { "prompt": "save my session", "shouldTrigger": false },
-    { "prompt": "what phase am I in", "shouldTrigger": false },
-    { "prompt": "start a new feature", "shouldTrigger": false }
-  ]
-}

package/src/templates/skills/status/SKILL.md DELETED Viewed

@@ -1,100 +0,0 @@
----
-name: status
-description: "Shows current project and session state"
-user-invocable: true
-workflow:
-  version: 1
-  steps:
-    - id: read-state
-      role: system
-      intent: "Read CLAUDE.md, PROJECT.md, and SESSION.md for project state."
-      commands: [cat CLAUDE.md, cat PROJECT.md, cat SESSION.md]
-      produces: [claude-md, project-md, session-md]
-    - id: scan-resources
-      role: system
-      intent: "List available agents and skills from .claude/ directories."
-      commands: [ls .claude/agents/, ls .claude/skills/]
-      produces: [agent-list, skill-list]
-    - id: present-status
-      role: system
-      intent: "Display project summary: name, stack, session state, agents, skills, and suggested next steps."
-      requires: [project-md, session-md, agent-list, skill-list]
-      produces: [status-display]
-      gate: true
----
-# Status
-Shows a complete summary of the current project state, the active session, and the available Guild resources.
-## When to use
-- At the start of a session to get oriented
-- To verify that Guild is correctly configured
-- To see which agents and skills are available
-## Usage
-`/status`
-## Process
-### Step 1 — Read state files
-Read the Guild configuration files:
-- `CLAUDE.md` — project instructions and conventions
-- `PROJECT.md` — identity, stack, and configured agents
-- `SESSION.md` — current session state
-### Step 2 — Present state
-Show the summary in the following format:
-```text
-Guild v1 — [project name]
-Current session:
-- Date: [date from SESSION.md]
-- Task in progress: [task or "none"]
-- State: [current state]
-Stack:
-- [technologies listed in PROJECT.md]
-Available agents:
-- [list of .md files in .claude/agents/]
-Available skills:
-- [list of directories in .claude/skills/]
-Next steps:
-- [extracted from SESSION.md]
-```
-### Step 3 — Suggest actions
-If there is no task in progress, suggest:
-- `/build-feature` to implement something new
-- `/new-feature` to prepare the environment for a feature
-- `/council` to debate a decision
-If there is a task in progress, suggest continuing with the appropriate skill based on the state.
-## Example Session
-```text
-User: /status
-Guild — MyProject
-Stack: Node.js 20, React 18, PostgreSQL
-Session: 2026-02-23
-Task: Implementing user preferences
-State: Phase 4 — Developer implementing
-Agents: advisor, tech-lead, developer, code-reviewer, qa, bugfix, learnings-extractor
-Skills: guild-specialize, build-feature, new-feature, council, qa-cycle, review, dev-flow,
-  status, session-start, session-end
-```

package/src/templates/skills/status/evals/evals.json DELETED Viewed

@@ -1,40 +0,0 @@
-{
-  "skill": "status",
-  "evals": [
-    {
-      "id": "st-has-core-steps",
-      "description": "Status has read-state, scan-resources, present steps",
-      "expectations": [
-        { "text": "Has read-state step", "assertion": "step-exists:read-state" },
-        { "text": "Has scan-resources step", "assertion": "step-exists:scan-resources" },
-        { "text": "Has present-status step", "assertion": "step-exists:present-status" }
-      ]
-    },
-    {
-      "id": "st-all-system",
-      "description": "All steps are system role",
-      "expectations": [
-        { "text": "read-state is system", "assertion": "step-role:read-state:system" },
-        { "text": "scan-resources is system", "assertion": "step-role:scan-resources:system" },
-        { "text": "present-status is system", "assertion": "step-role:present-status:system" }
-      ]
-    },
-    {
-      "id": "st-presentation-gate",
-      "description": "Present-status has gate",
-      "expectations": [
-        { "text": "present-status has gate", "assertion": "gate-exists:present-status" }
-      ]
-    },
-    {
-      "id": "st-dependencies",
-      "description": "Present-status requires project and session data",
-      "expectations": [
-        { "text": "present-status requires project-md", "assertion": "step-requires:present-status:project-md" },
-        { "text": "present-status requires session-md", "assertion": "step-requires:present-status:session-md" },
-        { "text": "present-status requires agent-list", "assertion": "step-requires:present-status:agent-list" },
-        { "text": "present-status requires skill-list", "assertion": "step-requires:present-status:skill-list" }
-      ]
-    }
-  ]
-}

package/src/templates/skills/status/evals/triggers.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-  "skill": "status",
-  "matcherType": "keyword",
-  "description": "Shows current project and session state",
-  "threshold": 0.3,
-  "tests": [
-    { "prompt": "show the project status", "shouldTrigger": true },
-    { "prompt": "show current session state", "shouldTrigger": true },
-    { "prompt": "what is the current project state", "shouldTrigger": true },
-    { "prompt": "how is the project going", "shouldTrigger": true },
-    { "prompt": "create a pull request", "shouldTrigger": false },
-    { "prompt": "review my code", "shouldTrigger": false },
-    { "prompt": "debug this bug", "shouldTrigger": false },
-    { "prompt": "save my session", "shouldTrigger": false }
-  ]
-}

package/src/templates/skills/verify/SKILL.md DELETED Viewed

@@ -1,114 +0,0 @@
----
-name: verify
-description: "Discipline skill — verification before completion. Use when about to claim work is complete, fixed, or passing, before committing or creating PRs."
-user-invocable: true
----
-# Verification Before Completion
-Claiming work is complete without verification is dishonesty, not efficiency.
-**Core principle:** Evidence before claims, always.
-## Usage
-`/verify`
-Invoke this skill before claiming any work is done, before committing, and before creating PRs.
-## When to use
-**ALWAYS before:**
-- Any success or completion claim
-- Committing, pushing, or creating PRs
-- Moving to the next task
-- Expressing satisfaction about work state
-## The Iron Law
-```text
-NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE
-```
-If you haven't run the verification command in this step, you cannot claim it passes.
-## The Gate Function
-```text
-BEFORE claiming any status:
-1. IDENTIFY: What command proves this claim?
-2. RUN: Execute the FULL command (fresh, complete)
-3. READ: Full output, check exit code, count failures
-4. VERIFY: Does output confirm the claim?
-   - If NO: State actual status with evidence
-   - If YES: State claim WITH evidence
-5. ONLY THEN: Make the claim
-Skip any step = lying, not verifying
-```
-## What Each Claim Requires
-| Claim            | Requires                        | Not Sufficient                 |
-| ---------------- | ------------------------------- | ------------------------------ |
-| Tests pass       | Test command output: 0 failures | Previous run, "should pass"    |
-| Linter clean     | Linter output: 0 errors         | Partial check, extrapolation   |
-| Build succeeds   | Build command: exit 0           | Linter passing, logs look good |
-| Bug fixed        | Test original symptom: passes   | Code changed, assumed fixed    |
-| Regression test  | Red-green cycle verified        | Test passes once               |
-| Requirements met | Line-by-line checklist          | Tests passing                  |
-## Red Flags - STOP
-- Using "should", "probably", "seems to"
-- Expressing satisfaction before verification ("Great!", "Perfect!", "Done!")
-- About to commit/push/PR without verification
-- Relying on partial verification
-- Thinking "just this once"
-- ANY wording implying success without having run verification
-## Common Rationalizations
-| Excuse                       | Reality                        |
-| ---------------------------- | ------------------------------ |
-| "Should work now"            | RUN the verification           |
-| "I'm confident"              | Confidence is not evidence     |
-| "Just this once"             | No exceptions                  |
-| "Linter passed"              | Linter is not compiler         |
-| "Partial check is enough"    | Partial proves nothing         |
-## Verification Patterns
-**Tests:**
-```text
-OK:  [Run test command] [See: 34/34 pass] "All tests pass"
-BAD: "Should pass now" / "Looks correct"
-```
-**Build:**
-```text
-OK:  [Run build] [See: exit 0] "Build passes"
-BAD: "Linter passed" (linter doesn't check compilation)
-```
-**Requirements:**
-```text
-OK:  Re-read plan -> Create checklist -> Verify each -> Report gaps or completion
-BAD: "Tests pass, phase complete"
-```
-## The Bottom Line
-Run the command. Read the output. THEN claim the result.
-No shortcuts. Non-negotiable.
-## Related Skills
-- `/tdd` — TDD ensures tests exist before claiming code works
-- `/debug` — systematic debugging when verification reveals failures

package/src/templates/skills/verify/evals/triggers.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-  "skill": "verify",
-  "matcherType": "keyword",
-  "description": "Discipline skill — verification before completion. Use when about to claim work is complete, fixed, or passing, before committing or creating PRs.",
-  "threshold": 0.3,
-  "tests": [
-    { "prompt": "verify before committing", "shouldTrigger": true },
-    { "prompt": "run verification before completion", "shouldTrigger": true },
-    { "prompt": "verify the work is complete", "shouldTrigger": true },
-    { "prompt": "make sure everything passes before the PR", "shouldTrigger": true, "keywordExpected": false },
-    { "prompt": "create a pull request", "shouldTrigger": false },
-    { "prompt": "review my code", "shouldTrigger": false },
-    { "prompt": "debug this bug", "shouldTrigger": false },
-    { "prompt": "save my session", "shouldTrigger": false }
-  ]
-}

package/src/utils/accounting.js DELETED Viewed

@@ -1,139 +0,0 @@
-/**
- * accounting.js — Token usage recording, persistence, and aggregation.
- *
- * Persists usage data to .claude/guild/usage.json.
- */
-import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
-import { join, dirname } from 'path';
-import { estimateCost } from './pricing.js';
-const USAGE_PATH = join('.claude', 'guild', 'usage.json');
-export function emptyUsage() {
-  return {
-    version: 1,
-    lastUpdated: new Date().toISOString(),
-    entries: [],
-    totals: {
-      totalTokens: 0,
-      totalInputTokens: 0,
-      totalOutputTokens: 0,
-      totalCostUSD: 0,
-      tokensByModel: {},
-      tokensByTier: {},
-      tokensByWorkflow: {},
-      workflowCount: 0,
-    },
-  };
-}
-export function createEntry({ workflow, agent, tier, model, inputTokens, outputTokens }) {
-  const totalTokens = inputTokens + outputTokens;
-  return {
-    timestamp: new Date().toISOString(),
-    workflow,
-    agent,
-    tier,
-    model,
-    inputTokens,
-    outputTokens,
-    totalTokens,
-    estimatedCostUSD: estimateCost(model, inputTokens, outputTokens),
-  };
-}
-export function loadUsage(root) {
-  const filePath = join(root, USAGE_PATH);
-  if (!existsSync(filePath)) return emptyUsage();
-  try {
-    return JSON.parse(readFileSync(filePath, 'utf8'));
-  } catch {
-    return emptyUsage();
-  }
-}
-export function saveUsage(root, usage) {
-  const filePath = join(root, USAGE_PATH);
-  mkdirSync(dirname(filePath), { recursive: true });
-  usage.lastUpdated = new Date().toISOString();
-  writeFileSync(filePath, JSON.stringify(usage, null, 2) + '\n');
-}
-function updateTotals(totals, entry) {
-  totals.totalTokens += entry.totalTokens;
-  totals.totalInputTokens += entry.inputTokens;
-  totals.totalOutputTokens += entry.outputTokens;
-  totals.totalCostUSD += entry.estimatedCostUSD;
-  totals.tokensByModel[entry.model] = (totals.tokensByModel[entry.model] || 0) + entry.totalTokens;
-  totals.tokensByTier[entry.tier] = (totals.tokensByTier[entry.tier] || 0) + entry.totalTokens;
-  totals.tokensByWorkflow[entry.workflow] = (totals.tokensByWorkflow[entry.workflow] || 0) + entry.totalTokens;
-  totals.workflowCount += 1;
-}
-export function recordStep(root, params) {
-  const usage = loadUsage(root);
-  const entry = createEntry(params);
-  usage.entries.push(entry);
-  updateTotals(usage.totals, entry);
-  saveUsage(root, usage);
-}
-const PROFILES = {
-  max: { reasoning: 'claude-opus-4-6', execution: 'claude-sonnet-4-5', routine: 'claude-haiku-4-5' },
-  pro: { reasoning: 'claude-sonnet-4-5', execution: 'claude-sonnet-4-5', routine: 'claude-haiku-4-5' },
-  'all-opus': { reasoning: 'claude-opus-4-6', execution: 'claude-opus-4-6', routine: 'claude-opus-4-6' },
-};
-export function aggregate(root, period) {
-  const usage = loadUsage(root);
-  const now = new Date();
-  let cutoff;
-  switch (period) {
-    case 'today':
-      cutoff = new Date(now.getFullYear(), now.getMonth(), now.getDate());
-      break;
-    case 'week':
-      cutoff = new Date(now);
-      cutoff.setDate(cutoff.getDate() - 7);
-      break;
-    case 'month':
-      cutoff = new Date(now);
-      cutoff.setDate(cutoff.getDate() - 30);
-      break;
-    default:
-      cutoff = new Date(0);
-  }
-  const filtered = usage.entries.filter(e => new Date(e.timestamp) >= cutoff);
-  const totals = {
-    totalTokens: 0,
-    totalInputTokens: 0,
-    totalOutputTokens: 0,
-    totalCostUSD: 0,
-    tokensByModel: {},
-    tokensByTier: {},
-    tokensByWorkflow: {},
-    workflowCount: 0,
-  };
-  for (const entry of filtered) {
-    updateTotals(totals, entry);
-  }
-  return totals;
-}
-export function estimateWithProfile(entries, profileName) {
-  const profile = PROFILES[profileName];
-  if (!profile) return 0;
-  let cost = 0;
-  for (const entry of entries) {
-    const model = profile[entry.tier] || entry.model;
-    cost += estimateCost(model, entry.inputTokens, entry.outputTokens);
-  }
-  return cost;
-}

package/src/utils/dispatch-protocol.js DELETED Viewed

@@ -1,71 +0,0 @@
-/**
- * dispatch-protocol.js — Constants and type definitions for the Guild dispatch protocol.
- *
- * Defines the vocabulary shared by dispatch utilities, workflow parser,
- * model routing, and trace modules. Zero dependencies.
- */
-/**
- * Valid model tier values. Each tier maps to a class of model capability.
- * @type {readonly ['reasoning', 'execution', 'routine']}
- */
-export const MODEL_TIERS = ['reasoning', 'execution', 'routine'];
-/**
- * Valid failure strategy base values for workflow steps.
- * - abort: halt the workflow on failure (default)
- * - continue: skip this step and proceed
- * Additionally, `goto:<step-id>` is valid for redirecting to another step.
- * @type {readonly ['abort', 'continue']}
- */
-export const FAILURE_STRATEGIES = ['abort', 'continue'];
-/**
- * Default failure strategy when none is specified.
- * @type {string}
- */
-export const DEFAULT_FAILURE_STRATEGY = 'abort';
-/**
- * Default tier assignment for each Guild agent role.
- * Used as fallback when neither the workflow step nor the agent frontmatter
- * specifies a tier.
- * @type {Record<string, string>}
- */
-export const DEFAULT_AGENT_TIERS = {
-  'advisor': 'reasoning',
-  'tech-lead': 'reasoning',
-  'code-reviewer': 'reasoning',
-  'developer': 'execution',
-  'bugfix': 'execution',
-  'qa': 'execution',
-  'learnings-extractor': 'routine',
-};
-/**
- * Built-in model profiles mapping tiers to concrete model IDs.
- * @type {Record<string, Record<string, string>>}
- */
-export const DEFAULT_MODEL_PROFILES = {
-  max: {
-    reasoning: 'claude-opus-4-6',
-    execution: 'claude-sonnet-4-6',
-    routine: 'claude-haiku-4-5',
-  },
-  pro: {
-    reasoning: 'claude-sonnet-4-6',
-    execution: 'claude-sonnet-4-6',
-    routine: 'claude-haiku-4-5',
-  },
-};
-/**
- * Fallback chain for tier resolution. When a tier's model is unavailable,
- * fall back to the next tier. `null` means no further fallback.
- * @type {Record<string, string|null>}
- */
-export const FALLBACK_CHAIN = {
-  reasoning: 'execution',
-  execution: 'routine',
-  routine: null,
-};