guild-agents 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +19 -6
  2. package/bin/guild.js +46 -0
  3. package/package.json +2 -2
  4. package/src/commands/eval.js +225 -0
  5. package/src/commands/stats.js +147 -0
  6. package/src/templates/agents/advisor.md +0 -1
  7. package/src/templates/agents/developer.md +2 -2
  8. package/src/templates/agents/qa.md +1 -1
  9. package/src/templates/agents/tech-lead.md +2 -2
  10. package/src/templates/skills/build-feature/SKILL.md +53 -80
  11. package/src/templates/skills/build-feature/evals/evals.json +1 -2
  12. package/src/templates/skills/build-feature/evals/triggers.json +16 -0
  13. package/src/templates/skills/council/SKILL.md +2 -2
  14. package/src/templates/skills/council/evals/triggers.json +16 -0
  15. package/src/templates/skills/create-pr/evals/evals.json +44 -0
  16. package/src/templates/skills/create-pr/evals/triggers.json +16 -0
  17. package/src/templates/skills/debug/SKILL.md +1 -1
  18. package/src/templates/skills/debug/evals/triggers.json +16 -0
  19. package/src/templates/skills/dev-flow/SKILL.md +10 -12
  20. package/src/templates/skills/dev-flow/evals/evals.json +36 -0
  21. package/src/templates/skills/dev-flow/evals/triggers.json +16 -0
  22. package/src/templates/skills/guild-specialize/SKILL.md +0 -4
  23. package/src/templates/skills/guild-specialize/evals/evals.json +54 -0
  24. package/src/templates/skills/guild-specialize/evals/triggers.json +16 -0
  25. package/src/templates/skills/new-feature/evals/evals.json +41 -0
  26. package/src/templates/skills/new-feature/evals/triggers.json +16 -0
  27. package/src/templates/skills/qa-cycle/evals/evals.json +46 -0
  28. package/src/templates/skills/qa-cycle/evals/triggers.json +16 -0
  29. package/src/templates/skills/re-specialize/evals/evals.json +48 -0
  30. package/src/templates/skills/re-specialize/evals/triggers.json +16 -0
  31. package/src/templates/skills/review/evals/evals.json +43 -0
  32. package/src/templates/skills/review/evals/triggers.json +16 -0
  33. package/src/templates/skills/session-end/evals/evals.json +40 -0
  34. package/src/templates/skills/session-end/evals/triggers.json +16 -0
  35. package/src/templates/skills/session-start/evals/evals.json +50 -0
  36. package/src/templates/skills/session-start/evals/triggers.json +16 -0
  37. package/src/templates/skills/status/SKILL.md +1 -1
  38. package/src/templates/skills/status/evals/evals.json +40 -0
  39. package/src/templates/skills/status/evals/triggers.json +16 -0
  40. package/src/templates/skills/tdd/evals/triggers.json +16 -0
  41. package/src/templates/skills/verify/evals/triggers.json +16 -0
  42. package/src/utils/accounting.js +139 -0
  43. package/src/utils/benchmark.js +128 -0
  44. package/src/utils/description-analyzer.js +92 -0
  45. package/src/utils/dispatch-protocol.js +0 -3
  46. package/src/utils/executor.js +133 -23
  47. package/src/utils/pricing.js +28 -0
  48. package/src/utils/semantic-matcher.js +91 -0
  49. package/src/utils/trigger-matcher.js +64 -0
  50. package/src/utils/trigger-runner.js +132 -0
  51. package/src/templates/agents/db-migration.md +0 -51
  52. package/src/templates/agents/platform-expert.md +0 -92
  53. package/src/templates/agents/product-owner.md +0 -52
@@ -0,0 +1,128 @@
1
+ /**
2
+ * benchmark.js — Records, reports, and detects regressions in eval benchmarks.
3
+ *
4
+ * Persists results to benchmarks/benchmark.json with 30-entry rotation.
5
+ * Generates benchmarks/benchmark.md as a human-readable report.
6
+ */
7
+
8
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
9
+ import { dirname } from 'path';
10
+
11
+ const MAX_ENTRIES = 30;
12
+
13
+ /**
14
+ * Appends a benchmark entry to the JSON file, rotating old entries.
15
+ * @param {object} entry - Benchmark entry with timestamp, matcher, skills, aggregate
16
+ * @param {string} filePath - Path to benchmark.json
17
+ */
18
+ export function recordBenchmark(entry, filePath) {
19
+ const dir = dirname(filePath);
20
+ if (!existsSync(dir)) {
21
+ mkdirSync(dir, { recursive: true });
22
+ }
23
+
24
+ let entries = [];
25
+ if (existsSync(filePath)) {
26
+ entries = JSON.parse(readFileSync(filePath, 'utf8'));
27
+ }
28
+
29
+ entries.push(entry);
30
+
31
+ if (entries.length > MAX_ENTRIES) {
32
+ entries = entries.slice(entries.length - MAX_ENTRIES);
33
+ }
34
+
35
+ writeFileSync(filePath, JSON.stringify(entries, null, 2));
36
+ }
37
+
38
+ /**
39
+ * Generates a markdown report from a benchmark entry.
40
+ * @param {object} current - Current benchmark entry
41
+ * @param {object|null} previous - Previous entry for delta comparison
42
+ * @returns {string} Markdown report
43
+ */
44
+ export function generateReport(current, previous) {
45
+ const lines = [];
46
+ const date = current.timestamp;
47
+ const matcher = current.matcher;
48
+ const model = current.model ? ` (${current.model})` : '';
49
+
50
+ lines.push(`# Eval Benchmark — ${date}`);
51
+ lines.push(`Matcher: ${matcher}${model} | Skills: ${current.skills.length} | Total tests: ${current.aggregate.total}`);
52
+ lines.push('');
53
+ lines.push('| Skill | Accuracy | Precision | Recall | Delta |');
54
+ lines.push('|-------|----------|-----------|--------|-------|');
55
+
56
+ for (const skill of current.skills) {
57
+ let delta = '—';
58
+ if (previous) {
59
+ const prev = previous.skills.find(s => s.name === skill.name);
60
+ if (prev) {
61
+ const diff = (skill.accuracy - prev.accuracy) * 100;
62
+ if (Math.abs(diff) >= 0.1) {
63
+ const sign = diff > 0 ? '+' : '';
64
+ const warn = diff < -5 ? ' !!' : '';
65
+ delta = `${sign}${diff.toFixed(1)}%${warn}`;
66
+ }
67
+ }
68
+ }
69
+
70
+ lines.push(`| ${skill.name} | ${(skill.accuracy * 100).toFixed(1)}% | ${(skill.precision * 100).toFixed(1)}% | ${(skill.recall * 100).toFixed(1)}% | ${delta} |`);
71
+ }
72
+
73
+ lines.push('');
74
+ lines.push('## Aggregate');
75
+
76
+ let aggDelta = '';
77
+ if (previous) {
78
+ const diff = (current.aggregate.accuracy - previous.aggregate.accuracy) * 100;
79
+ if (Math.abs(diff) >= 0.1) {
80
+ const sign = diff > 0 ? '+' : '';
81
+ aggDelta = ` (Delta ${sign}${diff.toFixed(1)}%)`;
82
+ }
83
+ }
84
+
85
+ lines.push(`Accuracy: ${(current.aggregate.accuracy * 100).toFixed(1)}%${aggDelta}`);
86
+ lines.push(`Precision: ${(current.aggregate.precision * 100).toFixed(1)}%`);
87
+ lines.push(`Recall: ${(current.aggregate.recall * 100).toFixed(1)}%`);
88
+ lines.push('');
89
+
90
+ return lines.join('\n');
91
+ }
92
+
93
+ /**
94
+ * Detects regressions between two benchmark entries.
95
+ * A regression is: accuracy dropped >5% AND at least 2 tests flipped.
96
+ * @param {object} current
97
+ * @param {object|null} previous
98
+ * @returns {Array<{ skill: string, currentAccuracy: number, previousAccuracy: number, delta: number, flippedTests: number }>}
99
+ */
100
+ export function detectRegressions(current, previous) {
101
+ if (!previous) return [];
102
+
103
+ const regressions = [];
104
+
105
+ for (const skill of current.skills) {
106
+ const prev = previous.skills.find(s => s.name === skill.name);
107
+ if (!prev) continue;
108
+
109
+ const delta = skill.accuracy - prev.accuracy;
110
+ if (delta > -0.05) continue;
111
+
112
+ const currentCorrect = skill.tp + skill.tn;
113
+ const prevCorrect = prev.tp + prev.tn;
114
+ const flippedTests = Math.abs(currentCorrect - prevCorrect);
115
+
116
+ if (flippedTests < 2) continue;
117
+
118
+ regressions.push({
119
+ skill: skill.name,
120
+ currentAccuracy: skill.accuracy,
121
+ previousAccuracy: prev.accuracy,
122
+ delta,
123
+ flippedTests,
124
+ });
125
+ }
126
+
127
+ return regressions;
128
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * description-analyzer.js — Analyzes keyword gaps in skill descriptions.
3
+ *
4
+ * Uses token analysis to identify which keywords are missing from
5
+ * skill descriptions based on failed trigger tests. No LLM required.
6
+ */
7
+
8
+ import { tokenize } from './trigger-matcher.js';
9
+
10
+ const STOP_WORDS = new Set([
11
+ 'the', 'is', 'at', 'in', 'on', 'to', 'of', 'for', 'and', 'or', 'an',
12
+ 'it', 'by', 'as', 'be', 'do', 'if', 'no', 'so', 'up', 'we', 'my',
13
+ 'use', 'when', 'with', 'from', 'this', 'that', 'will', 'can', 'has',
14
+ 'not', 'are', 'was', 'but', 'all', 'any', 'its', 'you', 'your',
15
+ 'want', 'need', 'just', 'let', 'get', 'make', 'help', 'me',
16
+ ]);
17
+
18
+ /**
19
+ * Checks if a token matches any description token (full or substring).
20
+ */
21
+ function tokenMatchesDescription(token, descTokens) {
22
+ for (const dt of descTokens) {
23
+ if (dt === token || dt.includes(token) || token.includes(dt)) {
24
+ return true;
25
+ }
26
+ }
27
+ return false;
28
+ }
29
+
30
+ /**
31
+ * Analyzes gaps between failed trigger prompts and a skill description.
32
+ * @param {Array} triggerResults - Results from runTriggerTests
33
+ * @param {string} description - Skill description
34
+ * @returns {{ missingKeywords: string[], failedPrompts: string[] }}
35
+ */
36
+ export function analyzeGaps(triggerResults, description) {
37
+ const failedPositives = triggerResults.filter(r => r.expected && !r.actual);
38
+
39
+ if (failedPositives.length === 0) {
40
+ return { missingKeywords: [], failedPrompts: [] };
41
+ }
42
+
43
+ const descTokens = tokenize(description).filter(w => !STOP_WORDS.has(w));
44
+ const missingKeywords = [];
45
+ const failedPrompts = [];
46
+
47
+ for (const result of failedPositives) {
48
+ failedPrompts.push(result.prompt);
49
+ const promptTokens = tokenize(result.prompt).filter(w => !STOP_WORDS.has(w));
50
+
51
+ for (const token of promptTokens) {
52
+ if (!tokenMatchesDescription(token, descTokens)) {
53
+ missingKeywords.push(token);
54
+ }
55
+ }
56
+ }
57
+
58
+ return { missingKeywords, failedPrompts };
59
+ }
60
+
61
+ /**
62
+ * Generates keyword suggestions from gap analysis results.
63
+ * @param {Array<{ skill: string, currentDescription: string, missingKeywords: string[], failedPrompts: string[] }>} gapsList
64
+ * @returns {Array<{ skill: string, currentDescription: string, suggestedKeywords: Array<{ word: string, confidence: string }> }>}
65
+ */
66
+ export function generateSuggestions(gapsList) {
67
+ const suggestions = [];
68
+
69
+ for (const gaps of gapsList) {
70
+ if (gaps.missingKeywords.length === 0) continue;
71
+
72
+ const freq = new Map();
73
+ for (const word of gaps.missingKeywords) {
74
+ freq.set(word, (freq.get(word) || 0) + 1);
75
+ }
76
+
77
+ const suggestedKeywords = [...freq.entries()]
78
+ .sort((a, b) => b[1] - a[1])
79
+ .map(([word, count]) => ({
80
+ word,
81
+ confidence: count >= 2 ? 'high' : 'medium',
82
+ }));
83
+
84
+ suggestions.push({
85
+ skill: gaps.skill,
86
+ currentDescription: gaps.currentDescription,
87
+ suggestedKeywords,
88
+ });
89
+ }
90
+
91
+ return suggestions;
92
+ }
@@ -34,14 +34,11 @@ export const DEFAULT_FAILURE_STRATEGY = 'abort';
34
34
  */
35
35
  export const DEFAULT_AGENT_TIERS = {
36
36
  'advisor': 'reasoning',
37
- 'product-owner': 'reasoning',
38
37
  'tech-lead': 'reasoning',
39
38
  'code-reviewer': 'reasoning',
40
39
  'developer': 'execution',
41
40
  'bugfix': 'execution',
42
- 'db-migration': 'execution',
43
41
  'qa': 'execution',
44
- 'platform-expert': 'execution',
45
42
  'learnings-extractor': 'routine',
46
43
  };
47
44
 
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * Drives a plan to completion by iterating through steps, dispatching
5
5
  * agent steps to a provider function and system steps to local commands.
6
- * Sequential execution only (v1.1); parallel groups deferred to v1.2.
6
+ * Supports parallel execution (v1.2) and delegation to sub-skills.
7
7
  */
8
8
 
9
9
  import { execFile } from 'child_process';
@@ -11,8 +11,15 @@ import {
11
11
  advanceStep,
12
12
  getNextSteps,
13
13
  isPlanComplete,
14
+ MAX_DELEGATION_DEPTH,
15
+ createExecutionPlan,
14
16
  } from './orchestrator.js';
15
- import { buildStepContext, recordStepTrace } from './orchestrator-io.js';
17
+ import {
18
+ buildStepContext,
19
+ recordStepTrace,
20
+ loadWorkflow,
21
+ resolveStepDispatch,
22
+ } from './orchestrator-io.js';
16
23
 
17
24
  const SYSTEM_STEP_TIMEOUT = 120_000; // 2 minutes
18
25
 
@@ -70,7 +77,7 @@ async function executeSystemStep(step, options = {}) {
70
77
  }
71
78
 
72
79
  if (step.delegatesTo) {
73
- return { status: 'passed', output: `Delegation to "${step.delegatesTo}" skipped (v1.1)` };
80
+ return { status: 'passed', output: `System step with delegation — handled by executeDelegation` };
74
81
  }
75
82
 
76
83
  return { status: 'passed', output: 'System step completed' };
@@ -92,12 +99,111 @@ function findStepInPlan(plan, stepId) {
92
99
  return null;
93
100
  }
94
101
 
102
+ /**
103
+ * Dispatches a single step (agent or system) and returns its result.
104
+ *
105
+ * @param {object} step - Step definition
106
+ * @param {object} dispatch - Dispatch info for this step
107
+ * @param {object} context - Execution context
108
+ * @param {import('./orchestrator.js').ExecutionPlan} context.currentPlan - Current plan state
109
+ * @param {Function} context.provider - Agent step provider
110
+ * @param {string} context.projectRoot - Working directory
111
+ * @param {string} context.skillBody - Skill body text
112
+ * @param {object} context.executeOptions - Full options passed to execute()
113
+ * @returns {Promise<{ status: string, output: string, outcome?: object, error?: string }>}
114
+ */
115
+ async function dispatchStep(step, dispatch, context) {
116
+ const { currentPlan, provider, projectRoot, skillBody, executeOptions } = context;
117
+
118
+ if (step.role === 'system' && step.delegatesTo) {
119
+ return executeDelegation(step, executeOptions);
120
+ }
121
+
122
+ if (step.role === 'system') {
123
+ return executeSystemStep(step, { projectRoot });
124
+ }
125
+
126
+ const stepContext = buildStepContext(step, currentPlan, { skillBody });
127
+ return provider(step, dispatch, stepContext);
128
+ }
129
+
130
+ /**
131
+ * Executes a delegation step by loading and running the sub-skill.
132
+ *
133
+ * @param {object} step - Delegation step (with delegatesTo field)
134
+ * @param {object} options - Execute options from parent
135
+ * @returns {Promise<{ status: string, output: string, error?: string }>}
136
+ */
137
+ async function executeDelegation(step, options) {
138
+ const {
139
+ provider,
140
+ trace,
141
+ projectRoot,
142
+ profile = 'max',
143
+ onStepStart,
144
+ onStepEnd,
145
+ delegationDepth = 0,
146
+ } = options;
147
+
148
+ if (delegationDepth >= MAX_DELEGATION_DEPTH) {
149
+ return {
150
+ status: 'failed',
151
+ output: '',
152
+ error: `Delegation depth limit (${MAX_DELEGATION_DEPTH}) exceeded at step "${step.id}" delegating to "${step.delegatesTo}"`,
153
+ };
154
+ }
155
+
156
+ let subSkill;
157
+ try {
158
+ subSkill = loadWorkflow(step.delegatesTo);
159
+ } catch (err) {
160
+ return {
161
+ status: 'failed',
162
+ output: '',
163
+ error: `Failed to load delegated skill "${step.delegatesTo}": ${err.message}`,
164
+ };
165
+ }
166
+
167
+ const subPlan = createExecutionPlan(subSkill.workflow, {
168
+ skillName: subSkill.name || step.delegatesTo,
169
+ });
170
+
171
+ const subDispatchMap = {};
172
+ for (const group of subPlan.groups) {
173
+ for (const s of group.steps) {
174
+ subDispatchMap[s.id] = resolveStepDispatch(s, { profile, projectRoot });
175
+ }
176
+ }
177
+
178
+ const finalSubPlan = await execute(subPlan, subDispatchMap, {
179
+ provider,
180
+ trace,
181
+ projectRoot,
182
+ skillBody: subSkill.body || '',
183
+ onStepStart,
184
+ onStepEnd,
185
+ delegationDepth: delegationDepth + 1,
186
+ profile,
187
+ });
188
+
189
+ if (finalSubPlan.status === 'completed') {
190
+ return { status: 'passed', output: `Delegation to "${step.delegatesTo}" completed` };
191
+ }
192
+
193
+ return {
194
+ status: 'failed',
195
+ output: '',
196
+ error: `Delegated skill "${step.delegatesTo}" ended with status: ${finalSubPlan.status}`,
197
+ };
198
+ }
199
+
95
200
  /**
96
201
  * Executes a workflow plan to completion.
97
202
  *
98
203
  * Drives the orchestrator state machine by repeatedly calling getNextSteps,
99
204
  * dispatching each step (agent via provider, system via local commands),
100
- * and advancing the plan with the result.
205
+ * and advancing the plan with the result. Parallel groups are dispatched
206
+ * concurrently via Promise.all.
101
207
  *
102
208
  * @param {import('./orchestrator.js').ExecutionPlan} plan - Initial execution plan
103
209
  * @param {Object.<string, import('./orchestrator-io.js').StepDispatchInfo>} dispatchInfoMap - Dispatch info per step
@@ -108,6 +214,8 @@ function findStepInPlan(plan, stepId) {
108
214
  * @param {string} [options.skillBody=''] - Skill body text for context building
109
215
  * @param {Function} [options.onStepStart] - Callback before each step: (step, dispatch) => void
110
216
  * @param {Function} [options.onStepEnd] - Callback after each step: (step, result) => void
217
+ * @param {number} [options.delegationDepth=0] - Current delegation nesting depth
218
+ * @param {string} [options.profile='max'] - Model profile for delegation dispatch
111
219
  * @returns {Promise<import('./orchestrator.js').ExecutionPlan>} Final plan state
112
220
  */
113
221
  export async function execute(plan, dispatchInfoMap, options = {}) {
@@ -127,7 +235,6 @@ export async function execute(plan, dispatchInfoMap, options = {}) {
127
235
  while (!isPlanComplete(currentPlan)) {
128
236
  const { steps, skipped } = getNextSteps(currentPlan);
129
237
 
130
- // Advance skipped steps first
131
238
  for (const stepId of skipped) {
132
239
  currentPlan = advanceStep(currentPlan, stepId, { status: 'skipped' });
133
240
 
@@ -140,7 +247,6 @@ export async function execute(plan, dispatchInfoMap, options = {}) {
140
247
  }
141
248
  }
142
249
 
143
- // If no executable steps remain, check completion again
144
250
  if (steps.length === 0) {
145
251
  if (isPlanComplete(currentPlan)) break;
146
252
  if (++emptyIterations > MAX_EMPTY_ITERATIONS) {
@@ -151,30 +257,34 @@ export async function execute(plan, dispatchInfoMap, options = {}) {
151
257
  }
152
258
  emptyIterations = 0;
153
259
 
154
- // v1.1: sequential execution — one step at a time
155
- const step = steps[0];
156
- const dispatch = dispatchInfoMap[step.id] || {};
260
+ const dispatchContext = {
261
+ currentPlan,
262
+ provider,
263
+ projectRoot,
264
+ skillBody,
265
+ executeOptions: options,
266
+ };
157
267
 
158
- onStepStart?.(step, dispatch);
268
+ const settled = await Promise.all(
269
+ steps.map(async (step) => {
270
+ const dispatch = dispatchInfoMap[step.id] || {};
271
+ onStepStart?.(step, dispatch);
272
+ const result = await dispatchStep(step, dispatch, dispatchContext);
273
+ return { step, dispatch, result };
274
+ })
275
+ );
159
276
 
160
- let result;
161
- if (step.role === 'system') {
162
- result = await executeSystemStep(step, { projectRoot });
163
- } else {
164
- const context = buildStepContext(step, currentPlan, { skillBody });
165
- result = await provider(step, dispatch, context);
166
- }
277
+ for (const { step, dispatch, result } of settled) {
278
+ currentPlan = advanceStep(currentPlan, step.id, result);
167
279
 
168
- currentPlan = advanceStep(currentPlan, step.id, result);
280
+ if (trace) {
281
+ recordStepTrace(trace, step, currentPlan.stepStates[step.id], dispatch);
282
+ }
169
283
 
170
- if (trace) {
171
- recordStepTrace(trace, step, currentPlan.stepStates[step.id], dispatch);
284
+ onStepEnd?.(step, result);
172
285
  }
173
-
174
- onStepEnd?.(step, result);
175
286
  }
176
287
 
177
- // Mark plan as completed if all steps reached terminal state and plan is still running
178
288
  if (currentPlan.status === 'running' && isPlanComplete(currentPlan)) {
179
289
  currentPlan = { ...currentPlan, status: 'completed' };
180
290
  }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * pricing.js — Model pricing table and cost calculation.
3
+ *
4
+ * Prices per million tokens (USD).
5
+ * Source: https://docs.anthropic.com/en/docs/about-claude/models
6
+ */
7
+
8
+ export const DEFAULT_PRICING = {
9
+ 'claude-opus-4-6': { input: 15.00, output: 75.00 },
10
+ 'claude-sonnet-4-5': { input: 3.00, output: 15.00 },
11
+ 'claude-haiku-4-5': { input: 0.80, output: 4.00 },
12
+ };
13
+
14
+ const SHORT_NAMES = {
15
+ 'claude-opus-4-6': 'Opus',
16
+ 'claude-sonnet-4-5': 'Sonnet',
17
+ 'claude-haiku-4-5': 'Haiku',
18
+ };
19
+
20
+ export function estimateCost(model, inputTokens, outputTokens) {
21
+ const pricing = DEFAULT_PRICING[model];
22
+ if (!pricing) return 0;
23
+ return (inputTokens * pricing.input + outputTokens * pricing.output) / 1_000_000;
24
+ }
25
+
26
+ export function getModelShortName(model) {
27
+ return SHORT_NAMES[model] || model;
28
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * semantic-matcher.js — LLM-based trigger scoring via Anthropic Haiku.
3
+ *
4
+ * Calls the Anthropic Messages API to score how well a user prompt
5
+ * matches a skill. Optional complement to the keyword matcher.
6
+ */
7
+
8
+ export const SEMANTIC_MODEL_DEFAULT = 'claude-haiku-4-5-20251001';
9
+
10
+ const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages';
11
+
12
+ const SYSTEM_PROMPT = `You are a skill-routing classifier. Given a user prompt and a skill name + description, score how likely the user wants to trigger this skill.
13
+
14
+ Respond with ONLY a JSON object, no other text:
15
+ {"score": <0-100>, "reasoning": "<one sentence>"}
16
+
17
+ Score guide:
18
+ - 90-100: Clear, direct match
19
+ - 60-89: Likely match, related intent
20
+ - 30-59: Possible but ambiguous
21
+ - 0-29: Unrelated`;
22
+
23
+ /**
24
+ * Scores a prompt against a skill using the Anthropic Messages API.
25
+ * @param {string} prompt - User prompt to classify
26
+ * @param {string} skillName - Skill identifier
27
+ * @param {string} skillDescription - Skill description text
28
+ * @returns {Promise<{ score: number, reasoning: string, error?: boolean }>}
29
+ */
30
+ export async function scoreMatchSemantic(prompt, skillName, skillDescription) {
31
+ const apiKey = process.env.ANTHROPIC_API_KEY;
32
+ const model = process.env.GUILD_SEMANTIC_MODEL || SEMANTIC_MODEL_DEFAULT;
33
+
34
+ try {
35
+ const response = await fetch(ANTHROPIC_API_URL, {
36
+ method: 'POST',
37
+ headers: {
38
+ 'Content-Type': 'application/json',
39
+ 'x-api-key': apiKey,
40
+ 'anthropic-version': '2023-06-01',
41
+ },
42
+ body: JSON.stringify({
43
+ model,
44
+ max_tokens: 100,
45
+ system: SYSTEM_PROMPT,
46
+ messages: [
47
+ {
48
+ role: 'user',
49
+ content: `User prompt: "${prompt}"\nSkill: ${skillName}\nDescription: ${skillDescription}`,
50
+ },
51
+ ],
52
+ }),
53
+ });
54
+
55
+ if (!response.ok) {
56
+ return { score: 0, reasoning: `API error: ${response.status} ${response.statusText}`, error: true };
57
+ }
58
+
59
+ const data = await response.json();
60
+ const text = data.content[0].text;
61
+
62
+ return parseResponse(text);
63
+ } catch (err) {
64
+ return { score: 0, reasoning: err.message, error: true };
65
+ }
66
+ }
67
+
68
+ /**
69
+ * Parses the LLM response, extracting JSON with fallback.
70
+ * @param {string} text
71
+ * @returns {{ score: number, reasoning: string, error?: boolean }}
72
+ */
73
+ function parseResponse(text) {
74
+ // Try direct parse first
75
+ try {
76
+ const parsed = JSON.parse(text);
77
+ return { score: parsed.score / 100, reasoning: parsed.reasoning };
78
+ } catch {
79
+ // Fallback: extract first JSON object from text
80
+ const match = text.match(/\{[^}]+\}/);
81
+ if (match) {
82
+ try {
83
+ const parsed = JSON.parse(match[0]);
84
+ return { score: parsed.score / 100, reasoning: parsed.reasoning };
85
+ } catch {
86
+ // Fall through
87
+ }
88
+ }
89
+ return { score: 0, reasoning: 'parse-error', error: true };
90
+ }
91
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * trigger-matcher.js — Scores prompts against skill descriptions.
3
+ *
4
+ * Uses keyword overlap scoring to determine how well a user prompt
5
+ * matches a skill's description. No LLM calls — purely programmatic.
6
+ */
7
+
8
+ /**
9
+ * Tokenizes text into lowercase words, stripping punctuation.
10
+ * @param {string} text
11
+ * @returns {string[]}
12
+ */
13
+ export function tokenize(text) {
14
+ return text
15
+ .toLowerCase()
16
+ .replace(/[—–\-/]/g, ' ')
17
+ .replace(/[^\w\s]/g, '')
18
+ .split(/\s+/)
19
+ .filter(w => w.length > 1);
20
+ }
21
+
22
+ const STOP_WORDS = new Set([
23
+ 'the', 'is', 'at', 'in', 'on', 'to', 'of', 'for', 'and', 'or', 'an',
24
+ 'it', 'by', 'as', 'be', 'do', 'if', 'no', 'so', 'up', 'we', 'my',
25
+ 'use', 'when', 'with', 'from', 'this', 'that', 'will', 'can', 'has',
26
+ 'not', 'are', 'was', 'but', 'all', 'any', 'its', 'you', 'your',
27
+ 'skill', 'discipline',
28
+ ]);
29
+
30
+ /**
31
+ * Scores how well a prompt matches a description.
32
+ * Returns 0-1.
33
+ */
34
+ export function scoreMatch(prompt, description) {
35
+ const promptTokens = tokenize(prompt).filter(w => !STOP_WORDS.has(w));
36
+ if (promptTokens.length === 0) return 0;
37
+
38
+ const descTokens = new Set(tokenize(description).filter(w => !STOP_WORDS.has(w)));
39
+
40
+ let matches = 0;
41
+ for (const token of promptTokens) {
42
+ if (descTokens.has(token)) {
43
+ matches++;
44
+ } else {
45
+ for (const dt of descTokens) {
46
+ if (dt.includes(token) || token.includes(dt)) {
47
+ matches += 0.5;
48
+ break;
49
+ }
50
+ }
51
+ }
52
+ }
53
+
54
+ return matches / promptTokens.length;
55
+ }
56
+
57
+ /**
58
+ * Ranks all skills by match score descending.
59
+ */
60
+ export function rankSkills(prompt, skills) {
61
+ return skills
62
+ .map(s => ({ ...s, score: scoreMatch(prompt, s.description) }))
63
+ .sort((a, b) => b.score - a.score);
64
+ }