clementine-agent 1.18.160 → 1.18.162
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/approval-signals.d.ts +58 -0
- package/dist/agent/approval-signals.js +105 -0
- package/dist/agent/run-skill.d.ts +135 -0
- package/dist/agent/run-skill.js +267 -0
- package/dist/agent/self-improve.d.ts +1 -1
- package/dist/agent/self-improve.js +37 -1
- package/dist/cli/dashboard.js +19 -2
- package/dist/gateway/router.d.ts +1 -0
- package/dist/gateway/router.js +1 -1
- package/dist/tools/skill-tools.js +38 -0
- package/package.json +1 -1
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owner-approval feedback loop for self-improve proposals (1.18.161).
|
|
3
|
+
*
|
|
4
|
+
* Background: the self-improve hypothesizer generates 1-3 proposals each
|
|
5
|
+
* cycle. The owner approves or denies each one in the dashboard. Today
|
|
6
|
+
* that decision is recorded only as a status change on the experiment row
|
|
7
|
+
* — the *implicit signal* ("this kind of fix is good / bad") is lost.
|
|
8
|
+
*
|
|
9
|
+
* This module captures the signal as an append-only JSONL log
|
|
10
|
+
* (`~/.clementine/self-improve/approval-signals.jsonl`) and exposes
|
|
11
|
+
* `formatForHypothesizer()` so the next cycle's prompt includes:
|
|
12
|
+
*
|
|
13
|
+
* ## Owner approval signals (recent)
|
|
14
|
+
* APPROVED (do more like this):
|
|
15
|
+
* - cron/insight-check: "Apply lean mode to reduce prompt size"
|
|
16
|
+
* - agent/sasha-the-cmo: "Add explicit citation requirement to system prompt"
|
|
17
|
+
*
|
|
18
|
+
* DENIED (avoid these patterns):
|
|
19
|
+
* - workflow/email-gen: "Replace template with LLM generation" ← user note: "too generic; loses voice"
|
|
20
|
+
*
|
|
21
|
+
* The hypothesizer reads this and biases future proposals — favoring
|
|
22
|
+
* patterns the owner has approved, avoiding patterns they've denied.
|
|
23
|
+
*
|
|
24
|
+
* Closed-loop autonomy: the system learns from human feedback without
|
|
25
|
+
* needing the human to write rules. Just react to proposals as usual.
|
|
26
|
+
*/
|
|
27
|
+
export interface ApprovalSignal {
|
|
28
|
+
/** ISO timestamp of the decision. */
|
|
29
|
+
ts: string;
|
|
30
|
+
/** Self-improve experiment ID this decision applies to. */
|
|
31
|
+
experimentId: string;
|
|
32
|
+
/** The area the proposal targeted (cron, agent, skill, soul, etc.). */
|
|
33
|
+
area: string;
|
|
34
|
+
/** The specific target (e.g., "insight-check", "sasha-the-cmo"). */
|
|
35
|
+
target: string;
|
|
36
|
+
/** The proposal's one-sentence hypothesis (truncated to 200 chars). */
|
|
37
|
+
hypothesis: string;
|
|
38
|
+
/** Owner's decision. */
|
|
39
|
+
decision: 'approved' | 'denied';
|
|
40
|
+
/** Optional free-text note from the owner explaining the decision. */
|
|
41
|
+
noteFromOwner?: string;
|
|
42
|
+
}
|
|
43
|
+
/** Append a new signal to the log. Best-effort — never throws to the caller. */
|
|
44
|
+
export declare function recordApprovalSignal(signal: Omit<ApprovalSignal, 'ts'>): void;
|
|
45
|
+
/**
|
|
46
|
+
* Read the most recent N signals from the log. Returns newest-first.
|
|
47
|
+
* Defaults to 50 — enough for the hypothesizer to see patterns, not so
|
|
48
|
+
* many that we bloat its prompt.
|
|
49
|
+
*/
|
|
50
|
+
export declare function getRecentApprovalSignals(limit?: number): ApprovalSignal[];
|
|
51
|
+
/**
|
|
52
|
+
* Render a recent-signals prompt block for the hypothesizer. Returns the
|
|
53
|
+
* empty string when there are no signals (so the prompt stays clean for
|
|
54
|
+
* fresh installs). Caps at the most recent 8 of each kind to keep the
|
|
55
|
+
* block compact.
|
|
56
|
+
*/
|
|
57
|
+
export declare function formatApprovalSignalsForHypothesizer(): string;
|
|
58
|
+
//# sourceMappingURL=approval-signals.d.ts.map
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Owner-approval feedback loop for self-improve proposals (1.18.161).
|
|
3
|
+
*
|
|
4
|
+
* Background: the self-improve hypothesizer generates 1-3 proposals each
|
|
5
|
+
* cycle. The owner approves or denies each one in the dashboard. Today
|
|
6
|
+
* that decision is recorded only as a status change on the experiment row
|
|
7
|
+
* — the *implicit signal* ("this kind of fix is good / bad") is lost.
|
|
8
|
+
*
|
|
9
|
+
* This module captures the signal as an append-only JSONL log
|
|
10
|
+
* (`~/.clementine/self-improve/approval-signals.jsonl`) and exposes
|
|
11
|
+
* `formatForHypothesizer()` so the next cycle's prompt includes:
|
|
12
|
+
*
|
|
13
|
+
* ## Owner approval signals (recent)
|
|
14
|
+
* APPROVED (do more like this):
|
|
15
|
+
* - cron/insight-check: "Apply lean mode to reduce prompt size"
|
|
16
|
+
* - agent/sasha-the-cmo: "Add explicit citation requirement to system prompt"
|
|
17
|
+
*
|
|
18
|
+
* DENIED (avoid these patterns):
|
|
19
|
+
* - workflow/email-gen: "Replace template with LLM generation" ← user note: "too generic; loses voice"
|
|
20
|
+
*
|
|
21
|
+
* The hypothesizer reads this and biases future proposals — favoring
|
|
22
|
+
* patterns the owner has approved, avoiding patterns they've denied.
|
|
23
|
+
*
|
|
24
|
+
* Closed-loop autonomy: the system learns from human feedback without
|
|
25
|
+
* needing the human to write rules. Just react to proposals as usual.
|
|
26
|
+
*/
|
|
27
|
+
import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs';
|
|
28
|
+
import path from 'node:path';
|
|
29
|
+
import { BASE_DIR } from '../config.js';
|
|
30
|
+
/** Where the append-only signals log lives. */
|
|
31
|
+
function signalsLogPath() {
|
|
32
|
+
return path.join(BASE_DIR, 'self-improve', 'approval-signals.jsonl');
|
|
33
|
+
}
|
|
34
|
+
/** Append a new signal to the log. Best-effort — never throws to the caller. */
|
|
35
|
+
export function recordApprovalSignal(signal) {
|
|
36
|
+
try {
|
|
37
|
+
const file = signalsLogPath();
|
|
38
|
+
mkdirSync(path.dirname(file), { recursive: true });
|
|
39
|
+
const entry = {
|
|
40
|
+
ts: new Date().toISOString(),
|
|
41
|
+
...signal,
|
|
42
|
+
// Truncate hypothesis to keep the log compact + searchable.
|
|
43
|
+
hypothesis: (signal.hypothesis || '').slice(0, 200),
|
|
44
|
+
};
|
|
45
|
+
appendFileSync(file, JSON.stringify(entry) + '\n');
|
|
46
|
+
}
|
|
47
|
+
catch { /* never block the apply/deny path on telemetry */ }
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Read the most recent N signals from the log. Returns newest-first.
|
|
51
|
+
* Defaults to 50 — enough for the hypothesizer to see patterns, not so
|
|
52
|
+
* many that we bloat its prompt.
|
|
53
|
+
*/
|
|
54
|
+
export function getRecentApprovalSignals(limit = 50) {
|
|
55
|
+
const file = signalsLogPath();
|
|
56
|
+
if (!existsSync(file))
|
|
57
|
+
return [];
|
|
58
|
+
try {
|
|
59
|
+
const lines = readFileSync(file, 'utf-8').trim().split('\n').filter(Boolean);
|
|
60
|
+
const recent = [];
|
|
61
|
+
for (let i = lines.length - 1; i >= 0 && recent.length < limit; i--) {
|
|
62
|
+
try {
|
|
63
|
+
recent.push(JSON.parse(lines[i]));
|
|
64
|
+
}
|
|
65
|
+
catch { /* skip malformed lines */ }
|
|
66
|
+
}
|
|
67
|
+
return recent;
|
|
68
|
+
}
|
|
69
|
+
catch {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Render a recent-signals prompt block for the hypothesizer. Returns the
|
|
75
|
+
* empty string when there are no signals (so the prompt stays clean for
|
|
76
|
+
* fresh installs). Caps at the most recent 8 of each kind to keep the
|
|
77
|
+
* block compact.
|
|
78
|
+
*/
|
|
79
|
+
export function formatApprovalSignalsForHypothesizer() {
|
|
80
|
+
const signals = getRecentApprovalSignals(40);
|
|
81
|
+
if (signals.length === 0)
|
|
82
|
+
return '';
|
|
83
|
+
const approved = signals.filter(s => s.decision === 'approved').slice(0, 8);
|
|
84
|
+
const denied = signals.filter(s => s.decision === 'denied').slice(0, 8);
|
|
85
|
+
if (approved.length === 0 && denied.length === 0)
|
|
86
|
+
return '';
|
|
87
|
+
const fmt = (s) => {
|
|
88
|
+
const note = s.noteFromOwner ? ` ← owner note: "${s.noteFromOwner.slice(0, 120)}"` : '';
|
|
89
|
+
return `- ${s.area}/${s.target}: "${s.hypothesis}"${note}`;
|
|
90
|
+
};
|
|
91
|
+
const parts = ['### Owner approval signals (recent)'];
|
|
92
|
+
if (approved.length > 0) {
|
|
93
|
+
parts.push('APPROVED (do more like these):');
|
|
94
|
+
parts.push(approved.map(fmt).join('\n'));
|
|
95
|
+
}
|
|
96
|
+
if (denied.length > 0) {
|
|
97
|
+
parts.push('DENIED (avoid these patterns):');
|
|
98
|
+
parts.push(denied.map(fmt).join('\n'));
|
|
99
|
+
}
|
|
100
|
+
parts.push('Bias today\'s proposals toward the approved patterns and away from the denied ones. ' +
|
|
101
|
+
'If a denied pattern reflects a misunderstanding (e.g. you proposed the wrong target), ' +
|
|
102
|
+
'reframe — don\'t just avoid the area entirely.');
|
|
103
|
+
return parts.join('\n') + '\n\n';
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=approval-signals.js.map
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* runSkill — the canonical Skill execution primitive (1.18.162).
|
|
3
|
+
*
|
|
4
|
+
* Closes Skills Runtime C-2 from the Skills-First redesign.
|
|
5
|
+
*
|
|
6
|
+
* Today (pre-1.18.162) a pinned skill is fed into a cron prompt as a
|
|
7
|
+
* markdown context block and its `clementine.tools.allow` list is UNIONED
|
|
8
|
+
* into the cron's allowedTools (1.18.121 widening). That is permissive,
|
|
9
|
+
* not enforced — a skill that says "I only use Bash + WebFetch" can still
|
|
10
|
+
* call any tool the surrounding cron allows.
|
|
11
|
+
*
|
|
12
|
+
* `runSkill(name, options)` is the alternative path: a sub-call where the
|
|
13
|
+
* skill's `tools.allow` is a HARD allowlist (only those tools, plus a
|
|
14
|
+
* minimal core set), `{{var}}` placeholders in the body are substituted
|
|
15
|
+
* from `options.inputs`, and `clementine.success.schema` is ajv-validated
|
|
16
|
+
* post-run.
|
|
17
|
+
*
|
|
18
|
+
* Why a separate primitive (and not a flag on the existing widening path):
|
|
19
|
+
* - Caller intent is different. Pinned-skills-as-context is "give the LLM
|
|
20
|
+
* reference material"; runSkill is "do this specific procedure now."
|
|
21
|
+
* - Hard enforcement requires constructing the SDK call ourselves, not
|
|
22
|
+
* reusing a cron-job's effective allowlist.
|
|
23
|
+
* - Inputs/success are skill-call concepts, not cron concepts.
|
|
24
|
+
*
|
|
25
|
+
* Surfaced as the MCP tool `run_skill(name, inputs?)` so chat + cron +
|
|
26
|
+
* sub-agents converge on one primitive.
|
|
27
|
+
*/
|
|
28
|
+
import type { Skill } from '../types.js';
|
|
29
|
+
export interface RunSkillOptions {
|
|
30
|
+
/** Mustache-style `{{var}}` substitutions for the skill body. */
|
|
31
|
+
inputs?: Record<string, string | number | boolean>;
|
|
32
|
+
/** Optional caller context appended after the skill body
|
|
33
|
+
* (e.g. the user's request, the cron firing context). */
|
|
34
|
+
context?: string;
|
|
35
|
+
/** Stable session key for transcript mirroring. Defaults to a synthesized
|
|
36
|
+
* key derived from the skill name + timestamp. */
|
|
37
|
+
sessionKey?: string;
|
|
38
|
+
/** Source classification for telemetry. Defaults to 'skill'. */
|
|
39
|
+
source?: string;
|
|
40
|
+
/** Optional model override. */
|
|
41
|
+
model?: string;
|
|
42
|
+
/** Hard turn cap. Falls back to `clementine.limits.maxTurns` if set. */
|
|
43
|
+
maxTurns?: number;
|
|
44
|
+
/** Hard budget cap (USD). Falls back to `clementine.limits.maxBudgetUsd`. */
|
|
45
|
+
maxBudgetUsd?: number;
|
|
46
|
+
/** Project work dir for per-project skill precedence (mirrors getSkill's
|
|
47
|
+
* `projectWorkDir` parameter — when set, project-scoped skills shadow
|
|
48
|
+
* global ones with the same name). */
|
|
49
|
+
projectWorkDir?: string;
|
|
50
|
+
/** Skip success.schema validation even if the skill declares one. */
|
|
51
|
+
skipValidation?: boolean;
|
|
52
|
+
/** Streaming callback for partial assistant text. */
|
|
53
|
+
onText?: (chunk: string) => void | Promise<void>;
|
|
54
|
+
/** Abort signal — cancels the SDK stream when triggered. */
|
|
55
|
+
abortSignal?: AbortSignal;
|
|
56
|
+
}
|
|
57
|
+
export interface RunSkillResult {
|
|
58
|
+
ok: boolean;
|
|
59
|
+
/** Final text response from the SDK. */
|
|
60
|
+
output: string;
|
|
61
|
+
/** Cost in USD. */
|
|
62
|
+
cost?: number;
|
|
63
|
+
/** Number of agentic turns. */
|
|
64
|
+
turns?: number;
|
|
65
|
+
/** SDK session id — capture for resume. */
|
|
66
|
+
sessionId?: string;
|
|
67
|
+
/** SDK runId — joins to the Event store. */
|
|
68
|
+
runId?: string;
|
|
69
|
+
/** Schema validation result when the skill declared `clementine.success.schema`. */
|
|
70
|
+
validation?: {
|
|
71
|
+
/** True when validation actually ran (schema present + JSON extractable). */
|
|
72
|
+
tried: boolean;
|
|
73
|
+
/** True when the response validated against the schema. */
|
|
74
|
+
pass: boolean;
|
|
75
|
+
/** First few ajv error messages. */
|
|
76
|
+
errors: string[];
|
|
77
|
+
};
|
|
78
|
+
/** The hard allowlist that was passed to the SDK. */
|
|
79
|
+
effectiveTools?: string[];
|
|
80
|
+
/** Failure reason when ok=false. */
|
|
81
|
+
error?: string;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Substitute `{{var}}` placeholders in `body` from `inputs`. Missing
|
|
85
|
+
* keys are left as-is (so the LLM still sees the placeholder and can
|
|
86
|
+
* complain) rather than silently dropped — a missing input is more
|
|
87
|
+
* recoverable as visible text than as a stripped string.
|
|
88
|
+
*/
|
|
89
|
+
export declare function applyMustache(body: string, inputs: Record<string, string | number | boolean> | undefined): string;
|
|
90
|
+
/**
|
|
91
|
+
* Compute the HARD allowlist for a skill call.
|
|
92
|
+
*
|
|
93
|
+
* Combines, in order:
|
|
94
|
+
* 1. The skill's `clementine.tools.allow` list (or [] if absent)
|
|
95
|
+
* 2. Tool names auto-extracted from the skill body matching `mcp__*__*`
|
|
96
|
+
* 3. SKILL_BASELINE_TOOLS so the SDK can read files / dispatch subagents
|
|
97
|
+
*
|
|
98
|
+
* Then subtracts anything in `clementine.tools.deny` (deny wins).
|
|
99
|
+
*
|
|
100
|
+
* Returns a deduped array. Empty input = empty output (which the SDK
|
|
101
|
+
* treats as "deny everything"); callers are expected to set a sensible
|
|
102
|
+
* `tools.allow` on the skill.
|
|
103
|
+
*/
|
|
104
|
+
export declare function computeSkillAllowlist(skill: Skill): string[];
|
|
105
|
+
/**
|
|
106
|
+
* Build the prompt the SDK actually executes for a skill call.
|
|
107
|
+
*
|
|
108
|
+
* Format:
|
|
109
|
+
* <skill body, with mustache substitutions applied>
|
|
110
|
+
*
|
|
111
|
+
* ## Caller context
|
|
112
|
+
* <options.context> ← when provided
|
|
113
|
+
*
|
|
114
|
+
* The skill body itself becomes the procedure; the optional context is
|
|
115
|
+
* the immediate "what triggered this call" frame. Bundled files (other
|
|
116
|
+
* .md siblings under the skill folder) are NOT inlined — the SDK can
|
|
117
|
+
* read them via `Read` if listed under tools.allow.
|
|
118
|
+
*/
|
|
119
|
+
export declare function buildSkillPrompt(skill: Skill, inputs: Record<string, string | number | boolean> | undefined, context: string | undefined): string;
|
|
120
|
+
/**
|
|
121
|
+
* Run a skill as a hard-allowlisted sub-call. Returns a structured result.
|
|
122
|
+
*
|
|
123
|
+
* The skill is loaded via `getSkill()` (project-precedence honored when
|
|
124
|
+
* `projectDir` + `agentSlug` are passed). Its body is mustache-rendered
|
|
125
|
+
* with `inputs`, then sent to the SDK with an allowlist computed from
|
|
126
|
+
* `clementine.tools.allow` + auto-extracted MCP refs + a small baseline.
|
|
127
|
+
* After the SDK returns, `clementine.success.schema` (when set) is
|
|
128
|
+
* ajv-validated against the response.
|
|
129
|
+
*
|
|
130
|
+
* This function never throws — failures (skill not found, SDK error,
|
|
131
|
+
* timeout) are returned as `{ ok: false, error }`. The caller (chat,
|
|
132
|
+
* cron, sub-agent, MCP tool) decides how to surface that.
|
|
133
|
+
*/
|
|
134
|
+
export declare function runSkill(name: string, options?: RunSkillOptions): Promise<RunSkillResult>;
|
|
135
|
+
//# sourceMappingURL=run-skill.d.ts.map
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* runSkill — the canonical Skill execution primitive (1.18.162).
|
|
3
|
+
*
|
|
4
|
+
* Closes Skills Runtime C-2 from the Skills-First redesign.
|
|
5
|
+
*
|
|
6
|
+
* Today (pre-1.18.162) a pinned skill is fed into a cron prompt as a
|
|
7
|
+
* markdown context block and its `clementine.tools.allow` list is UNIONED
|
|
8
|
+
* into the cron's allowedTools (1.18.121 widening). That is permissive,
|
|
9
|
+
* not enforced — a skill that says "I only use Bash + WebFetch" can still
|
|
10
|
+
* call any tool the surrounding cron allows.
|
|
11
|
+
*
|
|
12
|
+
* `runSkill(name, options)` is the alternative path: a sub-call where the
|
|
13
|
+
* skill's `tools.allow` is a HARD allowlist (only those tools, plus a
|
|
14
|
+
* minimal core set), `{{var}}` placeholders in the body are substituted
|
|
15
|
+
* from `options.inputs`, and `clementine.success.schema` is ajv-validated
|
|
16
|
+
* post-run.
|
|
17
|
+
*
|
|
18
|
+
* Why a separate primitive (and not a flag on the existing widening path):
|
|
19
|
+
* - Caller intent is different. Pinned-skills-as-context is "give the LLM
|
|
20
|
+
* reference material"; runSkill is "do this specific procedure now."
|
|
21
|
+
* - Hard enforcement requires constructing the SDK call ourselves, not
|
|
22
|
+
* reusing a cron-job's effective allowlist.
|
|
23
|
+
* - Inputs/success are skill-call concepts, not cron concepts.
|
|
24
|
+
*
|
|
25
|
+
* Surfaced as the MCP tool `run_skill(name, inputs?)` so chat + cron +
|
|
26
|
+
* sub-agents converge on one primitive.
|
|
27
|
+
*/
|
|
28
|
+
import path from 'node:path';
|
|
29
|
+
import pino from 'pino';
|
|
30
|
+
import { getSkill } from './skill-store.js';
|
|
31
|
+
import { runAgent } from './run-agent.js';
|
|
32
|
+
const logger = pino({ name: 'clementine.run-skill' });
|
|
33
|
+
// ── Mustache substitution ─────────────────────────────────────────────
|
|
34
|
+
/** Matches `{{var_name}}` with optional whitespace. var_name is
|
|
35
|
+
* `[a-zA-Z_][a-zA-Z0-9_-]*` — the same identifier shape used in YAML
|
|
36
|
+
* frontmatter `inputs:` keys. */
|
|
37
|
+
const MUSTACHE = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_-]*)\s*\}\}/g;
|
|
38
|
+
/**
|
|
39
|
+
* Substitute `{{var}}` placeholders in `body` from `inputs`. Missing
|
|
40
|
+
* keys are left as-is (so the LLM still sees the placeholder and can
|
|
41
|
+
* complain) rather than silently dropped — a missing input is more
|
|
42
|
+
* recoverable as visible text than as a stripped string.
|
|
43
|
+
*/
|
|
44
|
+
export function applyMustache(body, inputs) {
|
|
45
|
+
if (!inputs || Object.keys(inputs).length === 0)
|
|
46
|
+
return body;
|
|
47
|
+
return body.replace(MUSTACHE, (match, key) => {
|
|
48
|
+
if (Object.prototype.hasOwnProperty.call(inputs, key)) {
|
|
49
|
+
return String(inputs[key]);
|
|
50
|
+
}
|
|
51
|
+
return match;
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
// ── Allowlist computation ─────────────────────────────────────────────
|
|
55
|
+
/** Tools every skill needs as a baseline regardless of its `tools.allow`.
|
|
56
|
+
* Without these the SDK can't navigate the project at all. Read/Glob/Grep
|
|
57
|
+
* are non-mutating; Agent is required so the SDK can dispatch its own
|
|
58
|
+
* internal subagents. */
|
|
59
|
+
const SKILL_BASELINE_TOOLS = ['Agent', 'Read', 'Glob', 'Grep'];
|
|
60
|
+
/** Matches `mcp__<server>__<tool>` references in skill bodies. Used to
|
|
61
|
+
* auto-include MCP tool names the skill *clearly* intends to call but
|
|
62
|
+
* which the author forgot to list under `tools.allow`. Same pattern as
|
|
63
|
+
* run-agent-cron.ts:150. */
|
|
64
|
+
const MCP_TOOL_REF = /mcp__([A-Za-z0-9-]+(?:_[A-Za-z0-9-]+)*)__[A-Za-z0-9_-]+/g;
|
|
65
|
+
/**
|
|
66
|
+
* Compute the HARD allowlist for a skill call.
|
|
67
|
+
*
|
|
68
|
+
* Combines, in order:
|
|
69
|
+
* 1. The skill's `clementine.tools.allow` list (or [] if absent)
|
|
70
|
+
* 2. Tool names auto-extracted from the skill body matching `mcp__*__*`
|
|
71
|
+
* 3. SKILL_BASELINE_TOOLS so the SDK can read files / dispatch subagents
|
|
72
|
+
*
|
|
73
|
+
* Then subtracts anything in `clementine.tools.deny` (deny wins).
|
|
74
|
+
*
|
|
75
|
+
* Returns a deduped array. Empty input = empty output (which the SDK
|
|
76
|
+
* treats as "deny everything"); callers are expected to set a sensible
|
|
77
|
+
* `tools.allow` on the skill.
|
|
78
|
+
*/
|
|
79
|
+
export function computeSkillAllowlist(skill) {
|
|
80
|
+
const tools = skill.frontmatter?.clementine?.tools;
|
|
81
|
+
const declared = Array.isArray(tools?.allow) ? tools.allow : [];
|
|
82
|
+
const denied = new Set(Array.isArray(tools?.deny) ? tools.deny : []);
|
|
83
|
+
const fromBody = new Set();
|
|
84
|
+
let m;
|
|
85
|
+
// exec() with /g shares state per-regex; reset before each pass.
|
|
86
|
+
MCP_TOOL_REF.lastIndex = 0;
|
|
87
|
+
while ((m = MCP_TOOL_REF.exec(skill.body)) !== null) {
|
|
88
|
+
// m[0] is the full mcp__<server>__<tool> match
|
|
89
|
+
fromBody.add(m[0]);
|
|
90
|
+
}
|
|
91
|
+
const merged = new Set([
|
|
92
|
+
...declared,
|
|
93
|
+
...fromBody,
|
|
94
|
+
...SKILL_BASELINE_TOOLS,
|
|
95
|
+
]);
|
|
96
|
+
for (const d of denied)
|
|
97
|
+
merged.delete(d);
|
|
98
|
+
return [...merged];
|
|
99
|
+
}
|
|
100
|
+
// ── Prompt builder ────────────────────────────────────────────────────
|
|
101
|
+
/**
|
|
102
|
+
* Build the prompt the SDK actually executes for a skill call.
|
|
103
|
+
*
|
|
104
|
+
* Format:
|
|
105
|
+
* <skill body, with mustache substitutions applied>
|
|
106
|
+
*
|
|
107
|
+
* ## Caller context
|
|
108
|
+
* <options.context> ← when provided
|
|
109
|
+
*
|
|
110
|
+
* The skill body itself becomes the procedure; the optional context is
|
|
111
|
+
* the immediate "what triggered this call" frame. Bundled files (other
|
|
112
|
+
* .md siblings under the skill folder) are NOT inlined — the SDK can
|
|
113
|
+
* read them via `Read` if listed under tools.allow.
|
|
114
|
+
*/
|
|
115
|
+
export function buildSkillPrompt(skill, inputs, context) {
|
|
116
|
+
const substitutedBody = applyMustache(skill.body, inputs);
|
|
117
|
+
if (!context || !context.trim())
|
|
118
|
+
return substitutedBody;
|
|
119
|
+
return `${substitutedBody}\n\n## Caller context\n\n${context.trim()}\n`;
|
|
120
|
+
}
|
|
121
|
+
// ── Schema validation ─────────────────────────────────────────────────
|
|
122
|
+
/** Best-effort JSON extraction: try whole text, then fenced ```json
|
|
123
|
+
* block, then the largest {…} substring. Mirrors goal-evaluator.ts so
|
|
124
|
+
* skill authors get the same forgiving behavior as goalCheck. */
|
|
125
|
+
function extractJson(text) {
|
|
126
|
+
if (!text)
|
|
127
|
+
return null;
|
|
128
|
+
const trimmed = text.trim();
|
|
129
|
+
try {
|
|
130
|
+
return JSON.parse(trimmed);
|
|
131
|
+
}
|
|
132
|
+
catch { /* fall through */ }
|
|
133
|
+
const fenced = /```json\s*([\s\S]*?)```/i.exec(text);
|
|
134
|
+
if (fenced?.[1]) {
|
|
135
|
+
try {
|
|
136
|
+
return JSON.parse(fenced[1]);
|
|
137
|
+
}
|
|
138
|
+
catch { /* fall through */ }
|
|
139
|
+
}
|
|
140
|
+
const start = text.indexOf('{');
|
|
141
|
+
const end = text.lastIndexOf('}');
|
|
142
|
+
if (start !== -1 && end > start) {
|
|
143
|
+
try {
|
|
144
|
+
return JSON.parse(text.slice(start, end + 1));
|
|
145
|
+
}
|
|
146
|
+
catch { /* fall through */ }
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
async function validateSkillOutput(output, schema) {
|
|
151
|
+
const json = extractJson(output);
|
|
152
|
+
if (json === null)
|
|
153
|
+
return { tried: false, pass: false, errors: [] };
|
|
154
|
+
try {
|
|
155
|
+
// Lazy import: ajv pulls in ~150KB and most callers won't have a schema.
|
|
156
|
+
// Default-export interop matches goal-evaluator.ts:75 — ajv@8 is CJS
|
|
157
|
+
// and the ESM bridge sometimes lands the constructor on .default.
|
|
158
|
+
const ajvMod = await import('ajv');
|
|
159
|
+
const AjvCtor = ajvMod.default ?? ajvMod;
|
|
160
|
+
const ajv = new AjvCtor({ allErrors: true, strict: false });
|
|
161
|
+
const validate = ajv.compile(schema);
|
|
162
|
+
const valid = validate(json);
|
|
163
|
+
const rawErrors = validate.errors ?? ajv.errors ?? [];
|
|
164
|
+
return {
|
|
165
|
+
tried: true,
|
|
166
|
+
pass: !!valid,
|
|
167
|
+
errors: rawErrors.slice(0, 5).map(e => {
|
|
168
|
+
const p = e.instancePath || '';
|
|
169
|
+
const m = e.message || 'invalid';
|
|
170
|
+
return p ? `${p} ${m}` : m;
|
|
171
|
+
}),
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
catch (err) {
|
|
175
|
+
return { tried: true, pass: false, errors: [`schema compile error: ${err}`] };
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// ── The primitive ─────────────────────────────────────────────────────
|
|
179
|
+
/**
|
|
180
|
+
* Run a skill as a hard-allowlisted sub-call. Returns a structured result.
|
|
181
|
+
*
|
|
182
|
+
* The skill is loaded via `getSkill()` (project-precedence honored when
|
|
183
|
+
* `projectDir` + `agentSlug` are passed). Its body is mustache-rendered
|
|
184
|
+
* with `inputs`, then sent to the SDK with an allowlist computed from
|
|
185
|
+
* `clementine.tools.allow` + auto-extracted MCP refs + a small baseline.
|
|
186
|
+
* After the SDK returns, `clementine.success.schema` (when set) is
|
|
187
|
+
* ajv-validated against the response.
|
|
188
|
+
*
|
|
189
|
+
* This function never throws — failures (skill not found, SDK error,
|
|
190
|
+
* timeout) are returned as `{ ok: false, error }`. The caller (chat,
|
|
191
|
+
* cron, sub-agent, MCP tool) decides how to surface that.
|
|
192
|
+
*/
|
|
193
|
+
export async function runSkill(name, options = {}) {
|
|
194
|
+
const skill = getSkill(name, {
|
|
195
|
+
...(options.projectWorkDir ? { projectWorkDir: options.projectWorkDir } : {}),
|
|
196
|
+
});
|
|
197
|
+
if (!skill) {
|
|
198
|
+
return {
|
|
199
|
+
ok: false,
|
|
200
|
+
output: '',
|
|
201
|
+
error: `Skill not found: ${name}`,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
const effectiveTools = computeSkillAllowlist(skill);
|
|
205
|
+
const prompt = buildSkillPrompt(skill, options.inputs, options.context);
|
|
206
|
+
const limits = skill.frontmatter?.clementine?.limits;
|
|
207
|
+
const maxTurns = options.maxTurns ?? limits?.maxTurns;
|
|
208
|
+
const maxBudgetUsd = options.maxBudgetUsd ?? limits?.maxBudgetUsd;
|
|
209
|
+
const sessionKey = options.sessionKey
|
|
210
|
+
?? `skill:${name}:${Date.now().toString(36)}`;
|
|
211
|
+
// Surface the skill folder to the SDK via additionalDirectories so
|
|
212
|
+
// bundled scripts (skill/scripts/*.py) are reachable for `Bash` calls.
|
|
213
|
+
// Folder-form skills only — flat skills have no siblings worth surfacing.
|
|
214
|
+
const additionalDirectories = skill.layout === 'folder' ? [path.dirname(skill.filePath)] : undefined;
|
|
215
|
+
logger.info({
|
|
216
|
+
skill: name,
|
|
217
|
+
tools: effectiveTools,
|
|
218
|
+
maxTurns,
|
|
219
|
+
maxBudgetUsd,
|
|
220
|
+
inputKeys: Object.keys(options.inputs ?? {}),
|
|
221
|
+
hasContext: !!options.context,
|
|
222
|
+
}, 'runSkill: invoking');
|
|
223
|
+
let runResult;
|
|
224
|
+
try {
|
|
225
|
+
const sdkOpts = {
|
|
226
|
+
sessionKey,
|
|
227
|
+
source: options.source ?? 'skill',
|
|
228
|
+
allowedTools: effectiveTools,
|
|
229
|
+
...(options.model ? { model: options.model } : {}),
|
|
230
|
+
...(typeof maxTurns === 'number' ? { maxTurns } : {}),
|
|
231
|
+
...(typeof maxBudgetUsd === 'number' ? { maxBudgetUsd } : {}),
|
|
232
|
+
...(additionalDirectories ? { additionalDirectories } : {}),
|
|
233
|
+
...(options.onText ? { onText: options.onText } : {}),
|
|
234
|
+
...(options.abortSignal ? { abortSignal: options.abortSignal } : {}),
|
|
235
|
+
};
|
|
236
|
+
runResult = await runAgent(prompt, sdkOpts);
|
|
237
|
+
}
|
|
238
|
+
catch (err) {
|
|
239
|
+
logger.error({ err, skill: name }, 'runSkill: SDK call failed');
|
|
240
|
+
return {
|
|
241
|
+
ok: false,
|
|
242
|
+
output: '',
|
|
243
|
+
effectiveTools,
|
|
244
|
+
error: `SDK error: ${err}`,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
// Schema validation — only when the skill declared one and the caller
|
|
248
|
+
// didn't opt out. We do not flip ok=false on schema fail; we surface
|
|
249
|
+
// the result so the caller can decide. (A cron may want to retry; a
|
|
250
|
+
// chat user just sees a "schema mismatch" badge.)
|
|
251
|
+
let validation;
|
|
252
|
+
const successSchema = skill.frontmatter?.clementine?.success?.schema;
|
|
253
|
+
if (!options.skipValidation && successSchema) {
|
|
254
|
+
validation = await validateSkillOutput(runResult.text, successSchema);
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
ok: true,
|
|
258
|
+
output: runResult.text,
|
|
259
|
+
cost: runResult.totalCostUsd,
|
|
260
|
+
turns: runResult.numTurns,
|
|
261
|
+
sessionId: runResult.sessionId,
|
|
262
|
+
runId: runResult.runId,
|
|
263
|
+
effectiveTools,
|
|
264
|
+
...(validation ? { validation } : {}),
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
//# sourceMappingURL=run-skill.js.map
|
|
@@ -58,7 +58,7 @@ export declare class SelfImproveLoop {
|
|
|
58
58
|
private savePendingChange;
|
|
59
59
|
applyApprovedChange(experimentId: string): Promise<string>;
|
|
60
60
|
/** Deny a pending change without applying it. */
|
|
61
|
-
denyChange(experimentId: string): string;
|
|
61
|
+
denyChange(experimentId: string, noteFromOwner?: string): string;
|
|
62
62
|
private runMemoryCleanup;
|
|
63
63
|
private synthesizeFeedbackPatterns;
|
|
64
64
|
/** Update the structured user model from interaction data. */
|
|
@@ -18,6 +18,7 @@ import { BASE_DIR, SELF_IMPROVE_DIR, SOUL_FILE, CRON_FILE, WORKFLOWS_DIR, VAULT_
|
|
|
18
18
|
import { listAllGoals } from '../tools/shared.js';
|
|
19
19
|
import { MemoryStore } from '../memory/store.js';
|
|
20
20
|
import { ANTHROPIC_SKILL_NAME_PATTERN } from './skill-store.js';
|
|
21
|
+
import { recordApprovalSignal, formatApprovalSignalsForHypothesizer } from './approval-signals.js';
|
|
21
22
|
const logger = pino({ name: 'clementine.self-improve' });
|
|
22
23
|
// ── Defaults ─────────────────────────────────────────────────────────
|
|
23
24
|
const DEFAULT_CONFIG = {
|
|
@@ -1097,6 +1098,10 @@ export class SelfImproveLoop {
|
|
|
1097
1098
|
}
|
|
1098
1099
|
}
|
|
1099
1100
|
catch { /* non-fatal */ }
|
|
1101
|
+
// Owner-approval feedback (1.18.161) — bias hypotheses toward patterns the
|
|
1102
|
+
// owner has approved, away from those they've denied. Empty string for
|
|
1103
|
+
// fresh installs, which keeps the prompt clean.
|
|
1104
|
+
const approvalSignalsText = formatApprovalSignalsForHypothesizer();
|
|
1100
1105
|
// ── Step 1: Analysis — identify top opportunities from metrics (no config dumps) ──
|
|
1101
1106
|
const analysisPrompt = `You are Clementine's self-improvement strategist. Analyze the performance data below and identify the top 3 improvement opportunities.\n\n` +
|
|
1102
1107
|
`## Recent Performance Data (last 7 days)\n` +
|
|
@@ -1114,6 +1119,7 @@ export class SelfImproveLoop {
|
|
|
1114
1119
|
diversityConstraint +
|
|
1115
1120
|
agentFocusText +
|
|
1116
1121
|
soulCandidatesText +
|
|
1122
|
+
(approvalSignalsText ? `\n${approvalSignalsText}` : '') +
|
|
1117
1123
|
`\n## Instructions\n` +
|
|
1118
1124
|
`Propose **1-3 concrete, high-impact improvements** the owner should review today — no fewer (aim for at least one actionable suggestion when data warrants it), no more (the owner reads each proposal manually and you'll overwhelm them). Rank by expected impact; drop anything below "solid idea".\n\n` +
|
|
1119
1125
|
`For each opportunity, specify:\n` +
|
|
@@ -1486,14 +1492,33 @@ export class SelfImproveLoop {
|
|
|
1486
1492
|
catch (err) {
|
|
1487
1493
|
logger.warn({ err }, 'Failed to schedule impact check');
|
|
1488
1494
|
}
|
|
1495
|
+
// 1.18.161 — record the implicit owner-approval signal so future
|
|
1496
|
+
// hypothesizer cycles can see "the owner approved fixes like this"
|
|
1497
|
+
// and bias proposals accordingly. Best-effort, never blocks apply.
|
|
1498
|
+
recordApprovalSignal({
|
|
1499
|
+
experimentId,
|
|
1500
|
+
area: pending.area,
|
|
1501
|
+
target: pending.target,
|
|
1502
|
+
hypothesis: pending.hypothesis,
|
|
1503
|
+
decision: 'approved',
|
|
1504
|
+
});
|
|
1489
1505
|
return `Applied change to ${pending.area}/${pending.target}`;
|
|
1490
1506
|
}
|
|
1491
1507
|
/** Deny a pending change without applying it. */
|
|
1492
|
-
denyChange(experimentId) {
|
|
1508
|
+
denyChange(experimentId, noteFromOwner) {
|
|
1493
1509
|
const pendingFile = path.join(PENDING_DIR, `${experimentId}.json`);
|
|
1494
1510
|
if (!existsSync(pendingFile)) {
|
|
1495
1511
|
return `Pending change not found: ${experimentId}`;
|
|
1496
1512
|
}
|
|
1513
|
+
// 1.18.161 — capture the area/target/hypothesis BEFORE we delete the
|
|
1514
|
+
// pending file so the approval-signal log gets a meaningful entry
|
|
1515
|
+
// (not just an experiment ID with no context).
|
|
1516
|
+
let signalContext = null;
|
|
1517
|
+
try {
|
|
1518
|
+
const pending = JSON.parse(readFileSync(pendingFile, 'utf-8'));
|
|
1519
|
+
signalContext = { area: pending.area, target: pending.target, hypothesis: pending.hypothesis };
|
|
1520
|
+
}
|
|
1521
|
+
catch { /* file may be malformed; record a minimal signal below */ }
|
|
1497
1522
|
this.updateExperimentStatus(experimentId, 'denied');
|
|
1498
1523
|
try {
|
|
1499
1524
|
unlinkSync(pendingFile);
|
|
@@ -1502,6 +1527,17 @@ export class SelfImproveLoop {
|
|
|
1502
1527
|
const state = this.loadState();
|
|
1503
1528
|
state.pendingApprovals = Math.max(0, state.pendingApprovals - 1);
|
|
1504
1529
|
this.saveState(state);
|
|
1530
|
+
// 1.18.161 — record the denial signal. Owner can pass an optional note
|
|
1531
|
+
// (via the dashboard Reason field, or via Discord) explaining why so
|
|
1532
|
+
// the hypothesizer learns more than just "no."
|
|
1533
|
+
recordApprovalSignal({
|
|
1534
|
+
experimentId,
|
|
1535
|
+
area: signalContext?.area ?? 'unknown',
|
|
1536
|
+
target: signalContext?.target ?? 'unknown',
|
|
1537
|
+
hypothesis: signalContext?.hypothesis ?? '(pending file unreadable at deny time)',
|
|
1538
|
+
decision: 'denied',
|
|
1539
|
+
...(noteFromOwner ? { noteFromOwner } : {}),
|
|
1540
|
+
});
|
|
1505
1541
|
return `Denied change: ${experimentId}`;
|
|
1506
1542
|
}
|
|
1507
1543
|
// ── Memory cleanup ───────────────────────────────────────────────
|
package/dist/cli/dashboard.js
CHANGED
|
@@ -11502,7 +11502,14 @@ If the tool returns nothing or errors, return an empty array \`[]\`.`,
|
|
|
11502
11502
|
app.post('/api/self-improve/deny/:id', async (req, res) => {
|
|
11503
11503
|
try {
|
|
11504
11504
|
const gw = await getGateway();
|
|
11505
|
-
|
|
11505
|
+
// 1.18.161 — accept an optional `noteFromOwner` in the body so the
|
|
11506
|
+
// approval-signal log captures the *reason* for denial (the
|
|
11507
|
+
// hypothesizer learns more from "too generic — loses voice" than
|
|
11508
|
+
// from a bare "no").
|
|
11509
|
+
const noteFromOwner = typeof req.body?.noteFromOwner === 'string'
|
|
11510
|
+
? req.body.noteFromOwner.slice(0, 500)
|
|
11511
|
+
: undefined;
|
|
11512
|
+
const result = await gw.handleSelfImprove('deny', { experimentId: req.params.id, noteFromOwner });
|
|
11506
11513
|
res.json({ ok: true, message: result });
|
|
11507
11514
|
}
|
|
11508
11515
|
catch (err) {
|
|
@@ -40687,7 +40694,17 @@ async function siApply(id) {
|
|
|
40687
40694
|
|
|
40688
40695
|
async function siDeny(id) {
|
|
40689
40696
|
try {
|
|
40690
|
-
|
|
40697
|
+
// 1.18.161 — invite an optional one-line reason. Cancel = bare deny;
|
|
40698
|
+
// empty string = bare deny; non-empty = sent to the hypothesizer's
|
|
40699
|
+
// approval-signal log so future cycles avoid the rejected pattern.
|
|
40700
|
+
const note = window.prompt('Optional reason for denying (helps the hypothesizer learn — leave blank to skip):', '');
|
|
40701
|
+
if (note === null) return;
|
|
40702
|
+
const body = note.trim() ? JSON.stringify({ noteFromOwner: note.trim() }) : undefined;
|
|
40703
|
+
const r = await apiFetch('/api/self-improve/deny/' + id, {
|
|
40704
|
+
method: 'POST',
|
|
40705
|
+
headers: body ? { 'Content-Type': 'application/json' } : undefined,
|
|
40706
|
+
body,
|
|
40707
|
+
});
|
|
40691
40708
|
const d = await r.json();
|
|
40692
40709
|
if (d.ok) toast(d.message, 'success');
|
|
40693
40710
|
else toast(d.message || 'Failed', 'error');
|
package/dist/gateway/router.d.ts
CHANGED
|
@@ -308,6 +308,7 @@ export declare class Gateway {
|
|
|
308
308
|
getAllProvenance(): Map<string, SessionProvenance>;
|
|
309
309
|
handleSelfImprove(action: string, args?: {
|
|
310
310
|
experimentId?: string;
|
|
311
|
+
noteFromOwner?: string;
|
|
311
312
|
config?: Partial<SelfImproveConfig>;
|
|
312
313
|
}, onProposal?: (experiment: SelfImproveExperiment) => Promise<void>): Promise<string>;
|
|
313
314
|
/** Extract a procedural skill from a successful cron execution (fire-and-forget). */
|
package/dist/gateway/router.js
CHANGED
|
@@ -2437,7 +2437,7 @@ export class Gateway {
|
|
|
2437
2437
|
case 'deny': {
|
|
2438
2438
|
if (!args?.experimentId)
|
|
2439
2439
|
return 'Missing experiment ID.';
|
|
2440
|
-
return loop.denyChange(args.experimentId);
|
|
2440
|
+
return loop.denyChange(args.experimentId, args.noteFromOwner);
|
|
2441
2441
|
}
|
|
2442
2442
|
case 'run-agent': {
|
|
2443
2443
|
const slug = args?.experimentId; // Reuse experimentId field for agent slug
|
|
@@ -214,5 +214,43 @@ export function registerSkillTools(server) {
|
|
|
214
214
|
return textResult(`❌ Failed to list skills: ${err instanceof Error ? err.message : String(err)}`);
|
|
215
215
|
}
|
|
216
216
|
});
|
|
217
|
+
// ── run_skill (1.18.162) ────────────────────────────────────────────
|
|
218
|
+
// Invoke a skill as a hard-allowlisted sub-call. Mustache substitutes
|
|
219
|
+
// `{{var}}` placeholders in the skill body from `inputs`, runs through
|
|
220
|
+
// the SDK with ONLY the skill's clementine.tools.allow + a baseline
|
|
221
|
+
// (Agent/Read/Glob/Grep) + auto-extracted mcp__*__* refs from the body,
|
|
222
|
+
// and validates against clementine.success.schema if declared.
|
|
223
|
+
//
|
|
224
|
+
// Use this when a chat or another skill needs to *execute* a procedure
|
|
225
|
+
// (not just reference it). Pinned-skills-as-context (the existing 1.18.121
|
|
226
|
+
// widening path) is for the cron prompt; this is for callable execution.
|
|
227
|
+
server.tool('run_skill', 'Execute a named skill as a sub-call with a HARD tool allowlist. The skill body is rendered with optional {{var}} substitutions from `inputs`, then run with only the tools the skill declared under clementine.tools.allow (plus a small baseline). Returns the skill output + cost + schema validation result when applicable. Use when chat says "run my morning-briefing skill" or when one skill needs to invoke another.', {
|
|
228
|
+
name: z.string().regex(NAME_PATTERN).describe('Skill slug (e.g. "morning-briefing"). Must match an existing skill in the vault.'),
|
|
229
|
+
inputs: z.record(z.string(), z.union([z.string(), z.number(), z.boolean()])).optional()
|
|
230
|
+
.describe('Optional key→value map substituted into {{var}} placeholders in the skill body. Missing placeholders are left as-is so the LLM can complain.'),
|
|
231
|
+
context: z.string().optional()
|
|
232
|
+
.describe('Optional caller context appended after the skill body (e.g. "user said: do X right now"). Surfaced under a "## Caller context" heading.'),
|
|
233
|
+
}, async ({ name, inputs, context }) => {
|
|
234
|
+
try {
|
|
235
|
+
// Lazy import — runSkill pulls in run-agent + the SDK; only load on
|
|
236
|
+
// demand so `list_skills` etc stay fast and the MCP server boots
|
|
237
|
+
// without warming the whole agent path.
|
|
238
|
+
const { runSkill } = await import('../agent/run-skill.js');
|
|
239
|
+
const result = await runSkill(name, { inputs, context, source: 'mcp:run_skill' });
|
|
240
|
+
if (!result.ok) {
|
|
241
|
+
return textResult(`❌ run_skill(${name}) failed: ${result.error ?? 'unknown error'}`);
|
|
242
|
+
}
|
|
243
|
+
const validationLine = result.validation
|
|
244
|
+
? `\n\n**Schema:** ${result.validation.tried ? (result.validation.pass ? '✅ pass' : `❌ fail — ${result.validation.errors.slice(0, 2).join('; ')}`) : '(skipped — no JSON in output)'}`
|
|
245
|
+
: '';
|
|
246
|
+
const meta = `\n\n_${result.turns ?? 0} turns · $${(result.cost ?? 0).toFixed(4)} · ${result.effectiveTools?.length ?? 0} tools allowed_${validationLine}`;
|
|
247
|
+
return textResult(`${result.output}${meta}`);
|
|
248
|
+
}
|
|
249
|
+
catch (err) {
|
|
250
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
251
|
+
logger.error({ err, skill: name }, 'run_skill failed');
|
|
252
|
+
return textResult(`❌ run_skill(${name}) failed: ${msg}`);
|
|
253
|
+
}
|
|
254
|
+
});
|
|
217
255
|
}
|
|
218
256
|
//# sourceMappingURL=skill-tools.js.map
|