@vellumai/assistant 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +109 -0
- package/docs/architecture/memory.md +105 -0
- package/docs/skills.md +100 -0
- package/package.json +1 -1
- package/src/__tests__/archive-recall.test.ts +560 -0
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +7 -0
- package/src/__tests__/conversation-agent-loop.test.ts +7 -0
- package/src/__tests__/conversation-clear-safety.test.ts +259 -0
- package/src/__tests__/conversation-memory-dirty-tail.test.ts +150 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +7 -0
- package/src/__tests__/conversation-switch-memory-reduction.test.ts +474 -0
- package/src/__tests__/conversation-wipe.test.ts +226 -0
- package/src/__tests__/db-memory-archive-migration.test.ts +372 -0
- package/src/__tests__/db-memory-brief-state-migration.test.ts +213 -0
- package/src/__tests__/db-memory-reducer-checkpoints.test.ts +273 -0
- package/src/__tests__/db-schedule-syntax-migration.test.ts +3 -0
- package/src/__tests__/inline-command-runner.test.ts +311 -0
- package/src/__tests__/inline-skill-authoring-guard.test.ts +220 -0
- package/src/__tests__/inline-skill-load-permissions.test.ts +435 -0
- package/src/__tests__/list-messages-attachments.test.ts +96 -0
- package/src/__tests__/memory-brief-open-loops.test.ts +530 -0
- package/src/__tests__/memory-brief-time.test.ts +285 -0
- package/src/__tests__/memory-brief-wrapper.test.ts +311 -0
- package/src/__tests__/memory-chunk-archive.test.ts +400 -0
- package/src/__tests__/memory-chunk-dual-write.test.ts +453 -0
- package/src/__tests__/memory-episode-archive.test.ts +370 -0
- package/src/__tests__/memory-episode-dual-write.test.ts +626 -0
- package/src/__tests__/memory-observation-archive.test.ts +375 -0
- package/src/__tests__/memory-observation-dual-write.test.ts +318 -0
- package/src/__tests__/memory-recall-quality.test.ts +2 -2
- package/src/__tests__/memory-reducer-job.test.ts +538 -0
- package/src/__tests__/memory-reducer-scheduling.test.ts +473 -0
- package/src/__tests__/memory-reducer-store.test.ts +728 -0
- package/src/__tests__/memory-reducer-types.test.ts +707 -0
- package/src/__tests__/memory-reducer.test.ts +704 -0
- package/src/__tests__/memory-regressions.test.ts +30 -8
- package/src/__tests__/memory-simplified-config.test.ts +281 -0
- package/src/__tests__/parse-identity-fields.test.ts +129 -0
- package/src/__tests__/simplified-memory-e2e.test.ts +666 -0
- package/src/__tests__/simplified-memory-runtime.test.ts +616 -0
- package/src/__tests__/skill-load-inline-command.test.ts +598 -0
- package/src/__tests__/skill-load-inline-includes.test.ts +644 -0
- package/src/__tests__/skills-inline-command-expansions.test.ts +301 -0
- package/src/__tests__/skills-transitive-hash.test.ts +333 -0
- package/src/__tests__/vellum-self-knowledge-inline-command.test.ts +320 -0
- package/src/__tests__/workspace-migration-backfill-installation-id.test.ts +4 -4
- package/src/cli/commands/conversations.ts +18 -0
- package/src/config/bundled-skills/app-builder/SKILL.md +8 -8
- package/src/config/bundled-skills/schedule/TOOLS.json +8 -0
- package/src/config/bundled-skills/skill-management/SKILL.md +1 -1
- package/src/config/bundled-skills/skill-management/TOOLS.json +2 -2
- package/src/config/feature-flag-registry.json +16 -0
- package/src/config/raw-config-utils.ts +28 -0
- package/src/config/schema.ts +12 -0
- package/src/config/schemas/memory-simplified.ts +101 -0
- package/src/config/schemas/memory.ts +4 -0
- package/src/config/skills.ts +50 -4
- package/src/daemon/conversation-agent-loop-handlers.ts +8 -3
- package/src/daemon/conversation-agent-loop.ts +71 -1
- package/src/daemon/conversation-lifecycle.ts +11 -1
- package/src/daemon/conversation-memory.ts +117 -0
- package/src/daemon/conversation-runtime-assembly.ts +3 -1
- package/src/daemon/conversation-surfaces.ts +31 -8
- package/src/daemon/conversation.ts +40 -23
- package/src/daemon/handlers/config-embeddings.ts +10 -2
- package/src/daemon/handlers/config-model.ts +0 -9
- package/src/daemon/handlers/conversations.ts +11 -0
- package/src/daemon/handlers/identity.ts +12 -1
- package/src/daemon/lifecycle.ts +52 -1
- package/src/daemon/message-types/conversations.ts +0 -1
- package/src/daemon/server.ts +1 -1
- package/src/followups/followup-store.ts +47 -1
- package/src/memory/archive-recall.ts +516 -0
- package/src/memory/archive-store.ts +400 -0
- package/src/memory/brief-formatting.ts +33 -0
- package/src/memory/brief-open-loops.ts +266 -0
- package/src/memory/brief-time.ts +162 -0
- package/src/memory/brief.ts +75 -0
- package/src/memory/conversation-crud.ts +455 -101
- package/src/memory/conversation-key-store.ts +33 -4
- package/src/memory/db-init.ts +16 -0
- package/src/memory/indexer.ts +106 -15
- package/src/memory/job-handlers/backfill-simplified-memory.ts +462 -0
- package/src/memory/job-handlers/conversation-starters.ts +9 -3
- package/src/memory/job-handlers/embedding.test.ts +1 -0
- package/src/memory/job-handlers/embedding.ts +83 -0
- package/src/memory/job-handlers/reduce-conversation-memory.ts +229 -0
- package/src/memory/job-utils.ts +1 -1
- package/src/memory/jobs-store.ts +8 -0
- package/src/memory/jobs-worker.ts +20 -0
- package/src/memory/migrations/036-normalize-phone-identities.ts +49 -14
- package/src/memory/migrations/135-backfill-contact-interaction-stats.ts +9 -1
- package/src/memory/migrations/141-rename-verification-table.ts +8 -0
- package/src/memory/migrations/142-rename-verification-session-id-column.ts +7 -2
- package/src/memory/migrations/174-rename-thread-starters-table.ts +8 -0
- package/src/memory/migrations/185-memory-brief-state.ts +52 -0
- package/src/memory/migrations/186-memory-archive.ts +109 -0
- package/src/memory/migrations/187-memory-reducer-checkpoints.ts +19 -0
- package/src/memory/migrations/188-schedule-quiet-flag.ts +13 -0
- package/src/memory/migrations/index.ts +4 -0
- package/src/memory/qdrant-client.ts +23 -4
- package/src/memory/reducer-scheduler.ts +242 -0
- package/src/memory/reducer-store.ts +271 -0
- package/src/memory/reducer-types.ts +106 -0
- package/src/memory/reducer.ts +467 -0
- package/src/memory/schema/conversations.ts +3 -0
- package/src/memory/schema/index.ts +2 -0
- package/src/memory/schema/infrastructure.ts +1 -0
- package/src/memory/schema/memory-archive.ts +121 -0
- package/src/memory/schema/memory-brief.ts +55 -0
- package/src/memory/search/semantic.ts +17 -4
- package/src/oauth/oauth-store.ts +3 -1
- package/src/permissions/checker.ts +89 -6
- package/src/permissions/defaults.ts +14 -0
- package/src/runtime/auth/route-policy.ts +10 -1
- package/src/runtime/routes/conversation-management-routes.ts +94 -2
- package/src/runtime/routes/conversation-query-routes.ts +7 -0
- package/src/runtime/routes/conversation-routes.ts +52 -5
- package/src/runtime/routes/guardian-bootstrap-routes.ts +19 -7
- package/src/runtime/routes/identity-routes.ts +2 -35
- package/src/runtime/routes/llm-context-normalization.ts +14 -1
- package/src/runtime/routes/memory-item-routes.ts +90 -5
- package/src/runtime/routes/secret-routes.ts +3 -0
- package/src/runtime/routes/surface-action-routes.ts +68 -1
- package/src/schedule/schedule-store.ts +28 -0
- package/src/schedule/scheduler.ts +6 -2
- package/src/skills/inline-command-expansions.ts +204 -0
- package/src/skills/inline-command-render.ts +127 -0
- package/src/skills/inline-command-runner.ts +242 -0
- package/src/skills/transitive-version-hash.ts +88 -0
- package/src/tasks/task-store.ts +43 -1
- package/src/telemetry/usage-telemetry-reporter.ts +1 -1
- package/src/tools/filesystem/edit.ts +6 -1
- package/src/tools/filesystem/read.ts +6 -1
- package/src/tools/filesystem/write.ts +6 -1
- package/src/tools/memory/handlers.ts +129 -1
- package/src/tools/permission-checker.ts +8 -1
- package/src/tools/schedule/create.ts +3 -0
- package/src/tools/schedule/list.ts +5 -1
- package/src/tools/schedule/update.ts +6 -0
- package/src/tools/skills/load.ts +140 -6
- package/src/util/platform.ts +18 -0
- package/src/workspace/migrations/{002-backfill-installation-id.ts → 011-backfill-installation-id.ts} +1 -1
- package/src/workspace/migrations/registry.ts +1 -1
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical parser for inline command expansion tokens in skill bodies.
|
|
3
|
+
*
|
|
4
|
+
* Syntax: !\`command\`
|
|
5
|
+
*
|
|
6
|
+
* These tokens are parsed from the markdown body of a SKILL.md file (after
|
|
7
|
+
* frontmatter extraction). Tokens inside fenced code blocks are ignored so
|
|
8
|
+
* that documentation examples or literal snippets do not accidentally execute.
|
|
9
|
+
*
|
|
10
|
+
* The parser fails closed on malformed tokens: unmatched backticks, empty
|
|
11
|
+
* commands, or nested backticks that make the command text ambiguous are
|
|
12
|
+
* rejected rather than best-effort expanded.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { getLogger } from "../util/logger.js";
|
|
16
|
+
|
|
17
|
+
const log = getLogger("inline-command-expansions");
|
|
18
|
+
|
|
19
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
/** A single parsed inline command expansion descriptor. */
|
|
22
|
+
export interface InlineCommandExpansion {
|
|
23
|
+
/** The raw command text between the backticks (trimmed). */
|
|
24
|
+
command: string;
|
|
25
|
+
/** Byte offset of the `!` character in the original body string. */
|
|
26
|
+
startOffset: number;
|
|
27
|
+
/** Byte offset one past the closing backtick in the original body string. */
|
|
28
|
+
endOffset: number;
|
|
29
|
+
/** Stable placeholder ID derived from encounter order (0-indexed). */
|
|
30
|
+
placeholderId: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Result of parsing a skill body for inline command expansions. */
|
|
34
|
+
export interface InlineCommandExpansionResult {
|
|
35
|
+
/** Successfully parsed expansion descriptors, in encounter order. */
|
|
36
|
+
expansions: InlineCommandExpansion[];
|
|
37
|
+
/** Malformed tokens that were rejected (fail-closed). */
|
|
38
|
+
errors: InlineCommandExpansionError[];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** A malformed inline command expansion token. */
|
|
42
|
+
export interface InlineCommandExpansionError {
|
|
43
|
+
/** The raw matched text that was rejected. */
|
|
44
|
+
raw: string;
|
|
45
|
+
/** Byte offset in the original body. */
|
|
46
|
+
offset: number;
|
|
47
|
+
/** Human-readable reason for rejection. */
|
|
48
|
+
reason: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ─── Fenced code block stripping ──────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Build a set of character ranges that fall inside fenced code blocks.
|
|
55
|
+
* A fenced code block starts with a line matching ``` (with optional info
|
|
56
|
+
* string) and ends with a line matching ``` (or end of string).
|
|
57
|
+
*/
|
|
58
|
+
function buildFencedCodeRanges(body: string): Array<[number, number]> {
|
|
59
|
+
const ranges: Array<[number, number]> = [];
|
|
60
|
+
// Match fenced code block delimiters: ``` optionally followed by info string
|
|
61
|
+
const fenceRe = /^(`{3,}|~{3,})(.*)?$/gm;
|
|
62
|
+
let openFence: { index: number; delimiter: string } | undefined;
|
|
63
|
+
|
|
64
|
+
let match: RegExpExecArray | undefined;
|
|
65
|
+
while ((match = fenceRe.exec(body) ?? undefined) !== undefined) {
|
|
66
|
+
const delimiter = match[1];
|
|
67
|
+
if (openFence === undefined) {
|
|
68
|
+
// Opening fence
|
|
69
|
+
openFence = {
|
|
70
|
+
index: match.index,
|
|
71
|
+
delimiter: delimiter[0].repeat(delimiter.length),
|
|
72
|
+
};
|
|
73
|
+
} else if (
|
|
74
|
+
delimiter[0] === openFence.delimiter[0] &&
|
|
75
|
+
delimiter.length >= openFence.delimiter.length &&
|
|
76
|
+
// Closing fence must be bare (no info string after it)
|
|
77
|
+
(!match[2] || match[2].trim() === "")
|
|
78
|
+
) {
|
|
79
|
+
// Closing fence — range covers from opening fence to end of closing fence line
|
|
80
|
+
ranges.push([openFence.index, match.index + match[0].length]);
|
|
81
|
+
openFence = undefined;
|
|
82
|
+
}
|
|
83
|
+
// Otherwise ignore (nested fence-like lines inside a code block)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// If a fence was opened but never closed, treat everything from the opening
|
|
87
|
+
// fence to EOF as inside a code block.
|
|
88
|
+
if (openFence !== undefined) {
|
|
89
|
+
ranges.push([openFence.index, body.length]);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return ranges;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function isInsideFencedCode(
|
|
96
|
+
offset: number,
|
|
97
|
+
ranges: Array<[number, number]>,
|
|
98
|
+
): boolean {
|
|
99
|
+
for (const [start, end] of ranges) {
|
|
100
|
+
if (offset >= start && offset < end) return true;
|
|
101
|
+
}
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ─── Parser ───────────────────────────────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Parse inline command expansion tokens (`!\`...\``) from a skill body.
|
|
109
|
+
*
|
|
110
|
+
* The body must be the markdown content _after_ frontmatter has been stripped.
|
|
111
|
+
* Tokens inside fenced code blocks are skipped.
|
|
112
|
+
*
|
|
113
|
+
* Returns both the successfully parsed expansions and any malformed tokens
|
|
114
|
+
* that were rejected (fail-closed).
|
|
115
|
+
*/
|
|
116
|
+
export function parseInlineCommandExpansions(
|
|
117
|
+
body: string,
|
|
118
|
+
): InlineCommandExpansionResult {
|
|
119
|
+
const expansions: InlineCommandExpansion[] = [];
|
|
120
|
+
const errors: InlineCommandExpansionError[] = [];
|
|
121
|
+
|
|
122
|
+
const fencedRanges = buildFencedCodeRanges(body);
|
|
123
|
+
|
|
124
|
+
// Match !\`...\` tokens. The regex captures the content between the backticks.
|
|
125
|
+
// We use a non-greedy match to find the first closing backtick.
|
|
126
|
+
const tokenRe = /!\`([^`]*)\`/g;
|
|
127
|
+
|
|
128
|
+
let match: RegExpExecArray | undefined;
|
|
129
|
+
let placeholderCounter = 0;
|
|
130
|
+
|
|
131
|
+
while ((match = tokenRe.exec(body) ?? undefined) !== undefined) {
|
|
132
|
+
const startOffset = match.index;
|
|
133
|
+
const endOffset = startOffset + match[0].length;
|
|
134
|
+
const rawCommand = match[1];
|
|
135
|
+
|
|
136
|
+
// Skip tokens inside fenced code blocks
|
|
137
|
+
if (isInsideFencedCode(startOffset, fencedRanges)) {
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Fail closed: empty command
|
|
142
|
+
if (rawCommand.trim().length === 0) {
|
|
143
|
+
errors.push({
|
|
144
|
+
raw: match[0],
|
|
145
|
+
offset: startOffset,
|
|
146
|
+
reason: "Empty command text",
|
|
147
|
+
});
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Fail closed: nested backticks (would make command text ambiguous)
|
|
152
|
+
if (rawCommand.includes("`")) {
|
|
153
|
+
errors.push({
|
|
154
|
+
raw: match[0],
|
|
155
|
+
offset: startOffset,
|
|
156
|
+
reason: "Nested backticks in command text",
|
|
157
|
+
});
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
expansions.push({
|
|
162
|
+
command: rawCommand.trim(),
|
|
163
|
+
startOffset,
|
|
164
|
+
endOffset,
|
|
165
|
+
placeholderId: placeholderCounter++,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Also detect malformed tokens: !\` without a closing backtick.
|
|
170
|
+
// These are unmatched opening tokens that didn't match the regex above.
|
|
171
|
+
const unmatchedRe = /!\`/g;
|
|
172
|
+
const matchedStarts = new Set<number>();
|
|
173
|
+
// Re-run the token regex to collect all matched positions
|
|
174
|
+
tokenRe.lastIndex = 0;
|
|
175
|
+
while ((match = tokenRe.exec(body) ?? undefined) !== undefined) {
|
|
176
|
+
matchedStarts.add(match.index);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
let unmatchedMatch: RegExpExecArray | undefined;
|
|
180
|
+
while ((unmatchedMatch = unmatchedRe.exec(body) ?? undefined) !== undefined) {
|
|
181
|
+
const offset = unmatchedMatch.index;
|
|
182
|
+
|
|
183
|
+
// Skip if this was already matched as a complete token
|
|
184
|
+
if (matchedStarts.has(offset)) continue;
|
|
185
|
+
|
|
186
|
+
// Skip if inside a fenced code block
|
|
187
|
+
if (isInsideFencedCode(offset, fencedRanges)) continue;
|
|
188
|
+
|
|
189
|
+
errors.push({
|
|
190
|
+
raw: body.slice(offset, Math.min(offset + 40, body.length)),
|
|
191
|
+
offset,
|
|
192
|
+
reason: "Unmatched opening backtick (no closing backtick found)",
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (errors.length > 0) {
|
|
197
|
+
log.warn(
|
|
198
|
+
{ errorCount: errors.length, errors },
|
|
199
|
+
"Malformed inline command expansion tokens detected",
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return { expansions, errors };
|
|
204
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Renderer for inline command expansion tokens in skill bodies.
|
|
3
|
+
*
|
|
4
|
+
* Given a skill body and its parsed `InlineCommandExpansion` descriptors,
|
|
5
|
+
* replaces each `!\`command\`` token by executing the command through the
|
|
6
|
+
* sandbox-only runner and wrapping the result in XML tags:
|
|
7
|
+
*
|
|
8
|
+
* <inline_skill_command index="0">...output...</inline_skill_command>
|
|
9
|
+
*
|
|
10
|
+
* Render failures produce stable inline stubs rather than dumping raw
|
|
11
|
+
* shell stderr into the prompt:
|
|
12
|
+
*
|
|
13
|
+
* <inline_skill_command index="0">[inline command unavailable: <reason>]</inline_skill_command>
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { getLogger } from "../util/logger.js";
|
|
17
|
+
import type { InlineCommandExpansion } from "./inline-command-expansions.js";
|
|
18
|
+
import type { InlineCommandResult } from "./inline-command-runner.js";
|
|
19
|
+
import { runInlineCommand } from "./inline-command-runner.js";
|
|
20
|
+
|
|
21
|
+
const log = getLogger("inline-command-render");
|
|
22
|
+
|
|
23
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
/** Result of rendering all inline command expansions in a skill body. */
|
|
26
|
+
export interface InlineCommandRenderResult {
|
|
27
|
+
/** The body with all inline command tokens replaced. */
|
|
28
|
+
renderedBody: string;
|
|
29
|
+
/** Count of successfully expanded tokens. */
|
|
30
|
+
expandedCount: number;
|
|
31
|
+
/** Count of tokens that failed to expand (rendered as stubs). */
|
|
32
|
+
failedCount: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ─── Failure reason mapping ───────────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Map a machine-readable failure reason to a human-readable stub message
|
|
39
|
+
* suitable for inclusion in the prompt. These messages are intentionally
|
|
40
|
+
* terse and deterministic so they don't leak raw stderr or confuse the LLM.
|
|
41
|
+
*/
|
|
42
|
+
function failureReasonToStub(result: InlineCommandResult): string {
|
|
43
|
+
switch (result.failureReason) {
|
|
44
|
+
case "timeout":
|
|
45
|
+
return "command timed out";
|
|
46
|
+
case "non_zero_exit":
|
|
47
|
+
return "command failed";
|
|
48
|
+
case "binary_output":
|
|
49
|
+
return "command produced binary output";
|
|
50
|
+
case "spawn_failure":
|
|
51
|
+
return "command could not be started";
|
|
52
|
+
default:
|
|
53
|
+
return "unknown error";
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Render all inline command expansion tokens in a skill body.
|
|
61
|
+
*
|
|
62
|
+
* Each `!\`command\`` token is executed through the sandbox-only runner and
|
|
63
|
+
* replaced with its output wrapped in XML tags. Expansions are processed
|
|
64
|
+
* sequentially (not in parallel) to keep execution order deterministic and
|
|
65
|
+
* avoid overwhelming the sandbox.
|
|
66
|
+
*
|
|
67
|
+
* @param body The skill body containing `!\`command\`` tokens.
|
|
68
|
+
* @param expansions Parsed expansion descriptors from `parseInlineCommandExpansions`.
|
|
69
|
+
* @param workingDir The conversation's working directory (repo root).
|
|
70
|
+
*/
|
|
71
|
+
export async function renderInlineCommands(
|
|
72
|
+
body: string,
|
|
73
|
+
expansions: InlineCommandExpansion[],
|
|
74
|
+
workingDir: string,
|
|
75
|
+
): Promise<InlineCommandRenderResult> {
|
|
76
|
+
if (expansions.length === 0) {
|
|
77
|
+
return { renderedBody: body, expandedCount: 0, failedCount: 0 };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let expandedCount = 0;
|
|
81
|
+
let failedCount = 0;
|
|
82
|
+
|
|
83
|
+
// Process replacements in reverse offset order so that earlier offsets
|
|
84
|
+
// remain valid after splicing in replacement text.
|
|
85
|
+
const sorted = [...expansions].sort((a, b) => b.startOffset - a.startOffset);
|
|
86
|
+
|
|
87
|
+
let result = body;
|
|
88
|
+
|
|
89
|
+
for (const expansion of sorted) {
|
|
90
|
+
const commandResult = await runInlineCommand(expansion.command, workingDir);
|
|
91
|
+
|
|
92
|
+
let replacement: string;
|
|
93
|
+
if (commandResult.ok) {
|
|
94
|
+
replacement = wrapInXml(expansion.placeholderId, commandResult.output);
|
|
95
|
+
expandedCount++;
|
|
96
|
+
} else {
|
|
97
|
+
const stub = failureReasonToStub(commandResult);
|
|
98
|
+
replacement = wrapInXml(
|
|
99
|
+
expansion.placeholderId,
|
|
100
|
+
`[inline command unavailable: ${stub}]`,
|
|
101
|
+
);
|
|
102
|
+
failedCount++;
|
|
103
|
+
log.warn(
|
|
104
|
+
{
|
|
105
|
+
command: expansion.command,
|
|
106
|
+
placeholderId: expansion.placeholderId,
|
|
107
|
+
failureReason: commandResult.failureReason,
|
|
108
|
+
},
|
|
109
|
+
"Inline command expansion failed, rendering stub",
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Replace the original token with the rendered output
|
|
114
|
+
result =
|
|
115
|
+
result.slice(0, expansion.startOffset) +
|
|
116
|
+
replacement +
|
|
117
|
+
result.slice(expansion.endOffset);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return { renderedBody: result, expandedCount, failedCount };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
function wrapInXml(index: number, content: string): string {
|
|
126
|
+
return `<inline_skill_command index="${index}">${content}</inline_skill_command>`;
|
|
127
|
+
}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sandbox-only runner for inline command expansions (`!\`command\``).
|
|
3
|
+
*
|
|
4
|
+
* Executes the literal command string in the sandbox without going through the
|
|
5
|
+
* general `bash` tool's permission path. Security constraints:
|
|
6
|
+
*
|
|
7
|
+
* - Network mode forced off (no outbound connections)
|
|
8
|
+
* - Sanitized environment variables only (no API keys, tokens, credentials)
|
|
9
|
+
* - No credential proxy, no CES client, no host fallback
|
|
10
|
+
* - Uses the conversation working directory as `cwd` so repo-local commands
|
|
11
|
+
* remain interoperable with externally authored skills that expect project
|
|
12
|
+
* context.
|
|
13
|
+
*
|
|
14
|
+
* Output handling:
|
|
15
|
+
* - Captures stdout only (stderr is discarded)
|
|
16
|
+
* - Strips ANSI escape sequences
|
|
17
|
+
* - Rejects binary-ish output
|
|
18
|
+
* - Clamps output to a fixed cap
|
|
19
|
+
* - Returns deterministic sanitized error results for timeout, non-zero exit,
|
|
20
|
+
* or spawn failures (no raw stderr dumps)
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { spawn } from "node:child_process";
|
|
24
|
+
|
|
25
|
+
import { getConfig } from "../config/loader.js";
|
|
26
|
+
import { buildSanitizedEnv } from "../tools/terminal/safe-env.js";
|
|
27
|
+
import { wrapCommand } from "../tools/terminal/sandbox.js";
|
|
28
|
+
import { getLogger } from "../util/logger.js";
|
|
29
|
+
|
|
30
|
+
const log = getLogger("inline-command-runner");
|
|
31
|
+
|
|
32
|
+
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
/** Maximum wall-clock time for an inline command before it is killed. */
|
|
35
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
36
|
+
|
|
37
|
+
/** Maximum output characters before truncation. */
|
|
38
|
+
const MAX_OUTPUT_CHARS = 20_000;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* ANSI escape sequence pattern (covers SGR, cursor movement, erase, etc.).
|
|
42
|
+
* Matches: ESC[ ... final_byte and ESC] ... ST (OSC sequences).
|
|
43
|
+
*/
|
|
44
|
+
const ANSI_RE = /\x1b\[[0-9;]*[A-Za-z]|\x1b\][^\x07]*(?:\x07|\x1b\\)/g;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Heuristic for binary output: if more than 10% of the characters are
|
|
48
|
+
* non-printable (control chars excluding \t, \n, \r) then reject.
|
|
49
|
+
*/
|
|
50
|
+
const BINARY_THRESHOLD = 0.1;
|
|
51
|
+
|
|
52
|
+
// ─── Result type ─────────────────────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
/** Deterministic result shape returned by the inline command runner. */
|
|
55
|
+
export interface InlineCommandResult {
|
|
56
|
+
/** The sanitized stdout output, or a human-readable error description. */
|
|
57
|
+
output: string;
|
|
58
|
+
/** Whether the command completed successfully. */
|
|
59
|
+
ok: boolean;
|
|
60
|
+
/**
|
|
61
|
+
* Machine-readable failure reason.
|
|
62
|
+
* - `"timeout"` — command exceeded the wall-clock limit
|
|
63
|
+
* - `"non_zero_exit"` — command exited with a non-zero code
|
|
64
|
+
* - `"binary_output"` — stdout contained binary-ish data
|
|
65
|
+
* - `"spawn_failure"` — the subprocess could not be spawned
|
|
66
|
+
* - `undefined` — success
|
|
67
|
+
*/
|
|
68
|
+
failureReason?:
|
|
69
|
+
| "timeout"
|
|
70
|
+
| "non_zero_exit"
|
|
71
|
+
| "binary_output"
|
|
72
|
+
| "spawn_failure";
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
export interface InlineCommandRunnerOptions {
|
|
78
|
+
/** Override the default timeout (ms). */
|
|
79
|
+
timeoutMs?: number;
|
|
80
|
+
/** Override the default output cap (chars). */
|
|
81
|
+
maxOutputChars?: number;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Run an inline command expansion in the sandbox.
|
|
86
|
+
*
|
|
87
|
+
* @param command The literal command string from the `!\`...\`` token.
|
|
88
|
+
* @param workingDir The conversation's working directory (repo root).
|
|
89
|
+
* @param options Optional overrides for timeout and output cap.
|
|
90
|
+
*/
|
|
91
|
+
export async function runInlineCommand(
|
|
92
|
+
command: string,
|
|
93
|
+
workingDir: string,
|
|
94
|
+
options?: InlineCommandRunnerOptions,
|
|
95
|
+
): Promise<InlineCommandResult> {
|
|
96
|
+
const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
97
|
+
const maxChars = options?.maxOutputChars ?? MAX_OUTPUT_CHARS;
|
|
98
|
+
|
|
99
|
+
// Build sandbox-wrapped command. Always use the sandbox config with
|
|
100
|
+
// network forced off — inline commands never need network access.
|
|
101
|
+
const config = getConfig();
|
|
102
|
+
const sandboxConfig = { ...config.sandbox, enabled: true };
|
|
103
|
+
|
|
104
|
+
const wrapped = wrapCommand(command, workingDir, sandboxConfig, {
|
|
105
|
+
networkMode: "off",
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// Build a minimal, sanitized environment. Explicitly exclude gateway URL,
|
|
109
|
+
// workspace dir, and data dir since inline commands have no business calling
|
|
110
|
+
// internal APIs, mutating workspace state, or accessing instance-scoped data.
|
|
111
|
+
const env = buildSanitizedEnv();
|
|
112
|
+
delete env.INTERNAL_GATEWAY_BASE_URL;
|
|
113
|
+
delete env.VELLUM_WORKSPACE_DIR;
|
|
114
|
+
delete env.VELLUM_DATA_DIR;
|
|
115
|
+
|
|
116
|
+
return new Promise<InlineCommandResult>((resolve) => {
|
|
117
|
+
let timedOut = false;
|
|
118
|
+
const stdoutChunks: Buffer[] = [];
|
|
119
|
+
|
|
120
|
+
let child: ReturnType<typeof spawn>;
|
|
121
|
+
try {
|
|
122
|
+
child = spawn(wrapped.command, wrapped.args, {
|
|
123
|
+
cwd: workingDir,
|
|
124
|
+
env,
|
|
125
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
126
|
+
});
|
|
127
|
+
} catch (err) {
|
|
128
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
129
|
+
log.warn({ command, error: message }, "Failed to spawn inline command");
|
|
130
|
+
resolve({
|
|
131
|
+
output: "Inline command could not be started.",
|
|
132
|
+
ok: false,
|
|
133
|
+
failureReason: "spawn_failure",
|
|
134
|
+
});
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const timer = setTimeout(() => {
|
|
139
|
+
timedOut = true;
|
|
140
|
+
child.kill("SIGKILL");
|
|
141
|
+
}, timeoutMs);
|
|
142
|
+
|
|
143
|
+
child.stdout!.on("data", (data: Buffer) => stdoutChunks.push(data));
|
|
144
|
+
|
|
145
|
+
child.on("close", (code) => {
|
|
146
|
+
clearTimeout(timer);
|
|
147
|
+
|
|
148
|
+
// ── Timeout ──────────────────────────────────────────────────────
|
|
149
|
+
if (timedOut) {
|
|
150
|
+
log.debug({ command, timeoutMs }, "Inline command timed out");
|
|
151
|
+
resolve({
|
|
152
|
+
output: `Inline command timed out after ${timeoutMs}ms.`,
|
|
153
|
+
ok: false,
|
|
154
|
+
failureReason: "timeout",
|
|
155
|
+
});
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// ── Non-zero exit ────────────────────────────────────────────────
|
|
160
|
+
if (code !== 0) {
|
|
161
|
+
log.debug(
|
|
162
|
+
{ command, exitCode: code },
|
|
163
|
+
"Inline command exited with non-zero code",
|
|
164
|
+
);
|
|
165
|
+
resolve({
|
|
166
|
+
output: `Inline command failed (exit code ${code}).`,
|
|
167
|
+
ok: false,
|
|
168
|
+
failureReason: "non_zero_exit",
|
|
169
|
+
});
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ── Process stdout ───────────────────────────────────────────────
|
|
174
|
+
const raw = Buffer.concat(stdoutChunks).toString("utf-8");
|
|
175
|
+
|
|
176
|
+
// Strip ANSI sequences first — these are terminal artifacts, not
|
|
177
|
+
// binary data. Stripping before the binary check prevents legitimate
|
|
178
|
+
// color-coded tool output from being rejected.
|
|
179
|
+
let cleaned = raw.replace(ANSI_RE, "");
|
|
180
|
+
|
|
181
|
+
// Reject binary-ish output (after ANSI stripping)
|
|
182
|
+
if (isBinaryish(cleaned)) {
|
|
183
|
+
log.debug({ command }, "Inline command produced binary-ish output");
|
|
184
|
+
resolve({
|
|
185
|
+
output: "Inline command produced binary output.",
|
|
186
|
+
ok: false,
|
|
187
|
+
failureReason: "binary_output",
|
|
188
|
+
});
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Clamp to max output
|
|
193
|
+
if (cleaned.length > maxChars) {
|
|
194
|
+
cleaned = cleaned.slice(0, maxChars) + "\n[output truncated]";
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Trim trailing whitespace
|
|
198
|
+
cleaned = cleaned.trimEnd();
|
|
199
|
+
|
|
200
|
+
resolve({
|
|
201
|
+
output: cleaned,
|
|
202
|
+
ok: true,
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
child.on("error", (err) => {
|
|
207
|
+
clearTimeout(timer);
|
|
208
|
+
log.warn({ command, error: err.message }, "Inline command spawn error");
|
|
209
|
+
resolve({
|
|
210
|
+
output: "Inline command could not be started.",
|
|
211
|
+
ok: false,
|
|
212
|
+
failureReason: "spawn_failure",
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Heuristic check for binary output. Returns true if more than
|
|
222
|
+
* {@link BINARY_THRESHOLD} of the characters are non-printable control
|
|
223
|
+
* characters (excluding tab, newline, carriage return).
|
|
224
|
+
*/
|
|
225
|
+
function isBinaryish(text: string): boolean {
|
|
226
|
+
if (text.length === 0) return false;
|
|
227
|
+
|
|
228
|
+
let controlCount = 0;
|
|
229
|
+
for (let i = 0; i < text.length; i++) {
|
|
230
|
+
const code = text.charCodeAt(i);
|
|
231
|
+
// Control characters: 0x00-0x1F (excluding \t=0x09, \n=0x0A, \r=0x0D)
|
|
232
|
+
// and 0x7F (DEL)
|
|
233
|
+
if (
|
|
234
|
+
(code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) ||
|
|
235
|
+
code === 0x7f
|
|
236
|
+
) {
|
|
237
|
+
controlCount++;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return controlCount / text.length > BINARY_THRESHOLD;
|
|
242
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
import type { SkillSummary } from "../config/skills.js";
|
|
4
|
+
import { validateIncludes } from "./include-graph.js";
|
|
5
|
+
import { computeSkillVersionHash } from "./version-hash.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Error thrown when the include graph is invalid (missing nodes or cycles).
|
|
9
|
+
* The permission layer depends on exact approval candidates, so we fail closed
|
|
10
|
+
* rather than returning a partial or potentially misleading hash.
|
|
11
|
+
*/
|
|
12
|
+
export class TransitiveHashError extends Error {
|
|
13
|
+
constructor(
|
|
14
|
+
message: string,
|
|
15
|
+
public readonly code: "missing" | "cycle",
|
|
16
|
+
) {
|
|
17
|
+
super(message);
|
|
18
|
+
this.name = "TransitiveHashError";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Compute a transitive version hash for a skill and all its included children.
|
|
24
|
+
*
|
|
25
|
+
* The hash covers:
|
|
26
|
+
* 1. The DFS-ordered list of visited skill IDs (so the graph structure matters)
|
|
27
|
+
* 2. Each visited skill's directory hash (via `computeSkillVersionHash`)
|
|
28
|
+
*
|
|
29
|
+
* This means editing any included child skill invalidates the parent's
|
|
30
|
+
* transitive hash, which is required for version-pinned inline-command
|
|
31
|
+
* approval.
|
|
32
|
+
*
|
|
33
|
+
* Fails closed (throws `TransitiveHashError`) when:
|
|
34
|
+
* - A child referenced in `includes` is missing from the catalog index
|
|
35
|
+
* - The include graph contains a cycle
|
|
36
|
+
*
|
|
37
|
+
* @param rootSkillId The skill ID to start traversal from.
|
|
38
|
+
* @param catalogIndex A `Map<skillId, SkillSummary>` built via `indexCatalogById`.
|
|
39
|
+
* @returns A canonical hash string in the format `tv1:<hex-sha256>`.
|
|
40
|
+
*/
|
|
41
|
+
export function computeTransitiveSkillVersionHash(
|
|
42
|
+
rootSkillId: string,
|
|
43
|
+
catalogIndex: Map<string, SkillSummary>,
|
|
44
|
+
): string {
|
|
45
|
+
// Validate the include graph first — fail closed on any issue.
|
|
46
|
+
const validation = validateIncludes(rootSkillId, catalogIndex);
|
|
47
|
+
|
|
48
|
+
if (!validation.ok) {
|
|
49
|
+
if (validation.error === "cycle") {
|
|
50
|
+
throw new TransitiveHashError(
|
|
51
|
+
`Cycle detected in include graph: ${validation.cyclePath.join(" -> ")}`,
|
|
52
|
+
"cycle",
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
// validation.error === "missing"
|
|
56
|
+
throw new TransitiveHashError(
|
|
57
|
+
`Missing child skill "${validation.missingChildId}" referenced by "${validation.parentId}" (path: ${validation.path.join(" -> ")})`,
|
|
58
|
+
"missing",
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// validation.ok === true, so visited contains all skill IDs in DFS pre-order.
|
|
63
|
+
const { visited } = validation;
|
|
64
|
+
|
|
65
|
+
const hash = createHash("sha256");
|
|
66
|
+
|
|
67
|
+
for (const skillId of visited) {
|
|
68
|
+
// Fold the skill ID into the digest so graph structure matters.
|
|
69
|
+
hash.update(skillId);
|
|
70
|
+
hash.update("\0");
|
|
71
|
+
|
|
72
|
+
const skill = catalogIndex.get(skillId);
|
|
73
|
+
if (!skill) {
|
|
74
|
+
// Should be unreachable after validateIncludes succeeds, but fail closed.
|
|
75
|
+
throw new TransitiveHashError(
|
|
76
|
+
`Skill "${skillId}" disappeared from catalog index after validation`,
|
|
77
|
+
"missing",
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Fold the per-directory content hash so file changes propagate.
|
|
82
|
+
const dirHash = computeSkillVersionHash(skill.directoryPath);
|
|
83
|
+
hash.update(dirHash);
|
|
84
|
+
hash.update("\n");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return `tv1:${hash.digest("hex")}`;
|
|
88
|
+
}
|