@vellumai/cli 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/knip.json +3 -1
- package/package.json +1 -1
- package/src/commands/backup.ts +28 -13
- package/src/commands/hatch.ts +96 -60
- package/src/commands/retire.ts +5 -5
- package/src/commands/rollback.ts +298 -135
- package/src/commands/upgrade.ts +548 -200
- package/src/lib/assistant-config.ts +33 -6
- package/src/lib/aws.ts +2 -0
- package/src/lib/backup-ops.ts +213 -0
- package/src/lib/cli-error.ts +91 -0
- package/src/lib/config-utils.ts +59 -0
- package/src/lib/docker.ts +45 -37
- package/src/lib/doctor-client.ts +11 -1
- package/src/lib/gcp.ts +5 -1
- package/src/lib/local.ts +29 -9
- package/src/lib/platform-client.ts +17 -3
- package/src/lib/platform-releases.ts +112 -0
- package/src/lib/upgrade-lifecycle.ts +237 -0
- package/src/lib/workspace-git.ts +39 -0
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { randomBytes } from "crypto";
|
|
2
|
+
import {
|
|
3
|
+
existsSync,
|
|
4
|
+
mkdirSync,
|
|
5
|
+
readFileSync,
|
|
6
|
+
renameSync,
|
|
7
|
+
unlinkSync,
|
|
8
|
+
writeFileSync,
|
|
9
|
+
} from "fs";
|
|
2
10
|
import { homedir } from "os";
|
|
3
11
|
import { join } from "path";
|
|
4
12
|
|
|
@@ -82,6 +90,14 @@ export interface AssistantEntry {
|
|
|
82
90
|
previousServiceGroupVersion?: string;
|
|
83
91
|
/** Docker image metadata from before the last upgrade. Enables rollback to the prior version. */
|
|
84
92
|
previousContainerInfo?: ContainerInfo;
|
|
93
|
+
/** Path to the .vbundle backup created for the most recent upgrade. Used by rollback to restore
|
|
94
|
+
* only the backup from the specific upgrade being rolled back — never a stale backup from a
|
|
95
|
+
* previous upgrade cycle. */
|
|
96
|
+
preUpgradeBackupPath?: string;
|
|
97
|
+
/** Pre-upgrade DB migration version — used by rollback to know how far back to revert. */
|
|
98
|
+
previousDbMigrationVersion?: number;
|
|
99
|
+
/** Pre-upgrade workspace migration ID — used by rollback to know how far back to revert. */
|
|
100
|
+
previousWorkspaceMigrationId?: string;
|
|
85
101
|
[key: string]: unknown;
|
|
86
102
|
}
|
|
87
103
|
|
|
@@ -92,7 +108,7 @@ interface LockfileData {
|
|
|
92
108
|
[key: string]: unknown;
|
|
93
109
|
}
|
|
94
110
|
|
|
95
|
-
function getBaseDir(): string {
|
|
111
|
+
export function getBaseDir(): string {
|
|
96
112
|
return process.env.BASE_DATA_DIR?.trim() || homedir();
|
|
97
113
|
}
|
|
98
114
|
|
|
@@ -124,7 +140,16 @@ function readLockfile(): LockfileData {
|
|
|
124
140
|
|
|
125
141
|
function writeLockfile(data: LockfileData): void {
|
|
126
142
|
const lockfilePath = join(getLockfileDir(), ".vellum.lock.json");
|
|
127
|
-
|
|
143
|
+
const tmpPath = `${lockfilePath}.${randomBytes(4).toString("hex")}.tmp`;
|
|
144
|
+
try {
|
|
145
|
+
writeFileSync(tmpPath, JSON.stringify(data, null, 2) + "\n");
|
|
146
|
+
renameSync(tmpPath, lockfilePath);
|
|
147
|
+
} catch (err) {
|
|
148
|
+
try {
|
|
149
|
+
unlinkSync(tmpPath);
|
|
150
|
+
} catch {}
|
|
151
|
+
throw err;
|
|
152
|
+
}
|
|
128
153
|
}
|
|
129
154
|
|
|
130
155
|
/**
|
|
@@ -412,12 +437,14 @@ export async function allocateLocalResources(
|
|
|
412
437
|
instanceName: string,
|
|
413
438
|
): Promise<LocalInstanceResources> {
|
|
414
439
|
// First local assistant gets the home directory with default ports.
|
|
440
|
+
// Respect BASE_DATA_DIR when set (e.g. in e2e tests) so the daemon,
|
|
441
|
+
// gateway, and keychain broker all resolve paths under the same root.
|
|
415
442
|
const existingLocals = loadAllAssistants().filter((e) => e.cloud === "local");
|
|
416
443
|
if (existingLocals.length === 0) {
|
|
417
|
-
const
|
|
418
|
-
const vellumDir = join(
|
|
444
|
+
const baseDir = getBaseDir();
|
|
445
|
+
const vellumDir = join(baseDir, ".vellum");
|
|
419
446
|
return {
|
|
420
|
-
instanceDir:
|
|
447
|
+
instanceDir: baseDir,
|
|
421
448
|
daemonPort: DEFAULT_DAEMON_PORT,
|
|
422
449
|
gatewayPort: DEFAULT_GATEWAY_PORT,
|
|
423
450
|
qdrantPort: DEFAULT_QDRANT_PORT,
|
package/src/lib/aws.ts
CHANGED
|
@@ -374,6 +374,7 @@ export async function hatchAws(
|
|
|
374
374
|
species: Species,
|
|
375
375
|
detached: boolean,
|
|
376
376
|
name: string | null,
|
|
377
|
+
configValues: Record<string, string> = {},
|
|
377
378
|
): Promise<void> {
|
|
378
379
|
const startTime = Date.now();
|
|
379
380
|
try {
|
|
@@ -448,6 +449,7 @@ export async function hatchAws(
|
|
|
448
449
|
providerApiKeys,
|
|
449
450
|
instanceName,
|
|
450
451
|
"aws",
|
|
452
|
+
configValues,
|
|
451
453
|
);
|
|
452
454
|
const startupScriptPath = join(tmpdir(), `${instanceName}-startup.sh`);
|
|
453
455
|
writeFileSync(startupScriptPath, startupScript);
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import {
|
|
2
|
+
existsSync,
|
|
3
|
+
mkdirSync,
|
|
4
|
+
readdirSync,
|
|
5
|
+
readFileSync,
|
|
6
|
+
unlinkSync,
|
|
7
|
+
writeFileSync,
|
|
8
|
+
} from "fs";
|
|
9
|
+
import { homedir } from "os";
|
|
10
|
+
import { dirname, join } from "path";
|
|
11
|
+
|
|
12
|
+
import { loadGuardianToken, leaseGuardianToken } from "./guardian-token.js";
|
|
13
|
+
|
|
14
|
+
/** Default backup directory following XDG convention */
|
|
15
|
+
export function getBackupsDir(): string {
|
|
16
|
+
const dataHome =
|
|
17
|
+
process.env.XDG_DATA_HOME?.trim() || join(homedir(), ".local", "share");
|
|
18
|
+
return join(dataHome, "vellum", "backups");
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Human-readable file size */
|
|
22
|
+
export function formatSize(bytes: number): string {
|
|
23
|
+
if (bytes < 1024) return `${bytes} B`;
|
|
24
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
25
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Obtain a valid guardian access token (cached or fresh lease) */
|
|
29
|
+
async function getGuardianAccessToken(
|
|
30
|
+
runtimeUrl: string,
|
|
31
|
+
assistantId: string,
|
|
32
|
+
forceRefresh?: boolean,
|
|
33
|
+
): Promise<string> {
|
|
34
|
+
if (!forceRefresh) {
|
|
35
|
+
const tokenData = loadGuardianToken(assistantId);
|
|
36
|
+
if (tokenData && new Date(tokenData.accessTokenExpiresAt) > new Date()) {
|
|
37
|
+
return tokenData.accessToken;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const freshToken = await leaseGuardianToken(runtimeUrl, assistantId);
|
|
41
|
+
return freshToken.accessToken;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Create a .vbundle backup of a running assistant.
|
|
46
|
+
* Returns the path to the saved backup, or null if backup failed.
|
|
47
|
+
* Never throws — failures are logged as warnings.
|
|
48
|
+
*/
|
|
49
|
+
export async function createBackup(
|
|
50
|
+
runtimeUrl: string,
|
|
51
|
+
assistantId: string,
|
|
52
|
+
options?: { prefix?: string; description?: string },
|
|
53
|
+
): Promise<string | null> {
|
|
54
|
+
try {
|
|
55
|
+
let accessToken = await getGuardianAccessToken(runtimeUrl, assistantId);
|
|
56
|
+
|
|
57
|
+
let response = await fetch(`${runtimeUrl}/v1/migrations/export`, {
|
|
58
|
+
method: "POST",
|
|
59
|
+
headers: {
|
|
60
|
+
Authorization: `Bearer ${accessToken}`,
|
|
61
|
+
"Content-Type": "application/json",
|
|
62
|
+
},
|
|
63
|
+
body: JSON.stringify({
|
|
64
|
+
description: options?.description ?? "CLI backup",
|
|
65
|
+
}),
|
|
66
|
+
signal: AbortSignal.timeout(120_000),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
// Retry once with a fresh token on 401 — the cached token may be stale
|
|
70
|
+
// after a container restart that generated a new gateway signing key.
|
|
71
|
+
if (response.status === 401) {
|
|
72
|
+
accessToken = await getGuardianAccessToken(runtimeUrl, assistantId, true);
|
|
73
|
+
response = await fetch(`${runtimeUrl}/v1/migrations/export`, {
|
|
74
|
+
method: "POST",
|
|
75
|
+
headers: {
|
|
76
|
+
Authorization: `Bearer ${accessToken}`,
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
},
|
|
79
|
+
body: JSON.stringify({
|
|
80
|
+
description: options?.description ?? "CLI backup",
|
|
81
|
+
}),
|
|
82
|
+
signal: AbortSignal.timeout(120_000),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!response.ok) {
|
|
87
|
+
const body = await response.text();
|
|
88
|
+
console.warn(
|
|
89
|
+
`Warning: backup export failed (${response.status}): ${body}`,
|
|
90
|
+
);
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
95
|
+
const data = new Uint8Array(arrayBuffer);
|
|
96
|
+
|
|
97
|
+
const isoTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
98
|
+
const prefix = options?.prefix ?? assistantId;
|
|
99
|
+
const outputPath = join(
|
|
100
|
+
getBackupsDir(),
|
|
101
|
+
`${prefix}-${isoTimestamp}.vbundle`,
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
mkdirSync(dirname(outputPath), { recursive: true });
|
|
105
|
+
writeFileSync(outputPath, data);
|
|
106
|
+
|
|
107
|
+
return outputPath;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
110
|
+
console.warn(`Warning: backup failed: ${msg}`);
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Restore a .vbundle backup into a running assistant.
|
|
117
|
+
* Returns true if restore succeeded, false otherwise.
|
|
118
|
+
* Never throws — failures are logged as warnings.
|
|
119
|
+
*/
|
|
120
|
+
export async function restoreBackup(
|
|
121
|
+
runtimeUrl: string,
|
|
122
|
+
assistantId: string,
|
|
123
|
+
backupPath: string,
|
|
124
|
+
): Promise<boolean> {
|
|
125
|
+
try {
|
|
126
|
+
if (!existsSync(backupPath)) {
|
|
127
|
+
console.warn(`Warning: backup file not found: ${backupPath}`);
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const bundleData = readFileSync(backupPath);
|
|
132
|
+
let accessToken = await getGuardianAccessToken(runtimeUrl, assistantId);
|
|
133
|
+
|
|
134
|
+
let response = await fetch(`${runtimeUrl}/v1/migrations/import`, {
|
|
135
|
+
method: "POST",
|
|
136
|
+
headers: {
|
|
137
|
+
Authorization: `Bearer ${accessToken}`,
|
|
138
|
+
"Content-Type": "application/octet-stream",
|
|
139
|
+
},
|
|
140
|
+
body: bundleData,
|
|
141
|
+
signal: AbortSignal.timeout(120_000),
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Retry once with a fresh token on 401 — the cached token may be stale
|
|
145
|
+
// after a container restart that generated a new gateway signing key.
|
|
146
|
+
if (response.status === 401) {
|
|
147
|
+
accessToken = await getGuardianAccessToken(runtimeUrl, assistantId, true);
|
|
148
|
+
response = await fetch(`${runtimeUrl}/v1/migrations/import`, {
|
|
149
|
+
method: "POST",
|
|
150
|
+
headers: {
|
|
151
|
+
Authorization: `Bearer ${accessToken}`,
|
|
152
|
+
"Content-Type": "application/octet-stream",
|
|
153
|
+
},
|
|
154
|
+
body: bundleData,
|
|
155
|
+
signal: AbortSignal.timeout(120_000),
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (!response.ok) {
|
|
160
|
+
const body = await response.text();
|
|
161
|
+
console.warn(`Warning: restore failed (${response.status}): ${body}`);
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const result = (await response.json()) as {
|
|
166
|
+
success: boolean;
|
|
167
|
+
message?: string;
|
|
168
|
+
reason?: string;
|
|
169
|
+
};
|
|
170
|
+
if (!result.success) {
|
|
171
|
+
console.warn(
|
|
172
|
+
`Warning: restore failed — ${result.message ?? result.reason ?? "unknown reason"}`,
|
|
173
|
+
);
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return true;
|
|
178
|
+
} catch (err) {
|
|
179
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
180
|
+
console.warn(`Warning: restore failed: ${msg}`);
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Keep only the N most recent pre-upgrade backups for an assistant,
|
|
187
|
+
* deleting older ones. Default: keep 3.
|
|
188
|
+
* Never throws — failures are silently ignored.
|
|
189
|
+
*/
|
|
190
|
+
export function pruneOldBackups(assistantId: string, keep: number = 3): void {
|
|
191
|
+
try {
|
|
192
|
+
const backupsDir = getBackupsDir();
|
|
193
|
+
if (!existsSync(backupsDir)) return;
|
|
194
|
+
|
|
195
|
+
const prefix = `${assistantId}-pre-upgrade-`;
|
|
196
|
+
const entries = readdirSync(backupsDir)
|
|
197
|
+
.filter((f) => f.startsWith(prefix) && f.endsWith(".vbundle"))
|
|
198
|
+
.sort();
|
|
199
|
+
|
|
200
|
+
if (entries.length <= keep) return;
|
|
201
|
+
|
|
202
|
+
const toDelete = entries.slice(0, entries.length - keep);
|
|
203
|
+
for (const file of toDelete) {
|
|
204
|
+
try {
|
|
205
|
+
unlinkSync(join(backupsDir, file));
|
|
206
|
+
} catch {
|
|
207
|
+
// Best-effort cleanup — ignore individual file errors
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
} catch {
|
|
211
|
+
// Best-effort cleanup — never block the upgrade
|
|
212
|
+
}
|
|
213
|
+
}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured CLI error reporting for upgrade/rollback commands.
|
|
3
|
+
*
|
|
4
|
+
* When a CLI command fails, it can emit a machine-readable JSON object
|
|
5
|
+
* prefixed with `CLI_ERROR:` to stderr so that consumers (e.g. the
|
|
6
|
+
* desktop app) can parse it reliably. Modeled after the DAEMON_ERROR
|
|
7
|
+
* protocol in `assistant/src/daemon/startup-error.ts`.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/** Known error categories emitted by CLI commands. */
|
|
11
|
+
export type CliErrorCategory =
|
|
12
|
+
| "DOCKER_NOT_RUNNING"
|
|
13
|
+
| "IMAGE_PULL_FAILED"
|
|
14
|
+
| "READINESS_TIMEOUT"
|
|
15
|
+
| "ROLLBACK_FAILED"
|
|
16
|
+
| "ROLLBACK_NO_STATE"
|
|
17
|
+
| "AUTH_FAILED"
|
|
18
|
+
| "NETWORK_ERROR"
|
|
19
|
+
| "UNSUPPORTED_TOPOLOGY"
|
|
20
|
+
| "ASSISTANT_NOT_FOUND"
|
|
21
|
+
| "PLATFORM_API_ERROR"
|
|
22
|
+
| "UNKNOWN";
|
|
23
|
+
|
|
24
|
+
interface CliErrorPayload {
|
|
25
|
+
error: CliErrorCategory;
|
|
26
|
+
message: string;
|
|
27
|
+
detail?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const CLI_ERROR_PREFIX = "CLI_ERROR:";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Write a structured error line to stderr. The line is prefixed with
|
|
34
|
+
* `CLI_ERROR:` followed by JSON, making it unambiguous even if other
|
|
35
|
+
* stderr output precedes it.
|
|
36
|
+
*/
|
|
37
|
+
export function emitCliError(
|
|
38
|
+
category: CliErrorCategory,
|
|
39
|
+
message: string,
|
|
40
|
+
detail?: string,
|
|
41
|
+
): void {
|
|
42
|
+
const payload: CliErrorPayload = { error: category, message, detail };
|
|
43
|
+
const line = `${CLI_ERROR_PREFIX}${JSON.stringify(payload)}`;
|
|
44
|
+
process.stderr.write(line + "\n");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Inspect an error string and return the most appropriate
|
|
49
|
+
* {@link CliErrorCategory} for common upgrade/rollback failures.
|
|
50
|
+
*/
|
|
51
|
+
export function categorizeUpgradeError(err: unknown): CliErrorCategory {
|
|
52
|
+
const msg = String(err).toLowerCase();
|
|
53
|
+
|
|
54
|
+
if (
|
|
55
|
+
msg.includes("cannot connect to the docker") ||
|
|
56
|
+
msg.includes("is docker running")
|
|
57
|
+
) {
|
|
58
|
+
return "DOCKER_NOT_RUNNING";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (
|
|
62
|
+
msg.includes("manifest unknown") ||
|
|
63
|
+
msg.includes("manifest not found") ||
|
|
64
|
+
msg.includes("pull access denied") ||
|
|
65
|
+
msg.includes("repository does not exist")
|
|
66
|
+
) {
|
|
67
|
+
return "IMAGE_PULL_FAILED";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (msg.includes("timeout") || msg.includes("readyz")) {
|
|
71
|
+
return "READINESS_TIMEOUT";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (
|
|
75
|
+
msg.includes("401") ||
|
|
76
|
+
msg.includes("403") ||
|
|
77
|
+
msg.includes("unauthorized")
|
|
78
|
+
) {
|
|
79
|
+
return "AUTH_FAILED";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (
|
|
83
|
+
msg.includes("enotfound") ||
|
|
84
|
+
msg.includes("econnrefused") ||
|
|
85
|
+
msg.includes("network")
|
|
86
|
+
) {
|
|
87
|
+
return "NETWORK_ERROR";
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return "UNKNOWN";
|
|
91
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { writeFileSync } from "fs";
|
|
2
|
+
import { tmpdir } from "os";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Convert flat dot-notation key=value pairs into a nested config object.
|
|
7
|
+
*
|
|
8
|
+
* e.g. {"services.inference.provider": "anthropic", "services.inference.model": "claude-opus-4-6"}
|
|
9
|
+
* → {services: {inference: {provider: "anthropic", model: "claude-opus-4-6"}}}
|
|
10
|
+
*/
|
|
11
|
+
export function buildNestedConfig(
|
|
12
|
+
configValues: Record<string, string>,
|
|
13
|
+
): Record<string, unknown> {
|
|
14
|
+
const config: Record<string, unknown> = {};
|
|
15
|
+
for (const [dotKey, value] of Object.entries(configValues)) {
|
|
16
|
+
const parts = dotKey.split(".");
|
|
17
|
+
let target: Record<string, unknown> = config;
|
|
18
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
19
|
+
const part = parts[i];
|
|
20
|
+
const existing = target[part];
|
|
21
|
+
if (
|
|
22
|
+
existing == null ||
|
|
23
|
+
typeof existing !== "object" ||
|
|
24
|
+
Array.isArray(existing)
|
|
25
|
+
) {
|
|
26
|
+
target[part] = {};
|
|
27
|
+
}
|
|
28
|
+
target = target[part] as Record<string, unknown>;
|
|
29
|
+
}
|
|
30
|
+
target[parts[parts.length - 1]] = value;
|
|
31
|
+
}
|
|
32
|
+
return config;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Write arbitrary key-value pairs to a temporary JSON file and return its
|
|
37
|
+
* path. The caller passes this path to the daemon via the
|
|
38
|
+
* VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH env var so the daemon can merge the
|
|
39
|
+
* values into its workspace config on first boot.
|
|
40
|
+
*
|
|
41
|
+
* Keys use dot-notation to address nested fields. For example:
|
|
42
|
+
* "services.inference.provider" → {services: {inference: {provider: ...}}}
|
|
43
|
+
* "services.inference.model" → {services: {inference: {model: ...}}}
|
|
44
|
+
*
|
|
45
|
+
* Returns undefined when configValues is empty (nothing to write).
|
|
46
|
+
*/
|
|
47
|
+
export function writeInitialConfig(
|
|
48
|
+
configValues: Record<string, string>,
|
|
49
|
+
): string | undefined {
|
|
50
|
+
if (Object.keys(configValues).length === 0) return undefined;
|
|
51
|
+
|
|
52
|
+
const config = buildNestedConfig(configValues);
|
|
53
|
+
const tempPath = join(
|
|
54
|
+
tmpdir(),
|
|
55
|
+
`vellum-default-workspace-config-${process.pid}-${Date.now()}.json`,
|
|
56
|
+
);
|
|
57
|
+
writeFileSync(tempPath, JSON.stringify(config, null, 2) + "\n");
|
|
58
|
+
return tempPath;
|
|
59
|
+
}
|
package/src/lib/docker.ts
CHANGED
|
@@ -12,11 +12,13 @@ import {
|
|
|
12
12
|
setActiveAssistant,
|
|
13
13
|
} from "./assistant-config";
|
|
14
14
|
import type { AssistantEntry } from "./assistant-config";
|
|
15
|
+
import { writeInitialConfig } from "./config-utils";
|
|
15
16
|
import { DEFAULT_GATEWAY_PORT, PROVIDER_ENV_VAR_NAMES } from "./constants";
|
|
16
17
|
import type { Species } from "./constants";
|
|
17
18
|
import { leaseGuardianToken, saveBootstrapSecret } from "./guardian-token";
|
|
18
19
|
import { isVellumProcess, stopProcess } from "./process";
|
|
19
20
|
import { generateInstanceName } from "./random-name";
|
|
21
|
+
import { resolveImageRefs } from "./platform-releases.js";
|
|
20
22
|
import { exec, execOutput } from "./step-runner";
|
|
21
23
|
import {
|
|
22
24
|
closeLogFile,
|
|
@@ -464,16 +466,19 @@ async function buildAllImages(
|
|
|
464
466
|
* can be restarted independently.
|
|
465
467
|
*/
|
|
466
468
|
export function serviceDockerRunArgs(opts: {
|
|
469
|
+
signingKey?: string;
|
|
467
470
|
bootstrapSecret?: string;
|
|
468
471
|
cesServiceToken?: string;
|
|
469
472
|
extraAssistantEnv?: Record<string, string>;
|
|
470
473
|
gatewayPort: number;
|
|
471
474
|
imageTags: Record<ServiceName, string>;
|
|
475
|
+
defaultWorkspaceConfigPath?: string;
|
|
472
476
|
instanceName: string;
|
|
473
477
|
res: ReturnType<typeof dockerResourceNames>;
|
|
474
478
|
}): Record<ServiceName, () => string[]> {
|
|
475
479
|
const {
|
|
476
480
|
cesServiceToken,
|
|
481
|
+
defaultWorkspaceConfigPath,
|
|
477
482
|
extraAssistantEnv,
|
|
478
483
|
gatewayPort,
|
|
479
484
|
imageTags,
|
|
@@ -496,6 +501,8 @@ export function serviceDockerRunArgs(opts: {
|
|
|
496
501
|
"-e",
|
|
497
502
|
`VELLUM_ASSISTANT_NAME=${instanceName}`,
|
|
498
503
|
"-e",
|
|
504
|
+
"VELLUM_CLOUD=docker",
|
|
505
|
+
"-e",
|
|
499
506
|
"RUNTIME_HTTP_HOST=0.0.0.0",
|
|
500
507
|
"-e",
|
|
501
508
|
"WORKSPACE_DIR=/workspace",
|
|
@@ -504,9 +511,21 @@ export function serviceDockerRunArgs(opts: {
|
|
|
504
511
|
"-e",
|
|
505
512
|
`GATEWAY_INTERNAL_URL=http://${res.gatewayContainer}:${GATEWAY_INTERNAL_PORT}`,
|
|
506
513
|
];
|
|
514
|
+
if (defaultWorkspaceConfigPath) {
|
|
515
|
+
const containerPath = `/tmp/vellum-default-workspace-config-${Date.now()}.json`;
|
|
516
|
+
args.push(
|
|
517
|
+
"-v",
|
|
518
|
+
`${defaultWorkspaceConfigPath}:${containerPath}:ro`,
|
|
519
|
+
"-e",
|
|
520
|
+
`VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH=${containerPath}`,
|
|
521
|
+
);
|
|
522
|
+
}
|
|
507
523
|
if (cesServiceToken) {
|
|
508
524
|
args.push("-e", `CES_SERVICE_TOKEN=${cesServiceToken}`);
|
|
509
525
|
}
|
|
526
|
+
if (opts.signingKey) {
|
|
527
|
+
args.push("-e", `ACTOR_TOKEN_SIGNING_KEY=${opts.signingKey}`);
|
|
528
|
+
}
|
|
510
529
|
for (const envVar of [
|
|
511
530
|
...Object.values(PROVIDER_ENV_VAR_NAMES),
|
|
512
531
|
"VELLUM_PLATFORM_URL",
|
|
@@ -553,6 +572,9 @@ export function serviceDockerRunArgs(opts: {
|
|
|
553
572
|
...(cesServiceToken
|
|
554
573
|
? ["-e", `CES_SERVICE_TOKEN=${cesServiceToken}`]
|
|
555
574
|
: []),
|
|
575
|
+
...(opts.signingKey
|
|
576
|
+
? ["-e", `ACTOR_TOKEN_SIGNING_KEY=${opts.signingKey}`]
|
|
577
|
+
: []),
|
|
556
578
|
...(opts.bootstrapSecret
|
|
557
579
|
? ["-e", `GUARDIAN_BOOTSTRAP_SECRET=${opts.bootstrapSecret}`]
|
|
558
580
|
: []),
|
|
@@ -739,11 +761,13 @@ export const SERVICE_START_ORDER: ServiceName[] = [
|
|
|
739
761
|
/** Start all three containers in dependency order. */
|
|
740
762
|
export async function startContainers(
|
|
741
763
|
opts: {
|
|
764
|
+
signingKey?: string;
|
|
742
765
|
bootstrapSecret?: string;
|
|
743
766
|
cesServiceToken?: string;
|
|
744
767
|
extraAssistantEnv?: Record<string, string>;
|
|
745
768
|
gatewayPort: number;
|
|
746
769
|
imageTags: Record<ServiceName, string>;
|
|
770
|
+
defaultWorkspaceConfigPath?: string;
|
|
747
771
|
instanceName: string;
|
|
748
772
|
res: ReturnType<typeof dockerResourceNames>;
|
|
749
773
|
},
|
|
@@ -765,26 +789,6 @@ export async function stopContainers(
|
|
|
765
789
|
await removeContainer(res.assistantContainer);
|
|
766
790
|
}
|
|
767
791
|
|
|
768
|
-
/**
|
|
769
|
-
* Remove the signing-key-bootstrap lockfile from the gateway security volume.
|
|
770
|
-
* This allows the daemon to re-fetch the signing key from the gateway on the
|
|
771
|
-
* next startup — necessary during upgrades where the gateway may generate a
|
|
772
|
-
* new key.
|
|
773
|
-
*/
|
|
774
|
-
export async function clearSigningKeyBootstrapLock(
|
|
775
|
-
res: ReturnType<typeof dockerResourceNames>,
|
|
776
|
-
): Promise<void> {
|
|
777
|
-
await exec("docker", [
|
|
778
|
-
"run",
|
|
779
|
-
"--rm",
|
|
780
|
-
"-v",
|
|
781
|
-
`${res.gatewaySecurityVolume}:/gateway-security`,
|
|
782
|
-
"busybox",
|
|
783
|
-
"rm",
|
|
784
|
-
"-f",
|
|
785
|
-
"/gateway-security/signing-key-bootstrap.lock",
|
|
786
|
-
]);
|
|
787
|
-
}
|
|
788
792
|
|
|
789
793
|
/** Stop containers without removing them (preserves state for `docker start`). */
|
|
790
794
|
export async function sleepContainers(
|
|
@@ -1028,6 +1032,7 @@ export async function hatchDocker(
|
|
|
1028
1032
|
detached: boolean,
|
|
1029
1033
|
name: string | null,
|
|
1030
1034
|
watch: boolean = false,
|
|
1035
|
+
configValues: Record<string, string> = {},
|
|
1031
1036
|
): Promise<void> {
|
|
1032
1037
|
resetLogFile("hatch.log");
|
|
1033
1038
|
|
|
@@ -1074,14 +1079,16 @@ export async function hatchDocker(
|
|
|
1074
1079
|
} else {
|
|
1075
1080
|
const version = cliPkg.version;
|
|
1076
1081
|
const versionTag = version ? `v${version}` : "latest";
|
|
1077
|
-
|
|
1078
|
-
|
|
1082
|
+
log("🔍 Resolving image references...");
|
|
1083
|
+
const resolved = await resolveImageRefs(versionTag, log);
|
|
1084
|
+
imageTags.assistant = resolved.imageTags.assistant;
|
|
1085
|
+
imageTags.gateway = resolved.imageTags.gateway;
|
|
1079
1086
|
imageTags["credential-executor"] =
|
|
1080
|
-
|
|
1087
|
+
resolved.imageTags["credential-executor"];
|
|
1081
1088
|
|
|
1082
1089
|
log(`🥚 Hatching Docker assistant: ${instanceName}`);
|
|
1083
1090
|
log(` Species: ${species}`);
|
|
1084
|
-
log(` Images:`);
|
|
1091
|
+
log(` Images (${resolved.source}):`);
|
|
1085
1092
|
log(` assistant: ${imageTags.assistant}`);
|
|
1086
1093
|
log(` gateway: ${imageTags.gateway}`);
|
|
1087
1094
|
log(` credential-executor: ${imageTags["credential-executor"]}`);
|
|
@@ -1115,24 +1122,25 @@ export async function hatchDocker(
|
|
|
1115
1122
|
"/workspace",
|
|
1116
1123
|
]);
|
|
1117
1124
|
|
|
1118
|
-
//
|
|
1119
|
-
//
|
|
1120
|
-
|
|
1121
|
-
"run",
|
|
1122
|
-
"--rm",
|
|
1123
|
-
"-v",
|
|
1124
|
-
`${res.gatewaySecurityVolume}:/gateway-security`,
|
|
1125
|
-
"busybox",
|
|
1126
|
-
"rm",
|
|
1127
|
-
"-f",
|
|
1128
|
-
"/gateway-security/signing-key-bootstrap.lock",
|
|
1129
|
-
]);
|
|
1125
|
+
// Write --config key=value pairs to a temp file that gets bind-mounted
|
|
1126
|
+
// into the assistant container and read via VELLUM_DEFAULT_WORKSPACE_CONFIG_PATH.
|
|
1127
|
+
const defaultWorkspaceConfigPath = writeInitialConfig(configValues);
|
|
1130
1128
|
|
|
1131
1129
|
const cesServiceToken = randomBytes(32).toString("hex");
|
|
1130
|
+
const signingKey = randomBytes(32).toString("hex");
|
|
1132
1131
|
const bootstrapSecret = randomBytes(32).toString("hex");
|
|
1133
1132
|
saveBootstrapSecret(instanceName, bootstrapSecret);
|
|
1134
1133
|
await startContainers(
|
|
1135
|
-
{
|
|
1134
|
+
{
|
|
1135
|
+
signingKey,
|
|
1136
|
+
bootstrapSecret,
|
|
1137
|
+
cesServiceToken,
|
|
1138
|
+
gatewayPort,
|
|
1139
|
+
imageTags,
|
|
1140
|
+
defaultWorkspaceConfigPath,
|
|
1141
|
+
instanceName,
|
|
1142
|
+
res,
|
|
1143
|
+
},
|
|
1136
1144
|
log,
|
|
1137
1145
|
);
|
|
1138
1146
|
|
package/src/lib/doctor-client.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const DOCTOR_URL =
|
|
1
|
+
const DOCTOR_URL = process.env.DOCTOR_SERVICE_URL?.trim() || "";
|
|
2
2
|
|
|
3
3
|
export type ProgressPhase =
|
|
4
4
|
| "invoking_prompt"
|
|
@@ -107,6 +107,16 @@ async function callDoctorDaemon(
|
|
|
107
107
|
chatContext?: ChatLogEntry[],
|
|
108
108
|
onLog?: DoctorLogCallback,
|
|
109
109
|
): Promise<DoctorResult> {
|
|
110
|
+
if (!DOCTOR_URL) {
|
|
111
|
+
onLog?.("Doctor service not configured (DOCTOR_SERVICE_URL is not set)");
|
|
112
|
+
return {
|
|
113
|
+
assistantId,
|
|
114
|
+
diagnostics: null,
|
|
115
|
+
recommendation: null,
|
|
116
|
+
error: "Doctor service not configured",
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
110
120
|
const MAX_RETRIES = 2;
|
|
111
121
|
let lastError: unknown;
|
|
112
122
|
|
package/src/lib/gcp.ts
CHANGED
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
} from "./constants";
|
|
12
12
|
import type { Species } from "./constants";
|
|
13
13
|
import { leaseGuardianToken } from "./guardian-token";
|
|
14
|
+
import { getPlatformUrl } from "./platform-client";
|
|
14
15
|
import { generateInstanceName } from "./random-name";
|
|
15
16
|
import { exec, execOutput } from "./step-runner";
|
|
16
17
|
|
|
@@ -455,6 +456,7 @@ export async function hatchGcp(
|
|
|
455
456
|
providerApiKeys: Record<string, string>,
|
|
456
457
|
instanceName: string,
|
|
457
458
|
cloud: "gcp",
|
|
459
|
+
configValues?: Record<string, string>,
|
|
458
460
|
) => Promise<string>,
|
|
459
461
|
watchHatching: (
|
|
460
462
|
pollFn: () => Promise<PollResult>,
|
|
@@ -462,6 +464,7 @@ export async function hatchGcp(
|
|
|
462
464
|
startTime: number,
|
|
463
465
|
species: Species,
|
|
464
466
|
) => Promise<WatchHatchingResult>,
|
|
467
|
+
configValues: Record<string, string> = {},
|
|
465
468
|
): Promise<void> {
|
|
466
469
|
const startTime = Date.now();
|
|
467
470
|
const account = process.env.GCP_ACCOUNT_EMAIL;
|
|
@@ -525,6 +528,7 @@ export async function hatchGcp(
|
|
|
525
528
|
providerApiKeys,
|
|
526
529
|
instanceName,
|
|
527
530
|
"gcp",
|
|
531
|
+
configValues,
|
|
528
532
|
);
|
|
529
533
|
const startupScriptPath = join(tmpdir(), `${instanceName}-startup.sh`);
|
|
530
534
|
writeFileSync(startupScriptPath, startupScript);
|
|
@@ -640,7 +644,7 @@ export async function hatchGcp(
|
|
|
640
644
|
species === "vellum" &&
|
|
641
645
|
(await checkCurlFailure(instanceName, project, zone, account))
|
|
642
646
|
) {
|
|
643
|
-
const installScriptUrl = `${
|
|
647
|
+
const installScriptUrl = `${getPlatformUrl()}/install.sh`;
|
|
644
648
|
console.log(
|
|
645
649
|
`\ud83d\udd04 Detected install script curl failure for ${installScriptUrl}, attempting recovery...`,
|
|
646
650
|
);
|