@vellumai/cli 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/gcp.ts CHANGED
@@ -1,7 +1,36 @@
1
1
  import { spawn } from "child_process";
2
+ import { randomBytes } from "crypto";
3
+ import { existsSync, mkdtempSync, rmSync, unlinkSync, writeFileSync } from "fs";
4
+ import { tmpdir, userInfo } from "os";
5
+ import { join } from "path";
2
6
 
7
+ import { saveAssistantEntry } from "./assistant-config";
8
+ import type { AssistantEntry } from "./assistant-config";
9
+ import { FIREWALL_TAG, GATEWAY_PORT } from "./constants";
10
+ import type { Species } from "./constants";
11
+ import { generateRandomSuffix } from "./random-name";
3
12
  import { exec, execOutput } from "./step-runner";
4
13
 
14
+ export async function activateServiceAccount(): Promise<(() => void) | null> {
15
+ const account = process.env.GCP_ACCOUNT_EMAIL;
16
+ const keyFile = process.env.GOOGLE_APPLICATION_CREDENTIALS;
17
+ if (!account || !keyFile) return null;
18
+
19
+ const gcpConfigDir = mkdtempSync(join(tmpdir(), "vellum-gcloud-"));
20
+ process.env.CLOUDSDK_CONFIG = gcpConfigDir;
21
+ await exec("gcloud", [
22
+ "auth",
23
+ "activate-service-account",
24
+ account,
25
+ `--key-file=${keyFile}`,
26
+ ]);
27
+
28
+ return () => {
29
+ delete process.env.CLOUDSDK_CONFIG;
30
+ try { rmSync(gcpConfigDir, { recursive: true, force: true }); } catch {}
31
+ };
32
+ }
33
+
5
34
  export async function getActiveProject(): Promise<string> {
6
35
  const output = await execOutput("gcloud", [
7
36
  "config",
@@ -248,8 +277,12 @@ export async function instanceExists(
248
277
  if (account) args.push(`--account=${account}`);
249
278
  await execOutput("gcloud", args);
250
279
  return true;
251
- } catch {
252
- return false;
280
+ } catch (error) {
281
+ const msg = error instanceof Error ? error.message.toLowerCase() : "";
282
+ if (msg.includes("was not found") || msg.includes("could not fetch resource")) {
283
+ return false;
284
+ }
285
+ throw error;
253
286
  }
254
287
  }
255
288
 
@@ -313,13 +346,407 @@ export async function fetchAndDisplayStartupLogs(
313
346
  }
314
347
  }
315
348
 
349
+ async function checkGcloudAvailable(): Promise<boolean> {
350
+ try {
351
+ await execOutput("gcloud", ["--version"]);
352
+ return true;
353
+ } catch {
354
+ return false;
355
+ }
356
+ }
357
+
358
+ export interface PollResult {
359
+ lastLine: string | null;
360
+ done: boolean;
361
+ failed: boolean;
362
+ errorContent: string;
363
+ }
364
+
365
+ export interface WatchHatchingResult {
366
+ success: boolean;
367
+ errorContent: string;
368
+ }
369
+
370
+ const INSTALL_SCRIPT_REMOTE_PATH = "/tmp/vellum-install.sh";
371
+ const MACHINE_TYPE = "e2-standard-4"; // 4 vCPUs, 16 GB memory
372
+
373
+ const DESIRED_FIREWALL_RULES: FirewallRuleSpec[] = [
374
+ {
375
+ name: "allow-vellum-assistant-gateway",
376
+ direction: "INGRESS",
377
+ action: "ALLOW",
378
+ rules: `tcp:${GATEWAY_PORT}`,
379
+ sourceRanges: "0.0.0.0/0",
380
+ targetTags: FIREWALL_TAG,
381
+ description: `Allow gateway ingress on port ${GATEWAY_PORT} for vellum-assistant instances`,
382
+ },
383
+ {
384
+ name: "allow-vellum-assistant-egress",
385
+ direction: "EGRESS",
386
+ action: "ALLOW",
387
+ rules: "all",
388
+ destinationRanges: "0.0.0.0/0",
389
+ targetTags: FIREWALL_TAG,
390
+ description: "Allow all egress traffic for vellum-assistant instances",
391
+ },
392
+ ];
393
+
394
+ async function resolveInstallScriptPath(): Promise<string | null> {
395
+ const sourcePath = join(import.meta.dir, "..", "adapters", "install.sh");
396
+ if (existsSync(sourcePath)) {
397
+ return sourcePath;
398
+ }
399
+ console.warn("\u26a0\ufe0f Install script not found at", sourcePath, "(expected in compiled binary)");
400
+ return null;
401
+ }
402
+
403
+ async function pollInstance(
404
+ instanceName: string,
405
+ project: string,
406
+ zone: string,
407
+ account?: string,
408
+ ): Promise<PollResult> {
409
+ try {
410
+ const remoteCmd =
411
+ "L=$(tail -1 /var/log/startup-script.log 2>/dev/null || true); " +
412
+ "S=$(systemctl is-active google-startup-scripts.service 2>/dev/null || true); " +
413
+ "E=$(cat /var/log/startup-error 2>/dev/null || true); " +
414
+ 'printf "%s\\n===HATCH_SEP===\\n%s\\n===HATCH_ERR===\\n%s" "$L" "$S" "$E"';
415
+ const args = [
416
+ "compute",
417
+ "ssh",
418
+ instanceName,
419
+ `--project=${project}`,
420
+ `--zone=${zone}`,
421
+ "--quiet",
422
+ "--ssh-flag=-o StrictHostKeyChecking=no",
423
+ "--ssh-flag=-o UserKnownHostsFile=/dev/null",
424
+ "--ssh-flag=-o ConnectTimeout=10",
425
+ "--ssh-flag=-o LogLevel=ERROR",
426
+ `--command=${remoteCmd}`,
427
+ ];
428
+ if (account) args.push(`--account=${account}`);
429
+ const output = await execOutput("gcloud", args);
430
+ const sepIdx = output.indexOf("===HATCH_SEP===");
431
+ if (sepIdx === -1) {
432
+ return { lastLine: output.trim() || null, done: false, failed: false, errorContent: "" };
433
+ }
434
+ const errIdx = output.indexOf("===HATCH_ERR===");
435
+ const lastLine = output.substring(0, sepIdx).trim() || null;
436
+ const statusEnd = errIdx === -1 ? undefined : errIdx;
437
+ const status = output.substring(sepIdx + "===HATCH_SEP===".length, statusEnd).trim();
438
+ const errorContent =
439
+ errIdx === -1 ? "" : output.substring(errIdx + "===HATCH_ERR===".length).trim();
440
+ const done = lastLine !== null && status !== "active" && status !== "activating";
441
+ const failed = errorContent.length > 0 || status === "failed";
442
+ return { lastLine, done, failed, errorContent };
443
+ } catch {
444
+ return { lastLine: null, done: false, failed: false, errorContent: "" };
445
+ }
446
+ }
447
+
448
+ async function checkCurlFailure(
449
+ instanceName: string,
450
+ project: string,
451
+ zone: string,
452
+ account?: string,
453
+ ): Promise<boolean> {
454
+ try {
455
+ const args = [
456
+ "compute",
457
+ "ssh",
458
+ instanceName,
459
+ `--project=${project}`,
460
+ `--zone=${zone}`,
461
+ "--quiet",
462
+ "--ssh-flag=-o StrictHostKeyChecking=no",
463
+ "--ssh-flag=-o UserKnownHostsFile=/dev/null",
464
+ "--ssh-flag=-o ConnectTimeout=10",
465
+ "--ssh-flag=-o LogLevel=ERROR",
466
+ `--command=test -s ${INSTALL_SCRIPT_REMOTE_PATH} && echo EXISTS || echo MISSING`,
467
+ ];
468
+ if (account) args.push(`--account=${account}`);
469
+ const output = await execOutput("gcloud", args);
470
+ return output.trim() === "MISSING";
471
+ } catch {
472
+ return false;
473
+ }
474
+ }
475
+
476
+ async function recoverFromCurlFailure(
477
+ instanceName: string,
478
+ project: string,
479
+ zone: string,
480
+ sshUser: string,
481
+ account?: string,
482
+ ): Promise<void> {
483
+ const installScriptPath = await resolveInstallScriptPath();
484
+ if (!installScriptPath) {
485
+ console.warn("\u26a0\ufe0f Skipping install script upload (not available in compiled binary)");
486
+ return;
487
+ }
488
+
489
+ const scpArgs = [
490
+ "compute",
491
+ "scp",
492
+ installScriptPath,
493
+ `${instanceName}:${INSTALL_SCRIPT_REMOTE_PATH}`,
494
+ `--zone=${zone}`,
495
+ `--project=${project}`,
496
+ ];
497
+ if (account) scpArgs.push(`--account=${account}`);
498
+ console.log("\ud83d\udccb Uploading install script to instance...");
499
+ await exec("gcloud", scpArgs);
500
+
501
+ const sshArgs = [
502
+ "compute",
503
+ "ssh",
504
+ `${sshUser}@${instanceName}`,
505
+ `--zone=${zone}`,
506
+ `--project=${project}`,
507
+ `--command=source ${INSTALL_SCRIPT_REMOTE_PATH}`,
508
+ ];
509
+ if (account) sshArgs.push(`--account=${account}`);
510
+ console.log("\ud83d\udd27 Running install script on instance...");
511
+ await exec("gcloud", sshArgs);
512
+ }
513
+
514
+ export async function hatchGcp(
515
+ species: Species,
516
+ detached: boolean,
517
+ name: string | null,
518
+ buildStartupScript: (
519
+ species: Species,
520
+ bearerToken: string,
521
+ sshUser: string,
522
+ anthropicApiKey: string,
523
+ instanceName: string,
524
+ cloud: "gcp",
525
+ ) => Promise<string>,
526
+ watchHatching: (
527
+ pollFn: () => Promise<PollResult>,
528
+ instanceName: string,
529
+ startTime: number,
530
+ species: Species,
531
+ ) => Promise<WatchHatchingResult>,
532
+ ): Promise<void> {
533
+ const startTime = Date.now();
534
+ const account = process.env.GCP_ACCOUNT_EMAIL;
535
+ const cleanupServiceAccount = await activateServiceAccount();
536
+
537
+ try {
538
+ const project = process.env.GCP_PROJECT ?? (await getActiveProject());
539
+ let instanceName: string;
540
+
541
+ if (name) {
542
+ instanceName = name;
543
+ } else {
544
+ const suffix = generateRandomSuffix();
545
+ instanceName = `${species}-${suffix}`;
546
+ }
547
+
548
+ console.log(`\ud83e\udd5a Creating new assistant: ${instanceName}`);
549
+ console.log(` Species: ${species}`);
550
+ console.log(` Cloud: GCP`);
551
+ console.log(` Project: ${project}`);
552
+ const zone = process.env.GCP_DEFAULT_ZONE;
553
+ if (!zone) {
554
+ console.error("Error: GCP_DEFAULT_ZONE environment variable is not set.");
555
+ process.exit(1);
556
+ }
557
+
558
+ console.log(` Zone: ${zone}`);
559
+ console.log(` Machine type: ${MACHINE_TYPE}`);
560
+ console.log("");
561
+
562
+ if (name) {
563
+ if (await instanceExists(name, project, zone, account)) {
564
+ console.error(
565
+ `Error: Instance name '${name}' is already taken. Please choose a different name.`,
566
+ );
567
+ process.exit(1);
568
+ }
569
+ } else {
570
+ while (await instanceExists(instanceName, project, zone, account)) {
571
+ console.log(`\u26a0\ufe0f Instance name ${instanceName} already exists, generating a new name...`);
572
+ const suffix = generateRandomSuffix();
573
+ instanceName = `${species}-${suffix}`;
574
+ }
575
+ }
576
+
577
+ const sshUser = userInfo().username;
578
+ const bearerToken = randomBytes(32).toString("hex");
579
+ const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
580
+ if (!anthropicApiKey) {
581
+ console.error("Error: ANTHROPIC_API_KEY environment variable is not set.");
582
+ process.exit(1);
583
+ }
584
+ const startupScript = await buildStartupScript(
585
+ species,
586
+ bearerToken,
587
+ sshUser,
588
+ anthropicApiKey,
589
+ instanceName,
590
+ "gcp",
591
+ );
592
+ const startupScriptPath = join(tmpdir(), `${instanceName}-startup.sh`);
593
+ writeFileSync(startupScriptPath, startupScript);
594
+
595
+ console.log("\ud83d\udd28 Creating instance with startup script...");
596
+ try {
597
+ const createArgs = [
598
+ "compute",
599
+ "instances",
600
+ "create",
601
+ instanceName,
602
+ `--project=${project}`,
603
+ `--zone=${zone}`,
604
+ `--machine-type=${MACHINE_TYPE}`,
605
+ "--image-family=debian-11",
606
+ "--image-project=debian-cloud",
607
+ "--boot-disk-size=50GB",
608
+ "--boot-disk-type=pd-standard",
609
+ `--metadata-from-file=startup-script=${startupScriptPath}`,
610
+ `--labels=species=${species},vellum-assistant=true`,
611
+ "--tags=vellum-assistant",
612
+ "--no-service-account",
613
+ "--no-scopes",
614
+ ];
615
+ if (account) createArgs.push(`--account=${account}`);
616
+ await exec("gcloud", createArgs);
617
+ } finally {
618
+ try {
619
+ unlinkSync(startupScriptPath);
620
+ } catch {}
621
+ }
622
+
623
+ console.log("\ud83d\udd12 Syncing firewall rules...");
624
+ await syncFirewallRules(DESIRED_FIREWALL_RULES, project, FIREWALL_TAG, account);
625
+
626
+ console.log(`\u2705 Instance ${instanceName} created successfully\n`);
627
+
628
+ let externalIp: string | null = null;
629
+ try {
630
+ const describeArgs = [
631
+ "compute",
632
+ "instances",
633
+ "describe",
634
+ instanceName,
635
+ `--project=${project}`,
636
+ `--zone=${zone}`,
637
+ "--format=get(networkInterfaces[0].accessConfigs[0].natIP)",
638
+ ];
639
+ if (account) describeArgs.push(`--account=${account}`);
640
+ const ipOutput = await execOutput("gcloud", describeArgs);
641
+ externalIp = ipOutput.trim() || null;
642
+ } catch {
643
+ console.log("\u26a0\ufe0f Could not retrieve external IP yet (instance may still be starting)");
644
+ }
645
+
646
+ const runtimeUrl = externalIp
647
+ ? `http://${externalIp}:${GATEWAY_PORT}`
648
+ : `http://${instanceName}:${GATEWAY_PORT}`;
649
+ const gcpEntry: AssistantEntry = {
650
+ assistantId: instanceName,
651
+ runtimeUrl,
652
+ bearerToken,
653
+ cloud: "gcp",
654
+ project,
655
+ zone,
656
+ species,
657
+ sshUser,
658
+ hatchedAt: new Date().toISOString(),
659
+ };
660
+ saveAssistantEntry(gcpEntry);
661
+
662
+ if (detached) {
663
+ console.log("\ud83d\ude80 Startup script is running on the instance...");
664
+ console.log("");
665
+ console.log("\u2705 Assistant is hatching!\n");
666
+ console.log("Instance details:");
667
+ console.log(` Name: ${instanceName}`);
668
+ console.log(` Project: ${project}`);
669
+ console.log(` Zone: ${zone}`);
670
+ if (externalIp) {
671
+ console.log(` External IP: ${externalIp}`);
672
+ }
673
+ console.log("");
674
+ } else {
675
+ console.log(" Press Ctrl+C to detach (instance will keep running)");
676
+ console.log("");
677
+
678
+ const result = await watchHatching(
679
+ () => pollInstance(instanceName, project, zone, account),
680
+ instanceName,
681
+ startTime,
682
+ species,
683
+ );
684
+
685
+ if (!result.success) {
686
+ console.log("");
687
+ if (result.errorContent) {
688
+ console.log("\ud83d\udccb Startup error:");
689
+ console.log(` ${result.errorContent}`);
690
+ console.log("");
691
+ }
692
+
693
+ await fetchAndDisplayStartupLogs(instanceName, project, zone, account);
694
+
695
+ if (
696
+ species === "vellum" &&
697
+ (await checkCurlFailure(instanceName, project, zone, account))
698
+ ) {
699
+ const installScriptUrl = `${process.env.VELLUM_ASSISTANT_PLATFORM_URL ?? "https://assistant.vellum.ai"}/install.sh`;
700
+ console.log(`\ud83d\udd04 Detected install script curl failure for ${installScriptUrl}, attempting recovery...`);
701
+ await recoverFromCurlFailure(instanceName, project, zone, sshUser, account);
702
+ console.log("\u2705 Recovery successful!");
703
+ } else {
704
+ process.exit(1);
705
+ }
706
+ }
707
+
708
+ console.log("Instance details:");
709
+ console.log(` Name: ${instanceName}`);
710
+ console.log(` Project: ${project}`);
711
+ console.log(` Zone: ${zone}`);
712
+ if (externalIp) {
713
+ console.log(` External IP: ${externalIp}`);
714
+ }
715
+ }
716
+ } catch (error) {
717
+ console.error("\u274c Error:", error instanceof Error ? error.message : error);
718
+ process.exit(1);
719
+ } finally {
720
+ cleanupServiceAccount?.();
721
+ }
722
+ }
723
+
316
724
  export async function retireInstance(
317
725
  name: string,
318
726
  project: string,
319
727
  zone: string,
320
728
  source?: string,
321
729
  ): Promise<void> {
322
- const exists = await instanceExists(name, project, zone);
730
+ const gcloudOk = await checkGcloudAvailable();
731
+ if (!gcloudOk) {
732
+ throw new Error(
733
+ `Cannot retire GCP instance '${name}': gcloud CLI is not installed or not in PATH. ` +
734
+ `Please install the Google Cloud SDK and try again, or delete the instance manually ` +
735
+ `via the GCP Console (project=${project}, zone=${zone}).`,
736
+ );
737
+ }
738
+
739
+ let exists: boolean;
740
+ try {
741
+ exists = await instanceExists(name, project, zone);
742
+ } catch (error) {
743
+ const detail = error instanceof Error ? error.message : String(error);
744
+ throw new Error(
745
+ `Cannot verify GCP instance '${name}': gcloud authentication failed.\n` +
746
+ `Ensure you are authenticated with 'gcloud auth login' or provide valid credentials.\n\n` +
747
+ `Details: ${detail}`,
748
+ );
749
+ }
323
750
  if (!exists) {
324
751
  console.warn(
325
752
  `\u26a0\ufe0f Instance ${name} not found in GCP (project=${project}, zone=${zone}).`,