@vm0/runner 3.3.1 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +256 -199
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -5,7 +5,6 @@ import { program } from "commander";
5
5
 
6
6
  // src/commands/start.ts
7
7
  import { Command } from "commander";
8
- import { writeFileSync as writeFileSync2 } from "fs";
9
8
  import { dirname, join as join2 } from "path";
10
9
 
11
10
  // src/lib/config.ts
@@ -5915,7 +5914,8 @@ var runsMainContract = c3.router({
5915
5914
  201: createRunResponseSchema,
5916
5915
  400: apiErrorSchema,
5917
5916
  401: apiErrorSchema,
5918
- 404: apiErrorSchema
5917
+ 404: apiErrorSchema,
5918
+ 429: apiErrorSchema
5919
5919
  },
5920
5920
  summary: "Create and execute agent run"
5921
5921
  }
@@ -7477,8 +7477,10 @@ var publicApiErrorTypeSchema = z20.enum([
7477
7477
  // Auth failure (401)
7478
7478
  "not_found_error",
7479
7479
  // Resource missing (404)
7480
- "conflict_error"
7480
+ "conflict_error",
7481
7481
  // Resource conflict (409)
7482
+ "rate_limit_error"
7483
+ // Rate limit exceeded (429)
7482
7484
  ]);
7483
7485
  var publicApiErrorSchema = z20.object({
7484
7486
  error: z20.object({
@@ -7726,9 +7728,7 @@ var createRunRequestSchema = z23.object({
7726
7728
  // volume_name -> version
7727
7729
  });
7728
7730
  var runListQuerySchema = listQuerySchema.extend({
7729
- agentId: z23.string().optional(),
7730
- status: publicRunStatusSchema.optional(),
7731
- since: timestampSchema.optional()
7731
+ status: publicRunStatusSchema.optional()
7732
7732
  });
7733
7733
  var publicRunsListContract = c17.router({
7734
7734
  list: {
@@ -7742,7 +7742,7 @@ var publicRunsListContract = c17.router({
7742
7742
  500: publicApiErrorSchema
7743
7743
  },
7744
7744
  summary: "List runs",
7745
- description: "List runs with optional filtering by agent, status, and time"
7745
+ description: "List runs with optional filtering by status"
7746
7746
  },
7747
7747
  create: {
7748
7748
  method: "POST",
@@ -7755,6 +7755,7 @@ var publicRunsListContract = c17.router({
7755
7755
  400: publicApiErrorSchema,
7756
7756
  401: publicApiErrorSchema,
7757
7757
  404: publicApiErrorSchema,
7758
+ 429: publicApiErrorSchema,
7758
7759
  500: publicApiErrorSchema
7759
7760
  },
7760
7761
  summary: "Create run",
@@ -9628,9 +9629,9 @@ async function executeJob(context, config, options = {}) {
9628
9629
  }
9629
9630
  }
9630
9631
 
9631
- // src/commands/start.ts
9632
- var activeRuns = /* @__PURE__ */ new Set();
9633
- function writeStatusFile(statusFilePath, mode, startedAt) {
9632
+ // src/lib/runner/status.ts
9633
+ import { writeFileSync as writeFileSync2 } from "fs";
9634
+ function writeStatusFile(statusFilePath, mode, activeRuns, startedAt) {
9634
9635
  const status = {
9635
9636
  mode,
9636
9637
  active_runs: activeRuns.size,
@@ -9646,212 +9647,268 @@ function writeStatusFile(statusFilePath, mode, startedAt) {
9646
9647
  );
9647
9648
  }
9648
9649
  }
9649
- async function executeJob2(context, config) {
9650
- console.log(` Executing job ${context.runId}...`);
9651
- console.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
9652
- console.log(` Compose version: ${context.agentComposeVersionId}`);
9653
- try {
9654
- const result = await executeJob(context, config);
9655
- console.log(
9656
- ` Job ${context.runId} execution completed with exit code ${result.exitCode}`
9650
+ function createStatusUpdater(statusFilePath, state) {
9651
+ return () => {
9652
+ writeStatusFile(
9653
+ statusFilePath,
9654
+ state.mode,
9655
+ state.activeRuns,
9656
+ state.startedAt
9657
9657
  );
9658
- if (result.exitCode !== 0 && result.error) {
9659
- console.log(` Job ${context.runId} failed: ${result.error}`);
9658
+ };
9659
+ }
9660
+
9661
+ // src/lib/runner/setup.ts
9662
+ async function setupEnvironment(options) {
9663
+ const { config } = options;
9664
+ const datasetSuffix = process.env.AXIOM_DATASET_SUFFIX;
9665
+ if (!datasetSuffix) {
9666
+ throw new Error(
9667
+ "AXIOM_DATASET_SUFFIX is required. Set to 'dev' or 'prod'."
9668
+ );
9669
+ }
9670
+ initMetrics({
9671
+ serviceName: "vm0-runner",
9672
+ runnerLabel: config.name,
9673
+ axiomToken: process.env.AXIOM_TOKEN,
9674
+ environment: datasetSuffix
9675
+ });
9676
+ const networkCheck = checkNetworkPrerequisites();
9677
+ if (!networkCheck.ok) {
9678
+ console.error("Network prerequisites not met:");
9679
+ for (const error of networkCheck.errors) {
9680
+ console.error(` - ${error}`);
9660
9681
  }
9682
+ process.exit(1);
9683
+ }
9684
+ console.log("Setting up network bridge...");
9685
+ await setupBridge();
9686
+ console.log("Flushing bridge ARP cache...");
9687
+ await flushBridgeArpCache();
9688
+ console.log("Cleaning up orphaned proxy rules...");
9689
+ await cleanupOrphanedProxyRules(config.name);
9690
+ console.log("Cleaning up orphaned IP allocations...");
9691
+ await cleanupOrphanedAllocations();
9692
+ console.log("Initializing network proxy...");
9693
+ initVMRegistry();
9694
+ const proxyManager = initProxyManager({
9695
+ apiUrl: config.server.url,
9696
+ port: config.proxy.port,
9697
+ caDir: config.proxy.ca_dir
9698
+ });
9699
+ let proxyEnabled = false;
9700
+ try {
9701
+ await proxyManager.start();
9702
+ proxyEnabled = true;
9703
+ console.log("Network proxy initialized successfully");
9661
9704
  } catch (err) {
9662
- const error = err instanceof Error ? err.message : "Unknown execution error";
9663
- console.error(` Job ${context.runId} execution failed: ${error}`);
9664
- const result = await completeJob(config.server.url, context, 1, error);
9665
- console.log(` Job ${context.runId} reported as ${result.status}`);
9705
+ console.warn(
9706
+ `Network proxy not available: ${err instanceof Error ? err.message : "Unknown error"}`
9707
+ );
9708
+ console.warn(
9709
+ "Jobs with experimentalFirewall enabled will run without network interception"
9710
+ );
9666
9711
  }
9712
+ return { proxyEnabled };
9667
9713
  }
9668
- var startCommand = new Command("start").description("Start the runner").option("--config <path>", "Config file path", "./runner.yaml").action(
9669
- // eslint-disable-next-line complexity -- TODO: refactor complex function
9670
- async (options) => {
9671
- try {
9672
- const config = loadConfig(options.config);
9673
- validateFirecrackerPaths(config.firecracker);
9674
- console.log("Config valid");
9675
- const datasetSuffix = process.env.AXIOM_DATASET_SUFFIX;
9676
- if (!datasetSuffix) {
9677
- throw new Error(
9678
- "AXIOM_DATASET_SUFFIX is required. Set to 'dev' or 'prod'."
9679
- );
9714
+ async function cleanupEnvironment(resources) {
9715
+ if (resources.proxyEnabled) {
9716
+ console.log("Stopping network proxy...");
9717
+ await getProxyManager().stop();
9718
+ }
9719
+ console.log("Flushing metrics...");
9720
+ await flushMetrics();
9721
+ await shutdownMetrics();
9722
+ }
9723
+
9724
+ // src/lib/runner/signals.ts
9725
+ function setupSignalHandlers(state, handlers) {
9726
+ process.on("SIGINT", () => {
9727
+ console.log("\nShutting down...");
9728
+ handlers.onShutdown();
9729
+ state.mode = "stopped";
9730
+ handlers.updateStatus();
9731
+ });
9732
+ process.on("SIGTERM", () => {
9733
+ console.log("\nShutting down...");
9734
+ handlers.onShutdown();
9735
+ state.mode = "stopped";
9736
+ handlers.updateStatus();
9737
+ });
9738
+ process.on("SIGUSR1", () => {
9739
+ if (state.mode === "running") {
9740
+ console.log("\n[Maintenance] Entering drain mode...");
9741
+ console.log(
9742
+ `[Maintenance] Active jobs: ${state.activeRuns.size} (will wait for completion)`
9743
+ );
9744
+ state.mode = "draining";
9745
+ handlers.updateStatus();
9746
+ }
9747
+ });
9748
+ }
9749
+
9750
+ // src/lib/runner/runner.ts
9751
+ var Runner = class {
9752
+ config;
9753
+ statusFilePath;
9754
+ state;
9755
+ resources = null;
9756
+ running = true;
9757
+ updateStatus;
9758
+ constructor(config, statusFilePath) {
9759
+ this.config = config;
9760
+ this.statusFilePath = statusFilePath;
9761
+ this.state = {
9762
+ mode: "running",
9763
+ activeRuns: /* @__PURE__ */ new Set(),
9764
+ jobPromises: /* @__PURE__ */ new Set(),
9765
+ startedAt: /* @__PURE__ */ new Date()
9766
+ };
9767
+ this.updateStatus = createStatusUpdater(statusFilePath, this.state);
9768
+ }
9769
+ async start() {
9770
+ this.resources = await setupEnvironment({ config: this.config });
9771
+ setupSignalHandlers(this.state, {
9772
+ onShutdown: () => {
9773
+ this.running = false;
9774
+ },
9775
+ updateStatus: this.updateStatus
9776
+ });
9777
+ console.log(
9778
+ `Starting runner '${this.config.name}' for group '${this.config.group}'...`
9779
+ );
9780
+ console.log(`Max concurrent jobs: ${this.config.sandbox.max_concurrent}`);
9781
+ console.log(`Status file: ${this.statusFilePath}`);
9782
+ console.log("Press Ctrl+C to stop");
9783
+ console.log("");
9784
+ this.updateStatus();
9785
+ await this.runMainLoop();
9786
+ if (this.state.jobPromises.size > 0) {
9787
+ console.log(
9788
+ `Waiting for ${this.state.jobPromises.size} active job(s) to complete...`
9789
+ );
9790
+ await Promise.all(this.state.jobPromises);
9791
+ }
9792
+ await cleanupEnvironment(this.resources);
9793
+ this.state.mode = "stopped";
9794
+ this.updateStatus();
9795
+ console.log("Runner stopped");
9796
+ process.exit(0);
9797
+ }
9798
+ async runMainLoop() {
9799
+ while (this.running) {
9800
+ if (this.state.mode === "draining") {
9801
+ if (this.state.activeRuns.size === 0) {
9802
+ console.log("[Maintenance] All jobs completed, exiting drain mode");
9803
+ this.running = false;
9804
+ break;
9805
+ }
9806
+ if (this.state.jobPromises.size > 0) {
9807
+ await Promise.race(this.state.jobPromises);
9808
+ this.updateStatus();
9809
+ }
9810
+ continue;
9680
9811
  }
9681
- initMetrics({
9682
- serviceName: "vm0-runner",
9683
- runnerLabel: config.name,
9684
- axiomToken: process.env.AXIOM_TOKEN,
9685
- environment: datasetSuffix
9686
- });
9687
- const networkCheck = checkNetworkPrerequisites();
9688
- if (!networkCheck.ok) {
9689
- console.error("Network prerequisites not met:");
9690
- for (const error of networkCheck.errors) {
9691
- console.error(` - ${error}`);
9812
+ if (this.state.activeRuns.size >= this.config.sandbox.max_concurrent) {
9813
+ if (this.state.jobPromises.size > 0) {
9814
+ await Promise.race(this.state.jobPromises);
9815
+ this.updateStatus();
9692
9816
  }
9693
- process.exit(1);
9817
+ continue;
9694
9818
  }
9695
- console.log("Setting up network bridge...");
9696
- await setupBridge();
9697
- console.log("Flushing bridge ARP cache...");
9698
- await flushBridgeArpCache();
9699
- console.log("Cleaning up orphaned proxy rules...");
9700
- await cleanupOrphanedProxyRules(config.name);
9701
- console.log("Cleaning up orphaned IP allocations...");
9702
- await cleanupOrphanedAllocations();
9703
- console.log("Initializing network proxy...");
9704
- initVMRegistry();
9705
- const proxyManager = initProxyManager({
9706
- apiUrl: config.server.url,
9707
- port: config.proxy.port,
9708
- caDir: config.proxy.ca_dir
9709
- });
9710
- let proxyEnabled = false;
9711
9819
  try {
9712
- await proxyManager.start();
9713
- proxyEnabled = true;
9714
- console.log("Network proxy initialized successfully");
9715
- } catch (err) {
9716
- console.warn(
9717
- `Network proxy not available: ${err instanceof Error ? err.message : "Unknown error"}`
9718
- );
9719
- console.warn(
9720
- "Jobs with experimentalFirewall enabled will run without network interception"
9820
+ const job = await withRunnerTiming(
9821
+ "poll",
9822
+ () => pollForJob(this.config.server, this.config.group)
9721
9823
  );
9722
- }
9723
- const statusFilePath = join2(dirname(options.config), "status.json");
9724
- const startedAt = /* @__PURE__ */ new Date();
9725
- const state = { mode: "running" };
9726
- const updateStatus = () => {
9727
- writeStatusFile(statusFilePath, state.mode, startedAt);
9728
- };
9729
- console.log(
9730
- `Starting runner '${config.name}' for group '${config.group}'...`
9731
- );
9732
- console.log(`Max concurrent jobs: ${config.sandbox.max_concurrent}`);
9733
- console.log(`Status file: ${statusFilePath}`);
9734
- console.log("Press Ctrl+C to stop");
9735
- console.log("");
9736
- updateStatus();
9737
- let running = true;
9738
- process.on("SIGINT", () => {
9739
- console.log("\nShutting down...");
9740
- running = false;
9741
- state.mode = "stopped";
9742
- updateStatus();
9743
- });
9744
- process.on("SIGTERM", () => {
9745
- console.log("\nShutting down...");
9746
- running = false;
9747
- state.mode = "stopped";
9748
- updateStatus();
9749
- });
9750
- process.on("SIGUSR1", () => {
9751
- if (state.mode === "running") {
9752
- console.log("\n[Maintenance] Entering drain mode...");
9753
- console.log(
9754
- `[Maintenance] Active jobs: ${activeRuns.size} (will wait for completion)`
9824
+ if (!job) {
9825
+ await new Promise(
9826
+ (resolve) => setTimeout(resolve, this.config.sandbox.poll_interval_ms)
9755
9827
  );
9756
- state.mode = "draining";
9757
- updateStatus();
9758
- }
9759
- });
9760
- const jobPromises = /* @__PURE__ */ new Set();
9761
- while (running) {
9762
- if (state.mode === "draining") {
9763
- if (activeRuns.size === 0) {
9764
- console.log(
9765
- "[Maintenance] All jobs completed, exiting drain mode"
9766
- );
9767
- running = false;
9768
- break;
9769
- }
9770
- if (jobPromises.size > 0) {
9771
- await Promise.race(jobPromises);
9772
- updateStatus();
9773
- }
9774
- continue;
9775
- }
9776
- if (activeRuns.size >= config.sandbox.max_concurrent) {
9777
- if (jobPromises.size > 0) {
9778
- await Promise.race(jobPromises);
9779
- updateStatus();
9780
- }
9781
9828
  continue;
9782
9829
  }
9783
- try {
9784
- const job = await withRunnerTiming(
9785
- "poll",
9786
- () => pollForJob(config.server, config.group)
9787
- );
9788
- if (!job) {
9789
- await new Promise(
9790
- (resolve) => setTimeout(resolve, config.sandbox.poll_interval_ms)
9791
- );
9792
- continue;
9793
- }
9794
- console.log(`Found job: ${job.runId}`);
9795
- try {
9796
- const context = await withRunnerTiming(
9797
- "claim",
9798
- () => claimJob(config.server, job.runId)
9799
- );
9800
- console.log(`Claimed job: ${context.runId}`);
9801
- activeRuns.add(context.runId);
9802
- updateStatus();
9803
- const jobPromise = executeJob2(context, config).catch((error) => {
9804
- console.error(
9805
- `Job ${context.runId} failed:`,
9806
- error instanceof Error ? error.message : "Unknown error"
9807
- );
9808
- }).finally(() => {
9809
- activeRuns.delete(context.runId);
9810
- jobPromises.delete(jobPromise);
9811
- updateStatus();
9812
- });
9813
- jobPromises.add(jobPromise);
9814
- } catch (error) {
9815
- console.log(
9816
- `Could not claim job ${job.runId}:`,
9817
- error instanceof Error ? error.message : "Unknown error"
9818
- );
9819
- }
9820
- } catch (error) {
9821
- console.error(
9822
- "Polling error:",
9823
- error instanceof Error ? error.message : "Unknown error"
9824
- );
9825
- await new Promise((resolve) => setTimeout(resolve, 2e3));
9826
- }
9830
+ console.log(`Found job: ${job.runId}`);
9831
+ await this.processJob(job.runId);
9832
+ } catch (error) {
9833
+ console.error(
9834
+ "Polling error:",
9835
+ error instanceof Error ? error.message : "Unknown error"
9836
+ );
9837
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
9827
9838
  }
9828
- if (jobPromises.size > 0) {
9829
- console.log(
9830
- `Waiting for ${jobPromises.size} active job(s) to complete...`
9839
+ }
9840
+ }
9841
+ async processJob(runId) {
9842
+ try {
9843
+ const context = await withRunnerTiming(
9844
+ "claim",
9845
+ () => claimJob(this.config.server, runId)
9846
+ );
9847
+ console.log(`Claimed job: ${context.runId}`);
9848
+ this.state.activeRuns.add(context.runId);
9849
+ this.updateStatus();
9850
+ const jobPromise = this.executeJob(context).catch((error) => {
9851
+ console.error(
9852
+ `Job ${context.runId} failed:`,
9853
+ error instanceof Error ? error.message : "Unknown error"
9831
9854
  );
9832
- await Promise.all(jobPromises);
9833
- }
9834
- if (proxyEnabled) {
9835
- console.log("Stopping network proxy...");
9836
- await getProxyManager().stop();
9837
- }
9838
- console.log("Flushing metrics...");
9839
- await flushMetrics();
9840
- await shutdownMetrics();
9841
- state.mode = "stopped";
9842
- updateStatus();
9843
- console.log("Runner stopped");
9844
- process.exit(0);
9855
+ }).finally(() => {
9856
+ this.state.activeRuns.delete(context.runId);
9857
+ this.state.jobPromises.delete(jobPromise);
9858
+ this.updateStatus();
9859
+ });
9860
+ this.state.jobPromises.add(jobPromise);
9845
9861
  } catch (error) {
9846
- if (error instanceof Error) {
9847
- console.error(`Error: ${error.message}`);
9848
- } else {
9849
- console.error("An unknown error occurred");
9850
- }
9851
- process.exit(1);
9862
+ console.log(
9863
+ `Could not claim job ${runId}:`,
9864
+ error instanceof Error ? error.message : "Unknown error"
9865
+ );
9852
9866
  }
9853
9867
  }
9854
- );
9868
+ async executeJob(context) {
9869
+ console.log(` Executing job ${context.runId}...`);
9870
+ console.log(` Prompt: ${context.prompt.substring(0, 100)}...`);
9871
+ console.log(` Compose version: ${context.agentComposeVersionId}`);
9872
+ try {
9873
+ const result = await executeJob(context, this.config);
9874
+ console.log(
9875
+ ` Job ${context.runId} execution completed with exit code ${result.exitCode}`
9876
+ );
9877
+ if (result.exitCode !== 0 && result.error) {
9878
+ console.log(` Job ${context.runId} failed: ${result.error}`);
9879
+ }
9880
+ } catch (err) {
9881
+ const error = err instanceof Error ? err.message : "Unknown execution error";
9882
+ console.error(` Job ${context.runId} execution failed: ${error}`);
9883
+ const result = await completeJob(
9884
+ this.config.server.url,
9885
+ context,
9886
+ 1,
9887
+ error
9888
+ );
9889
+ console.log(` Job ${context.runId} reported as ${result.status}`);
9890
+ }
9891
+ }
9892
+ };
9893
+
9894
+ // src/commands/start.ts
9895
+ var startCommand = new Command("start").description("Start the runner").option("--config <path>", "Config file path", "./runner.yaml").action(async (options) => {
9896
+ try {
9897
+ const config = loadConfig(options.config);
9898
+ validateFirecrackerPaths(config.firecracker);
9899
+ console.log("Config valid");
9900
+ const statusFilePath = join2(dirname(options.config), "status.json");
9901
+ const runner = new Runner(config, statusFilePath);
9902
+ await runner.start();
9903
+ } catch (error) {
9904
+ if (error instanceof Error) {
9905
+ console.error(`Error: ${error.message}`);
9906
+ } else {
9907
+ console.error("An unknown error occurred");
9908
+ }
9909
+ process.exit(1);
9910
+ }
9911
+ });
9855
9912
 
9856
9913
  // src/commands/doctor.ts
9857
9914
  import { Command as Command2 } from "commander";
@@ -10437,7 +10494,7 @@ var benchmarkCommand = new Command4("benchmark").description(
10437
10494
  });
10438
10495
 
10439
10496
  // src/index.ts
10440
- var version = true ? "3.3.1" : "0.1.0";
10497
+ var version = true ? "3.3.2" : "0.1.0";
10441
10498
  program.name("vm0-runner").version(version).description("Self-hosted runner for VM0 agents");
10442
10499
  program.addCommand(startCommand);
10443
10500
  program.addCommand(doctorCommand);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/runner",
3
- "version": "3.3.1",
3
+ "version": "3.3.2",
4
4
  "description": "Self-hosted runner for VM0 agents",
5
5
  "repository": {
6
6
  "type": "git",