@themoltnet/pi-extension 0.23.0 → 0.23.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -843,6 +843,12 @@ declare interface TaskReporter {
843
843
  * cancellation has been observed. Null until `cancelSignal` aborts.
844
844
  */
845
845
  readonly cancelReason: string | null;
846
+ /**
847
+ * Request local cancellation of the in-flight task. Runtime shutdown uses
848
+ * this to trip the same executor-facing signal as proposer cancellation,
849
+ * without waiting for the next server heartbeat.
850
+ */
851
+ requestCancel?(reason: string): void;
846
852
  }
847
853
 
848
854
  declare const TaskStatus: Type.TUnion<[Type.TLiteral<"waiting">, Type.TLiteral<"queued">, Type.TLiteral<"dispatched">, Type.TLiteral<"running">, Type.TLiteral<"completed">, Type.TLiteral<"failed">, Type.TLiteral<"cancelled">, Type.TLiteral<"expired">]>;
@@ -901,6 +907,8 @@ export declare interface VmConfig {
901
907
  extraAllowedHosts?: string[];
902
908
  /** Full sandbox config (vfs shadows, env overrides). */
903
909
  sandboxConfig?: SandboxConfig;
910
+ /** Abort resume/setup work, closing any live VM owned by resumeVm. */
911
+ signal?: AbortSignal;
904
912
  }
905
913
 
906
914
  export declare interface VmCredentials {
package/dist/index.js CHANGED
@@ -1674,6 +1674,124 @@ var updateRenderedPack = (options) => (options.client ?? client).patch({
1674
1674
  }
1675
1675
  });
1676
1676
  /**
1677
+ * List runtime profiles for the active team context.
1678
+ */
1679
+ var listRuntimeProfiles = (options) => (options?.client ?? client).get({
1680
+ security: [
1681
+ {
1682
+ scheme: "bearer",
1683
+ type: "http"
1684
+ },
1685
+ {
1686
+ name: "X-Moltnet-Session-Token",
1687
+ type: "apiKey"
1688
+ },
1689
+ {
1690
+ in: "cookie",
1691
+ name: "ory_kratos_session",
1692
+ type: "apiKey"
1693
+ }
1694
+ ],
1695
+ url: "/runtime-profiles",
1696
+ ...options
1697
+ });
1698
+ /**
1699
+ * Create a runtime profile for the active team context.
1700
+ */
1701
+ var createRuntimeProfile = (options) => (options?.client ?? client).post({
1702
+ security: [
1703
+ {
1704
+ scheme: "bearer",
1705
+ type: "http"
1706
+ },
1707
+ {
1708
+ name: "X-Moltnet-Session-Token",
1709
+ type: "apiKey"
1710
+ },
1711
+ {
1712
+ in: "cookie",
1713
+ name: "ory_kratos_session",
1714
+ type: "apiKey"
1715
+ }
1716
+ ],
1717
+ url: "/runtime-profiles",
1718
+ ...options,
1719
+ headers: {
1720
+ "Content-Type": "application/json",
1721
+ ...options?.headers
1722
+ }
1723
+ });
1724
+ /**
1725
+ * Delete one runtime profile.
1726
+ */
1727
+ var deleteRuntimeProfile = (options) => (options.client ?? client).delete({
1728
+ security: [
1729
+ {
1730
+ scheme: "bearer",
1731
+ type: "http"
1732
+ },
1733
+ {
1734
+ name: "X-Moltnet-Session-Token",
1735
+ type: "apiKey"
1736
+ },
1737
+ {
1738
+ in: "cookie",
1739
+ name: "ory_kratos_session",
1740
+ type: "apiKey"
1741
+ }
1742
+ ],
1743
+ url: "/runtime-profiles/{profileId}",
1744
+ ...options
1745
+ });
1746
+ /**
1747
+ * Get one runtime profile.
1748
+ */
1749
+ var getRuntimeProfile = (options) => (options.client ?? client).get({
1750
+ security: [
1751
+ {
1752
+ scheme: "bearer",
1753
+ type: "http"
1754
+ },
1755
+ {
1756
+ name: "X-Moltnet-Session-Token",
1757
+ type: "apiKey"
1758
+ },
1759
+ {
1760
+ in: "cookie",
1761
+ name: "ory_kratos_session",
1762
+ type: "apiKey"
1763
+ }
1764
+ ],
1765
+ url: "/runtime-profiles/{profileId}",
1766
+ ...options
1767
+ });
1768
+ /**
1769
+ * Update one runtime profile.
1770
+ */
1771
+ var updateRuntimeProfile = (options) => (options.client ?? client).patch({
1772
+ security: [
1773
+ {
1774
+ scheme: "bearer",
1775
+ type: "http"
1776
+ },
1777
+ {
1778
+ name: "X-Moltnet-Session-Token",
1779
+ type: "apiKey"
1780
+ },
1781
+ {
1782
+ in: "cookie",
1783
+ name: "ory_kratos_session",
1784
+ type: "apiKey"
1785
+ }
1786
+ ],
1787
+ url: "/runtime-profiles/{profileId}",
1788
+ ...options,
1789
+ headers: {
1790
+ "Content-Type": "application/json",
1791
+ ...options.headers
1792
+ }
1793
+ });
1794
+ /**
1677
1795
  * List tasks for a team with optional filters.
1678
1796
  */
1679
1797
  var listTasks = (options) => (options.client ?? client).get({
@@ -1788,6 +1906,32 @@ var listTaskAttempts = (options) => (options.client ?? client).get({
1788
1906
  ...options
1789
1907
  });
1790
1908
  /**
1909
+ * Claimant intentionally abandons this attempt (e.g. daemon shutdown). The attempt becomes aborted and the task requeues for another claim (or fails when retries are exhausted). Does NOT cancel the task.
1910
+ */
1911
+ var abortTaskAttempt = (options) => (options.client ?? client).post({
1912
+ security: [
1913
+ {
1914
+ scheme: "bearer",
1915
+ type: "http"
1916
+ },
1917
+ {
1918
+ name: "X-Moltnet-Session-Token",
1919
+ type: "apiKey"
1920
+ },
1921
+ {
1922
+ in: "cookie",
1923
+ name: "ory_kratos_session",
1924
+ type: "apiKey"
1925
+ }
1926
+ ],
1927
+ url: "/tasks/{id}/attempts/{n}/abort",
1928
+ ...options,
1929
+ headers: {
1930
+ "Content-Type": "application/json",
1931
+ ...options.headers
1932
+ }
1933
+ });
1934
+ /**
1791
1935
  * Mark an attempt as completed with output.
1792
1936
  */
1793
1937
  var completeTask = (options) => (options.client ?? client).post({
@@ -4696,6 +4840,54 @@ function createRecoveryNamespace(context) {
4696
4840
  };
4697
4841
  }
4698
4842
  //#endregion
4843
+ //#region ../sdk/src/namespaces/runtime-profiles.ts
4844
+ function createRuntimeProfilesNamespace(context) {
4845
+ const { client, auth } = context;
4846
+ return {
4847
+ async list(options) {
4848
+ return unwrapResult(await listRuntimeProfiles({
4849
+ client,
4850
+ auth,
4851
+ headers: teamHeaders(options)
4852
+ }));
4853
+ },
4854
+ async create(body, options) {
4855
+ return unwrapResult(await createRuntimeProfile({
4856
+ client,
4857
+ auth,
4858
+ headers: teamHeaders(options),
4859
+ body
4860
+ }));
4861
+ },
4862
+ async get(profileId) {
4863
+ return unwrapResult(await getRuntimeProfile({
4864
+ client,
4865
+ auth,
4866
+ path: { profileId }
4867
+ }));
4868
+ },
4869
+ async update(profileId, body) {
4870
+ return unwrapResult(await updateRuntimeProfile({
4871
+ client,
4872
+ auth,
4873
+ path: { profileId },
4874
+ body
4875
+ }));
4876
+ },
4877
+ async delete(profileId) {
4878
+ const result = await deleteRuntimeProfile({
4879
+ client,
4880
+ auth,
4881
+ path: { profileId }
4882
+ });
4883
+ if (result.error) unwrapResult(result);
4884
+ }
4885
+ };
4886
+ }
4887
+ function teamHeaders(options) {
4888
+ return options?.teamId ? { "x-moltnet-team-id": options.teamId } : void 0;
4889
+ }
4890
+ //#endregion
4699
4891
  //#region ../sdk/src/namespaces/signing-requests.ts
4700
4892
  function createSigningRequestsNamespace(context) {
4701
4893
  const { client, auth } = context;
@@ -4816,6 +5008,17 @@ function createTasksNamespace(context) {
4816
5008
  body
4817
5009
  }));
4818
5010
  },
5011
+ async abortAttempt(id, n, body) {
5012
+ return unwrapResult(await abortTaskAttempt({
5013
+ client,
5014
+ auth,
5015
+ path: {
5016
+ id,
5017
+ n
5018
+ },
5019
+ body
5020
+ }));
5021
+ },
4819
5022
  async cancel(id, body) {
4820
5023
  return unwrapResult(await cancelTask({
4821
5024
  client,
@@ -4999,6 +5202,7 @@ function createAgent(options) {
4999
5202
  legreffier: createLegreffierNamespace(context),
5000
5203
  problems: createProblemsNamespace(context),
5001
5204
  teams: createTeamsNamespace(context),
5205
+ runtimeProfiles: createRuntimeProfilesNamespace(context),
5002
5206
  tasks: createTasksNamespace(context),
5003
5207
  client,
5004
5208
  getToken: () => tokenManager.getToken()
@@ -8310,6 +8514,63 @@ function pruneOldSnapshots(maxCached, currentDir) {
8310
8514
  });
8311
8515
  }
8312
8516
  //#endregion
8517
+ //#region src/abort-utils.ts
8518
+ function throwIfAborted(signal, label) {
8519
+ if (!signal?.aborted) return;
8520
+ throw abortError(label, signal);
8521
+ }
8522
+ function abortError(label, signal) {
8523
+ const reason = signal.reason;
8524
+ const suffix = reason instanceof Error ? reason.message : reason === void 0 ? "aborted" : String(reason);
8525
+ const err = /* @__PURE__ */ new Error(`${label} aborted: ${suffix}`);
8526
+ err.name = "AbortError";
8527
+ return err;
8528
+ }
8529
+ function cleanupLateResource(resourcePromise, opts) {
8530
+ resourcePromise.then(async (resource) => {
8531
+ try {
8532
+ await opts.cleanup(resource);
8533
+ } catch (err) {
8534
+ opts.onCleanupError?.(err);
8535
+ }
8536
+ }, () => {});
8537
+ }
8538
+ async function abortableResource(opts) {
8539
+ const { signal } = opts;
8540
+ if (!signal) return opts.promise;
8541
+ throwIfAborted(signal, opts.label);
8542
+ const resourcePromise = Promise.resolve(opts.promise);
8543
+ const abortPromise = new Promise((_, reject) => {
8544
+ const abort = () => {
8545
+ cleanupLateResource(resourcePromise, opts);
8546
+ reject(abortError(opts.label, signal));
8547
+ };
8548
+ signal.addEventListener("abort", abort, { once: true });
8549
+ resourcePromise.then(() => signal.removeEventListener("abort", abort), () => signal.removeEventListener("abort", abort));
8550
+ });
8551
+ return Promise.race([resourcePromise, abortPromise]);
8552
+ }
8553
+ async function delay(ms, signal, label) {
8554
+ if (!signal) {
8555
+ await new Promise((resolve) => {
8556
+ setTimeout(resolve, ms);
8557
+ });
8558
+ return;
8559
+ }
8560
+ throwIfAborted(signal, label);
8561
+ await new Promise((resolve, reject) => {
8562
+ const listener = () => {
8563
+ clearTimeout(timeout);
8564
+ reject(abortError(label, signal));
8565
+ };
8566
+ const timeout = setTimeout(() => {
8567
+ signal.removeEventListener("abort", listener);
8568
+ resolve();
8569
+ }, ms);
8570
+ signal.addEventListener("abort", listener, { once: true });
8571
+ });
8572
+ }
8573
+ //#endregion
8313
8574
  //#region src/vm-manager.ts
8314
8575
  /**
8315
8576
  * Memory-backed VFS mount used by the daemon to inject task-context
@@ -8426,23 +8687,33 @@ var BASE_ALLOWED_HOSTS = [
8426
8687
  * surface immediately rather than fall through to cryptic agent
8427
8688
  * errors later.
8428
8689
  */
8429
- async function vmRun(vm, label, command) {
8690
+ async function vmRun(vm, label, command, signal) {
8430
8691
  const wrapped = `set -eu\nset -o pipefail\n${command}`;
8692
+ throwIfAborted(signal, `resume step "${label}"`);
8431
8693
  const r = await vm.exec([
8432
8694
  "sh",
8433
8695
  "-c",
8434
8696
  wrapped
8435
- ]);
8697
+ ], { signal });
8436
8698
  if (r.exitCode !== 0) {
8437
8699
  const tail = [r.stderr, r.stdout].filter(Boolean).join("\n").slice(-800);
8438
8700
  throw new Error(`resume step "${label}" failed (exit ${r.exitCode}):\n${tail}`);
8439
8701
  }
8440
8702
  }
8703
+ function nonErrorMessage(err) {
8704
+ if (typeof err === "string") return err;
8705
+ try {
8706
+ return JSON.stringify(err) ?? "unknown error";
8707
+ } catch {
8708
+ return "unknown error";
8709
+ }
8710
+ }
8441
8711
  /**
8442
8712
  * Resume a VM from a checkpoint, inject credentials, configure egress +
8443
8713
  * TLS. Returns the managed VM handle.
8444
8714
  */
8445
8715
  async function resumeVm(config) {
8716
+ throwIfAborted(config.signal, "VM resume");
8446
8717
  const mainRepo = findMainWorktree();
8447
8718
  const agentDir = path.join(mainRepo, ".moltnet", config.agentName);
8448
8719
  const guestWorkspace = path.resolve(config.mountPath);
@@ -8486,24 +8757,33 @@ async function resumeVm(config) {
8486
8757
  };
8487
8758
  const resources = config.sandboxConfig?.resources;
8488
8759
  const workspaceMode = config.workspaceMode ?? "shared_mount";
8489
- const vm = await VmCheckpoint.load(config.checkpointPath).resume({
8490
- httpHooks,
8491
- env: vmEnv,
8492
- ...resources?.memory && { memory: resources.memory },
8493
- ...resources?.cpus && { cpus: resources.cpus },
8494
- vfs: { mounts: {
8495
- [guestWorkspace]: workspaceProvider,
8496
- [GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
8497
- } }
8760
+ const vm = await abortableResource({
8761
+ promise: VmCheckpoint.load(config.checkpointPath).resume({
8762
+ httpHooks,
8763
+ env: vmEnv,
8764
+ ...resources?.memory && { memory: resources.memory },
8765
+ ...resources?.cpus && { cpus: resources.cpus },
8766
+ vfs: { mounts: {
8767
+ [guestWorkspace]: workspaceProvider,
8768
+ [GUEST_TASK_SKILLS_MOUNT]: new MemoryProvider()
8769
+ } }
8770
+ }),
8771
+ signal: config.signal,
8772
+ label: "VM resume",
8773
+ cleanup: (resumedVm) => resumedVm.close(),
8774
+ onCleanupError: (err) => {
8775
+ const message = err instanceof Error ? err.message : String(err);
8776
+ process.stderr.write(`[vm] aborted resume late vm.close() failed: ${message}\n`);
8777
+ }
8498
8778
  });
8499
8779
  try {
8500
- await vm.exec(`sh -c '
8780
+ await vmRun(vm, "TLS certificates", `
8501
8781
  cp /etc/gondolin/mitm/ca.crt /usr/local/share/ca-certificates/gondolin-mitm.crt
8502
8782
  update-ca-certificates 2>/dev/null
8503
8783
  cat /etc/gondolin/mitm/ca.crt >> /etc/ssl/certs/ca-certificates.crt
8504
- '`);
8505
- await vmRun(vm, "DNS resolvers", `printf 'nameserver 8.8.8.8\\nnameserver 1.1.1.1\\n' > /etc/resolv.conf`);
8506
- await vmRun(vm, "git safe.directory", `git config --system --add safe.directory '*'`);
8784
+ `, config.signal);
8785
+ await vmRun(vm, "DNS resolvers", `printf 'nameserver 8.8.8.8\\nnameserver 1.1.1.1\\n' > /etc/resolv.conf`, config.signal);
8786
+ await vmRun(vm, "git safe.directory", `git config --system --add safe.directory '*'`, config.signal);
8507
8787
  for (const [i, entry] of (config.sandboxConfig?.resumeCommands ?? []).entries()) {
8508
8788
  if (!shouldRunResumeCommand(entry, { workspaceMode })) continue;
8509
8789
  const { run, retries, backoffMs } = typeof entry === "string" ? {
@@ -8518,34 +8798,67 @@ async function resumeVm(config) {
8518
8798
  const label = `resumeCommands[${i}]`;
8519
8799
  let lastErr;
8520
8800
  for (let attempt = 0; attempt <= retries; attempt++) try {
8521
- await vmRun(vm, label, run);
8801
+ await vmRun(vm, label, run, config.signal);
8522
8802
  lastErr = void 0;
8523
8803
  break;
8524
8804
  } catch (err) {
8525
8805
  lastErr = err;
8526
8806
  if (attempt === retries) break;
8527
- await new Promise((resolve) => {
8528
- setTimeout(resolve, (attempt + 1) * backoffMs);
8529
- });
8807
+ await delay((attempt + 1) * backoffMs, config.signal, label);
8530
8808
  }
8531
- if (lastErr) throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
8809
+ if (lastErr) throw lastErr instanceof Error ? lastErr : new Error(nonErrorMessage(lastErr));
8532
8810
  }
8533
8811
  const vmSshDir = `${vmAgentDir}/ssh`;
8534
- await vm.exec(`mkdir -p ${vmAgentDir}/ssh /home/agent/.pi/agent`);
8535
- if (creds.piAuthJson !== null) await vm.fs.writeFile("/home/agent/.pi/agent/auth.json", creds.piAuthJson, { mode: 384 });
8812
+ await vm.exec(`mkdir -p ${vmAgentDir}/ssh /home/agent/.pi/agent`, { signal: config.signal });
8813
+ if (creds.piAuthJson !== null) await vm.fs.writeFile("/home/agent/.pi/agent/auth.json", creds.piAuthJson, {
8814
+ mode: 384,
8815
+ signal: config.signal
8816
+ });
8536
8817
  const vmMoltnetJson = rewriteMoltnetJsonPaths(creds.moltnetJson, vmAgentDir, vmSshDir, creds.githubAppPemFilename);
8537
- await vm.fs.writeFile(`${vmAgentDir}/moltnet.json`, vmMoltnetJson, { mode: 384 });
8538
- await vm.fs.writeFile(`${vmAgentDir}/env`, creds.agentEnvRaw, { mode: 384 });
8818
+ await vm.fs.writeFile(`${vmAgentDir}/moltnet.json`, vmMoltnetJson, {
8819
+ mode: 384,
8820
+ signal: config.signal
8821
+ });
8822
+ await vm.fs.writeFile(`${vmAgentDir}/env`, creds.agentEnvRaw, {
8823
+ mode: 384,
8824
+ signal: config.signal
8825
+ });
8539
8826
  if (creds.gitconfig) {
8540
8827
  const vmSigningKey = `${vmSshDir}/id_ed25519`;
8541
8828
  const vmGitconfig = creds.gitconfig.replace(/signingKey\s*=\s*.+/g, `signingKey = ${vmSigningKey}`);
8542
- await vm.fs.writeFile(`${vmAgentDir}/gitconfig`, vmGitconfig, { mode: 420 });
8829
+ await vm.fs.writeFile(`${vmAgentDir}/gitconfig`, vmGitconfig, {
8830
+ mode: 420,
8831
+ signal: config.signal
8832
+ });
8543
8833
  }
8544
- if (creds.sshPrivateKey) await vm.fs.writeFile(`${vmSshDir}/id_ed25519`, creds.sshPrivateKey, { mode: 384 });
8545
- if (creds.sshPublicKey) await vm.fs.writeFile(`${vmSshDir}/id_ed25519.pub`, creds.sshPublicKey, { mode: 420 });
8546
- if (creds.allowedSigners) await vm.fs.writeFile(`${vmSshDir}/allowed_signers`, creds.allowedSigners, { mode: 420 });
8547
- if (creds.githubAppPem && creds.githubAppPemFilename) await vm.fs.writeFile(`${vmAgentDir}/${creds.githubAppPemFilename}`, creds.githubAppPem, { mode: 384 });
8548
- await vm.exec("chown -R agent:agent /home/agent/.pi /home/agent/.moltnet");
8834
+ if (creds.sshPrivateKey) await vm.fs.writeFile(`${vmSshDir}/id_ed25519`, creds.sshPrivateKey, {
8835
+ mode: 384,
8836
+ signal: config.signal
8837
+ });
8838
+ if (creds.sshPublicKey) await vm.fs.writeFile(`${vmSshDir}/id_ed25519.pub`, creds.sshPublicKey, {
8839
+ mode: 420,
8840
+ signal: config.signal
8841
+ });
8842
+ if (creds.allowedSigners) await vm.fs.writeFile(`${vmSshDir}/allowed_signers`, creds.allowedSigners, {
8843
+ mode: 420,
8844
+ signal: config.signal
8845
+ });
8846
+ if (creds.githubAppPem && creds.githubAppPemFilename) await vm.fs.writeFile(`${vmAgentDir}/${creds.githubAppPemFilename}`, creds.githubAppPem, {
8847
+ mode: 384,
8848
+ signal: config.signal
8849
+ });
8850
+ await vm.exec("chown -R agent:agent /home/agent/.pi /home/agent/.moltnet", { signal: config.signal });
8851
+ const gitCredHelperPath = `${vmSshDir}/git-credential-moltnet`;
8852
+ const credHelperScript = `#!/bin/sh
8853
+ echo "username=x-access-token"
8854
+ echo "password=$(moltnet github token --credentials ${vmSshDir}/moltnet.json)"
8855
+ `;
8856
+ await vm.fs.writeFile(gitCredHelperPath, credHelperScript, {
8857
+ mode: 493,
8858
+ signal: config.signal
8859
+ });
8860
+ await vmRun(vm, "git credential helper", `git config --global credential.helper ${gitCredHelperPath} && \
8861
+ git config --global url."https://github.com/".insteadOf "git@github.com:"`, config.signal);
8549
8862
  return {
8550
8863
  vm,
8551
8864
  credentials: creds,
@@ -13055,18 +13368,120 @@ var TaskContext = _Array_(_Object_({
13055
13368
  maxItems: 5
13056
13369
  });
13057
13370
  //#endregion
13058
- //#region ../tasks/src/daemon-profiles.ts
13059
- var DaemonProfileName = String$1({
13371
+ //#region ../tasks/src/rubric.ts
13372
+ /**
13373
+ * Rubric — structured acceptance criteria used by judgment tasks.
13374
+ *
13375
+ * Phase 1 (this PR): rubrics are embedded in task inputs. Their integrity
13376
+ * is pinned via the task's `input_cid` (which covers the whole input,
13377
+ * including the inline rubric). No separate storage, no CRUD.
13378
+ *
13379
+ * Phase 2 (see #881): rubrics become a first-class resource with their
13380
+ * own signed rows and CIDv1 lookup. The schema below is designed to
13381
+ * carry forward unchanged — only storage and addressing differ.
13382
+ *
13383
+ * Until Phase 2 lands, `rubricId` + `version` + `contentHash` are
13384
+ * informational fields the author fills in; no uniqueness is enforced.
13385
+ * `contentHash` is optional in Phase 1 because the *task*'s input_cid
13386
+ * is the authoritative commitment.
13387
+ */
13388
+ /**
13389
+ * How a judge must score a single criterion.
13390
+ *
13391
+ * - `llm_score`: 0..1 continuous, `rationale` required. Smooths failures
13392
+ * into the gradient — use `llm_checklist` instead for properties where
13393
+ * a single failure is a real failure (grounding, faithfulness).
13394
+ * - `llm_checklist`: judge enumerates per-claim assertions with
13395
+ * `{passed, evidence}`. The criterion's numeric `score` is derived:
13396
+ * `1` iff every assertion passes, else `0`. Per-claim evidence is the
13397
+ * dataset for cluster-analysis of failure modes. See #999.
13398
+ * - `boolean`: 0 or 1, `rationale` optional.
13399
+ * - `deterministic_signature_check`: judge runs a signature check;
13400
+ * result is 0 or 1. No LLM discretion.
13401
+ * - `deterministic_coverage_check`: every referenced source entry
13402
+ * appears in the rendered output; 0 or 1.
13403
+ */
13404
+ var RubricScoringMode = Union([
13405
+ Literal("llm_score"),
13406
+ Literal("llm_checklist"),
13407
+ Literal("boolean"),
13408
+ Literal("deterministic_signature_check"),
13409
+ Literal("deterministic_coverage_check")
13410
+ ], { $id: "RubricScoringMode" });
13411
+ /**
13412
+ * One binary check produced by an `llm_checklist`-mode criterion.
13413
+ *
13414
+ * `evidence` is REQUIRED for both PASS and FAIL — agentskills.io grading
13415
+ * principle: \"Don't give the benefit of the doubt.\" A PASS without
13416
+ * concrete evidence (a quoted span, an entry id, a source location)
13417
+ * cannot be audited. A FAIL without evidence cannot be clustered into
13418
+ * structural fixes. The same shape is reused by `judge-eval-variant`
13419
+ * (#943) so tooling, dashboards, and analysis stay uniform.
13420
+ */
13421
+ var AssertionResult = _Object_({
13422
+ id: String$1({ minLength: 1 }),
13423
+ text: String$1({ minLength: 1 }),
13424
+ passed: Boolean$1(),
13425
+ evidence: String$1({ minLength: 1 })
13426
+ }, {
13427
+ $id: "AssertionResult",
13428
+ additionalProperties: false
13429
+ });
13430
+ var RubricCriterion = _Object_({
13431
+ id: String$1({ minLength: 1 }),
13432
+ description: String$1({ minLength: 1 }),
13433
+ weight: Number$1({
13434
+ minimum: 0,
13435
+ maximum: 1
13436
+ }),
13437
+ scoring: RubricScoringMode
13438
+ }, {
13439
+ $id: "RubricCriterion",
13440
+ additionalProperties: false
13441
+ });
13442
+ /**
13443
+ * A complete rubric. Same shape used in Phase 1 (inline) and Phase 2
13444
+ * (stored row `body`); only the addressing mechanism differs.
13445
+ */
13446
+ var Rubric = _Object_({
13447
+ rubricId: String$1({ minLength: 1 }),
13448
+ version: String$1({ minLength: 1 }),
13449
+ preamble: Optional(String$1()),
13450
+ criteria: _Array_(RubricCriterion, { minItems: 1 }),
13451
+ scope: Optional(String$1()),
13452
+ contentHash: Optional(String$1())
13453
+ }, {
13454
+ $id: "Rubric",
13455
+ additionalProperties: false
13456
+ });
13457
+ /**
13458
+ * Verify rubric criteria weights sum to 1.0 within floating-point tolerance.
13459
+ * The schema constrains each weight to [0,1] but can't express a cross-field
13460
+ * sum constraint, so this is enforced programmatically by callers that
13461
+ * accept rubrics (task input validators, server-side task creation).
13462
+ *
13463
+ * Returns null when valid; otherwise an error message suitable for surfacing
13464
+ * to the caller. Tolerance is 1e-6 to accommodate JSON round-tripping of
13465
+ * decimal fractions (e.g. 0.1 + 0.2 + 0.3 + 0.4 ≠ 1.0 exactly).
13466
+ */
13467
+ function validateRubricWeights(rubric) {
13468
+ const sum = rubric.criteria.reduce((acc, c) => acc + c.weight, 0);
13469
+ if (Math.abs(sum - 1) > 1e-6) return `Rubric weights must sum to 1.0 (got ${sum.toFixed(6)})`;
13470
+ return null;
13471
+ }
13472
+ //#endregion
13473
+ //#region ../tasks/src/runtime-profiles.ts
13474
+ var RuntimeProfileName = String$1({
13060
13475
  minLength: 1,
13061
13476
  maxLength: 100,
13062
13477
  pattern: "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,99}$"
13063
13478
  });
13064
- var DaemonProfileEnvName = String$1({
13479
+ var RuntimeProfileEnvName = String$1({
13065
13480
  minLength: 1,
13066
13481
  maxLength: 128,
13067
13482
  pattern: "^[A-Z_][A-Z0-9_]*$"
13068
13483
  });
13069
- var DaemonProfileToolName = String$1({
13484
+ var RuntimeProfileToolName = String$1({
13070
13485
  minLength: 1,
13071
13486
  maxLength: 128,
13072
13487
  pattern: "^[a-zA-Z0-9._/-]+$"
@@ -13079,7 +13494,7 @@ var SandboxResumeCommandWhenSchema = _Object_({ workspaceMode: Optional(_Array_(
13079
13494
  minItems: 1,
13080
13495
  maxItems: 3
13081
13496
  })) }, { additionalProperties: false });
13082
- var DaemonProfileSandboxResumeCommand = Union([String$1({
13497
+ var RuntimeProfileSandboxResumeCommand = Union([String$1({
13083
13498
  minLength: 1,
13084
13499
  maxLength: 4096
13085
13500
  }), _Object_({
@@ -13097,7 +13512,7 @@ var DaemonProfileSandboxResumeCommand = Union([String$1({
13097
13512
  maximum: 6e4
13098
13513
  }))
13099
13514
  }, { additionalProperties: false })]);
13100
- var DaemonProfileSandbox = _Object_({
13515
+ var RuntimeProfileSandbox = _Object_({
13101
13516
  snapshot: Optional(_Object_({
13102
13517
  setupCommands: Optional(_Array_(String$1({
13103
13518
  minLength: 1,
@@ -13113,7 +13528,7 @@ var DaemonProfileSandbox = _Object_({
13113
13528
  pattern: "^[0-9]+[KMGTP]?$"
13114
13529
  }))
13115
13530
  }, { additionalProperties: false })),
13116
- resumeCommands: Optional(_Array_(DaemonProfileSandboxResumeCommand, { maxItems: 30 })),
13531
+ resumeCommands: Optional(_Array_(RuntimeProfileSandboxResumeCommand, { maxItems: 30 })),
13117
13532
  vfs: Optional(_Object_({
13118
13533
  shadow: Optional(_Array_(String$1({
13119
13534
  minLength: 1,
@@ -13121,7 +13536,7 @@ var DaemonProfileSandbox = _Object_({
13121
13536
  }), { maxItems: 100 })),
13122
13537
  shadowMode: Optional(Union([Literal("deny"), Literal("tmpfs")]))
13123
13538
  }, { additionalProperties: false })),
13124
- env: Optional(Record(DaemonProfileEnvName, String$1({ maxLength: 4096 }))),
13539
+ env: Optional(Record(RuntimeProfileEnvName, String$1({ maxLength: 4096 }))),
13125
13540
  hostExec: Optional(_Object_({ autoApprove: Optional(Literal(false)) }, { additionalProperties: false })),
13126
13541
  resources: Optional(_Object_({
13127
13542
  memory: Optional(String$1({
@@ -13135,10 +13550,10 @@ var DaemonProfileSandbox = _Object_({
13135
13550
  }))
13136
13551
  }, { additionalProperties: false }))
13137
13552
  }, {
13138
- $id: "DaemonProfileSandbox",
13553
+ $id: "RuntimeProfileSandbox",
13139
13554
  additionalProperties: false
13140
13555
  });
13141
- var DaemonProfileContext = _Object_({
13556
+ var RuntimeProfileContext = _Object_({
13142
13557
  slug: String$1({
13143
13558
  minLength: 1,
13144
13559
  maxLength: 64,
@@ -13155,17 +13570,29 @@ var DaemonProfileContext = _Object_({
13155
13570
  maxLength: 65536
13156
13571
  })
13157
13572
  }, {
13158
- $id: "DaemonProfileContext",
13573
+ $id: "RuntimeProfileContext",
13159
13574
  additionalProperties: false
13160
13575
  });
13161
- var DaemonProfileRef = _Object_({ profileId: String$1({ format: "uuid" }) }, {
13162
- $id: "DaemonProfileRef",
13576
+ var RuntimeProfileRef = _Object_({ profileId: String$1({ format: "uuid" }) }, {
13577
+ $id: "RuntimeProfileRef",
13163
13578
  additionalProperties: false
13164
13579
  });
13580
+ var RuntimeProfileLeaseTtlSec = Integer({
13581
+ minimum: 1,
13582
+ maximum: 86400
13583
+ });
13584
+ var RuntimeProfileHeartbeatIntervalMs = Integer({
13585
+ minimum: 0,
13586
+ maximum: 36e5
13587
+ });
13588
+ var RuntimeProfileMaxBatchSize = Integer({
13589
+ minimum: 1,
13590
+ maximum: 1e3
13591
+ });
13165
13592
  _Object_({
13166
13593
  id: String$1({ format: "uuid" }),
13167
13594
  teamId: String$1({ format: "uuid" }),
13168
- name: DaemonProfileName,
13595
+ name: RuntimeProfileName,
13169
13596
  description: Union([String$1({ maxLength: 4096 }), Null()]),
13170
13597
  provider: String$1({
13171
13598
  minLength: 1,
@@ -13176,7 +13603,7 @@ _Object_({
13176
13603
  maxLength: 200
13177
13604
  }),
13178
13605
  runtimeKind: Literal("gondolin_pi"),
13179
- sandbox: DaemonProfileSandbox,
13606
+ sandbox: RuntimeProfileSandbox,
13180
13607
  sessionStorageMode: Literal("local"),
13181
13608
  workspaceStorageMode: Literal("local"),
13182
13609
  sessionTtlSec: Integer({
@@ -13187,9 +13614,12 @@ _Object_({
13187
13614
  minimum: 1,
13188
13615
  maximum: 86400
13189
13616
  }),
13190
- requiredEnv: _Array_(DaemonProfileEnvName, { maxItems: 100 }),
13191
- requiredTools: _Array_(DaemonProfileToolName, { maxItems: 100 }),
13192
- context: _Array_(DaemonProfileContext, { maxItems: 5 }),
13617
+ leaseTtlSec: RuntimeProfileLeaseTtlSec,
13618
+ heartbeatIntervalMs: RuntimeProfileHeartbeatIntervalMs,
13619
+ maxBatchSize: RuntimeProfileMaxBatchSize,
13620
+ requiredEnv: _Array_(RuntimeProfileEnvName, { maxItems: 100 }),
13621
+ requiredTools: _Array_(RuntimeProfileToolName, { maxItems: 100 }),
13622
+ context: _Array_(RuntimeProfileContext, { maxItems: 5 }),
13193
13623
  revision: Integer({ minimum: 1 }),
13194
13624
  definitionCid: String$1({
13195
13625
  minLength: 1,
@@ -13200,111 +13630,9 @@ _Object_({
13200
13630
  createdAt: String$1({ format: "date-time" }),
13201
13631
  updatedAt: String$1({ format: "date-time" })
13202
13632
  }, {
13203
- $id: "DaemonProfile",
13204
- additionalProperties: false
13205
- });
13206
- //#endregion
13207
- //#region ../tasks/src/rubric.ts
13208
- /**
13209
- * Rubric — structured acceptance criteria used by judgment tasks.
13210
- *
13211
- * Phase 1 (this PR): rubrics are embedded in task inputs. Their integrity
13212
- * is pinned via the task's `input_cid` (which covers the whole input,
13213
- * including the inline rubric). No separate storage, no CRUD.
13214
- *
13215
- * Phase 2 (see #881): rubrics become a first-class resource with their
13216
- * own signed rows and CIDv1 lookup. The schema below is designed to
13217
- * carry forward unchanged — only storage and addressing differ.
13218
- *
13219
- * Until Phase 2 lands, `rubricId` + `version` + `contentHash` are
13220
- * informational fields the author fills in; no uniqueness is enforced.
13221
- * `contentHash` is optional in Phase 1 because the *task*'s input_cid
13222
- * is the authoritative commitment.
13223
- */
13224
- /**
13225
- * How a judge must score a single criterion.
13226
- *
13227
- * - `llm_score`: 0..1 continuous, `rationale` required. Smooths failures
13228
- * into the gradient — use `llm_checklist` instead for properties where
13229
- * a single failure is a real failure (grounding, faithfulness).
13230
- * - `llm_checklist`: judge enumerates per-claim assertions with
13231
- * `{passed, evidence}`. The criterion's numeric `score` is derived:
13232
- * `1` iff every assertion passes, else `0`. Per-claim evidence is the
13233
- * dataset for cluster-analysis of failure modes. See #999.
13234
- * - `boolean`: 0 or 1, `rationale` optional.
13235
- * - `deterministic_signature_check`: judge runs a signature check;
13236
- * result is 0 or 1. No LLM discretion.
13237
- * - `deterministic_coverage_check`: every referenced source entry
13238
- * appears in the rendered output; 0 or 1.
13239
- */
13240
- var RubricScoringMode = Union([
13241
- Literal("llm_score"),
13242
- Literal("llm_checklist"),
13243
- Literal("boolean"),
13244
- Literal("deterministic_signature_check"),
13245
- Literal("deterministic_coverage_check")
13246
- ], { $id: "RubricScoringMode" });
13247
- /**
13248
- * One binary check produced by an `llm_checklist`-mode criterion.
13249
- *
13250
- * `evidence` is REQUIRED for both PASS and FAIL — agentskills.io grading
13251
- * principle: \"Don't give the benefit of the doubt.\" A PASS without
13252
- * concrete evidence (a quoted span, an entry id, a source location)
13253
- * cannot be audited. A FAIL without evidence cannot be clustered into
13254
- * structural fixes. The same shape is reused by `judge-eval-variant`
13255
- * (#943) so tooling, dashboards, and analysis stay uniform.
13256
- */
13257
- var AssertionResult = _Object_({
13258
- id: String$1({ minLength: 1 }),
13259
- text: String$1({ minLength: 1 }),
13260
- passed: Boolean$1(),
13261
- evidence: String$1({ minLength: 1 })
13262
- }, {
13263
- $id: "AssertionResult",
13264
- additionalProperties: false
13265
- });
13266
- var RubricCriterion = _Object_({
13267
- id: String$1({ minLength: 1 }),
13268
- description: String$1({ minLength: 1 }),
13269
- weight: Number$1({
13270
- minimum: 0,
13271
- maximum: 1
13272
- }),
13273
- scoring: RubricScoringMode
13274
- }, {
13275
- $id: "RubricCriterion",
13276
- additionalProperties: false
13277
- });
13278
- /**
13279
- * A complete rubric. Same shape used in Phase 1 (inline) and Phase 2
13280
- * (stored row `body`); only the addressing mechanism differs.
13281
- */
13282
- var Rubric = _Object_({
13283
- rubricId: String$1({ minLength: 1 }),
13284
- version: String$1({ minLength: 1 }),
13285
- preamble: Optional(String$1()),
13286
- criteria: _Array_(RubricCriterion, { minItems: 1 }),
13287
- scope: Optional(String$1()),
13288
- contentHash: Optional(String$1())
13289
- }, {
13290
- $id: "Rubric",
13633
+ $id: "RuntimeProfile",
13291
13634
  additionalProperties: false
13292
13635
  });
13293
- /**
13294
- * Verify rubric criteria weights sum to 1.0 within floating-point tolerance.
13295
- * The schema constrains each weight to [0,1] but can't express a cross-field
13296
- * sum constraint, so this is enforced programmatically by callers that
13297
- * accept rubrics (task input validators, server-side task creation).
13298
- *
13299
- * Returns null when valid; otherwise an error message suitable for surfacing
13300
- * to the caller. Tolerance is 1e-6 to accommodate JSON round-tripping of
13301
- * decimal fractions (e.g. 0.1 + 0.2 + 0.3 + 0.4 ≠ 1.0 exactly).
13302
- */
13303
- function validateRubricWeights(rubric) {
13304
- const sum = rubric.criteria.reduce((acc, c) => acc + c.weight, 0);
13305
- if (Math.abs(sum - 1) > 1e-6) return `Rubric weights must sum to 1.0 (got ${sum.toFixed(6)})`;
13306
- return null;
13307
- }
13308
13636
  //#endregion
13309
13637
  //#region ../tasks/src/success-criteria.ts
13310
13638
  /**
@@ -16997,6 +17325,7 @@ var TaskAttemptStatus = Union([
16997
17325
  Literal("completed"),
16998
17326
  Literal("failed"),
16999
17327
  Literal("cancelled"),
17328
+ Literal("aborted"),
17000
17329
  Literal("timed_out")
17001
17330
  ], { $id: "TaskAttemptStatus" });
17002
17331
  var ExecutorTrustLevel = Union([
@@ -17149,7 +17478,7 @@ _Object_({
17149
17478
  acceptedAttemptN: Union([Number$1(), Null()]),
17150
17479
  claimCondition: Union([Unsafe(Ref$1("ClaimCondition")), Null()]),
17151
17480
  requiredExecutorTrustLevel: ExecutorTrustLevel,
17152
- allowedProfiles: _Array_(DaemonProfileRef, { maxItems: 16 }),
17481
+ allowedProfiles: _Array_(RuntimeProfileRef, { maxItems: 16 }),
17153
17482
  status: TaskStatus,
17154
17483
  queuedAt: IsoTimestamp,
17155
17484
  completedAt: Union([IsoTimestamp, Null()]),
@@ -23129,6 +23458,20 @@ async function executePiTask(claimedTask, reporter, opts) {
23129
23458
  retryable: false
23130
23459
  }
23131
23460
  });
23461
+ const makeCancelledOutput = (message) => ({
23462
+ taskId: task.id,
23463
+ attemptN,
23464
+ status: "cancelled",
23465
+ output: null,
23466
+ outputCid: null,
23467
+ usage: finalUsage,
23468
+ durationMs: Date.now() - startTime,
23469
+ error: {
23470
+ code: "task_cancelled",
23471
+ message,
23472
+ retryable: false
23473
+ }
23474
+ });
23132
23475
  let onTurnEvent;
23133
23476
  if (opts.makeOnTurnEvent) try {
23134
23477
  onTurnEvent = opts.makeOnTurnEvent(claimedTask);
@@ -23191,10 +23534,15 @@ async function executePiTask(claimedTask, reporter, opts) {
23191
23534
  mountPath,
23192
23535
  workspaceMode: workspace.mode,
23193
23536
  extraAllowedHosts: opts.extraAllowedHosts,
23194
- sandboxConfig
23537
+ sandboxConfig,
23538
+ signal: reporter.cancelSignal
23195
23539
  });
23196
23540
  } catch (err) {
23197
23541
  const message = err instanceof Error ? err.message : String(err);
23542
+ if (reporter.cancelSignal.aborted) {
23543
+ await emitError("vm_resume", message, { cancelled: true });
23544
+ return makeCancelledOutput(reporter.cancelReason ?? "Task cancelled during VM resume.");
23545
+ }
23198
23546
  await emitError("vm_resume", message);
23199
23547
  return makeFailedOutput("vm_resume_failed", message);
23200
23548
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@themoltnet/pi-extension",
3
- "version": "0.23.0",
3
+ "version": "0.23.1",
4
4
  "type": "module",
5
5
  "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
6
6
  "keywords": [
@@ -36,8 +36,8 @@
36
36
  "@earendil-works/gondolin": "^0.9.1",
37
37
  "@opentelemetry/api": "^1.9.0",
38
38
  "typebox": "^1.2.8",
39
- "@themoltnet/sdk": "0.107.0",
40
- "@themoltnet/agent-runtime": "0.23.0"
39
+ "@themoltnet/sdk": "0.108.0",
40
+ "@themoltnet/agent-runtime": "0.24.0"
41
41
  },
42
42
  "peerDependencies": {
43
43
  "@earendil-works/pi-coding-agent": ">=0.74.0",
@@ -61,8 +61,8 @@
61
61
  "vite": "^8.0.0",
62
62
  "vite-plugin-dts": "^4.5.4",
63
63
  "vitest": "^3.0.0",
64
- "@moltnet/tasks": "0.1.0",
65
- "@moltnet/crypto-service": "0.1.0"
64
+ "@moltnet/crypto-service": "0.1.0",
65
+ "@moltnet/tasks": "0.1.0"
66
66
  },
67
67
  "engines": {
68
68
  "node": ">=22"