@themoltnet/pi-extension 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -33,6 +33,8 @@ declare interface ClaimedTask {
33
33
  task: Task;
34
34
  /** Attempt number assigned by the source/queue. */
35
35
  attemptN: number;
36
+ /** W3C trace headers from the claim response for OTel context propagation. */
37
+ traceHeaders: Record<string, string>;
36
38
  }
37
39
 
38
40
  export declare function computePiJudgeRecipeCid(inputs: PiJudgeRecipeInputs): PiJudgeRecipeCid;
@@ -50,6 +52,8 @@ export declare function createGondolinWriteOps(vm: VM, localCwd: string): WriteO
50
52
  */
51
53
  export declare function createMoltNetTools(config: MoltNetToolsConfig): ToolDefinition<any, any>[];
52
54
 
55
+ export declare function createPiOtelExtension(options?: PiOtelOptions): (pi: ExtensionAPI) => void;
56
+
53
57
  /**
54
58
  * Factory that builds a pi-specific `executeTask` function suitable for
55
59
  * injection into `AgentRuntime`. The returned function caches the resolved
@@ -132,6 +136,7 @@ export default moltnetExtension;
132
136
  declare interface MoltNetToolsConfig {
133
137
  getAgent(): MoltNetAgent | null;
134
138
  getDiaryId(): string | null;
139
+ getTeamId(): string | null;
135
140
  getSessionErrors(): readonly TrackedError[];
136
141
  clearSessionErrors(): void;
137
142
  /** Host working directory for host-exec commands (worktree path or cwd). */
@@ -183,6 +188,17 @@ declare interface PiJudgeRecipeVersions {
183
188
  sdk: string | null;
184
189
  }
185
190
 
191
+ export declare interface PiOtelOptions {
192
+ /** Agent name for `gen_ai.agent.name` on the root span. */
193
+ agentName?: string;
194
+ /**
195
+ * Extra attributes merged onto every span. Use MoltNet-specific keys
196
+ * like `moltnet.task.id` — any `gen_ai.*` keys here are filtered out
197
+ * since the extension is authoritative for those.
198
+ */
199
+ spanAttributes?: Record<string, string | number | boolean>;
200
+ }
201
+
186
202
  export declare function resolvePiJudgeRecipeVersions(): PiJudgeRecipeVersions;
187
203
 
188
204
  /**
package/dist/index.js CHANGED
@@ -3,14 +3,15 @@ import { execFileSync } from "node:child_process";
3
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
4
4
  import path, { join } from "node:path";
5
5
  import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@mariozechner/pi-coding-agent";
6
- import { createHash, randomUUID } from "node:crypto";
6
+ import { createHash } from "node:crypto";
7
7
  import crypto, { createHash as createHash$1 } from "crypto";
8
8
  import { readFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
10
  import { Type, complete, getModel } from "@mariozechner/pi-ai";
11
- import { fileURLToPath } from "node:url";
12
11
  import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
13
12
  import { parseEnv } from "node:util";
13
+ import { fileURLToPath } from "node:url";
14
+ import { SpanStatusCode, context, trace } from "@opentelemetry/api";
14
15
  import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
15
16
  import { Value } from "@sinclair/typebox/value";
16
17
  //#region ../api-client/src/generated/core/bodySerializer.gen.ts
@@ -1415,80 +1416,6 @@ var updateRenderedPack = (options) => (options.client ?? client).patch({
1415
1416
  }
1416
1417
  });
1417
1418
  /**
1418
- * Trigger fidelity verification for an agent-rendered pack.
1419
- */
1420
- var verifyRenderedPack = (options) => (options.client ?? client).post({
1421
- security: [
1422
- {
1423
- scheme: "bearer",
1424
- type: "http"
1425
- },
1426
- {
1427
- name: "X-Moltnet-Session-Token",
1428
- type: "apiKey"
1429
- },
1430
- {
1431
- in: "cookie",
1432
- name: "ory_kratos_session",
1433
- type: "apiKey"
1434
- }
1435
- ],
1436
- url: "/rendered-packs/{id}/verify",
1437
- ...options,
1438
- headers: {
1439
- "Content-Type": "application/json",
1440
- ...options.headers
1441
- }
1442
- });
1443
- /**
1444
- * Judge claims verification payload (source entries, rendered content, and rubric).
1445
- */
1446
- var claimVerification = (options) => (options.client ?? client).post({
1447
- security: [
1448
- {
1449
- scheme: "bearer",
1450
- type: "http"
1451
- },
1452
- {
1453
- name: "X-Moltnet-Session-Token",
1454
- type: "apiKey"
1455
- },
1456
- {
1457
- in: "cookie",
1458
- name: "ory_kratos_session",
1459
- type: "apiKey"
1460
- }
1461
- ],
1462
- url: "/rendered-packs/{id}/verify/claim",
1463
- ...options
1464
- });
1465
- /**
1466
- * Judge submits fidelity scores and transcript.
1467
- */
1468
- var submitVerification = (options) => (options.client ?? client).post({
1469
- security: [
1470
- {
1471
- scheme: "bearer",
1472
- type: "http"
1473
- },
1474
- {
1475
- name: "X-Moltnet-Session-Token",
1476
- type: "apiKey"
1477
- },
1478
- {
1479
- in: "cookie",
1480
- name: "ory_kratos_session",
1481
- type: "apiKey"
1482
- }
1483
- ],
1484
- url: "/rendered-packs/{id}/verify/submit",
1485
- ...options,
1486
- headers: {
1487
- "Content-Type": "application/json",
1488
- ...options.headers
1489
- }
1490
- });
1491
- /**
1492
1419
  * Get an agent's public profile by key fingerprint (A1B2-C3D4-E5F6-G7H8).
1493
1420
  */
1494
1421
  var getAgentProfile = (options) => (options.client ?? client).get({
@@ -6445,29 +6372,6 @@ function createPacksNamespace(context) {
6445
6372
  body
6446
6373
  }));
6447
6374
  },
6448
- async verifyRendered(id, body) {
6449
- return unwrapResult(await verifyRenderedPack({
6450
- client,
6451
- auth,
6452
- path: { id },
6453
- body
6454
- }));
6455
- },
6456
- async claimVerification(id) {
6457
- return unwrapResult(await claimVerification({
6458
- client,
6459
- auth,
6460
- path: { id }
6461
- }));
6462
- },
6463
- async submitVerification(id, body) {
6464
- return unwrapResult(await submitVerification({
6465
- client,
6466
- auth,
6467
- path: { id },
6468
- body
6469
- }));
6470
- },
6471
6375
  async create(diaryId, body) {
6472
6376
  return unwrapResult(await createDiaryCustomPack({
6473
6377
  client,
@@ -6618,12 +6522,24 @@ function createTasksNamespace(context) {
6618
6522
  }));
6619
6523
  },
6620
6524
  async claim(id, body) {
6621
- return unwrapResult(await claimTask({
6525
+ const result = await claimTask({
6622
6526
  client,
6623
6527
  auth,
6624
6528
  path: { id },
6625
6529
  body
6626
- }));
6530
+ });
6531
+ const data = unwrapResult(result);
6532
+ const traceHeaders = {};
6533
+ const traceparent = result.response.headers.get("traceparent");
6534
+ if (traceparent) {
6535
+ traceHeaders["traceparent"] = traceparent;
6536
+ const tracestate = result.response.headers.get("tracestate");
6537
+ if (tracestate) traceHeaders["tracestate"] = tracestate;
6538
+ }
6539
+ return {
6540
+ ...data,
6541
+ traceHeaders
6542
+ };
6627
6543
  },
6628
6544
  async heartbeat(id, n, body) {
6629
6545
  return unwrapResult(await taskHeartbeat({
@@ -7155,21 +7071,6 @@ var registerSandboxCommand = (pi, state) => {
7155
7071
  };
7156
7072
  //#endregion
7157
7073
  //#region src/moltnet/judge/assets.ts
7158
- /**
7159
- * Judge assets — single source of truth.
7160
- *
7161
- * `DEFAULT_RUBRIC` and `JUDGE_SYSTEM_PROMPT` below ARE the assets. There are
7162
- * no companion `.md` files; tsc does not copy non-TS files into `dist/`, and
7163
- * keeping a parallel markdown copy invited drift between source-of-truth
7164
- * versions, which is what previously happened.
7165
- *
7166
- * The asset path constants are opaque identifiers used in the judge-recipe
7167
- * CID manifest so verifiers can trace which asset set a given Pi extension
7168
- * version emitted. They are NOT filesystem paths and are never read.
7169
- * Bump the version suffix when you change the corresponding constant.
7170
- */
7171
- var RUBRIC_ASSET_PATH = "pi-extension/judge/rubric@v1";
7172
- var JUDGE_PROMPT_ASSET_PATH = "pi-extension/judge/system-prompt@v1";
7173
7074
  /** Default fidelity rubric — kept verbatim from the Go judge. */
7174
7075
  var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
7175
7076
 
@@ -7322,135 +7223,6 @@ function buildSourceEntriesMarkdown(entries) {
7322
7223
  return parts.join("\n");
7323
7224
  }
7324
7225
  //#endregion
7325
- //#region src/moltnet/judge-recipe-cid.ts
7326
- var require = createRequire(import.meta.url);
7327
- var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
7328
- var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
7329
- var SDK_PACKAGE_NAME = "@themoltnet/sdk";
7330
- var CID_VERSION = 1;
7331
- var RAW_CODEC = 85;
7332
- var SHA2_256_CODE = 18;
7333
- var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
7334
- function findSelfPackageDir() {
7335
- const start = path.dirname(fileURLToPath(import.meta.url));
7336
- let dir = start;
7337
- while (true) {
7338
- const candidate = path.join(dir, "package.json");
7339
- if (existsSync(candidate)) {
7340
- if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
7341
- }
7342
- const parent = path.dirname(dir);
7343
- if (parent === dir) return start;
7344
- dir = parent;
7345
- }
7346
- }
7347
- var PACKAGE_DIR = findSelfPackageDir();
7348
- function sha256Hex(value) {
7349
- return createHash("sha256").update(value, "utf8").digest("hex");
7350
- }
7351
- function encodeVarint(value) {
7352
- const bytes = [];
7353
- let current = value >>> 0;
7354
- while (current >= 128) {
7355
- bytes.push(current & 127 | 128);
7356
- current >>>= 7;
7357
- }
7358
- bytes.push(current);
7359
- return bytes;
7360
- }
7361
- function base32Lower(bytes) {
7362
- let bits = 0;
7363
- let value = 0;
7364
- let output = "";
7365
- for (const byte of bytes) {
7366
- value = value << 8 | byte;
7367
- bits += 8;
7368
- while (bits >= 5) {
7369
- output += BASE32_ALPHABET[value >>> bits - 5 & 31];
7370
- bits -= 5;
7371
- }
7372
- }
7373
- if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
7374
- return `b${output}`;
7375
- }
7376
- function stableStringify(value) {
7377
- if (value === null || typeof value !== "object") return JSON.stringify(value);
7378
- if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
7379
- return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
7380
- }
7381
- function readPackageVersion(pkgPath, expectedName) {
7382
- if (!existsSync(pkgPath)) return null;
7383
- const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
7384
- if (expectedName && parsed.name !== expectedName) return null;
7385
- return typeof parsed.version === "string" ? parsed.version : null;
7386
- }
7387
- function resolveInstalledPackageVersion(packageName) {
7388
- const candidates = [];
7389
- try {
7390
- candidates.push(path.dirname(require.resolve(packageName)));
7391
- } catch {}
7392
- let dir = PACKAGE_DIR;
7393
- while (true) {
7394
- candidates.push(path.join(dir, "node_modules", packageName));
7395
- const parent = path.dirname(dir);
7396
- if (parent === dir) break;
7397
- dir = parent;
7398
- }
7399
- for (const start of candidates) {
7400
- let current = start;
7401
- while (true) {
7402
- const version = readPackageVersion(path.join(current, "package.json"), packageName);
7403
- if (version) return version;
7404
- const parent = path.dirname(current);
7405
- if (parent === current) break;
7406
- current = parent;
7407
- }
7408
- }
7409
- return null;
7410
- }
7411
- function resolvePiJudgeRecipeVersions() {
7412
- return {
7413
- pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
7414
- piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
7415
- sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
7416
- };
7417
- }
7418
- function buildPiJudgeRecipeManifest(inputs) {
7419
- return {
7420
- kind: "pi-judge-recipe/v1",
7421
- versions: {
7422
- ...resolvePiJudgeRecipeVersions(),
7423
- ...inputs.overrides
7424
- },
7425
- assets: {
7426
- promptAsset: inputs.promptAsset ?? null,
7427
- rubricAsset: inputs.rubricAsset ?? null,
7428
- skillSourcePath: inputs.skillSourcePath ?? null
7429
- },
7430
- hashes: {
7431
- judgePromptSha256: sha256Hex(inputs.judgePrompt),
7432
- rubricSha256: sha256Hex(inputs.rubric),
7433
- skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
7434
- implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
7435
- }
7436
- };
7437
- }
7438
- function computePiJudgeRecipeCid(inputs) {
7439
- const manifest = buildPiJudgeRecipeManifest(inputs);
7440
- const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
7441
- const digestBytes = createHash("sha256").update(manifestBytes).digest();
7442
- return {
7443
- cid: base32Lower(Uint8Array.from([
7444
- ...encodeVarint(CID_VERSION),
7445
- ...encodeVarint(RAW_CODEC),
7446
- ...encodeVarint(SHA2_256_CODE),
7447
- ...encodeVarint(digestBytes.length),
7448
- ...digestBytes
7449
- ])),
7450
- manifest
7451
- };
7452
- }
7453
- //#endregion
7454
7226
  //#region src/moltnet/render-phase6.ts
7455
7227
  function slugToTitle(value) {
7456
7228
  return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
@@ -7592,7 +7364,8 @@ function ensureConnected(config) {
7592
7364
  if (!agent || !diaryId) throw new Error("MoltNet not connected");
7593
7365
  return {
7594
7366
  agent,
7595
- diaryId
7367
+ diaryId,
7368
+ teamId: config.getTeamId() ?? ""
7596
7369
  };
7597
7370
  }
7598
7371
  /**
@@ -7758,24 +7531,34 @@ function createMoltNetTools(config) {
7758
7531
  };
7759
7532
  }
7760
7533
  });
7761
- const verifyRenderedPack = defineTool({
7762
- name: "moltnet_rendered_pack_verify",
7763
- label: "Verify MoltNet Rendered Pack",
7764
- description: "Create a verification workflow for a rendered pack and return the verification ID and nonce.",
7534
+ const createJudgePackTask = defineTool({
7535
+ name: "moltnet_judge_pack_task_create",
7536
+ label: "Create Judge Pack Task",
7537
+ description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
7765
7538
  parameters: Type.Object({
7766
- renderedPackId: Type.String({ description: "Rendered pack ID" }),
7767
- nonce: Type.Optional(Type.String({ description: "Caller-supplied idempotency nonce. Generated automatically if omitted." }))
7539
+ renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
7540
+ sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
7541
+ rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
7542
+ diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
7768
7543
  }),
7769
7544
  async execute(_id, params) {
7770
- const { agent } = ensureConnected(config);
7771
- const nonce = params.nonce ?? randomUUID();
7772
- const verification = await agent.packs.verifyRendered(params.renderedPackId, { nonce });
7545
+ const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
7546
+ const task = await agent.tasks.create({
7547
+ taskType: "judge_pack",
7548
+ input: {
7549
+ renderedPackId: params.renderedPackId,
7550
+ sourcePackId: params.sourcePackId,
7551
+ rubric: params.rubric
7552
+ },
7553
+ diaryId: params.diaryId ?? connectedDiaryId,
7554
+ teamId: connectedTeamId
7555
+ });
7773
7556
  return {
7774
7557
  content: [{
7775
7558
  type: "text",
7776
7559
  text: JSON.stringify({
7777
- ...verification,
7778
- nonce
7560
+ taskId: task.id,
7561
+ task
7779
7562
  }, null, 2)
7780
7563
  }],
7781
7564
  details: {}
@@ -7785,87 +7568,79 @@ function createMoltNetTools(config) {
7785
7568
  const judgeRenderedPack = defineTool({
7786
7569
  name: "moltnet_rendered_pack_judge",
7787
7570
  label: "Judge MoltNet Rendered Pack",
7788
- description: "Run the fidelity judge against a rendered pack. Local mode (no nonce): fetch the rendered pack + its source pack with entries, judge locally, return scores. Proctored mode (nonce): claim the verification payload from the API, judge, and submit scores with a Pi judge-recipe CID.",
7571
+ description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
7789
7572
  parameters: Type.Object({
7790
- renderedPackId: Type.String({ description: "Rendered pack ID" }),
7791
- nonce: Type.Optional(Type.String({ description: "Verification nonce from moltnet_rendered_pack_verify. If set, runs proctored mode and submits scores. If omitted, runs local mode and does not submit." })),
7792
- rubric: Type.Optional(Type.String({ description: "Custom rubric override (local mode only). Defaults to the built-in rubric when omitted." }))
7573
+ taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
7574
+ rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
7793
7575
  }),
7794
7576
  async execute(_id, params, _signal, _onUpdate, ctx) {
7795
7577
  const { agent } = ensureConnected(config);
7796
7578
  const model = ctx?.model;
7797
7579
  if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
7798
- let sourceEntriesMd;
7799
- let renderedContent;
7800
- let rubric;
7801
- if (params.nonce) {
7802
- if (params.rubric) throw new Error("`rubric` is only supported in local mode (omit `nonce`).");
7803
- const claim = await agent.packs.claimVerification(params.renderedPackId);
7804
- sourceEntriesMd = buildSourceEntriesMarkdown(claim.sourceEntries);
7805
- renderedContent = claim.renderedContent;
7806
- rubric = claim.rubric?.trim() ? claim.rubric : DEFAULT_RUBRIC;
7807
- } else {
7808
- const rendered = await agent.packs.getRendered(params.renderedPackId);
7809
- if (!rendered.content?.trim()) throw new Error(`rendered pack ${params.renderedPackId} has empty content`);
7810
- const sourcePack = await agent.packs.get(rendered.sourcePackId, { expand: "entries" });
7811
- if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${rendered.sourcePackId} has no entries`);
7812
- sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
7813
- title: entry.entry.title,
7814
- content: entry.entry.content
7815
- })));
7816
- renderedContent = rendered.content;
7817
- rubric = params.rubric?.trim() ? params.rubric : DEFAULT_RUBRIC;
7818
- }
7580
+ const claimed = await agent.tasks.claim(params.taskId);
7581
+ const input = claimed.task.input;
7582
+ const rendered = await agent.packs.getRendered(input.renderedPackId);
7583
+ if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
7584
+ const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
7585
+ if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
7586
+ const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
7587
+ title: entry.entry.title,
7588
+ content: entry.entry.content
7589
+ })));
7590
+ const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
7819
7591
  let scores;
7820
7592
  try {
7821
7593
  scores = await runFidelityJudge({
7822
7594
  model,
7823
7595
  sourceEntries: sourceEntriesMd,
7824
- renderedContent,
7596
+ renderedContent: rendered.content,
7825
7597
  rubric
7826
7598
  });
7827
7599
  } catch (err) {
7600
+ await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
7601
+ code: "judge_failed",
7602
+ message: err.message ?? String(err)
7603
+ } }).catch(() => {});
7828
7604
  throw new Error(`judge failed: ${err.message ?? String(err)}`);
7829
7605
  }
7830
- if (!params.nonce) return {
7831
- content: [{
7832
- type: "text",
7833
- text: JSON.stringify({
7834
- mode: "local",
7835
- renderedPackId: params.renderedPackId,
7836
- scores
7837
- }, null, 2)
7838
- }],
7839
- details: {}
7606
+ const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
7607
+ const output = {
7608
+ scores: [
7609
+ {
7610
+ criterionId: "coverage",
7611
+ score: scores.coverage
7612
+ },
7613
+ {
7614
+ criterionId: "grounding",
7615
+ score: scores.grounding
7616
+ },
7617
+ {
7618
+ criterionId: "faithfulness",
7619
+ score: scores.faithfulness
7620
+ }
7621
+ ],
7622
+ composite: scores.composite,
7623
+ verdict: scores.reasoning,
7624
+ judgeModel: modelId
7840
7625
  };
7841
- const recipe = computePiJudgeRecipeCid({
7842
- judgePrompt: JUDGE_SYSTEM_PROMPT,
7843
- rubric,
7844
- promptAsset: JUDGE_PROMPT_ASSET_PATH,
7845
- rubricAsset: RUBRIC_ASSET_PATH
7846
- });
7847
- const providerName = model.provider ?? "pi";
7848
- const modelId = model.id ?? "unknown";
7849
- const submit = await agent.packs.submitVerification(params.renderedPackId, {
7850
- nonce: params.nonce,
7851
- coverage: scores.coverage,
7852
- grounding: scores.grounding,
7853
- faithfulness: scores.faithfulness,
7854
- transcript: scores.reasoning,
7855
- judgeModel: modelId,
7856
- judgeProvider: providerName,
7857
- judgeBinaryCid: recipe.cid
7626
+ const outputCid = await computeJsonCid(output);
7627
+ const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
7628
+ output,
7629
+ outputCid,
7630
+ usage: {
7631
+ inputTokens: 0,
7632
+ outputTokens: 0
7633
+ }
7858
7634
  });
7635
+ await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
7859
7636
  return {
7860
7637
  content: [{
7861
7638
  type: "text",
7862
7639
  text: JSON.stringify({
7863
- mode: "proctored",
7864
- renderedPackId: params.renderedPackId,
7640
+ renderedPackId: input.renderedPackId,
7641
+ taskId: params.taskId,
7865
7642
  scores,
7866
- submission: submit,
7867
- judgeRecipeCid: recipe.cid,
7868
- judgeRecipeManifest: recipe.manifest
7643
+ task: completed
7869
7644
  }, null, 2)
7870
7645
  }],
7871
7646
  details: {}
@@ -8081,7 +7856,7 @@ function createMoltNetTools(config) {
8081
7856
  renderPack,
8082
7857
  listRenderedPacks,
8083
7858
  getRenderedPack,
8084
- verifyRenderedPack,
7859
+ createJudgePackTask,
8085
7860
  judgeRenderedPack,
8086
7861
  diaryTags,
8087
7862
  listEntries,
@@ -8697,6 +8472,272 @@ function ensureRelativeWorktreePaths(gitconfig) {
8697
8472
  return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
8698
8473
  }
8699
8474
  //#endregion
8475
+ //#region src/moltnet/judge-recipe-cid.ts
8476
+ var require = createRequire(import.meta.url);
8477
+ var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
8478
+ var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
8479
+ var SDK_PACKAGE_NAME = "@themoltnet/sdk";
8480
+ var CID_VERSION = 1;
8481
+ var RAW_CODEC = 85;
8482
+ var SHA2_256_CODE = 18;
8483
+ var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
8484
+ function findSelfPackageDir() {
8485
+ const start = path.dirname(fileURLToPath(import.meta.url));
8486
+ let dir = start;
8487
+ while (true) {
8488
+ const candidate = path.join(dir, "package.json");
8489
+ if (existsSync(candidate)) {
8490
+ if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
8491
+ }
8492
+ const parent = path.dirname(dir);
8493
+ if (parent === dir) return start;
8494
+ dir = parent;
8495
+ }
8496
+ }
8497
+ var PACKAGE_DIR = findSelfPackageDir();
8498
+ function sha256Hex(value) {
8499
+ return createHash("sha256").update(value, "utf8").digest("hex");
8500
+ }
8501
+ function encodeVarint(value) {
8502
+ const bytes = [];
8503
+ let current = value >>> 0;
8504
+ while (current >= 128) {
8505
+ bytes.push(current & 127 | 128);
8506
+ current >>>= 7;
8507
+ }
8508
+ bytes.push(current);
8509
+ return bytes;
8510
+ }
8511
+ function base32Lower(bytes) {
8512
+ let bits = 0;
8513
+ let value = 0;
8514
+ let output = "";
8515
+ for (const byte of bytes) {
8516
+ value = value << 8 | byte;
8517
+ bits += 8;
8518
+ while (bits >= 5) {
8519
+ output += BASE32_ALPHABET[value >>> bits - 5 & 31];
8520
+ bits -= 5;
8521
+ }
8522
+ }
8523
+ if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
8524
+ return `b${output}`;
8525
+ }
8526
+ function stableStringify(value) {
8527
+ if (value === null || typeof value !== "object") return JSON.stringify(value);
8528
+ if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
8529
+ return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
8530
+ }
8531
+ function readPackageVersion(pkgPath, expectedName) {
8532
+ if (!existsSync(pkgPath)) return null;
8533
+ const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
8534
+ if (expectedName && parsed.name !== expectedName) return null;
8535
+ return typeof parsed.version === "string" ? parsed.version : null;
8536
+ }
8537
+ function resolveInstalledPackageVersion(packageName) {
8538
+ const candidates = [];
8539
+ try {
8540
+ candidates.push(path.dirname(require.resolve(packageName)));
8541
+ } catch {}
8542
+ let dir = PACKAGE_DIR;
8543
+ while (true) {
8544
+ candidates.push(path.join(dir, "node_modules", packageName));
8545
+ const parent = path.dirname(dir);
8546
+ if (parent === dir) break;
8547
+ dir = parent;
8548
+ }
8549
+ for (const start of candidates) {
8550
+ let current = start;
8551
+ while (true) {
8552
+ const version = readPackageVersion(path.join(current, "package.json"), packageName);
8553
+ if (version) return version;
8554
+ const parent = path.dirname(current);
8555
+ if (parent === current) break;
8556
+ current = parent;
8557
+ }
8558
+ }
8559
+ return null;
8560
+ }
8561
+ function resolvePiJudgeRecipeVersions() {
8562
+ return {
8563
+ pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
8564
+ piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
8565
+ sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
8566
+ };
8567
+ }
8568
+ function buildPiJudgeRecipeManifest(inputs) {
8569
+ return {
8570
+ kind: "pi-judge-recipe/v1",
8571
+ versions: {
8572
+ ...resolvePiJudgeRecipeVersions(),
8573
+ ...inputs.overrides
8574
+ },
8575
+ assets: {
8576
+ promptAsset: inputs.promptAsset ?? null,
8577
+ rubricAsset: inputs.rubricAsset ?? null,
8578
+ skillSourcePath: inputs.skillSourcePath ?? null
8579
+ },
8580
+ hashes: {
8581
+ judgePromptSha256: sha256Hex(inputs.judgePrompt),
8582
+ rubricSha256: sha256Hex(inputs.rubric),
8583
+ skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
8584
+ implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
8585
+ }
8586
+ };
8587
+ }
8588
+ function computePiJudgeRecipeCid(inputs) {
8589
+ const manifest = buildPiJudgeRecipeManifest(inputs);
8590
+ const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
8591
+ const digestBytes = createHash("sha256").update(manifestBytes).digest();
8592
+ return {
8593
+ cid: base32Lower(Uint8Array.from([
8594
+ ...encodeVarint(CID_VERSION),
8595
+ ...encodeVarint(RAW_CODEC),
8596
+ ...encodeVarint(SHA2_256_CODE),
8597
+ ...encodeVarint(digestBytes.length),
8598
+ ...digestBytes
8599
+ ])),
8600
+ manifest
8601
+ };
8602
+ }
8603
+ //#endregion
8604
+ //#region src/otel/index.ts
8605
+ var TRACER_NAME = "@themoltnet/pi-extension/otel";
8606
+ function stripReservedAttrs(attrs) {
8607
+ const out = {};
8608
+ for (const [k, v] of Object.entries(attrs)) {
8609
+ if (k.startsWith("gen_ai.")) continue;
8610
+ out[k] = v;
8611
+ }
8612
+ return out;
8613
+ }
8614
+ function createPiOtelExtension(options = {}) {
8615
+ return function piOtelExtension(pi) {
8616
+ const tracer = trace.getTracer(TRACER_NAME);
8617
+ const extraAttrs = stripReservedAttrs(options.spanAttributes ?? {});
8618
+ let sessionSpan;
8619
+ let sessionCtx = context.active();
8620
+ let turnSpan;
8621
+ let turnCtx = context.active();
8622
+ let currentModel;
8623
+ const toolSpans = /* @__PURE__ */ new Map();
8624
+ function drainToolSpans(reason) {
8625
+ for (const [, entry] of toolSpans) {
8626
+ entry.span.setStatus({
8627
+ code: SpanStatusCode.ERROR,
8628
+ message: reason
8629
+ });
8630
+ entry.span.end();
8631
+ }
8632
+ toolSpans.clear();
8633
+ }
8634
+ function endTurnSpan() {
8635
+ if (!turnSpan) return;
8636
+ drainToolSpans("tool span not closed before turn end");
8637
+ turnSpan.end();
8638
+ turnSpan = void 0;
8639
+ turnCtx = sessionCtx;
8640
+ }
8641
+ function endSessionSpan() {
8642
+ drainToolSpans("tool span not closed before session shutdown");
8643
+ endTurnSpan();
8644
+ if (sessionSpan) {
8645
+ sessionSpan.setStatus({ code: SpanStatusCode.OK });
8646
+ sessionSpan.end();
8647
+ sessionSpan = void 0;
8648
+ sessionCtx = context.active();
8649
+ }
8650
+ currentModel = void 0;
8651
+ }
8652
+ pi.on("session_start", (event, ctx) => {
8653
+ endSessionSpan();
8654
+ const agentName = options.agentName ?? "pi";
8655
+ sessionSpan = tracer.startSpan(`invoke_agent ${agentName}`, { attributes: {
8656
+ ...extraAttrs,
8657
+ "gen_ai.operation.name": "invoke_agent",
8658
+ "gen_ai.agent.name": agentName,
8659
+ "session.reason": event.reason,
8660
+ "session.cwd": ctx.cwd
8661
+ } }, context.active());
8662
+ sessionCtx = trace.setSpan(context.active(), sessionSpan);
8663
+ turnCtx = sessionCtx;
8664
+ });
8665
+ pi.on("session_shutdown", () => {
8666
+ endSessionSpan();
8667
+ });
8668
+ pi.on("model_select", (event) => {
8669
+ currentModel = {
8670
+ provider: event.model.provider,
8671
+ id: event.model.id
8672
+ };
8673
+ if (sessionSpan) {
8674
+ sessionSpan.setAttribute("gen_ai.request.model", event.model.id);
8675
+ sessionSpan.setAttribute("gen_ai.provider.name", event.model.provider);
8676
+ }
8677
+ });
8678
+ pi.on("turn_start", (event) => {
8679
+ if (!sessionSpan) return;
8680
+ const modelLabel = currentModel?.id ?? "unknown";
8681
+ turnSpan = tracer.startSpan(`chat ${modelLabel}`, { attributes: {
8682
+ ...extraAttrs,
8683
+ "gen_ai.operation.name": "chat",
8684
+ "gen_ai.request.model": currentModel?.id ?? "unknown",
8685
+ "gen_ai.provider.name": currentModel?.provider ?? "unknown",
8686
+ "turn.index": event.turnIndex
8687
+ } }, sessionCtx);
8688
+ turnCtx = trace.setSpan(sessionCtx, turnSpan);
8689
+ });
8690
+ pi.on("turn_end", (event) => {
8691
+ if (!turnSpan) return;
8692
+ const usage = extractUsage(event.message);
8693
+ if (usage) {
8694
+ turnSpan.setAttribute("gen_ai.usage.input_tokens", usage.input);
8695
+ turnSpan.setAttribute("gen_ai.usage.output_tokens", usage.output);
8696
+ }
8697
+ turnSpan.setAttribute("turn.tool_results", event.toolResults?.length ?? 0);
8698
+ turnSpan.setStatus({ code: SpanStatusCode.OK });
8699
+ endTurnSpan();
8700
+ });
8701
+ pi.on("tool_execution_start", (event) => {
8702
+ const parentCtx = turnSpan ? turnCtx : sessionCtx;
8703
+ const span = tracer.startSpan(`execute_tool ${event.toolName}`, { attributes: {
8704
+ ...extraAttrs,
8705
+ "gen_ai.operation.name": "execute_tool",
8706
+ "gen_ai.tool.name": event.toolName,
8707
+ "gen_ai.tool.call.id": event.toolCallId
8708
+ } }, parentCtx);
8709
+ toolSpans.set(event.toolCallId, {
8710
+ span,
8711
+ startedAt: Date.now()
8712
+ });
8713
+ });
8714
+ pi.on("tool_execution_end", (event) => {
8715
+ const entry = toolSpans.get(event.toolCallId);
8716
+ if (!entry) return;
8717
+ const durationMs = Date.now() - entry.startedAt;
8718
+ entry.span.setAttribute("tool.duration_ms", durationMs);
8719
+ if (event.isError) {
8720
+ entry.span.setAttribute("error.type", "tool_execution_error");
8721
+ entry.span.setStatus({
8722
+ code: SpanStatusCode.ERROR,
8723
+ message: "tool execution failed"
8724
+ });
8725
+ } else entry.span.setStatus({ code: SpanStatusCode.OK });
8726
+ entry.span.end();
8727
+ toolSpans.delete(event.toolCallId);
8728
+ });
8729
+ };
8730
+ }
8731
+ function extractUsage(message) {
8732
+ if (!message || typeof message !== "object" || !("usage" in message) || !("role" in message)) return null;
8733
+ const msg = message;
8734
+ if (msg.role !== "assistant" || !msg.usage) return null;
8735
+ return {
8736
+ input: msg.usage.input ?? 0,
8737
+ output: msg.usage.output ?? 0
8738
+ };
8739
+ }
8740
+ //#endregion
8700
8741
  //#region ../tasks/src/formats.ts
8701
8742
  /**
8702
8743
  * Register TypeBox string formats used across Task / TaskOutput / task-type
@@ -10001,6 +10042,7 @@ async function executePiTask(claimedTask, reporter, opts) {
10001
10042
  sandboxConfig: opts.sandboxConfig
10002
10043
  });
10003
10044
  const diaryId = task.diaryId ?? "";
10045
+ const taskTeamId = task.teamId ?? "";
10004
10046
  let reporterOpen = false;
10005
10047
  let session = null;
10006
10048
  const finalUsage = emptyUsage(opts.provider, opts.model);
@@ -10063,6 +10105,7 @@ async function executePiTask(claimedTask, reporter, opts) {
10063
10105
  const moltnetTools = createMoltNetTools({
10064
10106
  getAgent: () => moltnetAgent,
10065
10107
  getDiaryId: () => diaryId,
10108
+ getTeamId: () => taskTeamId,
10066
10109
  getSessionErrors: () => [],
10067
10110
  clearSessionErrors: () => {},
10068
10111
  getHostCwd: () => mountPath,
@@ -10072,7 +10115,15 @@ async function executePiTask(claimedTask, reporter, opts) {
10072
10115
  const modelHandle = getModel(opts.provider, opts.model);
10073
10116
  const resourceLoader = new DefaultResourceLoader({
10074
10117
  cwd: mountPath,
10075
- agentDir: piAuthDir
10118
+ agentDir: piAuthDir,
10119
+ extensionFactories: [createPiOtelExtension({
10120
+ agentName: opts.agentName,
10121
+ spanAttributes: {
10122
+ "moltnet.task.id": task.id,
10123
+ "moltnet.task.attempt": attemptN,
10124
+ "moltnet.task.type": task.taskType
10125
+ }
10126
+ })]
10076
10127
  });
10077
10128
  await resourceLoader.reload();
10078
10129
  session = (await createAgentSession({
@@ -10189,10 +10240,16 @@ async function executePiTask(claimedTask, reporter, opts) {
10189
10240
  if (reporterOpen) {
10190
10241
  try {
10191
10242
  await reporter.finalize(finalUsage);
10192
- } catch {}
10243
+ } catch (err) {
10244
+ const detail = err instanceof Error ? err.message : String(err);
10245
+ console.error(`executePiTask: reporter.finalize() failed for task ${task.id} attempt ${attemptN}: ${detail}`);
10246
+ }
10193
10247
  try {
10194
10248
  await reporter.close();
10195
- } catch {}
10249
+ } catch (err) {
10250
+ const detail = err instanceof Error ? err.message : String(err);
10251
+ console.error(`executePiTask: reporter.close() failed for task ${task.id} attempt ${attemptN}: ${detail}`);
10252
+ }
10196
10253
  }
10197
10254
  await managed.vm.close();
10198
10255
  }
@@ -10281,6 +10338,7 @@ function moltnetExtension(pi) {
10281
10338
  let worktreePath = null;
10282
10339
  let moltnetAgent = null;
10283
10340
  let diaryId = null;
10341
+ let teamId = null;
10284
10342
  let hostExecBaseEnv = HOST_EXEC_DEFAULT_BASE_ENV;
10285
10343
  async function ensureVm(ctx) {
10286
10344
  if (vm) return vm;
@@ -10335,6 +10393,7 @@ function moltnetExtension(pi) {
10335
10393
  activateAgentEnv(managed.credentials.agentEnv, mainRepo);
10336
10394
  moltnetAgent = await connect({ configDir: managed.agentDir });
10337
10395
  diaryId = managed.credentials.agentEnv.MOLTNET_DIARY_ID ?? null;
10396
+ teamId = managed.credentials.agentEnv.MOLTNET_TEAM_ID ?? null;
10338
10397
  hostExecBaseEnv = new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]);
10339
10398
  vm = managed.vm;
10340
10399
  const label = worktreePath ? `${mountPath} → ${GUEST_WORKSPACE}` : `${localCwd} → ${GUEST_WORKSPACE}`;
@@ -10356,6 +10415,7 @@ function moltnetExtension(pi) {
10356
10415
  vm = null;
10357
10416
  vmStarting = null;
10358
10417
  moltnetAgent = null;
10418
+ teamId = null;
10359
10419
  }
10360
10420
  });
10361
10421
  pi.on("before_agent_start", async (event, ctx) => {
@@ -10395,6 +10455,7 @@ function moltnetExtension(pi) {
10395
10455
  const moltnetTools = createMoltNetTools({
10396
10456
  getAgent: () => moltnetAgent,
10397
10457
  getDiaryId: () => diaryId,
10458
+ getTeamId: () => teamId,
10398
10459
  getSessionErrors: () => sessionErrors,
10399
10460
  clearSessionErrors: () => {
10400
10461
  sessionErrors.length = 0;
@@ -10506,4 +10567,4 @@ function moltnetExtension(pi) {
10506
10567
  registerMoltnetReflectCommand(pi, state);
10507
10568
  }
10508
10569
  //#endregion
10509
- export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
10570
+ export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@themoltnet/pi-extension",
3
- "version": "0.5.0",
3
+ "version": "0.7.0",
4
4
  "type": "module",
5
5
  "description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
6
6
  "license": "MIT",
@@ -29,9 +29,10 @@
29
29
  },
30
30
  "dependencies": {
31
31
  "@earendil-works/gondolin": "^0.7.0",
32
+ "@opentelemetry/api": "^1.9.0",
32
33
  "@sinclair/typebox": "^0.34.0",
33
- "@themoltnet/sdk": "0.94.0",
34
- "@themoltnet/agent-runtime": "0.2.0"
34
+ "@themoltnet/agent-runtime": "0.3.0",
35
+ "@themoltnet/sdk": "0.95.0"
35
36
  },
36
37
  "peerDependencies": {
37
38
  "@mariozechner/pi-coding-agent": ">=0.67.0",
@@ -48,6 +49,7 @@
48
49
  "devDependencies": {
49
50
  "@mariozechner/pi-ai": "^0.67.68",
50
51
  "@mariozechner/pi-coding-agent": "^0.67.68",
52
+ "@opentelemetry/sdk-trace-base": "^2.5.1",
51
53
  "@types/node": "^20.11.0",
52
54
  "typescript": "^5.3.3",
53
55
  "vite": "^8.0.0",