@themoltnet/pi-extension 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +16 -0
- package/dist/index.js +381 -320
- package/package.json +5 -3
package/dist/index.d.ts
CHANGED
|
@@ -33,6 +33,8 @@ declare interface ClaimedTask {
|
|
|
33
33
|
task: Task;
|
|
34
34
|
/** Attempt number assigned by the source/queue. */
|
|
35
35
|
attemptN: number;
|
|
36
|
+
/** W3C trace headers from the claim response for OTel context propagation. */
|
|
37
|
+
traceHeaders: Record<string, string>;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
export declare function computePiJudgeRecipeCid(inputs: PiJudgeRecipeInputs): PiJudgeRecipeCid;
|
|
@@ -50,6 +52,8 @@ export declare function createGondolinWriteOps(vm: VM, localCwd: string): WriteO
|
|
|
50
52
|
*/
|
|
51
53
|
export declare function createMoltNetTools(config: MoltNetToolsConfig): ToolDefinition<any, any>[];
|
|
52
54
|
|
|
55
|
+
export declare function createPiOtelExtension(options?: PiOtelOptions): (pi: ExtensionAPI) => void;
|
|
56
|
+
|
|
53
57
|
/**
|
|
54
58
|
* Factory that builds a pi-specific `executeTask` function suitable for
|
|
55
59
|
* injection into `AgentRuntime`. The returned function caches the resolved
|
|
@@ -132,6 +136,7 @@ export default moltnetExtension;
|
|
|
132
136
|
declare interface MoltNetToolsConfig {
|
|
133
137
|
getAgent(): MoltNetAgent | null;
|
|
134
138
|
getDiaryId(): string | null;
|
|
139
|
+
getTeamId(): string | null;
|
|
135
140
|
getSessionErrors(): readonly TrackedError[];
|
|
136
141
|
clearSessionErrors(): void;
|
|
137
142
|
/** Host working directory for host-exec commands (worktree path or cwd). */
|
|
@@ -183,6 +188,17 @@ declare interface PiJudgeRecipeVersions {
|
|
|
183
188
|
sdk: string | null;
|
|
184
189
|
}
|
|
185
190
|
|
|
191
|
+
export declare interface PiOtelOptions {
|
|
192
|
+
/** Agent name for `gen_ai.agent.name` on the root span. */
|
|
193
|
+
agentName?: string;
|
|
194
|
+
/**
|
|
195
|
+
* Extra attributes merged onto every span. Use MoltNet-specific keys
|
|
196
|
+
* like `moltnet.task.id` — any `gen_ai.*` keys here are filtered out
|
|
197
|
+
* since the extension is authoritative for those.
|
|
198
|
+
*/
|
|
199
|
+
spanAttributes?: Record<string, string | number | boolean>;
|
|
200
|
+
}
|
|
201
|
+
|
|
186
202
|
export declare function resolvePiJudgeRecipeVersions(): PiJudgeRecipeVersions;
|
|
187
203
|
|
|
188
204
|
/**
|
package/dist/index.js
CHANGED
|
@@ -3,14 +3,15 @@ import { execFileSync } from "node:child_process";
|
|
|
3
3
|
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
|
|
4
4
|
import path, { join } from "node:path";
|
|
5
5
|
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@mariozechner/pi-coding-agent";
|
|
6
|
-
import { createHash
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
7
7
|
import crypto, { createHash as createHash$1 } from "crypto";
|
|
8
8
|
import { readFile } from "node:fs/promises";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { Type, complete, getModel } from "@mariozechner/pi-ai";
|
|
11
|
-
import { fileURLToPath } from "node:url";
|
|
12
11
|
import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
13
12
|
import { parseEnv } from "node:util";
|
|
13
|
+
import { fileURLToPath } from "node:url";
|
|
14
|
+
import { SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
14
15
|
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
15
16
|
import { Value } from "@sinclair/typebox/value";
|
|
16
17
|
//#region ../api-client/src/generated/core/bodySerializer.gen.ts
|
|
@@ -1415,80 +1416,6 @@ var updateRenderedPack = (options) => (options.client ?? client).patch({
|
|
|
1415
1416
|
}
|
|
1416
1417
|
});
|
|
1417
1418
|
/**
|
|
1418
|
-
* Trigger fidelity verification for an agent-rendered pack.
|
|
1419
|
-
*/
|
|
1420
|
-
var verifyRenderedPack = (options) => (options.client ?? client).post({
|
|
1421
|
-
security: [
|
|
1422
|
-
{
|
|
1423
|
-
scheme: "bearer",
|
|
1424
|
-
type: "http"
|
|
1425
|
-
},
|
|
1426
|
-
{
|
|
1427
|
-
name: "X-Moltnet-Session-Token",
|
|
1428
|
-
type: "apiKey"
|
|
1429
|
-
},
|
|
1430
|
-
{
|
|
1431
|
-
in: "cookie",
|
|
1432
|
-
name: "ory_kratos_session",
|
|
1433
|
-
type: "apiKey"
|
|
1434
|
-
}
|
|
1435
|
-
],
|
|
1436
|
-
url: "/rendered-packs/{id}/verify",
|
|
1437
|
-
...options,
|
|
1438
|
-
headers: {
|
|
1439
|
-
"Content-Type": "application/json",
|
|
1440
|
-
...options.headers
|
|
1441
|
-
}
|
|
1442
|
-
});
|
|
1443
|
-
/**
|
|
1444
|
-
* Judge claims verification payload (source entries, rendered content, and rubric).
|
|
1445
|
-
*/
|
|
1446
|
-
var claimVerification = (options) => (options.client ?? client).post({
|
|
1447
|
-
security: [
|
|
1448
|
-
{
|
|
1449
|
-
scheme: "bearer",
|
|
1450
|
-
type: "http"
|
|
1451
|
-
},
|
|
1452
|
-
{
|
|
1453
|
-
name: "X-Moltnet-Session-Token",
|
|
1454
|
-
type: "apiKey"
|
|
1455
|
-
},
|
|
1456
|
-
{
|
|
1457
|
-
in: "cookie",
|
|
1458
|
-
name: "ory_kratos_session",
|
|
1459
|
-
type: "apiKey"
|
|
1460
|
-
}
|
|
1461
|
-
],
|
|
1462
|
-
url: "/rendered-packs/{id}/verify/claim",
|
|
1463
|
-
...options
|
|
1464
|
-
});
|
|
1465
|
-
/**
|
|
1466
|
-
* Judge submits fidelity scores and transcript.
|
|
1467
|
-
*/
|
|
1468
|
-
var submitVerification = (options) => (options.client ?? client).post({
|
|
1469
|
-
security: [
|
|
1470
|
-
{
|
|
1471
|
-
scheme: "bearer",
|
|
1472
|
-
type: "http"
|
|
1473
|
-
},
|
|
1474
|
-
{
|
|
1475
|
-
name: "X-Moltnet-Session-Token",
|
|
1476
|
-
type: "apiKey"
|
|
1477
|
-
},
|
|
1478
|
-
{
|
|
1479
|
-
in: "cookie",
|
|
1480
|
-
name: "ory_kratos_session",
|
|
1481
|
-
type: "apiKey"
|
|
1482
|
-
}
|
|
1483
|
-
],
|
|
1484
|
-
url: "/rendered-packs/{id}/verify/submit",
|
|
1485
|
-
...options,
|
|
1486
|
-
headers: {
|
|
1487
|
-
"Content-Type": "application/json",
|
|
1488
|
-
...options.headers
|
|
1489
|
-
}
|
|
1490
|
-
});
|
|
1491
|
-
/**
|
|
1492
1419
|
* Get an agent's public profile by key fingerprint (A1B2-C3D4-E5F6-G7H8).
|
|
1493
1420
|
*/
|
|
1494
1421
|
var getAgentProfile = (options) => (options.client ?? client).get({
|
|
@@ -6445,29 +6372,6 @@ function createPacksNamespace(context) {
|
|
|
6445
6372
|
body
|
|
6446
6373
|
}));
|
|
6447
6374
|
},
|
|
6448
|
-
async verifyRendered(id, body) {
|
|
6449
|
-
return unwrapResult(await verifyRenderedPack({
|
|
6450
|
-
client,
|
|
6451
|
-
auth,
|
|
6452
|
-
path: { id },
|
|
6453
|
-
body
|
|
6454
|
-
}));
|
|
6455
|
-
},
|
|
6456
|
-
async claimVerification(id) {
|
|
6457
|
-
return unwrapResult(await claimVerification({
|
|
6458
|
-
client,
|
|
6459
|
-
auth,
|
|
6460
|
-
path: { id }
|
|
6461
|
-
}));
|
|
6462
|
-
},
|
|
6463
|
-
async submitVerification(id, body) {
|
|
6464
|
-
return unwrapResult(await submitVerification({
|
|
6465
|
-
client,
|
|
6466
|
-
auth,
|
|
6467
|
-
path: { id },
|
|
6468
|
-
body
|
|
6469
|
-
}));
|
|
6470
|
-
},
|
|
6471
6375
|
async create(diaryId, body) {
|
|
6472
6376
|
return unwrapResult(await createDiaryCustomPack({
|
|
6473
6377
|
client,
|
|
@@ -6618,12 +6522,24 @@ function createTasksNamespace(context) {
|
|
|
6618
6522
|
}));
|
|
6619
6523
|
},
|
|
6620
6524
|
async claim(id, body) {
|
|
6621
|
-
|
|
6525
|
+
const result = await claimTask({
|
|
6622
6526
|
client,
|
|
6623
6527
|
auth,
|
|
6624
6528
|
path: { id },
|
|
6625
6529
|
body
|
|
6626
|
-
})
|
|
6530
|
+
});
|
|
6531
|
+
const data = unwrapResult(result);
|
|
6532
|
+
const traceHeaders = {};
|
|
6533
|
+
const traceparent = result.response.headers.get("traceparent");
|
|
6534
|
+
if (traceparent) {
|
|
6535
|
+
traceHeaders["traceparent"] = traceparent;
|
|
6536
|
+
const tracestate = result.response.headers.get("tracestate");
|
|
6537
|
+
if (tracestate) traceHeaders["tracestate"] = tracestate;
|
|
6538
|
+
}
|
|
6539
|
+
return {
|
|
6540
|
+
...data,
|
|
6541
|
+
traceHeaders
|
|
6542
|
+
};
|
|
6627
6543
|
},
|
|
6628
6544
|
async heartbeat(id, n, body) {
|
|
6629
6545
|
return unwrapResult(await taskHeartbeat({
|
|
@@ -7155,21 +7071,6 @@ var registerSandboxCommand = (pi, state) => {
|
|
|
7155
7071
|
};
|
|
7156
7072
|
//#endregion
|
|
7157
7073
|
//#region src/moltnet/judge/assets.ts
|
|
7158
|
-
/**
|
|
7159
|
-
* Judge assets — single source of truth.
|
|
7160
|
-
*
|
|
7161
|
-
* `DEFAULT_RUBRIC` and `JUDGE_SYSTEM_PROMPT` below ARE the assets. There are
|
|
7162
|
-
* no companion `.md` files; tsc does not copy non-TS files into `dist/`, and
|
|
7163
|
-
* keeping a parallel markdown copy invited drift between source-of-truth
|
|
7164
|
-
* versions, which is what previously happened.
|
|
7165
|
-
*
|
|
7166
|
-
* The asset path constants are opaque identifiers used in the judge-recipe
|
|
7167
|
-
* CID manifest so verifiers can trace which asset set a given Pi extension
|
|
7168
|
-
* version emitted. They are NOT filesystem paths and are never read.
|
|
7169
|
-
* Bump the version suffix when you change the corresponding constant.
|
|
7170
|
-
*/
|
|
7171
|
-
var RUBRIC_ASSET_PATH = "pi-extension/judge/rubric@v1";
|
|
7172
|
-
var JUDGE_PROMPT_ASSET_PATH = "pi-extension/judge/system-prompt@v1";
|
|
7173
7074
|
/** Default fidelity rubric — kept verbatim from the Go judge. */
|
|
7174
7075
|
var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
|
|
7175
7076
|
|
|
@@ -7322,135 +7223,6 @@ function buildSourceEntriesMarkdown(entries) {
|
|
|
7322
7223
|
return parts.join("\n");
|
|
7323
7224
|
}
|
|
7324
7225
|
//#endregion
|
|
7325
|
-
//#region src/moltnet/judge-recipe-cid.ts
|
|
7326
|
-
var require = createRequire(import.meta.url);
|
|
7327
|
-
var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
|
|
7328
|
-
var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
|
|
7329
|
-
var SDK_PACKAGE_NAME = "@themoltnet/sdk";
|
|
7330
|
-
var CID_VERSION = 1;
|
|
7331
|
-
var RAW_CODEC = 85;
|
|
7332
|
-
var SHA2_256_CODE = 18;
|
|
7333
|
-
var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
|
|
7334
|
-
function findSelfPackageDir() {
|
|
7335
|
-
const start = path.dirname(fileURLToPath(import.meta.url));
|
|
7336
|
-
let dir = start;
|
|
7337
|
-
while (true) {
|
|
7338
|
-
const candidate = path.join(dir, "package.json");
|
|
7339
|
-
if (existsSync(candidate)) {
|
|
7340
|
-
if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
|
|
7341
|
-
}
|
|
7342
|
-
const parent = path.dirname(dir);
|
|
7343
|
-
if (parent === dir) return start;
|
|
7344
|
-
dir = parent;
|
|
7345
|
-
}
|
|
7346
|
-
}
|
|
7347
|
-
var PACKAGE_DIR = findSelfPackageDir();
|
|
7348
|
-
function sha256Hex(value) {
|
|
7349
|
-
return createHash("sha256").update(value, "utf8").digest("hex");
|
|
7350
|
-
}
|
|
7351
|
-
function encodeVarint(value) {
|
|
7352
|
-
const bytes = [];
|
|
7353
|
-
let current = value >>> 0;
|
|
7354
|
-
while (current >= 128) {
|
|
7355
|
-
bytes.push(current & 127 | 128);
|
|
7356
|
-
current >>>= 7;
|
|
7357
|
-
}
|
|
7358
|
-
bytes.push(current);
|
|
7359
|
-
return bytes;
|
|
7360
|
-
}
|
|
7361
|
-
function base32Lower(bytes) {
|
|
7362
|
-
let bits = 0;
|
|
7363
|
-
let value = 0;
|
|
7364
|
-
let output = "";
|
|
7365
|
-
for (const byte of bytes) {
|
|
7366
|
-
value = value << 8 | byte;
|
|
7367
|
-
bits += 8;
|
|
7368
|
-
while (bits >= 5) {
|
|
7369
|
-
output += BASE32_ALPHABET[value >>> bits - 5 & 31];
|
|
7370
|
-
bits -= 5;
|
|
7371
|
-
}
|
|
7372
|
-
}
|
|
7373
|
-
if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
|
|
7374
|
-
return `b${output}`;
|
|
7375
|
-
}
|
|
7376
|
-
function stableStringify(value) {
|
|
7377
|
-
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
7378
|
-
if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
|
|
7379
|
-
return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
|
|
7380
|
-
}
|
|
7381
|
-
function readPackageVersion(pkgPath, expectedName) {
|
|
7382
|
-
if (!existsSync(pkgPath)) return null;
|
|
7383
|
-
const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
7384
|
-
if (expectedName && parsed.name !== expectedName) return null;
|
|
7385
|
-
return typeof parsed.version === "string" ? parsed.version : null;
|
|
7386
|
-
}
|
|
7387
|
-
function resolveInstalledPackageVersion(packageName) {
|
|
7388
|
-
const candidates = [];
|
|
7389
|
-
try {
|
|
7390
|
-
candidates.push(path.dirname(require.resolve(packageName)));
|
|
7391
|
-
} catch {}
|
|
7392
|
-
let dir = PACKAGE_DIR;
|
|
7393
|
-
while (true) {
|
|
7394
|
-
candidates.push(path.join(dir, "node_modules", packageName));
|
|
7395
|
-
const parent = path.dirname(dir);
|
|
7396
|
-
if (parent === dir) break;
|
|
7397
|
-
dir = parent;
|
|
7398
|
-
}
|
|
7399
|
-
for (const start of candidates) {
|
|
7400
|
-
let current = start;
|
|
7401
|
-
while (true) {
|
|
7402
|
-
const version = readPackageVersion(path.join(current, "package.json"), packageName);
|
|
7403
|
-
if (version) return version;
|
|
7404
|
-
const parent = path.dirname(current);
|
|
7405
|
-
if (parent === current) break;
|
|
7406
|
-
current = parent;
|
|
7407
|
-
}
|
|
7408
|
-
}
|
|
7409
|
-
return null;
|
|
7410
|
-
}
|
|
7411
|
-
function resolvePiJudgeRecipeVersions() {
|
|
7412
|
-
return {
|
|
7413
|
-
pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
|
|
7414
|
-
piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
|
|
7415
|
-
sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
|
|
7416
|
-
};
|
|
7417
|
-
}
|
|
7418
|
-
function buildPiJudgeRecipeManifest(inputs) {
|
|
7419
|
-
return {
|
|
7420
|
-
kind: "pi-judge-recipe/v1",
|
|
7421
|
-
versions: {
|
|
7422
|
-
...resolvePiJudgeRecipeVersions(),
|
|
7423
|
-
...inputs.overrides
|
|
7424
|
-
},
|
|
7425
|
-
assets: {
|
|
7426
|
-
promptAsset: inputs.promptAsset ?? null,
|
|
7427
|
-
rubricAsset: inputs.rubricAsset ?? null,
|
|
7428
|
-
skillSourcePath: inputs.skillSourcePath ?? null
|
|
7429
|
-
},
|
|
7430
|
-
hashes: {
|
|
7431
|
-
judgePromptSha256: sha256Hex(inputs.judgePrompt),
|
|
7432
|
-
rubricSha256: sha256Hex(inputs.rubric),
|
|
7433
|
-
skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
|
|
7434
|
-
implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
|
|
7435
|
-
}
|
|
7436
|
-
};
|
|
7437
|
-
}
|
|
7438
|
-
function computePiJudgeRecipeCid(inputs) {
|
|
7439
|
-
const manifest = buildPiJudgeRecipeManifest(inputs);
|
|
7440
|
-
const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
|
|
7441
|
-
const digestBytes = createHash("sha256").update(manifestBytes).digest();
|
|
7442
|
-
return {
|
|
7443
|
-
cid: base32Lower(Uint8Array.from([
|
|
7444
|
-
...encodeVarint(CID_VERSION),
|
|
7445
|
-
...encodeVarint(RAW_CODEC),
|
|
7446
|
-
...encodeVarint(SHA2_256_CODE),
|
|
7447
|
-
...encodeVarint(digestBytes.length),
|
|
7448
|
-
...digestBytes
|
|
7449
|
-
])),
|
|
7450
|
-
manifest
|
|
7451
|
-
};
|
|
7452
|
-
}
|
|
7453
|
-
//#endregion
|
|
7454
7226
|
//#region src/moltnet/render-phase6.ts
|
|
7455
7227
|
function slugToTitle(value) {
|
|
7456
7228
|
return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
|
|
@@ -7592,7 +7364,8 @@ function ensureConnected(config) {
|
|
|
7592
7364
|
if (!agent || !diaryId) throw new Error("MoltNet not connected");
|
|
7593
7365
|
return {
|
|
7594
7366
|
agent,
|
|
7595
|
-
diaryId
|
|
7367
|
+
diaryId,
|
|
7368
|
+
teamId: config.getTeamId() ?? ""
|
|
7596
7369
|
};
|
|
7597
7370
|
}
|
|
7598
7371
|
/**
|
|
@@ -7758,24 +7531,34 @@ function createMoltNetTools(config) {
|
|
|
7758
7531
|
};
|
|
7759
7532
|
}
|
|
7760
7533
|
});
|
|
7761
|
-
const
|
|
7762
|
-
name: "
|
|
7763
|
-
label: "
|
|
7764
|
-
description: "Create a
|
|
7534
|
+
const createJudgePackTask = defineTool({
|
|
7535
|
+
name: "moltnet_judge_pack_task_create",
|
|
7536
|
+
label: "Create Judge Pack Task",
|
|
7537
|
+
description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
|
|
7765
7538
|
parameters: Type.Object({
|
|
7766
|
-
renderedPackId: Type.String({ description: "Rendered pack ID" }),
|
|
7767
|
-
|
|
7539
|
+
renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
|
|
7540
|
+
sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
|
|
7541
|
+
rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
|
|
7542
|
+
diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
|
|
7768
7543
|
}),
|
|
7769
7544
|
async execute(_id, params) {
|
|
7770
|
-
const { agent } = ensureConnected(config);
|
|
7771
|
-
const
|
|
7772
|
-
|
|
7545
|
+
const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
|
|
7546
|
+
const task = await agent.tasks.create({
|
|
7547
|
+
taskType: "judge_pack",
|
|
7548
|
+
input: {
|
|
7549
|
+
renderedPackId: params.renderedPackId,
|
|
7550
|
+
sourcePackId: params.sourcePackId,
|
|
7551
|
+
rubric: params.rubric
|
|
7552
|
+
},
|
|
7553
|
+
diaryId: params.diaryId ?? connectedDiaryId,
|
|
7554
|
+
teamId: connectedTeamId
|
|
7555
|
+
});
|
|
7773
7556
|
return {
|
|
7774
7557
|
content: [{
|
|
7775
7558
|
type: "text",
|
|
7776
7559
|
text: JSON.stringify({
|
|
7777
|
-
|
|
7778
|
-
|
|
7560
|
+
taskId: task.id,
|
|
7561
|
+
task
|
|
7779
7562
|
}, null, 2)
|
|
7780
7563
|
}],
|
|
7781
7564
|
details: {}
|
|
@@ -7785,87 +7568,79 @@ function createMoltNetTools(config) {
|
|
|
7785
7568
|
const judgeRenderedPack = defineTool({
|
|
7786
7569
|
name: "moltnet_rendered_pack_judge",
|
|
7787
7570
|
label: "Judge MoltNet Rendered Pack",
|
|
7788
|
-
description: "
|
|
7571
|
+
description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
|
|
7789
7572
|
parameters: Type.Object({
|
|
7790
|
-
|
|
7791
|
-
|
|
7792
|
-
rubric: Type.Optional(Type.String({ description: "Custom rubric override (local mode only). Defaults to the built-in rubric when omitted." }))
|
|
7573
|
+
taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
|
|
7574
|
+
rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
|
|
7793
7575
|
}),
|
|
7794
7576
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
7795
7577
|
const { agent } = ensureConnected(config);
|
|
7796
7578
|
const model = ctx?.model;
|
|
7797
7579
|
if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
|
|
7798
|
-
|
|
7799
|
-
|
|
7800
|
-
|
|
7801
|
-
if (
|
|
7802
|
-
|
|
7803
|
-
|
|
7804
|
-
|
|
7805
|
-
|
|
7806
|
-
|
|
7807
|
-
}
|
|
7808
|
-
|
|
7809
|
-
if (!rendered.content?.trim()) throw new Error(`rendered pack ${params.renderedPackId} has empty content`);
|
|
7810
|
-
const sourcePack = await agent.packs.get(rendered.sourcePackId, { expand: "entries" });
|
|
7811
|
-
if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${rendered.sourcePackId} has no entries`);
|
|
7812
|
-
sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
|
|
7813
|
-
title: entry.entry.title,
|
|
7814
|
-
content: entry.entry.content
|
|
7815
|
-
})));
|
|
7816
|
-
renderedContent = rendered.content;
|
|
7817
|
-
rubric = params.rubric?.trim() ? params.rubric : DEFAULT_RUBRIC;
|
|
7818
|
-
}
|
|
7580
|
+
const claimed = await agent.tasks.claim(params.taskId);
|
|
7581
|
+
const input = claimed.task.input;
|
|
7582
|
+
const rendered = await agent.packs.getRendered(input.renderedPackId);
|
|
7583
|
+
if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
|
|
7584
|
+
const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
|
|
7585
|
+
if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
|
|
7586
|
+
const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
|
|
7587
|
+
title: entry.entry.title,
|
|
7588
|
+
content: entry.entry.content
|
|
7589
|
+
})));
|
|
7590
|
+
const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
|
|
7819
7591
|
let scores;
|
|
7820
7592
|
try {
|
|
7821
7593
|
scores = await runFidelityJudge({
|
|
7822
7594
|
model,
|
|
7823
7595
|
sourceEntries: sourceEntriesMd,
|
|
7824
|
-
renderedContent,
|
|
7596
|
+
renderedContent: rendered.content,
|
|
7825
7597
|
rubric
|
|
7826
7598
|
});
|
|
7827
7599
|
} catch (err) {
|
|
7600
|
+
await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
|
|
7601
|
+
code: "judge_failed",
|
|
7602
|
+
message: err.message ?? String(err)
|
|
7603
|
+
} }).catch(() => {});
|
|
7828
7604
|
throw new Error(`judge failed: ${err.message ?? String(err)}`);
|
|
7829
7605
|
}
|
|
7830
|
-
|
|
7831
|
-
|
|
7832
|
-
|
|
7833
|
-
|
|
7834
|
-
|
|
7835
|
-
|
|
7836
|
-
|
|
7837
|
-
|
|
7838
|
-
|
|
7839
|
-
|
|
7606
|
+
const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
|
|
7607
|
+
const output = {
|
|
7608
|
+
scores: [
|
|
7609
|
+
{
|
|
7610
|
+
criterionId: "coverage",
|
|
7611
|
+
score: scores.coverage
|
|
7612
|
+
},
|
|
7613
|
+
{
|
|
7614
|
+
criterionId: "grounding",
|
|
7615
|
+
score: scores.grounding
|
|
7616
|
+
},
|
|
7617
|
+
{
|
|
7618
|
+
criterionId: "faithfulness",
|
|
7619
|
+
score: scores.faithfulness
|
|
7620
|
+
}
|
|
7621
|
+
],
|
|
7622
|
+
composite: scores.composite,
|
|
7623
|
+
verdict: scores.reasoning,
|
|
7624
|
+
judgeModel: modelId
|
|
7840
7625
|
};
|
|
7841
|
-
const
|
|
7842
|
-
|
|
7843
|
-
|
|
7844
|
-
|
|
7845
|
-
|
|
7846
|
-
|
|
7847
|
-
|
|
7848
|
-
|
|
7849
|
-
const submit = await agent.packs.submitVerification(params.renderedPackId, {
|
|
7850
|
-
nonce: params.nonce,
|
|
7851
|
-
coverage: scores.coverage,
|
|
7852
|
-
grounding: scores.grounding,
|
|
7853
|
-
faithfulness: scores.faithfulness,
|
|
7854
|
-
transcript: scores.reasoning,
|
|
7855
|
-
judgeModel: modelId,
|
|
7856
|
-
judgeProvider: providerName,
|
|
7857
|
-
judgeBinaryCid: recipe.cid
|
|
7626
|
+
const outputCid = await computeJsonCid(output);
|
|
7627
|
+
const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
|
|
7628
|
+
output,
|
|
7629
|
+
outputCid,
|
|
7630
|
+
usage: {
|
|
7631
|
+
inputTokens: 0,
|
|
7632
|
+
outputTokens: 0
|
|
7633
|
+
}
|
|
7858
7634
|
});
|
|
7635
|
+
await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
|
|
7859
7636
|
return {
|
|
7860
7637
|
content: [{
|
|
7861
7638
|
type: "text",
|
|
7862
7639
|
text: JSON.stringify({
|
|
7863
|
-
|
|
7864
|
-
|
|
7640
|
+
renderedPackId: input.renderedPackId,
|
|
7641
|
+
taskId: params.taskId,
|
|
7865
7642
|
scores,
|
|
7866
|
-
|
|
7867
|
-
judgeRecipeCid: recipe.cid,
|
|
7868
|
-
judgeRecipeManifest: recipe.manifest
|
|
7643
|
+
task: completed
|
|
7869
7644
|
}, null, 2)
|
|
7870
7645
|
}],
|
|
7871
7646
|
details: {}
|
|
@@ -8081,7 +7856,7 @@ function createMoltNetTools(config) {
|
|
|
8081
7856
|
renderPack,
|
|
8082
7857
|
listRenderedPacks,
|
|
8083
7858
|
getRenderedPack,
|
|
8084
|
-
|
|
7859
|
+
createJudgePackTask,
|
|
8085
7860
|
judgeRenderedPack,
|
|
8086
7861
|
diaryTags,
|
|
8087
7862
|
listEntries,
|
|
@@ -8697,6 +8472,272 @@ function ensureRelativeWorktreePaths(gitconfig) {
|
|
|
8697
8472
|
return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
|
|
8698
8473
|
}
|
|
8699
8474
|
//#endregion
|
|
8475
|
+
//#region src/moltnet/judge-recipe-cid.ts
|
|
8476
|
+
var require = createRequire(import.meta.url);
|
|
8477
|
+
var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
|
|
8478
|
+
var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
|
|
8479
|
+
var SDK_PACKAGE_NAME = "@themoltnet/sdk";
|
|
8480
|
+
var CID_VERSION = 1;
|
|
8481
|
+
var RAW_CODEC = 85;
|
|
8482
|
+
var SHA2_256_CODE = 18;
|
|
8483
|
+
var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
|
|
8484
|
+
function findSelfPackageDir() {
|
|
8485
|
+
const start = path.dirname(fileURLToPath(import.meta.url));
|
|
8486
|
+
let dir = start;
|
|
8487
|
+
while (true) {
|
|
8488
|
+
const candidate = path.join(dir, "package.json");
|
|
8489
|
+
if (existsSync(candidate)) {
|
|
8490
|
+
if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
|
|
8491
|
+
}
|
|
8492
|
+
const parent = path.dirname(dir);
|
|
8493
|
+
if (parent === dir) return start;
|
|
8494
|
+
dir = parent;
|
|
8495
|
+
}
|
|
8496
|
+
}
|
|
8497
|
+
var PACKAGE_DIR = findSelfPackageDir();
|
|
8498
|
+
function sha256Hex(value) {
|
|
8499
|
+
return createHash("sha256").update(value, "utf8").digest("hex");
|
|
8500
|
+
}
|
|
8501
|
+
function encodeVarint(value) {
|
|
8502
|
+
const bytes = [];
|
|
8503
|
+
let current = value >>> 0;
|
|
8504
|
+
while (current >= 128) {
|
|
8505
|
+
bytes.push(current & 127 | 128);
|
|
8506
|
+
current >>>= 7;
|
|
8507
|
+
}
|
|
8508
|
+
bytes.push(current);
|
|
8509
|
+
return bytes;
|
|
8510
|
+
}
|
|
8511
|
+
function base32Lower(bytes) {
|
|
8512
|
+
let bits = 0;
|
|
8513
|
+
let value = 0;
|
|
8514
|
+
let output = "";
|
|
8515
|
+
for (const byte of bytes) {
|
|
8516
|
+
value = value << 8 | byte;
|
|
8517
|
+
bits += 8;
|
|
8518
|
+
while (bits >= 5) {
|
|
8519
|
+
output += BASE32_ALPHABET[value >>> bits - 5 & 31];
|
|
8520
|
+
bits -= 5;
|
|
8521
|
+
}
|
|
8522
|
+
}
|
|
8523
|
+
if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
|
|
8524
|
+
return `b${output}`;
|
|
8525
|
+
}
|
|
8526
|
+
function stableStringify(value) {
|
|
8527
|
+
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
8528
|
+
if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
|
|
8529
|
+
return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
|
|
8530
|
+
}
|
|
8531
|
+
function readPackageVersion(pkgPath, expectedName) {
|
|
8532
|
+
if (!existsSync(pkgPath)) return null;
|
|
8533
|
+
const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
8534
|
+
if (expectedName && parsed.name !== expectedName) return null;
|
|
8535
|
+
return typeof parsed.version === "string" ? parsed.version : null;
|
|
8536
|
+
}
|
|
8537
|
+
function resolveInstalledPackageVersion(packageName) {
|
|
8538
|
+
const candidates = [];
|
|
8539
|
+
try {
|
|
8540
|
+
candidates.push(path.dirname(require.resolve(packageName)));
|
|
8541
|
+
} catch {}
|
|
8542
|
+
let dir = PACKAGE_DIR;
|
|
8543
|
+
while (true) {
|
|
8544
|
+
candidates.push(path.join(dir, "node_modules", packageName));
|
|
8545
|
+
const parent = path.dirname(dir);
|
|
8546
|
+
if (parent === dir) break;
|
|
8547
|
+
dir = parent;
|
|
8548
|
+
}
|
|
8549
|
+
for (const start of candidates) {
|
|
8550
|
+
let current = start;
|
|
8551
|
+
while (true) {
|
|
8552
|
+
const version = readPackageVersion(path.join(current, "package.json"), packageName);
|
|
8553
|
+
if (version) return version;
|
|
8554
|
+
const parent = path.dirname(current);
|
|
8555
|
+
if (parent === current) break;
|
|
8556
|
+
current = parent;
|
|
8557
|
+
}
|
|
8558
|
+
}
|
|
8559
|
+
return null;
|
|
8560
|
+
}
|
|
8561
|
+
function resolvePiJudgeRecipeVersions() {
|
|
8562
|
+
return {
|
|
8563
|
+
pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
|
|
8564
|
+
piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
|
|
8565
|
+
sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
|
|
8566
|
+
};
|
|
8567
|
+
}
|
|
8568
|
+
function buildPiJudgeRecipeManifest(inputs) {
|
|
8569
|
+
return {
|
|
8570
|
+
kind: "pi-judge-recipe/v1",
|
|
8571
|
+
versions: {
|
|
8572
|
+
...resolvePiJudgeRecipeVersions(),
|
|
8573
|
+
...inputs.overrides
|
|
8574
|
+
},
|
|
8575
|
+
assets: {
|
|
8576
|
+
promptAsset: inputs.promptAsset ?? null,
|
|
8577
|
+
rubricAsset: inputs.rubricAsset ?? null,
|
|
8578
|
+
skillSourcePath: inputs.skillSourcePath ?? null
|
|
8579
|
+
},
|
|
8580
|
+
hashes: {
|
|
8581
|
+
judgePromptSha256: sha256Hex(inputs.judgePrompt),
|
|
8582
|
+
rubricSha256: sha256Hex(inputs.rubric),
|
|
8583
|
+
skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
|
|
8584
|
+
implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
|
|
8585
|
+
}
|
|
8586
|
+
};
|
|
8587
|
+
}
|
|
8588
|
+
function computePiJudgeRecipeCid(inputs) {
|
|
8589
|
+
const manifest = buildPiJudgeRecipeManifest(inputs);
|
|
8590
|
+
const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
|
|
8591
|
+
const digestBytes = createHash("sha256").update(manifestBytes).digest();
|
|
8592
|
+
return {
|
|
8593
|
+
cid: base32Lower(Uint8Array.from([
|
|
8594
|
+
...encodeVarint(CID_VERSION),
|
|
8595
|
+
...encodeVarint(RAW_CODEC),
|
|
8596
|
+
...encodeVarint(SHA2_256_CODE),
|
|
8597
|
+
...encodeVarint(digestBytes.length),
|
|
8598
|
+
...digestBytes
|
|
8599
|
+
])),
|
|
8600
|
+
manifest
|
|
8601
|
+
};
|
|
8602
|
+
}
|
|
8603
|
+
//#endregion
|
|
8604
|
+
//#region src/otel/index.ts
|
|
8605
|
+
var TRACER_NAME = "@themoltnet/pi-extension/otel";
|
|
8606
|
+
function stripReservedAttrs(attrs) {
|
|
8607
|
+
const out = {};
|
|
8608
|
+
for (const [k, v] of Object.entries(attrs)) {
|
|
8609
|
+
if (k.startsWith("gen_ai.")) continue;
|
|
8610
|
+
out[k] = v;
|
|
8611
|
+
}
|
|
8612
|
+
return out;
|
|
8613
|
+
}
|
|
8614
|
+
function createPiOtelExtension(options = {}) {
|
|
8615
|
+
return function piOtelExtension(pi) {
|
|
8616
|
+
const tracer = trace.getTracer(TRACER_NAME);
|
|
8617
|
+
const extraAttrs = stripReservedAttrs(options.spanAttributes ?? {});
|
|
8618
|
+
let sessionSpan;
|
|
8619
|
+
let sessionCtx = context.active();
|
|
8620
|
+
let turnSpan;
|
|
8621
|
+
let turnCtx = context.active();
|
|
8622
|
+
let currentModel;
|
|
8623
|
+
const toolSpans = /* @__PURE__ */ new Map();
|
|
8624
|
+
function drainToolSpans(reason) {
|
|
8625
|
+
for (const [, entry] of toolSpans) {
|
|
8626
|
+
entry.span.setStatus({
|
|
8627
|
+
code: SpanStatusCode.ERROR,
|
|
8628
|
+
message: reason
|
|
8629
|
+
});
|
|
8630
|
+
entry.span.end();
|
|
8631
|
+
}
|
|
8632
|
+
toolSpans.clear();
|
|
8633
|
+
}
|
|
8634
|
+
function endTurnSpan() {
|
|
8635
|
+
if (!turnSpan) return;
|
|
8636
|
+
drainToolSpans("tool span not closed before turn end");
|
|
8637
|
+
turnSpan.end();
|
|
8638
|
+
turnSpan = void 0;
|
|
8639
|
+
turnCtx = sessionCtx;
|
|
8640
|
+
}
|
|
8641
|
+
function endSessionSpan() {
|
|
8642
|
+
drainToolSpans("tool span not closed before session shutdown");
|
|
8643
|
+
endTurnSpan();
|
|
8644
|
+
if (sessionSpan) {
|
|
8645
|
+
sessionSpan.setStatus({ code: SpanStatusCode.OK });
|
|
8646
|
+
sessionSpan.end();
|
|
8647
|
+
sessionSpan = void 0;
|
|
8648
|
+
sessionCtx = context.active();
|
|
8649
|
+
}
|
|
8650
|
+
currentModel = void 0;
|
|
8651
|
+
}
|
|
8652
|
+
pi.on("session_start", (event, ctx) => {
|
|
8653
|
+
endSessionSpan();
|
|
8654
|
+
const agentName = options.agentName ?? "pi";
|
|
8655
|
+
sessionSpan = tracer.startSpan(`invoke_agent ${agentName}`, { attributes: {
|
|
8656
|
+
...extraAttrs,
|
|
8657
|
+
"gen_ai.operation.name": "invoke_agent",
|
|
8658
|
+
"gen_ai.agent.name": agentName,
|
|
8659
|
+
"session.reason": event.reason,
|
|
8660
|
+
"session.cwd": ctx.cwd
|
|
8661
|
+
} }, context.active());
|
|
8662
|
+
sessionCtx = trace.setSpan(context.active(), sessionSpan);
|
|
8663
|
+
turnCtx = sessionCtx;
|
|
8664
|
+
});
|
|
8665
|
+
pi.on("session_shutdown", () => {
|
|
8666
|
+
endSessionSpan();
|
|
8667
|
+
});
|
|
8668
|
+
pi.on("model_select", (event) => {
|
|
8669
|
+
currentModel = {
|
|
8670
|
+
provider: event.model.provider,
|
|
8671
|
+
id: event.model.id
|
|
8672
|
+
};
|
|
8673
|
+
if (sessionSpan) {
|
|
8674
|
+
sessionSpan.setAttribute("gen_ai.request.model", event.model.id);
|
|
8675
|
+
sessionSpan.setAttribute("gen_ai.provider.name", event.model.provider);
|
|
8676
|
+
}
|
|
8677
|
+
});
|
|
8678
|
+
pi.on("turn_start", (event) => {
|
|
8679
|
+
if (!sessionSpan) return;
|
|
8680
|
+
const modelLabel = currentModel?.id ?? "unknown";
|
|
8681
|
+
turnSpan = tracer.startSpan(`chat ${modelLabel}`, { attributes: {
|
|
8682
|
+
...extraAttrs,
|
|
8683
|
+
"gen_ai.operation.name": "chat",
|
|
8684
|
+
"gen_ai.request.model": currentModel?.id ?? "unknown",
|
|
8685
|
+
"gen_ai.provider.name": currentModel?.provider ?? "unknown",
|
|
8686
|
+
"turn.index": event.turnIndex
|
|
8687
|
+
} }, sessionCtx);
|
|
8688
|
+
turnCtx = trace.setSpan(sessionCtx, turnSpan);
|
|
8689
|
+
});
|
|
8690
|
+
pi.on("turn_end", (event) => {
|
|
8691
|
+
if (!turnSpan) return;
|
|
8692
|
+
const usage = extractUsage(event.message);
|
|
8693
|
+
if (usage) {
|
|
8694
|
+
turnSpan.setAttribute("gen_ai.usage.input_tokens", usage.input);
|
|
8695
|
+
turnSpan.setAttribute("gen_ai.usage.output_tokens", usage.output);
|
|
8696
|
+
}
|
|
8697
|
+
turnSpan.setAttribute("turn.tool_results", event.toolResults?.length ?? 0);
|
|
8698
|
+
turnSpan.setStatus({ code: SpanStatusCode.OK });
|
|
8699
|
+
endTurnSpan();
|
|
8700
|
+
});
|
|
8701
|
+
pi.on("tool_execution_start", (event) => {
|
|
8702
|
+
const parentCtx = turnSpan ? turnCtx : sessionCtx;
|
|
8703
|
+
const span = tracer.startSpan(`execute_tool ${event.toolName}`, { attributes: {
|
|
8704
|
+
...extraAttrs,
|
|
8705
|
+
"gen_ai.operation.name": "execute_tool",
|
|
8706
|
+
"gen_ai.tool.name": event.toolName,
|
|
8707
|
+
"gen_ai.tool.call.id": event.toolCallId
|
|
8708
|
+
} }, parentCtx);
|
|
8709
|
+
toolSpans.set(event.toolCallId, {
|
|
8710
|
+
span,
|
|
8711
|
+
startedAt: Date.now()
|
|
8712
|
+
});
|
|
8713
|
+
});
|
|
8714
|
+
pi.on("tool_execution_end", (event) => {
|
|
8715
|
+
const entry = toolSpans.get(event.toolCallId);
|
|
8716
|
+
if (!entry) return;
|
|
8717
|
+
const durationMs = Date.now() - entry.startedAt;
|
|
8718
|
+
entry.span.setAttribute("tool.duration_ms", durationMs);
|
|
8719
|
+
if (event.isError) {
|
|
8720
|
+
entry.span.setAttribute("error.type", "tool_execution_error");
|
|
8721
|
+
entry.span.setStatus({
|
|
8722
|
+
code: SpanStatusCode.ERROR,
|
|
8723
|
+
message: "tool execution failed"
|
|
8724
|
+
});
|
|
8725
|
+
} else entry.span.setStatus({ code: SpanStatusCode.OK });
|
|
8726
|
+
entry.span.end();
|
|
8727
|
+
toolSpans.delete(event.toolCallId);
|
|
8728
|
+
});
|
|
8729
|
+
};
|
|
8730
|
+
}
|
|
8731
|
+
function extractUsage(message) {
|
|
8732
|
+
if (!message || typeof message !== "object" || !("usage" in message) || !("role" in message)) return null;
|
|
8733
|
+
const msg = message;
|
|
8734
|
+
if (msg.role !== "assistant" || !msg.usage) return null;
|
|
8735
|
+
return {
|
|
8736
|
+
input: msg.usage.input ?? 0,
|
|
8737
|
+
output: msg.usage.output ?? 0
|
|
8738
|
+
};
|
|
8739
|
+
}
|
|
8740
|
+
//#endregion
|
|
8700
8741
|
//#region ../tasks/src/formats.ts
|
|
8701
8742
|
/**
|
|
8702
8743
|
* Register TypeBox string formats used across Task / TaskOutput / task-type
|
|
@@ -10001,6 +10042,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10001
10042
|
sandboxConfig: opts.sandboxConfig
|
|
10002
10043
|
});
|
|
10003
10044
|
const diaryId = task.diaryId ?? "";
|
|
10045
|
+
const taskTeamId = task.teamId ?? "";
|
|
10004
10046
|
let reporterOpen = false;
|
|
10005
10047
|
let session = null;
|
|
10006
10048
|
const finalUsage = emptyUsage(opts.provider, opts.model);
|
|
@@ -10063,6 +10105,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10063
10105
|
const moltnetTools = createMoltNetTools({
|
|
10064
10106
|
getAgent: () => moltnetAgent,
|
|
10065
10107
|
getDiaryId: () => diaryId,
|
|
10108
|
+
getTeamId: () => taskTeamId,
|
|
10066
10109
|
getSessionErrors: () => [],
|
|
10067
10110
|
clearSessionErrors: () => {},
|
|
10068
10111
|
getHostCwd: () => mountPath,
|
|
@@ -10072,7 +10115,15 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10072
10115
|
const modelHandle = getModel(opts.provider, opts.model);
|
|
10073
10116
|
const resourceLoader = new DefaultResourceLoader({
|
|
10074
10117
|
cwd: mountPath,
|
|
10075
|
-
agentDir: piAuthDir
|
|
10118
|
+
agentDir: piAuthDir,
|
|
10119
|
+
extensionFactories: [createPiOtelExtension({
|
|
10120
|
+
agentName: opts.agentName,
|
|
10121
|
+
spanAttributes: {
|
|
10122
|
+
"moltnet.task.id": task.id,
|
|
10123
|
+
"moltnet.task.attempt": attemptN,
|
|
10124
|
+
"moltnet.task.type": task.taskType
|
|
10125
|
+
}
|
|
10126
|
+
})]
|
|
10076
10127
|
});
|
|
10077
10128
|
await resourceLoader.reload();
|
|
10078
10129
|
session = (await createAgentSession({
|
|
@@ -10189,10 +10240,16 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10189
10240
|
if (reporterOpen) {
|
|
10190
10241
|
try {
|
|
10191
10242
|
await reporter.finalize(finalUsage);
|
|
10192
|
-
} catch {
|
|
10243
|
+
} catch (err) {
|
|
10244
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
10245
|
+
console.error(`executePiTask: reporter.finalize() failed for task ${task.id} attempt ${attemptN}: ${detail}`);
|
|
10246
|
+
}
|
|
10193
10247
|
try {
|
|
10194
10248
|
await reporter.close();
|
|
10195
|
-
} catch {
|
|
10249
|
+
} catch (err) {
|
|
10250
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
10251
|
+
console.error(`executePiTask: reporter.close() failed for task ${task.id} attempt ${attemptN}: ${detail}`);
|
|
10252
|
+
}
|
|
10196
10253
|
}
|
|
10197
10254
|
await managed.vm.close();
|
|
10198
10255
|
}
|
|
@@ -10281,6 +10338,7 @@ function moltnetExtension(pi) {
|
|
|
10281
10338
|
let worktreePath = null;
|
|
10282
10339
|
let moltnetAgent = null;
|
|
10283
10340
|
let diaryId = null;
|
|
10341
|
+
let teamId = null;
|
|
10284
10342
|
let hostExecBaseEnv = HOST_EXEC_DEFAULT_BASE_ENV;
|
|
10285
10343
|
async function ensureVm(ctx) {
|
|
10286
10344
|
if (vm) return vm;
|
|
@@ -10335,6 +10393,7 @@ function moltnetExtension(pi) {
|
|
|
10335
10393
|
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
10336
10394
|
moltnetAgent = await connect({ configDir: managed.agentDir });
|
|
10337
10395
|
diaryId = managed.credentials.agentEnv.MOLTNET_DIARY_ID ?? null;
|
|
10396
|
+
teamId = managed.credentials.agentEnv.MOLTNET_TEAM_ID ?? null;
|
|
10338
10397
|
hostExecBaseEnv = new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]);
|
|
10339
10398
|
vm = managed.vm;
|
|
10340
10399
|
const label = worktreePath ? `${mountPath} → ${GUEST_WORKSPACE}` : `${localCwd} → ${GUEST_WORKSPACE}`;
|
|
@@ -10356,6 +10415,7 @@ function moltnetExtension(pi) {
|
|
|
10356
10415
|
vm = null;
|
|
10357
10416
|
vmStarting = null;
|
|
10358
10417
|
moltnetAgent = null;
|
|
10418
|
+
teamId = null;
|
|
10359
10419
|
}
|
|
10360
10420
|
});
|
|
10361
10421
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
@@ -10395,6 +10455,7 @@ function moltnetExtension(pi) {
|
|
|
10395
10455
|
const moltnetTools = createMoltNetTools({
|
|
10396
10456
|
getAgent: () => moltnetAgent,
|
|
10397
10457
|
getDiaryId: () => diaryId,
|
|
10458
|
+
getTeamId: () => teamId,
|
|
10398
10459
|
getSessionErrors: () => sessionErrors,
|
|
10399
10460
|
clearSessionErrors: () => {
|
|
10400
10461
|
sessionErrors.length = 0;
|
|
@@ -10506,4 +10567,4 @@ function moltnetExtension(pi) {
|
|
|
10506
10567
|
registerMoltnetReflectCommand(pi, state);
|
|
10507
10568
|
}
|
|
10508
10569
|
//#endregion
|
|
10509
|
-
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
|
|
10570
|
+
export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -29,9 +29,10 @@
|
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@earendil-works/gondolin": "^0.7.0",
|
|
32
|
+
"@opentelemetry/api": "^1.9.0",
|
|
32
33
|
"@sinclair/typebox": "^0.34.0",
|
|
33
|
-
"@themoltnet/
|
|
34
|
-
"@themoltnet/
|
|
34
|
+
"@themoltnet/agent-runtime": "0.3.0",
|
|
35
|
+
"@themoltnet/sdk": "0.95.0"
|
|
35
36
|
},
|
|
36
37
|
"peerDependencies": {
|
|
37
38
|
"@mariozechner/pi-coding-agent": ">=0.67.0",
|
|
@@ -48,6 +49,7 @@
|
|
|
48
49
|
"devDependencies": {
|
|
49
50
|
"@mariozechner/pi-ai": "^0.67.68",
|
|
50
51
|
"@mariozechner/pi-coding-agent": "^0.67.68",
|
|
52
|
+
"@opentelemetry/sdk-trace-base": "^2.5.1",
|
|
51
53
|
"@types/node": "^20.11.0",
|
|
52
54
|
"typescript": "^5.3.3",
|
|
53
55
|
"vite": "^8.0.0",
|