@themoltnet/pi-extension 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -0
- package/dist/index.js +225 -316
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -33,6 +33,8 @@ declare interface ClaimedTask {
|
|
|
33
33
|
task: Task;
|
|
34
34
|
/** Attempt number assigned by the source/queue. */
|
|
35
35
|
attemptN: number;
|
|
36
|
+
/** W3C trace headers from the claim response for OTel context propagation. */
|
|
37
|
+
traceHeaders: Record<string, string>;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
export declare function computePiJudgeRecipeCid(inputs: PiJudgeRecipeInputs): PiJudgeRecipeCid;
|
|
@@ -132,6 +134,7 @@ export default moltnetExtension;
|
|
|
132
134
|
declare interface MoltNetToolsConfig {
|
|
133
135
|
getAgent(): MoltNetAgent | null;
|
|
134
136
|
getDiaryId(): string | null;
|
|
137
|
+
getTeamId(): string | null;
|
|
135
138
|
getSessionErrors(): readonly TrackedError[];
|
|
136
139
|
clearSessionErrors(): void;
|
|
137
140
|
/** Host working directory for host-exec commands (worktree path or cwd). */
|
package/dist/index.js
CHANGED
|
@@ -3,14 +3,14 @@ import { execFileSync } from "node:child_process";
|
|
|
3
3
|
import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, statSync } from "node:fs";
|
|
4
4
|
import path, { join } from "node:path";
|
|
5
5
|
import { DefaultResourceLoader, SessionManager, createAgentSession, createBashTool, createBashToolDefinition, createEditTool, createEditToolDefinition, createReadTool, createReadToolDefinition, createWriteTool, createWriteToolDefinition, defineTool } from "@mariozechner/pi-coding-agent";
|
|
6
|
-
import { createHash
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
7
7
|
import crypto, { createHash as createHash$1 } from "crypto";
|
|
8
8
|
import { readFile } from "node:fs/promises";
|
|
9
9
|
import { homedir } from "node:os";
|
|
10
10
|
import { Type, complete, getModel } from "@mariozechner/pi-ai";
|
|
11
|
-
import { fileURLToPath } from "node:url";
|
|
12
11
|
import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
|
|
13
12
|
import { parseEnv } from "node:util";
|
|
13
|
+
import { fileURLToPath } from "node:url";
|
|
14
14
|
import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
|
|
15
15
|
import { Value } from "@sinclair/typebox/value";
|
|
16
16
|
//#region ../api-client/src/generated/core/bodySerializer.gen.ts
|
|
@@ -1415,80 +1415,6 @@ var updateRenderedPack = (options) => (options.client ?? client).patch({
|
|
|
1415
1415
|
}
|
|
1416
1416
|
});
|
|
1417
1417
|
/**
|
|
1418
|
-
* Trigger fidelity verification for an agent-rendered pack.
|
|
1419
|
-
*/
|
|
1420
|
-
var verifyRenderedPack = (options) => (options.client ?? client).post({
|
|
1421
|
-
security: [
|
|
1422
|
-
{
|
|
1423
|
-
scheme: "bearer",
|
|
1424
|
-
type: "http"
|
|
1425
|
-
},
|
|
1426
|
-
{
|
|
1427
|
-
name: "X-Moltnet-Session-Token",
|
|
1428
|
-
type: "apiKey"
|
|
1429
|
-
},
|
|
1430
|
-
{
|
|
1431
|
-
in: "cookie",
|
|
1432
|
-
name: "ory_kratos_session",
|
|
1433
|
-
type: "apiKey"
|
|
1434
|
-
}
|
|
1435
|
-
],
|
|
1436
|
-
url: "/rendered-packs/{id}/verify",
|
|
1437
|
-
...options,
|
|
1438
|
-
headers: {
|
|
1439
|
-
"Content-Type": "application/json",
|
|
1440
|
-
...options.headers
|
|
1441
|
-
}
|
|
1442
|
-
});
|
|
1443
|
-
/**
|
|
1444
|
-
* Judge claims verification payload (source entries, rendered content, and rubric).
|
|
1445
|
-
*/
|
|
1446
|
-
var claimVerification = (options) => (options.client ?? client).post({
|
|
1447
|
-
security: [
|
|
1448
|
-
{
|
|
1449
|
-
scheme: "bearer",
|
|
1450
|
-
type: "http"
|
|
1451
|
-
},
|
|
1452
|
-
{
|
|
1453
|
-
name: "X-Moltnet-Session-Token",
|
|
1454
|
-
type: "apiKey"
|
|
1455
|
-
},
|
|
1456
|
-
{
|
|
1457
|
-
in: "cookie",
|
|
1458
|
-
name: "ory_kratos_session",
|
|
1459
|
-
type: "apiKey"
|
|
1460
|
-
}
|
|
1461
|
-
],
|
|
1462
|
-
url: "/rendered-packs/{id}/verify/claim",
|
|
1463
|
-
...options
|
|
1464
|
-
});
|
|
1465
|
-
/**
|
|
1466
|
-
* Judge submits fidelity scores and transcript.
|
|
1467
|
-
*/
|
|
1468
|
-
var submitVerification = (options) => (options.client ?? client).post({
|
|
1469
|
-
security: [
|
|
1470
|
-
{
|
|
1471
|
-
scheme: "bearer",
|
|
1472
|
-
type: "http"
|
|
1473
|
-
},
|
|
1474
|
-
{
|
|
1475
|
-
name: "X-Moltnet-Session-Token",
|
|
1476
|
-
type: "apiKey"
|
|
1477
|
-
},
|
|
1478
|
-
{
|
|
1479
|
-
in: "cookie",
|
|
1480
|
-
name: "ory_kratos_session",
|
|
1481
|
-
type: "apiKey"
|
|
1482
|
-
}
|
|
1483
|
-
],
|
|
1484
|
-
url: "/rendered-packs/{id}/verify/submit",
|
|
1485
|
-
...options,
|
|
1486
|
-
headers: {
|
|
1487
|
-
"Content-Type": "application/json",
|
|
1488
|
-
...options.headers
|
|
1489
|
-
}
|
|
1490
|
-
});
|
|
1491
|
-
/**
|
|
1492
1418
|
* Get an agent's public profile by key fingerprint (A1B2-C3D4-E5F6-G7H8).
|
|
1493
1419
|
*/
|
|
1494
1420
|
var getAgentProfile = (options) => (options.client ?? client).get({
|
|
@@ -6445,29 +6371,6 @@ function createPacksNamespace(context) {
|
|
|
6445
6371
|
body
|
|
6446
6372
|
}));
|
|
6447
6373
|
},
|
|
6448
|
-
async verifyRendered(id, body) {
|
|
6449
|
-
return unwrapResult(await verifyRenderedPack({
|
|
6450
|
-
client,
|
|
6451
|
-
auth,
|
|
6452
|
-
path: { id },
|
|
6453
|
-
body
|
|
6454
|
-
}));
|
|
6455
|
-
},
|
|
6456
|
-
async claimVerification(id) {
|
|
6457
|
-
return unwrapResult(await claimVerification({
|
|
6458
|
-
client,
|
|
6459
|
-
auth,
|
|
6460
|
-
path: { id }
|
|
6461
|
-
}));
|
|
6462
|
-
},
|
|
6463
|
-
async submitVerification(id, body) {
|
|
6464
|
-
return unwrapResult(await submitVerification({
|
|
6465
|
-
client,
|
|
6466
|
-
auth,
|
|
6467
|
-
path: { id },
|
|
6468
|
-
body
|
|
6469
|
-
}));
|
|
6470
|
-
},
|
|
6471
6374
|
async create(diaryId, body) {
|
|
6472
6375
|
return unwrapResult(await createDiaryCustomPack({
|
|
6473
6376
|
client,
|
|
@@ -6618,12 +6521,24 @@ function createTasksNamespace(context) {
|
|
|
6618
6521
|
}));
|
|
6619
6522
|
},
|
|
6620
6523
|
async claim(id, body) {
|
|
6621
|
-
|
|
6524
|
+
const result = await claimTask({
|
|
6622
6525
|
client,
|
|
6623
6526
|
auth,
|
|
6624
6527
|
path: { id },
|
|
6625
6528
|
body
|
|
6626
|
-
})
|
|
6529
|
+
});
|
|
6530
|
+
const data = unwrapResult(result);
|
|
6531
|
+
const traceHeaders = {};
|
|
6532
|
+
const traceparent = result.response.headers.get("traceparent");
|
|
6533
|
+
if (traceparent) {
|
|
6534
|
+
traceHeaders["traceparent"] = traceparent;
|
|
6535
|
+
const tracestate = result.response.headers.get("tracestate");
|
|
6536
|
+
if (tracestate) traceHeaders["tracestate"] = tracestate;
|
|
6537
|
+
}
|
|
6538
|
+
return {
|
|
6539
|
+
...data,
|
|
6540
|
+
traceHeaders
|
|
6541
|
+
};
|
|
6627
6542
|
},
|
|
6628
6543
|
async heartbeat(id, n, body) {
|
|
6629
6544
|
return unwrapResult(await taskHeartbeat({
|
|
@@ -7155,21 +7070,6 @@ var registerSandboxCommand = (pi, state) => {
|
|
|
7155
7070
|
};
|
|
7156
7071
|
//#endregion
|
|
7157
7072
|
//#region src/moltnet/judge/assets.ts
|
|
7158
|
-
/**
|
|
7159
|
-
* Judge assets — single source of truth.
|
|
7160
|
-
*
|
|
7161
|
-
* `DEFAULT_RUBRIC` and `JUDGE_SYSTEM_PROMPT` below ARE the assets. There are
|
|
7162
|
-
* no companion `.md` files; tsc does not copy non-TS files into `dist/`, and
|
|
7163
|
-
* keeping a parallel markdown copy invited drift between source-of-truth
|
|
7164
|
-
* versions, which is what previously happened.
|
|
7165
|
-
*
|
|
7166
|
-
* The asset path constants are opaque identifiers used in the judge-recipe
|
|
7167
|
-
* CID manifest so verifiers can trace which asset set a given Pi extension
|
|
7168
|
-
* version emitted. They are NOT filesystem paths and are never read.
|
|
7169
|
-
* Bump the version suffix when you change the corresponding constant.
|
|
7170
|
-
*/
|
|
7171
|
-
var RUBRIC_ASSET_PATH = "pi-extension/judge/rubric@v1";
|
|
7172
|
-
var JUDGE_PROMPT_ASSET_PATH = "pi-extension/judge/system-prompt@v1";
|
|
7173
7073
|
/** Default fidelity rubric — kept verbatim from the Go judge. */
|
|
7174
7074
|
var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
|
|
7175
7075
|
|
|
@@ -7322,135 +7222,6 @@ function buildSourceEntriesMarkdown(entries) {
|
|
|
7322
7222
|
return parts.join("\n");
|
|
7323
7223
|
}
|
|
7324
7224
|
//#endregion
|
|
7325
|
-
//#region src/moltnet/judge-recipe-cid.ts
|
|
7326
|
-
var require = createRequire(import.meta.url);
|
|
7327
|
-
var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
|
|
7328
|
-
var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
|
|
7329
|
-
var SDK_PACKAGE_NAME = "@themoltnet/sdk";
|
|
7330
|
-
var CID_VERSION = 1;
|
|
7331
|
-
var RAW_CODEC = 85;
|
|
7332
|
-
var SHA2_256_CODE = 18;
|
|
7333
|
-
var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
|
|
7334
|
-
function findSelfPackageDir() {
|
|
7335
|
-
const start = path.dirname(fileURLToPath(import.meta.url));
|
|
7336
|
-
let dir = start;
|
|
7337
|
-
while (true) {
|
|
7338
|
-
const candidate = path.join(dir, "package.json");
|
|
7339
|
-
if (existsSync(candidate)) {
|
|
7340
|
-
if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
|
|
7341
|
-
}
|
|
7342
|
-
const parent = path.dirname(dir);
|
|
7343
|
-
if (parent === dir) return start;
|
|
7344
|
-
dir = parent;
|
|
7345
|
-
}
|
|
7346
|
-
}
|
|
7347
|
-
var PACKAGE_DIR = findSelfPackageDir();
|
|
7348
|
-
function sha256Hex(value) {
|
|
7349
|
-
return createHash("sha256").update(value, "utf8").digest("hex");
|
|
7350
|
-
}
|
|
7351
|
-
function encodeVarint(value) {
|
|
7352
|
-
const bytes = [];
|
|
7353
|
-
let current = value >>> 0;
|
|
7354
|
-
while (current >= 128) {
|
|
7355
|
-
bytes.push(current & 127 | 128);
|
|
7356
|
-
current >>>= 7;
|
|
7357
|
-
}
|
|
7358
|
-
bytes.push(current);
|
|
7359
|
-
return bytes;
|
|
7360
|
-
}
|
|
7361
|
-
function base32Lower(bytes) {
|
|
7362
|
-
let bits = 0;
|
|
7363
|
-
let value = 0;
|
|
7364
|
-
let output = "";
|
|
7365
|
-
for (const byte of bytes) {
|
|
7366
|
-
value = value << 8 | byte;
|
|
7367
|
-
bits += 8;
|
|
7368
|
-
while (bits >= 5) {
|
|
7369
|
-
output += BASE32_ALPHABET[value >>> bits - 5 & 31];
|
|
7370
|
-
bits -= 5;
|
|
7371
|
-
}
|
|
7372
|
-
}
|
|
7373
|
-
if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
|
|
7374
|
-
return `b${output}`;
|
|
7375
|
-
}
|
|
7376
|
-
function stableStringify(value) {
|
|
7377
|
-
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
7378
|
-
if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
|
|
7379
|
-
return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
|
|
7380
|
-
}
|
|
7381
|
-
function readPackageVersion(pkgPath, expectedName) {
|
|
7382
|
-
if (!existsSync(pkgPath)) return null;
|
|
7383
|
-
const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
7384
|
-
if (expectedName && parsed.name !== expectedName) return null;
|
|
7385
|
-
return typeof parsed.version === "string" ? parsed.version : null;
|
|
7386
|
-
}
|
|
7387
|
-
function resolveInstalledPackageVersion(packageName) {
|
|
7388
|
-
const candidates = [];
|
|
7389
|
-
try {
|
|
7390
|
-
candidates.push(path.dirname(require.resolve(packageName)));
|
|
7391
|
-
} catch {}
|
|
7392
|
-
let dir = PACKAGE_DIR;
|
|
7393
|
-
while (true) {
|
|
7394
|
-
candidates.push(path.join(dir, "node_modules", packageName));
|
|
7395
|
-
const parent = path.dirname(dir);
|
|
7396
|
-
if (parent === dir) break;
|
|
7397
|
-
dir = parent;
|
|
7398
|
-
}
|
|
7399
|
-
for (const start of candidates) {
|
|
7400
|
-
let current = start;
|
|
7401
|
-
while (true) {
|
|
7402
|
-
const version = readPackageVersion(path.join(current, "package.json"), packageName);
|
|
7403
|
-
if (version) return version;
|
|
7404
|
-
const parent = path.dirname(current);
|
|
7405
|
-
if (parent === current) break;
|
|
7406
|
-
current = parent;
|
|
7407
|
-
}
|
|
7408
|
-
}
|
|
7409
|
-
return null;
|
|
7410
|
-
}
|
|
7411
|
-
function resolvePiJudgeRecipeVersions() {
|
|
7412
|
-
return {
|
|
7413
|
-
pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
|
|
7414
|
-
piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
|
|
7415
|
-
sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
|
|
7416
|
-
};
|
|
7417
|
-
}
|
|
7418
|
-
function buildPiJudgeRecipeManifest(inputs) {
|
|
7419
|
-
return {
|
|
7420
|
-
kind: "pi-judge-recipe/v1",
|
|
7421
|
-
versions: {
|
|
7422
|
-
...resolvePiJudgeRecipeVersions(),
|
|
7423
|
-
...inputs.overrides
|
|
7424
|
-
},
|
|
7425
|
-
assets: {
|
|
7426
|
-
promptAsset: inputs.promptAsset ?? null,
|
|
7427
|
-
rubricAsset: inputs.rubricAsset ?? null,
|
|
7428
|
-
skillSourcePath: inputs.skillSourcePath ?? null
|
|
7429
|
-
},
|
|
7430
|
-
hashes: {
|
|
7431
|
-
judgePromptSha256: sha256Hex(inputs.judgePrompt),
|
|
7432
|
-
rubricSha256: sha256Hex(inputs.rubric),
|
|
7433
|
-
skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
|
|
7434
|
-
implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
|
|
7435
|
-
}
|
|
7436
|
-
};
|
|
7437
|
-
}
|
|
7438
|
-
function computePiJudgeRecipeCid(inputs) {
|
|
7439
|
-
const manifest = buildPiJudgeRecipeManifest(inputs);
|
|
7440
|
-
const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
|
|
7441
|
-
const digestBytes = createHash("sha256").update(manifestBytes).digest();
|
|
7442
|
-
return {
|
|
7443
|
-
cid: base32Lower(Uint8Array.from([
|
|
7444
|
-
...encodeVarint(CID_VERSION),
|
|
7445
|
-
...encodeVarint(RAW_CODEC),
|
|
7446
|
-
...encodeVarint(SHA2_256_CODE),
|
|
7447
|
-
...encodeVarint(digestBytes.length),
|
|
7448
|
-
...digestBytes
|
|
7449
|
-
])),
|
|
7450
|
-
manifest
|
|
7451
|
-
};
|
|
7452
|
-
}
|
|
7453
|
-
//#endregion
|
|
7454
7225
|
//#region src/moltnet/render-phase6.ts
|
|
7455
7226
|
function slugToTitle(value) {
|
|
7456
7227
|
return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
|
|
@@ -7592,7 +7363,8 @@ function ensureConnected(config) {
|
|
|
7592
7363
|
if (!agent || !diaryId) throw new Error("MoltNet not connected");
|
|
7593
7364
|
return {
|
|
7594
7365
|
agent,
|
|
7595
|
-
diaryId
|
|
7366
|
+
diaryId,
|
|
7367
|
+
teamId: config.getTeamId() ?? ""
|
|
7596
7368
|
};
|
|
7597
7369
|
}
|
|
7598
7370
|
/**
|
|
@@ -7758,24 +7530,34 @@ function createMoltNetTools(config) {
|
|
|
7758
7530
|
};
|
|
7759
7531
|
}
|
|
7760
7532
|
});
|
|
7761
|
-
const
|
|
7762
|
-
name: "
|
|
7763
|
-
label: "
|
|
7764
|
-
description: "Create a
|
|
7533
|
+
const createJudgePackTask = defineTool({
|
|
7534
|
+
name: "moltnet_judge_pack_task_create",
|
|
7535
|
+
label: "Create Judge Pack Task",
|
|
7536
|
+
description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
|
|
7765
7537
|
parameters: Type.Object({
|
|
7766
|
-
renderedPackId: Type.String({ description: "Rendered pack ID" }),
|
|
7767
|
-
|
|
7538
|
+
renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
|
|
7539
|
+
sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
|
|
7540
|
+
rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
|
|
7541
|
+
diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
|
|
7768
7542
|
}),
|
|
7769
7543
|
async execute(_id, params) {
|
|
7770
|
-
const { agent } = ensureConnected(config);
|
|
7771
|
-
const
|
|
7772
|
-
|
|
7544
|
+
const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
|
|
7545
|
+
const task = await agent.tasks.create({
|
|
7546
|
+
taskType: "judge_pack",
|
|
7547
|
+
input: {
|
|
7548
|
+
renderedPackId: params.renderedPackId,
|
|
7549
|
+
sourcePackId: params.sourcePackId,
|
|
7550
|
+
rubric: params.rubric
|
|
7551
|
+
},
|
|
7552
|
+
diaryId: params.diaryId ?? connectedDiaryId,
|
|
7553
|
+
teamId: connectedTeamId
|
|
7554
|
+
});
|
|
7773
7555
|
return {
|
|
7774
7556
|
content: [{
|
|
7775
7557
|
type: "text",
|
|
7776
7558
|
text: JSON.stringify({
|
|
7777
|
-
|
|
7778
|
-
|
|
7559
|
+
taskId: task.id,
|
|
7560
|
+
task
|
|
7779
7561
|
}, null, 2)
|
|
7780
7562
|
}],
|
|
7781
7563
|
details: {}
|
|
@@ -7785,87 +7567,79 @@ function createMoltNetTools(config) {
|
|
|
7785
7567
|
const judgeRenderedPack = defineTool({
|
|
7786
7568
|
name: "moltnet_rendered_pack_judge",
|
|
7787
7569
|
label: "Judge MoltNet Rendered Pack",
|
|
7788
|
-
description: "
|
|
7570
|
+
description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
|
|
7789
7571
|
parameters: Type.Object({
|
|
7790
|
-
|
|
7791
|
-
|
|
7792
|
-
rubric: Type.Optional(Type.String({ description: "Custom rubric override (local mode only). Defaults to the built-in rubric when omitted." }))
|
|
7572
|
+
taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
|
|
7573
|
+
rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
|
|
7793
7574
|
}),
|
|
7794
7575
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
7795
7576
|
const { agent } = ensureConnected(config);
|
|
7796
7577
|
const model = ctx?.model;
|
|
7797
7578
|
if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
|
|
7798
|
-
|
|
7799
|
-
|
|
7800
|
-
|
|
7801
|
-
if (
|
|
7802
|
-
|
|
7803
|
-
|
|
7804
|
-
|
|
7805
|
-
|
|
7806
|
-
|
|
7807
|
-
}
|
|
7808
|
-
|
|
7809
|
-
if (!rendered.content?.trim()) throw new Error(`rendered pack ${params.renderedPackId} has empty content`);
|
|
7810
|
-
const sourcePack = await agent.packs.get(rendered.sourcePackId, { expand: "entries" });
|
|
7811
|
-
if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${rendered.sourcePackId} has no entries`);
|
|
7812
|
-
sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
|
|
7813
|
-
title: entry.entry.title,
|
|
7814
|
-
content: entry.entry.content
|
|
7815
|
-
})));
|
|
7816
|
-
renderedContent = rendered.content;
|
|
7817
|
-
rubric = params.rubric?.trim() ? params.rubric : DEFAULT_RUBRIC;
|
|
7818
|
-
}
|
|
7579
|
+
const claimed = await agent.tasks.claim(params.taskId);
|
|
7580
|
+
const input = claimed.task.input;
|
|
7581
|
+
const rendered = await agent.packs.getRendered(input.renderedPackId);
|
|
7582
|
+
if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
|
|
7583
|
+
const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
|
|
7584
|
+
if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
|
|
7585
|
+
const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
|
|
7586
|
+
title: entry.entry.title,
|
|
7587
|
+
content: entry.entry.content
|
|
7588
|
+
})));
|
|
7589
|
+
const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
|
|
7819
7590
|
let scores;
|
|
7820
7591
|
try {
|
|
7821
7592
|
scores = await runFidelityJudge({
|
|
7822
7593
|
model,
|
|
7823
7594
|
sourceEntries: sourceEntriesMd,
|
|
7824
|
-
renderedContent,
|
|
7595
|
+
renderedContent: rendered.content,
|
|
7825
7596
|
rubric
|
|
7826
7597
|
});
|
|
7827
7598
|
} catch (err) {
|
|
7599
|
+
await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
|
|
7600
|
+
code: "judge_failed",
|
|
7601
|
+
message: err.message ?? String(err)
|
|
7602
|
+
} }).catch(() => {});
|
|
7828
7603
|
throw new Error(`judge failed: ${err.message ?? String(err)}`);
|
|
7829
7604
|
}
|
|
7830
|
-
|
|
7831
|
-
|
|
7832
|
-
|
|
7833
|
-
|
|
7834
|
-
|
|
7835
|
-
|
|
7836
|
-
|
|
7837
|
-
|
|
7838
|
-
|
|
7839
|
-
|
|
7605
|
+
const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
|
|
7606
|
+
const output = {
|
|
7607
|
+
scores: [
|
|
7608
|
+
{
|
|
7609
|
+
criterionId: "coverage",
|
|
7610
|
+
score: scores.coverage
|
|
7611
|
+
},
|
|
7612
|
+
{
|
|
7613
|
+
criterionId: "grounding",
|
|
7614
|
+
score: scores.grounding
|
|
7615
|
+
},
|
|
7616
|
+
{
|
|
7617
|
+
criterionId: "faithfulness",
|
|
7618
|
+
score: scores.faithfulness
|
|
7619
|
+
}
|
|
7620
|
+
],
|
|
7621
|
+
composite: scores.composite,
|
|
7622
|
+
verdict: scores.reasoning,
|
|
7623
|
+
judgeModel: modelId
|
|
7840
7624
|
};
|
|
7841
|
-
const
|
|
7842
|
-
|
|
7843
|
-
|
|
7844
|
-
|
|
7845
|
-
|
|
7846
|
-
|
|
7847
|
-
|
|
7848
|
-
|
|
7849
|
-
const submit = await agent.packs.submitVerification(params.renderedPackId, {
|
|
7850
|
-
nonce: params.nonce,
|
|
7851
|
-
coverage: scores.coverage,
|
|
7852
|
-
grounding: scores.grounding,
|
|
7853
|
-
faithfulness: scores.faithfulness,
|
|
7854
|
-
transcript: scores.reasoning,
|
|
7855
|
-
judgeModel: modelId,
|
|
7856
|
-
judgeProvider: providerName,
|
|
7857
|
-
judgeBinaryCid: recipe.cid
|
|
7625
|
+
const outputCid = await computeJsonCid(output);
|
|
7626
|
+
const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
|
|
7627
|
+
output,
|
|
7628
|
+
outputCid,
|
|
7629
|
+
usage: {
|
|
7630
|
+
inputTokens: 0,
|
|
7631
|
+
outputTokens: 0
|
|
7632
|
+
}
|
|
7858
7633
|
});
|
|
7634
|
+
await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
|
|
7859
7635
|
return {
|
|
7860
7636
|
content: [{
|
|
7861
7637
|
type: "text",
|
|
7862
7638
|
text: JSON.stringify({
|
|
7863
|
-
|
|
7864
|
-
|
|
7639
|
+
renderedPackId: input.renderedPackId,
|
|
7640
|
+
taskId: params.taskId,
|
|
7865
7641
|
scores,
|
|
7866
|
-
|
|
7867
|
-
judgeRecipeCid: recipe.cid,
|
|
7868
|
-
judgeRecipeManifest: recipe.manifest
|
|
7642
|
+
task: completed
|
|
7869
7643
|
}, null, 2)
|
|
7870
7644
|
}],
|
|
7871
7645
|
details: {}
|
|
@@ -8081,7 +7855,7 @@ function createMoltNetTools(config) {
|
|
|
8081
7855
|
renderPack,
|
|
8082
7856
|
listRenderedPacks,
|
|
8083
7857
|
getRenderedPack,
|
|
8084
|
-
|
|
7858
|
+
createJudgePackTask,
|
|
8085
7859
|
judgeRenderedPack,
|
|
8086
7860
|
diaryTags,
|
|
8087
7861
|
listEntries,
|
|
@@ -8697,6 +8471,135 @@ function ensureRelativeWorktreePaths(gitconfig) {
|
|
|
8697
8471
|
return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
|
|
8698
8472
|
}
|
|
8699
8473
|
//#endregion
|
|
8474
|
+
//#region src/moltnet/judge-recipe-cid.ts
|
|
8475
|
+
var require = createRequire(import.meta.url);
|
|
8476
|
+
var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
|
|
8477
|
+
var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
|
|
8478
|
+
var SDK_PACKAGE_NAME = "@themoltnet/sdk";
|
|
8479
|
+
var CID_VERSION = 1;
|
|
8480
|
+
var RAW_CODEC = 85;
|
|
8481
|
+
var SHA2_256_CODE = 18;
|
|
8482
|
+
var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
|
|
8483
|
+
function findSelfPackageDir() {
|
|
8484
|
+
const start = path.dirname(fileURLToPath(import.meta.url));
|
|
8485
|
+
let dir = start;
|
|
8486
|
+
while (true) {
|
|
8487
|
+
const candidate = path.join(dir, "package.json");
|
|
8488
|
+
if (existsSync(candidate)) {
|
|
8489
|
+
if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
|
|
8490
|
+
}
|
|
8491
|
+
const parent = path.dirname(dir);
|
|
8492
|
+
if (parent === dir) return start;
|
|
8493
|
+
dir = parent;
|
|
8494
|
+
}
|
|
8495
|
+
}
|
|
8496
|
+
var PACKAGE_DIR = findSelfPackageDir();
|
|
8497
|
+
function sha256Hex(value) {
|
|
8498
|
+
return createHash("sha256").update(value, "utf8").digest("hex");
|
|
8499
|
+
}
|
|
8500
|
+
function encodeVarint(value) {
|
|
8501
|
+
const bytes = [];
|
|
8502
|
+
let current = value >>> 0;
|
|
8503
|
+
while (current >= 128) {
|
|
8504
|
+
bytes.push(current & 127 | 128);
|
|
8505
|
+
current >>>= 7;
|
|
8506
|
+
}
|
|
8507
|
+
bytes.push(current);
|
|
8508
|
+
return bytes;
|
|
8509
|
+
}
|
|
8510
|
+
function base32Lower(bytes) {
|
|
8511
|
+
let bits = 0;
|
|
8512
|
+
let value = 0;
|
|
8513
|
+
let output = "";
|
|
8514
|
+
for (const byte of bytes) {
|
|
8515
|
+
value = value << 8 | byte;
|
|
8516
|
+
bits += 8;
|
|
8517
|
+
while (bits >= 5) {
|
|
8518
|
+
output += BASE32_ALPHABET[value >>> bits - 5 & 31];
|
|
8519
|
+
bits -= 5;
|
|
8520
|
+
}
|
|
8521
|
+
}
|
|
8522
|
+
if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
|
|
8523
|
+
return `b${output}`;
|
|
8524
|
+
}
|
|
8525
|
+
function stableStringify(value) {
|
|
8526
|
+
if (value === null || typeof value !== "object") return JSON.stringify(value);
|
|
8527
|
+
if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
|
|
8528
|
+
return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
|
|
8529
|
+
}
|
|
8530
|
+
function readPackageVersion(pkgPath, expectedName) {
|
|
8531
|
+
if (!existsSync(pkgPath)) return null;
|
|
8532
|
+
const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
8533
|
+
if (expectedName && parsed.name !== expectedName) return null;
|
|
8534
|
+
return typeof parsed.version === "string" ? parsed.version : null;
|
|
8535
|
+
}
|
|
8536
|
+
function resolveInstalledPackageVersion(packageName) {
|
|
8537
|
+
const candidates = [];
|
|
8538
|
+
try {
|
|
8539
|
+
candidates.push(path.dirname(require.resolve(packageName)));
|
|
8540
|
+
} catch {}
|
|
8541
|
+
let dir = PACKAGE_DIR;
|
|
8542
|
+
while (true) {
|
|
8543
|
+
candidates.push(path.join(dir, "node_modules", packageName));
|
|
8544
|
+
const parent = path.dirname(dir);
|
|
8545
|
+
if (parent === dir) break;
|
|
8546
|
+
dir = parent;
|
|
8547
|
+
}
|
|
8548
|
+
for (const start of candidates) {
|
|
8549
|
+
let current = start;
|
|
8550
|
+
while (true) {
|
|
8551
|
+
const version = readPackageVersion(path.join(current, "package.json"), packageName);
|
|
8552
|
+
if (version) return version;
|
|
8553
|
+
const parent = path.dirname(current);
|
|
8554
|
+
if (parent === current) break;
|
|
8555
|
+
current = parent;
|
|
8556
|
+
}
|
|
8557
|
+
}
|
|
8558
|
+
return null;
|
|
8559
|
+
}
|
|
8560
|
+
function resolvePiJudgeRecipeVersions() {
|
|
8561
|
+
return {
|
|
8562
|
+
pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
|
|
8563
|
+
piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
|
|
8564
|
+
sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
|
|
8565
|
+
};
|
|
8566
|
+
}
|
|
8567
|
+
function buildPiJudgeRecipeManifest(inputs) {
|
|
8568
|
+
return {
|
|
8569
|
+
kind: "pi-judge-recipe/v1",
|
|
8570
|
+
versions: {
|
|
8571
|
+
...resolvePiJudgeRecipeVersions(),
|
|
8572
|
+
...inputs.overrides
|
|
8573
|
+
},
|
|
8574
|
+
assets: {
|
|
8575
|
+
promptAsset: inputs.promptAsset ?? null,
|
|
8576
|
+
rubricAsset: inputs.rubricAsset ?? null,
|
|
8577
|
+
skillSourcePath: inputs.skillSourcePath ?? null
|
|
8578
|
+
},
|
|
8579
|
+
hashes: {
|
|
8580
|
+
judgePromptSha256: sha256Hex(inputs.judgePrompt),
|
|
8581
|
+
rubricSha256: sha256Hex(inputs.rubric),
|
|
8582
|
+
skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
|
|
8583
|
+
implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
|
|
8584
|
+
}
|
|
8585
|
+
};
|
|
8586
|
+
}
|
|
8587
|
+
function computePiJudgeRecipeCid(inputs) {
|
|
8588
|
+
const manifest = buildPiJudgeRecipeManifest(inputs);
|
|
8589
|
+
const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
|
|
8590
|
+
const digestBytes = createHash("sha256").update(manifestBytes).digest();
|
|
8591
|
+
return {
|
|
8592
|
+
cid: base32Lower(Uint8Array.from([
|
|
8593
|
+
...encodeVarint(CID_VERSION),
|
|
8594
|
+
...encodeVarint(RAW_CODEC),
|
|
8595
|
+
...encodeVarint(SHA2_256_CODE),
|
|
8596
|
+
...encodeVarint(digestBytes.length),
|
|
8597
|
+
...digestBytes
|
|
8598
|
+
])),
|
|
8599
|
+
manifest
|
|
8600
|
+
};
|
|
8601
|
+
}
|
|
8602
|
+
//#endregion
|
|
8700
8603
|
//#region ../tasks/src/formats.ts
|
|
8701
8604
|
/**
|
|
8702
8605
|
* Register TypeBox string formats used across Task / TaskOutput / task-type
|
|
@@ -10001,6 +9904,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10001
9904
|
sandboxConfig: opts.sandboxConfig
|
|
10002
9905
|
});
|
|
10003
9906
|
const diaryId = task.diaryId ?? "";
|
|
9907
|
+
const taskTeamId = task.teamId ?? "";
|
|
10004
9908
|
let reporterOpen = false;
|
|
10005
9909
|
let session = null;
|
|
10006
9910
|
const finalUsage = emptyUsage(opts.provider, opts.model);
|
|
@@ -10063,6 +9967,7 @@ async function executePiTask(claimedTask, reporter, opts) {
|
|
|
10063
9967
|
const moltnetTools = createMoltNetTools({
|
|
10064
9968
|
getAgent: () => moltnetAgent,
|
|
10065
9969
|
getDiaryId: () => diaryId,
|
|
9970
|
+
getTeamId: () => taskTeamId,
|
|
10066
9971
|
getSessionErrors: () => [],
|
|
10067
9972
|
clearSessionErrors: () => {},
|
|
10068
9973
|
getHostCwd: () => mountPath,
|
|
@@ -10281,6 +10186,7 @@ function moltnetExtension(pi) {
|
|
|
10281
10186
|
let worktreePath = null;
|
|
10282
10187
|
let moltnetAgent = null;
|
|
10283
10188
|
let diaryId = null;
|
|
10189
|
+
let teamId = null;
|
|
10284
10190
|
let hostExecBaseEnv = HOST_EXEC_DEFAULT_BASE_ENV;
|
|
10285
10191
|
async function ensureVm(ctx) {
|
|
10286
10192
|
if (vm) return vm;
|
|
@@ -10335,6 +10241,7 @@ function moltnetExtension(pi) {
|
|
|
10335
10241
|
activateAgentEnv(managed.credentials.agentEnv, mainRepo);
|
|
10336
10242
|
moltnetAgent = await connect({ configDir: managed.agentDir });
|
|
10337
10243
|
diaryId = managed.credentials.agentEnv.MOLTNET_DIARY_ID ?? null;
|
|
10244
|
+
teamId = managed.credentials.agentEnv.MOLTNET_TEAM_ID ?? null;
|
|
10338
10245
|
hostExecBaseEnv = new Set([...HOST_EXEC_DEFAULT_BASE_ENV, ...Object.keys(managed.credentials.agentEnv)]);
|
|
10339
10246
|
vm = managed.vm;
|
|
10340
10247
|
const label = worktreePath ? `${mountPath} → ${GUEST_WORKSPACE}` : `${localCwd} → ${GUEST_WORKSPACE}`;
|
|
@@ -10356,6 +10263,7 @@ function moltnetExtension(pi) {
|
|
|
10356
10263
|
vm = null;
|
|
10357
10264
|
vmStarting = null;
|
|
10358
10265
|
moltnetAgent = null;
|
|
10266
|
+
teamId = null;
|
|
10359
10267
|
}
|
|
10360
10268
|
});
|
|
10361
10269
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
@@ -10395,6 +10303,7 @@ function moltnetExtension(pi) {
|
|
|
10395
10303
|
const moltnetTools = createMoltNetTools({
|
|
10396
10304
|
getAgent: () => moltnetAgent,
|
|
10397
10305
|
getDiaryId: () => diaryId,
|
|
10306
|
+
getTeamId: () => teamId,
|
|
10398
10307
|
getSessionErrors: () => sessionErrors,
|
|
10399
10308
|
clearSessionErrors: () => {
|
|
10400
10309
|
sessionErrors.length = 0;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@themoltnet/pi-extension",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MoltNet pi extension — sandboxed tool execution in Gondolin VMs with MoltNet identity and persistent memory",
|
|
6
6
|
"license": "MIT",
|
|
@@ -30,8 +30,8 @@
|
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@earendil-works/gondolin": "^0.7.0",
|
|
32
32
|
"@sinclair/typebox": "^0.34.0",
|
|
33
|
-
"@themoltnet/
|
|
34
|
-
"@themoltnet/
|
|
33
|
+
"@themoltnet/agent-runtime": "0.2.1",
|
|
34
|
+
"@themoltnet/sdk": "0.95.0"
|
|
35
35
|
},
|
|
36
36
|
"peerDependencies": {
|
|
37
37
|
"@mariozechner/pi-coding-agent": ">=0.67.0",
|