@themoltnet/pi-extension 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,11 +7,10 @@ import { createHash } from "node:crypto";
7
7
  import crypto, { createHash as createHash$1 } from "crypto";
8
8
  import { readFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
- import { Type, complete, getModel } from "@mariozechner/pi-ai";
10
+ import { Type, getModel } from "@mariozechner/pi-ai";
11
11
  import { RealFSProvider, ShadowProvider, VM, VmCheckpoint, createHttpHooks, createShadowPathPredicate, ensureImageSelector, loadGuestAssets } from "@earendil-works/gondolin";
12
12
  import { parseEnv } from "node:util";
13
- import { fileURLToPath } from "node:url";
14
- import { SpanStatusCode, context, trace } from "@opentelemetry/api";
13
+ import { SpanStatusCode, context, metrics, trace } from "@opentelemetry/api";
15
14
  import { FormatRegistry, Type as Type$1 } from "@sinclair/typebox";
16
15
  import { Value } from "@sinclair/typebox/value";
17
16
  //#region \0rolldown/runtime.js
@@ -3848,7 +3847,7 @@ var cidSymbol = Symbol.for("@ipld/js-cid/CID");
3848
3847
  * naturally prevents field delimiter collision.
3849
3848
  */
3850
3849
  /** SHA-256 multicodec code per multihash table */
3851
- var SHA2_256_CODE$1 = 18;
3850
+ var SHA2_256_CODE = 18;
3852
3851
  /**
3853
3852
  * Build the canonical JSON input for content hashing.
3854
3853
  *
@@ -3880,7 +3879,7 @@ function computeCanonicalHash(entryType, title, content, tags) {
3880
3879
  * Example output: "bafkreig..."
3881
3880
  */
3882
3881
  function computeContentCid(entryType, title, content, tags) {
3883
- const digest = create(SHA2_256_CODE$1, computeCanonicalHash(entryType, title, content, tags));
3882
+ const digest = create(SHA2_256_CODE, computeCanonicalHash(entryType, title, content, tags));
3884
3883
  return CID.createV1(85, digest).toString(base32);
3885
3884
  }
3886
3885
  var { p: P, n: N, Gx, Gy, a: _a, d: _d } = {
@@ -7135,159 +7134,6 @@ var registerSandboxCommand = (pi, state) => {
7135
7134
  });
7136
7135
  };
7137
7136
  //#endregion
7138
- //#region src/moltnet/judge/assets.ts
7139
- /** Default fidelity rubric — kept verbatim from the Go judge. */
7140
- var DEFAULT_RUBRIC = `Evaluate the rendered content against the source entries on three axes:
7141
-
7142
- COVERAGE (0.0-1.0):
7143
- - Identify each distinct topic/fact in the source entries
7144
- - Check if each is represented in the rendered content
7145
- - Score = (represented topics) / (total source topics)
7146
- - A topic can be restructured or summarized but must be present
7147
-
7148
- GROUNDING (0.0-1.0):
7149
- - Identify each distinct claim/fact in the rendered content
7150
- - Check if each is traceable to a specific source entry
7151
- - Score = (grounded claims) / (total rendered claims)
7152
- - Restructured content is fine if the underlying fact comes from a source
7153
-
7154
- FAITHFULNESS (0.0-1.0):
7155
- - For content that IS represented, check semantic accuracy
7156
- - Is the meaning preserved? Any distortions, inversions, or misquotes?
7157
- - Score = (accurate representations) / (total representations)
7158
- - Summarization is fine; misrepresentation is not
7159
- `;
7160
- /** Judge system prompt — kept verbatim from the Go judge signature. */
7161
- var JUDGE_SYSTEM_PROMPT = `You are a fidelity judge for rendered context packs. Your job is to evaluate
7162
- whether a rendered markdown document faithfully represents its source entries.
7163
-
7164
- Score each axis independently and precisely. Be critical — the purpose is to
7165
- catch content drift, hallucination, and cherry-picking.
7166
-
7167
- You will be given three inputs:
7168
-
7169
- 1. \`source_entries\` — the original source entries from the context pack, in
7170
- markdown format.
7171
- 2. \`rendered_content\` — the agent-rendered markdown derived from the source
7172
- entries.
7173
- 3. \`rubric\` — the fidelity scoring rubric with criteria definitions.
7174
-
7175
- Return a JSON object matching the requested schema with these fields:
7176
-
7177
- - \`coverage\` (number, 0.0–1.0): fraction of source entries represented in
7178
- rendered content. 1.0 means all source entries are covered.
7179
- - \`grounding\` (number, 0.0–1.0): fraction of rendered content traceable to
7180
- source entries. 1.0 means everything comes from sources.
7181
- - \`faithfulness\` (number, 0.0–1.0): semantic accuracy of represented content.
7182
- 1.0 means source content is accurately represented.
7183
- - \`reasoning\` (string): detailed step-by-step analysis explaining each score.
7184
-
7185
- Respond with ONLY a single JSON object. No prose before or after.
7186
- `;
7187
- //#endregion
7188
- //#region src/moltnet/judge/fidelity.ts
7189
- /**
7190
- * Pi-native port of the Go fidelity judge
7191
- * (libs/dspy-adapters/fidelity/fidelity.go).
7192
- *
7193
- * Same inputs (source_entries, rendered_content, rubric), same outputs
7194
- * (coverage, grounding, faithfulness, reasoning). Uses pi-ai `complete()`
7195
- * instead of dspy-go; no process-global state.
7196
- */
7197
- var JSON_FENCE_RE = /```(?:json)?\s*([\s\S]*?)```/i;
7198
- function extractJson(text) {
7199
- const fenceMatch = text.match(JSON_FENCE_RE);
7200
- if (fenceMatch && fenceMatch[1]) return fenceMatch[1].trim();
7201
- const firstBrace = text.indexOf("{");
7202
- const lastBrace = text.lastIndexOf("}");
7203
- if (firstBrace >= 0 && lastBrace > firstBrace) return text.slice(firstBrace, lastBrace + 1);
7204
- return text.trim();
7205
- }
7206
- function clamp01(value) {
7207
- const n = typeof value === "number" ? value : Number(value);
7208
- if (!Number.isFinite(n)) return 0;
7209
- if (n < 0) return 0;
7210
- if (n > 1) return 1;
7211
- return n;
7212
- }
7213
- function coerceString(value) {
7214
- if (typeof value === "string") return value;
7215
- if (value === null || value === void 0) return "";
7216
- if (typeof value === "number" || typeof value === "boolean") return String(value);
7217
- try {
7218
- return JSON.stringify(value);
7219
- } catch {
7220
- return "";
7221
- }
7222
- }
7223
- function parseScores(raw) {
7224
- const jsonText = extractJson(raw);
7225
- let parsed;
7226
- try {
7227
- parsed = JSON.parse(jsonText);
7228
- } catch (err) {
7229
- throw new Error(`judge returned an invalid structured response: ${err.message}\n---raw---\n${raw}`);
7230
- }
7231
- const coverage = clamp01(parsed.coverage);
7232
- const grounding = clamp01(parsed.grounding);
7233
- const faithfulness = clamp01(parsed.faithfulness);
7234
- const reasoning = coerceString(parsed.reasoning);
7235
- return {
7236
- coverage,
7237
- grounding,
7238
- faithfulness,
7239
- composite: (coverage + grounding + faithfulness) / 3,
7240
- reasoning
7241
- };
7242
- }
7243
- function buildUserMessage(sourceEntries, renderedContent, rubric) {
7244
- return [
7245
- "## Rubric",
7246
- rubric,
7247
- "",
7248
- "## Source entries",
7249
- sourceEntries,
7250
- "",
7251
- "## Rendered content",
7252
- renderedContent,
7253
- "",
7254
- "Produce the JSON object now."
7255
- ].join("\n");
7256
- }
7257
- /**
7258
- * Run the fidelity judge via pi-ai `complete()`. Mirrors `fidelity.Run` in
7259
- * libs/dspy-adapters/fidelity/fidelity.go.
7260
- */
7261
- async function runFidelityJudge(req, options = {}) {
7262
- const rubric = req.rubric?.trim() ? req.rubric : DEFAULT_RUBRIC;
7263
- const userPrompt = buildUserMessage(req.sourceEntries, req.renderedContent, rubric);
7264
- const message = await complete(req.model, {
7265
- systemPrompt: JUDGE_SYSTEM_PROMPT,
7266
- messages: [{
7267
- role: "user",
7268
- content: userPrompt,
7269
- timestamp: Date.now()
7270
- }]
7271
- }, options.signal ? { signal: options.signal } : void 0);
7272
- if (message.stopReason === "error" || message.stopReason === "aborted") throw new Error(`judge failed: ${message.errorMessage ?? message.stopReason}`);
7273
- const textContent = message.content.filter((c) => c.type === "text" && typeof c.text === "string").map((c) => c.text).join("\n").trim();
7274
- if (!textContent) throw new Error("judge returned empty response");
7275
- return parseScores(textContent);
7276
- }
7277
- /**
7278
- * Build a stable markdown blob of source entries for the judge prompt.
7279
- * Mirrors `buildSourceEntriesFromPack` / `buildSourceEntriesMarkdown` in the
7280
- * Go CLI so that local and proctored modes produce the same input shape.
7281
- */
7282
- function buildSourceEntriesMarkdown(entries) {
7283
- const parts = [];
7284
- for (const entry of entries) {
7285
- const title = entry.title?.trim() || "Untitled";
7286
- parts.push(`## ${title}\n${entry.content}\n`);
7287
- }
7288
- return parts.join("\n");
7289
- }
7290
- //#endregion
7291
7137
  //#region src/moltnet/render-phase6.ts
7292
7138
  function slugToTitle(value) {
7293
7139
  return value.split(/[:/_-]+/).filter(Boolean).map((part) => part[0]?.toUpperCase() + part.slice(1)).join(" ");
@@ -7434,6 +7280,21 @@ function ensureConnected(config) {
7434
7280
  };
7435
7281
  }
7436
7282
  /**
7283
+ * Expand the `taskFilter` shorthand on the diary list/search tools into
7284
+ * the matching `task:*` provenance tags emitted by `moltnet_create_entry`
7285
+ * during a task. Returning an array (possibly empty) lets callers spread
7286
+ * it into a larger `tags` AND-filter without conditionals.
7287
+ */
7288
+ function compileTaskFilterTags(filter) {
7289
+ if (!filter) return [];
7290
+ const tags = [];
7291
+ if (filter.taskId) tags.push(`task:id:${filter.taskId}`);
7292
+ if (filter.taskType) tags.push(`task:type:${filter.taskType}`);
7293
+ if (filter.correlationId) tags.push(`task:correlation:${filter.correlationId}`);
7294
+ if (typeof filter.attemptN === "number") tags.push(`task:attempt:${filter.attemptN}`);
7295
+ return tags;
7296
+ }
7297
+ /**
7437
7298
  * Create all MoltNet tool definitions, ready to pass to `pi.registerTool()`.
7438
7299
  */
7439
7300
  function createMoltNetTools(config) {
@@ -7596,122 +7457,6 @@ function createMoltNetTools(config) {
7596
7457
  };
7597
7458
  }
7598
7459
  });
7599
- const createJudgePackTask = defineTool({
7600
- name: "moltnet_judge_pack_task_create",
7601
- label: "Create Judge Pack Task",
7602
- description: "Create a judge_pack task for a rendered pack. Returns a taskId that moltnet_rendered_pack_judge can claim and execute. The rubric is required — pass the structured rubric JSON from @moltnet/tasks Rubric schema.",
7603
- parameters: Type.Object({
7604
- renderedPackId: Type.String({ description: "Rendered pack ID to judge" }),
7605
- sourcePackId: Type.String({ description: "Source pack ID. Fetch it from the rendered pack if unknown." }),
7606
- rubric: Type.Any({ description: "Structured rubric object (Rubric schema from @moltnet/tasks). Must have rubricId, version, criteria[]." }),
7607
- diaryId: Type.Optional(Type.String({ description: "Diary ID to impose the task on. Defaults to the connected diary." }))
7608
- }),
7609
- async execute(_id, params) {
7610
- const { agent, diaryId: connectedDiaryId, teamId: connectedTeamId } = ensureConnected(config);
7611
- const task = await agent.tasks.create({
7612
- taskType: "judge_pack",
7613
- input: {
7614
- renderedPackId: params.renderedPackId,
7615
- sourcePackId: params.sourcePackId,
7616
- rubric: params.rubric
7617
- },
7618
- diaryId: params.diaryId ?? connectedDiaryId,
7619
- teamId: connectedTeamId
7620
- });
7621
- return {
7622
- content: [{
7623
- type: "text",
7624
- text: JSON.stringify({
7625
- taskId: task.id,
7626
- task
7627
- }, null, 2)
7628
- }],
7629
- details: {}
7630
- };
7631
- }
7632
- });
7633
- const judgeRenderedPack = defineTool({
7634
- name: "moltnet_rendered_pack_judge",
7635
- label: "Judge MoltNet Rendered Pack",
7636
- description: "Claim a judge_pack task, run the fidelity judge locally, complete the task with structured scores, and set verifiedTaskId on the rendered pack. Create the task first with moltnet_judge_pack_task_create.",
7637
- parameters: Type.Object({
7638
- taskId: Type.String({ description: "judge_pack task ID from moltnet_judge_pack_task_create" }),
7639
- rubricOverride: Type.Optional(Type.String({ description: "Freeform rubric string override for the LLM judge prompt. When omitted the task rubric preamble (or built-in default) is used." }))
7640
- }),
7641
- async execute(_id, params, _signal, _onUpdate, ctx) {
7642
- const { agent } = ensureConnected(config);
7643
- const model = ctx?.model;
7644
- if (!model) throw new Error("No active model in pi session — cannot run the fidelity judge.");
7645
- const claimed = await agent.tasks.claim(params.taskId);
7646
- const input = claimed.task.input;
7647
- const rendered = await agent.packs.getRendered(input.renderedPackId);
7648
- if (!rendered.content?.trim()) throw new Error(`rendered pack ${input.renderedPackId} has empty content`);
7649
- const sourcePack = await agent.packs.get(input.sourcePackId, { expand: "entries" });
7650
- if (!sourcePack.entries || sourcePack.entries.length === 0) throw new Error(`source pack ${input.sourcePackId} has no entries`);
7651
- const sourceEntriesMd = buildSourceEntriesMarkdown(sourcePack.entries.map((entry) => ({
7652
- title: entry.entry.title,
7653
- content: entry.entry.content
7654
- })));
7655
- const rubric = params.rubricOverride?.trim() || input.rubric?.preamble?.trim() || DEFAULT_RUBRIC;
7656
- let scores;
7657
- try {
7658
- scores = await runFidelityJudge({
7659
- model,
7660
- sourceEntries: sourceEntriesMd,
7661
- renderedContent: rendered.content,
7662
- rubric
7663
- });
7664
- } catch (err) {
7665
- await agent.tasks.fail(params.taskId, claimed.attempt.attemptN, { error: {
7666
- code: "judge_failed",
7667
- message: err.message ?? String(err)
7668
- } }).catch(() => {});
7669
- throw new Error(`judge failed: ${err.message ?? String(err)}`);
7670
- }
7671
- const modelId = model.provider && model.id ? `${model.provider}:${model.id}` : model.id ?? "pi:unknown";
7672
- const output = {
7673
- scores: [
7674
- {
7675
- criterionId: "coverage",
7676
- score: scores.coverage
7677
- },
7678
- {
7679
- criterionId: "grounding",
7680
- score: scores.grounding
7681
- },
7682
- {
7683
- criterionId: "faithfulness",
7684
- score: scores.faithfulness
7685
- }
7686
- ],
7687
- composite: scores.composite,
7688
- verdict: scores.reasoning,
7689
- judgeModel: modelId
7690
- };
7691
- const outputCid = await computeJsonCid(output);
7692
- const completed = await agent.tasks.complete(params.taskId, claimed.attempt.attemptN, {
7693
- output,
7694
- outputCid,
7695
- usage: {
7696
- inputTokens: 0,
7697
- outputTokens: 0
7698
- }
7699
- });
7700
- await agent.packs.updateRendered(input.renderedPackId, { verifiedTaskId: params.taskId });
7701
- return {
7702
- content: [{
7703
- type: "text",
7704
- text: JSON.stringify({
7705
- renderedPackId: input.renderedPackId,
7706
- taskId: params.taskId,
7707
- scores,
7708
- task: completed
7709
- }, null, 2)
7710
- }],
7711
- details: {}
7712
- };
7713
- }
7714
- });
7715
7460
  const diaryTags = defineTool({
7716
7461
  name: "moltnet_diary_tags",
7717
7462
  label: "List MoltNet Diary Tags",
@@ -7747,12 +7492,32 @@ function createMoltNetTools(config) {
7747
7492
  const listEntries = defineTool({
7748
7493
  name: "moltnet_list_entries",
7749
7494
  label: "List MoltNet Diary Entries",
7750
- description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview.",
7495
+ description: "List entries from the MoltNet diary. When `entryIds` is provided, batch-fetches those specific entries (max 50) and returns full fields including entryType, contentSignature, and contentHash for signature checks. Otherwise returns recent entries with a content preview, filtered by any combination of tags (AND), excludeTags (NONE), entryType, and the taskFilter shorthand which expands into the right `task:*` tags.",
7751
7496
  parameters: Type.Object({
7752
7497
  limit: Type.Optional(Type.Number({ description: "Max entries to return (default 10)" })),
7753
- tag: Type.Optional(Type.String({ description: "Filter by tag (optional)" })),
7498
+ tags: Type.Optional(Type.Array(Type.String({
7499
+ minLength: 1,
7500
+ maxLength: 50
7501
+ }), {
7502
+ description: "Tags filter — entry must have ALL listed tags (AND). Max 20.",
7503
+ maxItems: 20
7504
+ })),
7505
+ excludeTags: Type.Optional(Type.Array(Type.String({
7506
+ minLength: 1,
7507
+ maxLength: 50
7508
+ }), {
7509
+ description: "Tags to exclude — entry must have NONE of these. Max 20.",
7510
+ maxItems: 20
7511
+ })),
7512
+ entryType: Type.Optional(Type.String({ description: "Filter by entry type (procedural, semantic, episodic, reflection, identity, soul)." })),
7513
+ taskFilter: Type.Optional(Type.Object({
7514
+ taskId: Type.Optional(Type.String()),
7515
+ taskType: Type.Optional(Type.String()),
7516
+ correlationId: Type.Optional(Type.String()),
7517
+ attemptN: Type.Optional(Type.Number())
7518
+ }, { description: "Shorthand: any combination compiles to the matching task:* tags (task:id:<id>, task:type:<type>, task:correlation:<id>, task:attempt:<n>) and is merged into the tags filter." })),
7754
7519
  entryIds: Type.Optional(Type.Array(Type.String(), {
7755
- description: "Batch-fetch specific entries by UUID (max 50). Overrides `limit` and `tag` for selection.",
7520
+ description: "Batch-fetch specific entries by UUID (max 50). Overrides every other filter.",
7756
7521
  maxItems: 50
7757
7522
  }))
7758
7523
  }),
@@ -7766,7 +7531,11 @@ function createMoltNetTools(config) {
7766
7531
  if (batchMode) query.ids = params.entryIds;
7767
7532
  else {
7768
7533
  query.limit = params.limit ?? 10;
7769
- if (params.tag) query.tag = params.tag;
7534
+ const expandedTags = compileTaskFilterTags(params.taskFilter);
7535
+ const allTags = [...params.tags ?? [], ...expandedTags];
7536
+ if (allTags.length) query.tags = allTags;
7537
+ if (params.excludeTags?.length) query.excludeTags = params.excludeTags;
7538
+ if (params.entryType) query.entryType = params.entryType;
7770
7539
  }
7771
7540
  const entries = await agent.entries.list(diaryId, query);
7772
7541
  return {
@@ -7822,17 +7591,46 @@ function createMoltNetTools(config) {
7822
7591
  const searchEntries = defineTool({
7823
7592
  name: "moltnet_search_entries",
7824
7593
  label: "Search MoltNet Diary Entries",
7825
- description: "Search diary entries by semantic query. Uses vector similarity to find relevant entries.",
7594
+ description: "Hybrid (semantic + lexical) search over diary entries. Optional tags / excludeTags / entryTypes filters AND with the query; the taskFilter shorthand expands into task:* provenance tags so `taskFilter: { taskType: \"fulfill_brief\" }` returns only entries from fulfill_brief attempts. Filters apply server-side before ranking.",
7826
7595
  parameters: Type.Object({
7827
7596
  query: Type.String({ description: "Natural language search query" }),
7828
- limit: Type.Optional(Type.Number({ description: "Max results (default 5)" }))
7597
+ limit: Type.Optional(Type.Number({ description: "Max results (default 5)" })),
7598
+ tags: Type.Optional(Type.Array(Type.String({
7599
+ minLength: 1,
7600
+ maxLength: 50
7601
+ }), {
7602
+ description: "Entry must have ALL listed tags (AND). Max 20.",
7603
+ maxItems: 20
7604
+ })),
7605
+ excludeTags: Type.Optional(Type.Array(Type.String({
7606
+ minLength: 1,
7607
+ maxLength: 50
7608
+ }), {
7609
+ description: "Entry must have NONE of these tags. Max 20.",
7610
+ maxItems: 20
7611
+ })),
7612
+ entryTypes: Type.Optional(Type.Array(Type.String(), {
7613
+ description: "Restrict to these entry types (procedural, semantic, episodic, reflection, identity, soul). Max 6.",
7614
+ maxItems: 6
7615
+ })),
7616
+ taskFilter: Type.Optional(Type.Object({
7617
+ taskId: Type.Optional(Type.String()),
7618
+ taskType: Type.Optional(Type.String()),
7619
+ correlationId: Type.Optional(Type.String()),
7620
+ attemptN: Type.Optional(Type.Number())
7621
+ }, { description: "Shorthand: any combination compiles to the matching task:* tags and is merged into the tags filter." }))
7829
7622
  }),
7830
7623
  async execute(_id, params) {
7831
7624
  const { agent, diaryId } = ensureConnected(config);
7625
+ const expandedTags = compileTaskFilterTags(params.taskFilter);
7626
+ const allTags = [...params.tags ?? [], ...expandedTags];
7832
7627
  const results = await agent.entries.search({
7833
7628
  diaryId,
7834
7629
  query: params.query,
7835
- limit: params.limit ?? 5
7630
+ limit: params.limit ?? 5,
7631
+ ...allTags.length ? { tags: allTags } : {},
7632
+ ...params.excludeTags?.length ? { excludeTags: params.excludeTags } : {},
7633
+ ...params.entryTypes?.length ? { entryTypes: params.entryTypes } : {}
7836
7634
  });
7837
7635
  return {
7838
7636
  content: [{
@@ -7852,7 +7650,7 @@ function createMoltNetTools(config) {
7852
7650
  const createEntry = defineTool({
7853
7651
  name: "moltnet_create_entry",
7854
7652
  label: "Create MoltNet Diary Entry",
7855
- description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with task:<id>, task_type:<type>, task_attempt:<n>, and correlation:<id> when set; an explicit diaryId mismatching the task diary is rejected.",
7653
+ description: "Create a new diary entry to record decisions, findings, incidents, or reflections. During an active task, the entry is forced into the task diary and tagged with the task:* provenance namespace (task:id:<id>, task:type:<type>, task:attempt:<n>, plus task:correlation:<id> when set); an explicit diaryId mismatching the task diary is rejected.",
7856
7654
  parameters: Type.Object({
7857
7655
  title: Type.String({ description: "Entry title (concise, descriptive)" }),
7858
7656
  content: Type.String({ description: "Entry content (markdown)" }),
@@ -7869,10 +7667,10 @@ function createMoltNetTools(config) {
7869
7667
  if (params.diaryId && params.diaryId !== taskCtx.diaryId) throw new Error(`entries_create: diaryId "${params.diaryId}" does not match the active task diary "${taskCtx.diaryId}". Entries created during a task must land in the task diary.`);
7870
7668
  targetDiaryId = taskCtx.diaryId;
7871
7669
  autoTags = [
7872
- `task:${taskCtx.taskId}`,
7873
- `task_type:${taskCtx.taskType}`,
7874
- `task_attempt:${taskCtx.attemptN}`,
7875
- ...taskCtx.correlationId ? [`correlation:${taskCtx.correlationId}`] : []
7670
+ `task:id:${taskCtx.taskId}`,
7671
+ `task:type:${taskCtx.taskType}`,
7672
+ `task:attempt:${taskCtx.attemptN}`,
7673
+ ...taskCtx.correlationId ? [`task:correlation:${taskCtx.correlationId}`] : []
7876
7674
  ];
7877
7675
  } else targetDiaryId = params.diaryId ?? envDiaryId;
7878
7676
  const userTags = params.tags ?? [];
@@ -7973,8 +7771,6 @@ function createMoltNetTools(config) {
7973
7771
  renderPack,
7974
7772
  listRenderedPacks,
7975
7773
  getRenderedPack,
7976
- createJudgePackTask,
7977
- judgeRenderedPack,
7978
7774
  diaryTags,
7979
7775
  listEntries,
7980
7776
  getEntry,
@@ -8591,135 +8387,6 @@ function ensureRelativeWorktreePaths(gitconfig) {
8591
8387
  return `${gitconfig}${gitconfig.endsWith("\n") ? "" : "\n"}[worktree]\n\tuseRelativePaths = true\n`;
8592
8388
  }
8593
8389
  //#endregion
8594
- //#region src/moltnet/judge-recipe-cid.ts
8595
- var require$1 = createRequire(import.meta.url);
8596
- var SELF_PACKAGE_NAME = "@themoltnet/pi-extension";
8597
- var PI_PACKAGE_NAME = "@mariozechner/pi-coding-agent";
8598
- var SDK_PACKAGE_NAME = "@themoltnet/sdk";
8599
- var CID_VERSION = 1;
8600
- var RAW_CODEC = 85;
8601
- var SHA2_256_CODE = 18;
8602
- var BASE32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
8603
- function findSelfPackageDir() {
8604
- const start = path.dirname(fileURLToPath(import.meta.url));
8605
- let dir = start;
8606
- while (true) {
8607
- const candidate = path.join(dir, "package.json");
8608
- if (existsSync(candidate)) {
8609
- if (JSON.parse(readFileSync(candidate, "utf8")).name === SELF_PACKAGE_NAME) return dir;
8610
- }
8611
- const parent = path.dirname(dir);
8612
- if (parent === dir) return start;
8613
- dir = parent;
8614
- }
8615
- }
8616
- var PACKAGE_DIR = findSelfPackageDir();
8617
- function sha256Hex(value) {
8618
- return createHash("sha256").update(value, "utf8").digest("hex");
8619
- }
8620
- function encodeVarint(value) {
8621
- const bytes = [];
8622
- let current = value >>> 0;
8623
- while (current >= 128) {
8624
- bytes.push(current & 127 | 128);
8625
- current >>>= 7;
8626
- }
8627
- bytes.push(current);
8628
- return bytes;
8629
- }
8630
- function base32Lower(bytes) {
8631
- let bits = 0;
8632
- let value = 0;
8633
- let output = "";
8634
- for (const byte of bytes) {
8635
- value = value << 8 | byte;
8636
- bits += 8;
8637
- while (bits >= 5) {
8638
- output += BASE32_ALPHABET[value >>> bits - 5 & 31];
8639
- bits -= 5;
8640
- }
8641
- }
8642
- if (bits > 0) output += BASE32_ALPHABET[value << 5 - bits & 31];
8643
- return `b${output}`;
8644
- }
8645
- function stableStringify(value) {
8646
- if (value === null || typeof value !== "object") return JSON.stringify(value);
8647
- if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(",")}]`;
8648
- return `{${Object.entries(value).sort(([left], [right]) => left.localeCompare(right)).map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(",")}}`;
8649
- }
8650
- function readPackageVersion(pkgPath, expectedName) {
8651
- if (!existsSync(pkgPath)) return null;
8652
- const parsed = JSON.parse(readFileSync(pkgPath, "utf8"));
8653
- if (expectedName && parsed.name !== expectedName) return null;
8654
- return typeof parsed.version === "string" ? parsed.version : null;
8655
- }
8656
- function resolveInstalledPackageVersion(packageName) {
8657
- const candidates = [];
8658
- try {
8659
- candidates.push(path.dirname(require$1.resolve(packageName)));
8660
- } catch {}
8661
- let dir = PACKAGE_DIR;
8662
- while (true) {
8663
- candidates.push(path.join(dir, "node_modules", packageName));
8664
- const parent = path.dirname(dir);
8665
- if (parent === dir) break;
8666
- dir = parent;
8667
- }
8668
- for (const start of candidates) {
8669
- let current = start;
8670
- while (true) {
8671
- const version = readPackageVersion(path.join(current, "package.json"), packageName);
8672
- if (version) return version;
8673
- const parent = path.dirname(current);
8674
- if (parent === current) break;
8675
- current = parent;
8676
- }
8677
- }
8678
- return null;
8679
- }
8680
- function resolvePiJudgeRecipeVersions() {
8681
- return {
8682
- pi: resolveInstalledPackageVersion(PI_PACKAGE_NAME),
8683
- piExtension: readPackageVersion(path.join(PACKAGE_DIR, "package.json"), SELF_PACKAGE_NAME),
8684
- sdk: resolveInstalledPackageVersion(SDK_PACKAGE_NAME)
8685
- };
8686
- }
8687
- function buildPiJudgeRecipeManifest(inputs) {
8688
- return {
8689
- kind: "pi-judge-recipe/v1",
8690
- versions: {
8691
- ...resolvePiJudgeRecipeVersions(),
8692
- ...inputs.overrides
8693
- },
8694
- assets: {
8695
- promptAsset: inputs.promptAsset ?? null,
8696
- rubricAsset: inputs.rubricAsset ?? null,
8697
- skillSourcePath: inputs.skillSourcePath ?? null
8698
- },
8699
- hashes: {
8700
- judgePromptSha256: sha256Hex(inputs.judgePrompt),
8701
- rubricSha256: sha256Hex(inputs.rubric),
8702
- skillFragmentSha256: inputs.skillFragment ? sha256Hex(inputs.skillFragment) : null,
8703
- implementationSha256: inputs.implementationSource ? sha256Hex(inputs.implementationSource) : null
8704
- }
8705
- };
8706
- }
8707
- function computePiJudgeRecipeCid(inputs) {
8708
- const manifest = buildPiJudgeRecipeManifest(inputs);
8709
- const manifestBytes = Buffer.from(stableStringify(manifest), "utf8");
8710
- const digestBytes = createHash("sha256").update(manifestBytes).digest();
8711
- return {
8712
- cid: base32Lower(Uint8Array.from([
8713
- ...encodeVarint(CID_VERSION),
8714
- ...encodeVarint(RAW_CODEC),
8715
- ...encodeVarint(SHA2_256_CODE),
8716
- ...encodeVarint(digestBytes.length),
8717
- ...digestBytes
8718
- ])),
8719
- manifest
8720
- };
8721
- }
8722
- //#endregion
8723
8390
  //#region src/otel/index.ts
8724
8391
  var TRACER_NAME = "@themoltnet/pi-extension/otel";
8725
8392
  function stripReservedAttrs(attrs) {
@@ -9321,6 +8988,14 @@ function validateTaskOutput(taskType, output) {
9321
8988
  }];
9322
8989
  return schemaErrors("output", entry.outputSchema, output);
9323
8990
  }
8991
+ /**
8992
+ * Resolve the TypeBox output schema registered for `taskType`. Returns
8993
+ * `null` for unknown task types — callers (e.g. submit-tool factories)
8994
+ * decide how to surface that.
8995
+ */
8996
+ function getTaskOutputSchema(taskType) {
8997
+ return getTaskTypeEntry(taskType)?.outputSchema ?? null;
8998
+ }
9324
8999
  //#endregion
9325
9000
  //#region ../tasks/src/wire.ts
9326
9001
  /**
@@ -9549,6 +9224,98 @@ Type$1.Object({
9549
9224
  additionalProperties: false
9550
9225
  });
9551
9226
  //#endregion
9227
+ //#region ../agent-runtime/src/output-tools.ts
9228
+ /**
9229
+ * Submit-output tool contract.
9230
+ *
9231
+ * The runtime advertises a per-task-type "submit output" tool in every
9232
+ * prompt. The tool's name and schema must be the same wherever the
9233
+ * agent encounters it: in the system prompt the model reads, in the
9234
+ * executor that registers it, in any future executor that wires it
9235
+ * into a different coding-agent SDK.
9236
+ *
9237
+ * This module is the single source of truth for the (toolName,
9238
+ * description, parametersSchema) triple. It has no executor-specific
9239
+ * dependencies — `agent-runtime` is intentionally agnostic of the
9240
+ * concrete coding-agent runtime — so anything that wants to register
9241
+ * the tool (pi-extension today, a Codex-SDK adapter tomorrow, a local
9242
+ * MCP bridge if we ever go that route) can read the contract here and
9243
+ * wire it into its own tool API.
9244
+ *
9245
+ * Conventions captured here:
9246
+ *
9247
+ * - Tool name shape: `submit_<task_type>_output` (e.g.
9248
+ * `submit_fulfill_brief_output`). This is the string the model
9249
+ * sees in the prompt's "preferred path" instruction.
9250
+ * - Parameters schema: the task type's TypeBox `*Output` schema
9251
+ * **directly**, NOT wrapped in `{ output: <schema> }`. Tool args
9252
+ * ARE the payload, so the model gets field-level guidance at
9253
+ * planning time.
9254
+ * - Description text: shared across executors so the tool's
9255
+ * advertised purpose is identical regardless of who registers it.
9256
+ */
9257
+ /**
9258
+ * Build the submit-output contract for a task type. Returns `null` if
9259
+ * no output schema is registered for that type — callers (executors)
9260
+ * decide whether that's a hard error, a fallback to the parser-only
9261
+ * path, or anything else.
9262
+ */
9263
+ function getSubmitOutputContract(taskType) {
9264
+ const schema = getTaskOutputSchema(taskType);
9265
+ if (!schema) return null;
9266
+ return {
9267
+ toolName: submitOutputToolName(taskType),
9268
+ taskType,
9269
+ description: `Submit the structured output for this ${taskType} task. Call exactly once when done. The arguments below ARE the output payload — pass each top-level field of the task type's output schema directly. The runtime validates the args against the schema; mismatches return a tool error you can recover from in the same session. On a valid call the runtime captures the payload and ends the session — you do not need to repeat the JSON in your final assistant message.`,
9270
+ parametersSchema: schema
9271
+ };
9272
+ }
9273
+ /**
9274
+ * Plain-string name builder. Exposed separately so the prompt builder
9275
+ * can advertise the tool name even when the schema lookup is deferred
9276
+ * to the executor (the prompt is built before any tool registration
9277
+ * happens).
9278
+ */
9279
+ function submitOutputToolName(taskType) {
9280
+ return `submit_${taskType}_output`;
9281
+ }
9282
+ //#endregion
9283
+ //#region ../agent-runtime/src/prompts/final-output.ts
9284
+ function buildFinalOutputBlock(opts) {
9285
+ const { taskType, outputSchemaName, shapeSketch, extraNotes } = opts;
9286
+ const submitTool = submitOutputToolName(taskType);
9287
+ const lines = [
9288
+ "## Final output (read this carefully)",
9289
+ "",
9290
+ `Your VERY LAST action in this conversation MUST report the structured`,
9291
+ `output matching \`${outputSchemaName}\`. Two ways to do it, in order of`,
9292
+ `preference:`,
9293
+ "",
9294
+ `1. **Preferred — call \`${submitTool}\` exactly once** with the payload.`,
9295
+ ` The runtime captures the validated arguments and ends the session.`,
9296
+ ` If the tool is registered, prefer this path.`,
9297
+ `2. **Fallback** — if the submit tool is unavailable, your very last`,
9298
+ ` assistant message MUST be a single JSON object matching`,
9299
+ ` \`${outputSchemaName}\`. No prose before or after. No code fences.`,
9300
+ ` No "ok" or "done". The runtime parses the last balanced top-level`,
9301
+ ` JSON object as the output.`,
9302
+ "",
9303
+ `Failing to report structured output as the very last action means the`,
9304
+ `attempt is marked failed even if the underlying work succeeded.`,
9305
+ "",
9306
+ `Output shape:`,
9307
+ "",
9308
+ "```json",
9309
+ shapeSketch,
9310
+ "```"
9311
+ ];
9312
+ if (extraNotes?.length) {
9313
+ lines.push("");
9314
+ for (const note of extraNotes) lines.push(note);
9315
+ }
9316
+ return lines.join("\n");
9317
+ }
9318
+ //#endregion
9552
9319
  //#region ../agent-runtime/src/prompts/assess-brief.ts
9553
9320
  /**
9554
9321
  * Build the system prompt for an `assess_brief` judge attempt.
@@ -9606,6 +9373,20 @@ function buildAssessBriefPrompt(input, ctx) {
9606
9373
  " - `summary` set → use as orientation, not as ground truth.",
9607
9374
  "Adapt your investigation to whatever the output actually contains. Score conservatively when the producer's output is opaque or thin.",
9608
9375
  "",
9376
+ "### Querying the producer's diary entries",
9377
+ "",
9378
+ `Beyond the explicit \`diaryEntryIds[]\` from step 3, the producer's`,
9379
+ "attempts auto-tag every entry with the `task:*` provenance namespace.",
9380
+ "You can pull the full set without enumerating ids by passing the",
9381
+ "`taskFilter` shorthand to `moltnet_list_entries` or",
9382
+ "`moltnet_search_entries`:",
9383
+ "",
9384
+ `- All entries from the producer task: \`taskFilter: { taskId: "${input.targetTaskId}" }\`.`,
9385
+ "- Just the accepted attempt: add `attemptN: <acceptedAttemptN>`.",
9386
+ "- The producer plus any prior chain (when a correlationId was set):",
9387
+ " read it from the task you fetched in step 1 and pass",
9388
+ " `taskFilter: { correlationId: \"<id>\" }`.",
9389
+ "",
9609
9390
  preambleSection,
9610
9391
  "## Criteria",
9611
9392
  "",
@@ -9617,12 +9398,23 @@ function buildAssessBriefPrompt(input, ctx) {
9617
9398
  "- `boolean`: score exactly 0 or 1. `rationale` optional.",
9618
9399
  "- `deterministic_signature_check`: run `moltnet entry verify` on every diary entry returned by step 3 above AND `git verify-commit` on every commit. Score 1 iff ALL signatures are valid; otherwise 0. Populate `evidence.commitsVerified`, `evidence.commitsTotal`, `evidence.signatureFailures`.",
9619
9400
  "",
9620
- "### Final output",
9401
+ "Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before reporting structured output.",
9621
9402
  "",
9622
- "Emit a JSON object matching `AssessBriefOutput`:",
9623
- " { \"scores\": [{criterionId, score, rationale?, evidence?}], \"composite\", \"verdict\", \"judgeModel\"? }",
9624
- "`composite` = Σ(weight_i × score_i) recomputed. The runtime will reject a mismatch.",
9625
- "Write a signed diary entry (tags: \"judgment\", \"assess_brief\") capturing the rationale before emitting the JSON."
9403
+ buildFinalOutputBlock({
9404
+ taskType: "assess_brief",
9405
+ outputSchemaName: "AssessBriefOutput",
9406
+ shapeSketch: [
9407
+ "{",
9408
+ " \"scores\": [",
9409
+ " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
9410
+ " ],",
9411
+ " \"composite\": <sum>,",
9412
+ " \"verdict\": \"<1-3 sentence overall>\",",
9413
+ " \"judgeModel\": \"<provider:model>\"",
9414
+ "}"
9415
+ ].join("\n"),
9416
+ extraNotes: ["`composite` = Σ(weight_i × score_i) recomputed. The runtime rejects a mismatch."]
9417
+ })
9626
9418
  ].filter(Boolean).join("\n");
9627
9419
  }
9628
9420
  //#endregion
@@ -9699,9 +9491,16 @@ function buildCuratePackPrompt(input, ctx) {
9699
9491
  "## Tools available (not a recipe — use what the situation calls for)",
9700
9492
  "",
9701
9493
  "- `moltnet_diary_tags` — tag inventory with counts. Cheap reconnaissance",
9702
- " when the prompt implies a scope but not a tag.",
9494
+ " when the prompt implies a scope but not a tag. Pass",
9495
+ " `prefix: \"task:\"` to enumerate task-provenance tags only",
9496
+ " (`task:type:*`, `task:correlation:*`, etc.).",
9703
9497
  "- `moltnet_search_entries` — hybrid semantic + lexical search.",
9704
- "- `moltnet_list_entries` tag-filtered listing.",
9498
+ " Filters AND with the query: pass `tags`, `excludeTags`,",
9499
+ " `entryTypes`, or the `taskFilter` shorthand to narrow before",
9500
+ " ranking. Example: `taskFilter: { taskType: \"fulfill_brief\" }`",
9501
+ " returns only entries from fulfill_brief attempts.",
9502
+ "- `moltnet_list_entries` — multi-tag (AND) listing with optional",
9503
+ " `excludeTags`, `entryType`, and the same `taskFilter` shorthand.",
9705
9504
  "- `moltnet_get_entry` — full entry read, for disambiguation.",
9706
9505
  "- `moltnet_pack_create` — terminal call that persists the pack.",
9707
9506
  "",
@@ -9747,31 +9546,30 @@ function buildCuratePackPrompt(input, ctx) {
9747
9546
  "",
9748
9547
  "## Hard constraints",
9749
9548
  "",
9750
- "- Do NOT call `moltnet_pack_render` or `moltnet_rendered_pack_judge` ",
9751
- " those belong to the next sessions.",
9549
+ "- Do NOT call `moltnet_pack_render` that belongs to the next session.",
9752
9550
  "- Do NOT write diary entries unless curation surfaces a genuine",
9753
9551
  " incident worth recording. The curation reasoning lives in the task",
9754
9552
  " output, not in the diary.",
9755
9553
  "- Respect hard include/exclude filters literally.",
9756
9554
  "",
9757
- "## Final output",
9758
- "",
9759
- "Write to stdout a JSON object matching `CuratePackOutput`:",
9760
- "```",
9761
- "{",
9762
- " \"packId\": \"<uuid>\",",
9763
- " \"packCid\": \"<cid>\",",
9764
- " \"entries\": [",
9765
- " { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
9766
- " ],",
9767
- " \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
9768
- " \"checkpoints\": [",
9769
- " { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
9770
- " ],",
9771
- " \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\"",
9772
- "}",
9773
- "```",
9774
- "The runtime parses this. Failing to emit it is a task failure."
9555
+ buildFinalOutputBlock({
9556
+ taskType: "curate_pack",
9557
+ outputSchemaName: "CuratePackOutput",
9558
+ shapeSketch: [
9559
+ "{",
9560
+ " \"packId\": \"<uuid>\",",
9561
+ " \"packCid\": \"<cid>\",",
9562
+ " \"entries\": [",
9563
+ " { \"entryId\": \"<uuid>\", \"rank\": 1, \"rationale\": \"<why>\" }",
9564
+ " ],",
9565
+ " \"recipeParams\": { \"recipe\": \"...\", \"prompt\": \"...\", ... },",
9566
+ " \"checkpoints\": [",
9567
+ " { \"phase\": \"recon\", \"candidateIds\": [...], \"droppedIds\": [...], \"notes\": \"...\" }",
9568
+ " ],",
9569
+ " \"summary\": \"<2-4 sentences: what you looked for, how you narrowed, what defines the final set>\"",
9570
+ "}"
9571
+ ].join("\n")
9572
+ })
9775
9573
  ].filter((l) => l !== null).join("\n");
9776
9574
  }
9777
9575
  //#endregion
@@ -9829,11 +9627,19 @@ function buildFulfillBriefPrompt(input, ctx) {
9829
9627
  " `MoltNet-Diary: <id>` (per the runtime instructor).",
9830
9628
  "6. Push the branch and open a PR.",
9831
9629
  "",
9832
- "### Final output",
9833
- "",
9834
- "When done, write to stdout a JSON object with shape matching `FulfillBriefOutput`:",
9835
- " { \"branch\", \"commits\": [{sha, message, diaryEntryId}], \"pullRequestUrl\", \"diaryEntryIds\", \"summary\" }",
9836
- "The runtime parses this as the structured task output. Failing to emit it is a failure."
9630
+ buildFinalOutputBlock({
9631
+ taskType: "fulfill_brief",
9632
+ outputSchemaName: "FulfillBriefOutput",
9633
+ shapeSketch: [
9634
+ "{",
9635
+ " \"branch\": \"<branch-name>\",",
9636
+ " \"commits\": [{ \"sha\": \"...\", \"message\": \"...\", \"diaryEntryId\": \"...\" }],",
9637
+ " \"pullRequestUrl\": \"<url-or-null>\",",
9638
+ " \"diaryEntryIds\": [\"...\"],",
9639
+ " \"summary\": \"<1-3 sentence recap>\"",
9640
+ "}"
9641
+ ].join("\n")
9642
+ })
9837
9643
  ].filter(Boolean).join("\n");
9838
9644
  }
9839
9645
  //#endregion
@@ -9915,23 +9721,29 @@ function buildJudgePackPrompt(input, ctx) {
9915
9721
  " may leak guidance that biases judgment.",
9916
9722
  "- Keep the session focused on scoring; no speculative exploration.",
9917
9723
  "",
9918
- "## Final output",
9919
- "",
9920
- "Write to stdout a JSON object matching `JudgePackOutput`:",
9921
- "```",
9922
- "{",
9923
- " \"scores\": [{\"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {...}}],",
9924
- " \"composite\": <sum-of-weighted-scores>,",
9925
- " \"verdict\": \"<1-3 sentence overall>\",",
9926
- " \"judgeModel\": \"<provider:model>\",",
9927
- " \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
9928
- "}",
9929
- "```",
9930
- "Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
9931
- "`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is optional",
9932
- "and absence is the correct representation when unavailable.",
9933
9724
  `Write a signed diary entry (tags: \`judgment\`, \`judge_pack\`, \`rubric:${rubric.rubricId}\`) capturing the rationale before`,
9934
- "emitting the JSON."
9725
+ "reporting structured output.",
9726
+ "",
9727
+ buildFinalOutputBlock({
9728
+ taskType: "judge_pack",
9729
+ outputSchemaName: "JudgePackOutput",
9730
+ shapeSketch: [
9731
+ "{",
9732
+ " \"scores\": [",
9733
+ " { \"criterionId\": \"...\", \"score\": 0.0, \"rationale\": \"...\", \"evidence\": {} }",
9734
+ " ],",
9735
+ " \"composite\": <sum-of-weighted-scores>,",
9736
+ " \"verdict\": \"<1-3 sentence overall>\",",
9737
+ " \"judgeModel\": \"<provider:model>\",",
9738
+ " \"rendererBinaryCid\": \"<cid-string-only-if-available>\"",
9739
+ "}"
9740
+ ].join("\n"),
9741
+ extraNotes: [
9742
+ "Omit `rendererBinaryCid` entirely when no binary CID is exposed by",
9743
+ "`moltnet_rendered_pack_get`. Do NOT emit `null` — the field is",
9744
+ "optional and absence is the correct representation when unavailable."
9745
+ ]
9746
+ })
9935
9747
  ].filter((l) => l !== null).join("\n");
9936
9748
  }
9937
9749
  //#endregion
@@ -9972,24 +9784,23 @@ function buildRenderPackPrompt(input, ctx) {
9972
9784
  "## Constraints",
9973
9785
  "",
9974
9786
  "- Do NOT modify the source pack or its entries.",
9975
- "- Do NOT call `moltnet_rendered_pack_judge`.",
9976
9787
  "- Do NOT write diary entries unless a genuine incident occurs",
9977
9788
  " (rendering failure, invariant violation).",
9978
9789
  "",
9979
- "## Final output",
9980
- "",
9981
- "Write to stdout a JSON object matching `RenderPackOutput`:",
9982
- "```",
9983
- "{",
9984
- " \"renderedPackId\": \"<uuid-or-null>\",",
9985
- " \"renderedCid\": \"<cid>\",",
9986
- " \"renderMethod\": \"<label>\",",
9987
- " \"byteSize\": <int>,",
9988
- " \"entriesRendered\": <int>,",
9989
- " \"summary\": \"<1-3 sentence recap>\"",
9990
- "}",
9991
- "```",
9992
- "Failing to emit it is a task failure."
9790
+ buildFinalOutputBlock({
9791
+ taskType: "render_pack",
9792
+ outputSchemaName: "RenderPackOutput",
9793
+ shapeSketch: [
9794
+ "{",
9795
+ " \"renderedPackId\": \"<uuid-or-null>\",",
9796
+ " \"renderedCid\": \"<cid>\",",
9797
+ " \"renderMethod\": \"<label>\",",
9798
+ " \"byteSize\": <int>,",
9799
+ " \"entriesRendered\": <int>,",
9800
+ " \"summary\": \"<1-3 sentence recap>\"",
9801
+ "}"
9802
+ ].join("\n")
9803
+ })
9993
9804
  ].join("\n");
9994
9805
  }
9995
9806
  //#endregion
@@ -12020,7 +11831,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
12020
11831
  var { existsSync: existsSync$1 } = __require("node:fs");
12021
11832
  var getCallers = require_caller();
12022
11833
  var { join: join$1, isAbsolute, sep } = __require("node:path");
12023
- var { fileURLToPath: fileURLToPath$1 } = __require("node:url");
11834
+ var { fileURLToPath } = __require("node:url");
12024
11835
  var sleep = require_atomic_sleep();
12025
11836
  var onExit = require_on_exit_leak_free();
12026
11837
  var ThreadStream = require_thread_stream();
@@ -12076,7 +11887,7 @@ var require_transport = /* @__PURE__ */ __commonJSMin(((exports, module) => {
12076
11887
  if (!unquoted) return false;
12077
11888
  let path = unquoted;
12078
11889
  if (path.startsWith("file://")) try {
12079
- path = fileURLToPath$1(path);
11890
+ path = fileURLToPath(path);
12080
11891
  } catch {
12081
11892
  return false;
12082
11893
  }
@@ -13567,9 +13378,13 @@ function buildRuntimeInstructor(ctx) {
13567
13378
  `- During this task, every diary entry MUST land in \`${ctx.diaryId}\``,
13568
13379
  " (the task diary). The MCP `moltnet_create_entry` tool enforces this",
13569
13380
  " and rejects mismatched explicit `diaryId` parameters.",
13570
- `- Provenance tags \`task:${ctx.taskId}\`, \`task_type:${ctx.taskType}\`,`,
13571
- ` and \`task_attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
13572
- " You may add additional tags; you cannot remove the auto-tags.",
13381
+ `- Provenance tags \`task:id:${ctx.taskId}\`, \`task:type:${ctx.taskType}\`,`,
13382
+ ` and \`task:attempt:${ctx.attemptN}\`${ctx.correlationId ? `, plus \`task:correlation:${ctx.correlationId}\`` : ""} are auto-injected on every entry.`,
13383
+ " These share the `task:` namespace so `moltnet_diary_tags` with",
13384
+ " `prefix: \"task:\"` lists every task-scoped tag, and the",
13385
+ " `taskFilter` shorthand on `moltnet_list_entries` /",
13386
+ " `moltnet_search_entries` expands into them. You may add additional",
13387
+ " tags but you cannot remove the auto-injected ones.",
13573
13388
  "",
13574
13389
  "## Accountable commits",
13575
13390
  "",
@@ -13598,42 +13413,78 @@ function buildRuntimeInstructor(ctx) {
13598
13413
  }
13599
13414
  //#endregion
13600
13415
  //#region src/runtime/task-output.ts
13601
- async function parseStructuredTaskOutput(assistantText, taskType) {
13416
+ var METER_NAME = "@themoltnet/pi-extension/task-output";
13417
+ var parseResultCounter = null;
13418
+ function getParseResultCounter() {
13419
+ if (parseResultCounter) return parseResultCounter;
13420
+ parseResultCounter = metrics.getMeter(METER_NAME).createCounter("agent_runtime.task_output.parse_result", {
13421
+ description: "Outcome of structured task-output capture, labelled by task_type, model, and code (success | output_missing | output_validation_failed | unknown_task_type | output_cid_compute_failed | captured_via_tool).",
13422
+ unit: "1"
13423
+ });
13424
+ return parseResultCounter;
13425
+ }
13426
+ /**
13427
+ * Record one parse-result observation. Exposed so the executor can also
13428
+ * record the `captured_via_tool` outcome from the submit-tool path
13429
+ * without bouncing through the parser. Labels: `task_type`, `model`, `code`.
13430
+ */
13431
+ function recordTaskOutputParseResult(args) {
13432
+ getParseResultCounter().add(1, {
13433
+ task_type: args.taskType,
13434
+ model: args.model ?? "unknown",
13435
+ code: args.code
13436
+ });
13437
+ }
13438
+ async function parseStructuredTaskOutput(assistantText, taskType, opts = {}) {
13439
+ const record = (code) => recordTaskOutputParseResult({
13440
+ taskType,
13441
+ model: opts.model,
13442
+ code
13443
+ });
13602
13444
  const extracted = extractJsonObject(assistantText);
13603
- if (!extracted) return {
13604
- output: null,
13605
- outputCid: null,
13606
- error: {
13607
- code: "output_missing",
13608
- message: "Agent did not emit a parseable JSON object as its final message."
13609
- }
13610
- };
13445
+ if (!extracted) {
13446
+ record("output_missing");
13447
+ return {
13448
+ output: null,
13449
+ outputCid: null,
13450
+ error: {
13451
+ code: "output_missing",
13452
+ message: "Agent did not emit a parseable JSON object as its final message."
13453
+ }
13454
+ };
13455
+ }
13611
13456
  const errors = validateTaskOutput(taskType, extracted);
13612
13457
  if (errors.length > 0) {
13613
13458
  const details = errors.slice(0, 3).map((error) => `${error.field}: ${error.message}`);
13614
13459
  const [firstError] = errors;
13460
+ const code = firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed";
13461
+ record(code);
13615
13462
  return {
13616
13463
  output: null,
13617
13464
  outputCid: null,
13618
13465
  error: {
13619
- code: firstError?.field === "taskType" ? "unknown_task_type" : "output_validation_failed",
13466
+ code,
13620
13467
  message: `Output failed schema validation: ${details.join("; ")}`
13621
13468
  }
13622
13469
  };
13623
13470
  }
13624
13471
  try {
13472
+ const outputCid = await computeJsonCid(extracted);
13473
+ record("success");
13625
13474
  return {
13626
13475
  output: extracted,
13627
- outputCid: await computeJsonCid(extracted),
13476
+ outputCid,
13628
13477
  error: null
13629
13478
  };
13630
13479
  } catch (error) {
13480
+ const message = error instanceof Error ? error.message : String(error);
13481
+ record("output_cid_compute_failed");
13631
13482
  return {
13632
13483
  output: null,
13633
13484
  outputCid: null,
13634
13485
  error: {
13635
13486
  code: "output_cid_compute_failed",
13636
- message: `Validated output could not be canonicalized: ${error instanceof Error ? error.message : String(error)}`
13487
+ message: `Validated output could not be canonicalized: ${message}`
13637
13488
  }
13638
13489
  };
13639
13490
  }
@@ -13689,6 +13540,99 @@ function extractJsonObject(text) {
13689
13540
  return null;
13690
13541
  }
13691
13542
  //#endregion
13543
+ //#region src/runtime/submit-output-tool.ts
13544
+ /**
13545
+ * Sentinel thrown when the requested task type has no registered output
13546
+ * schema. The executor recognises this specific error class and falls
13547
+ * back to the parser path; any other error from `createSubmitOutputTool`
13548
+ * is unexpected and must propagate.
13549
+ */
13550
+ var UnknownTaskTypeForSubmitToolError = class extends Error {
13551
+ constructor(taskType) {
13552
+ super(`createSubmitOutputTool: no output schema registered for task type "${taskType}"`);
13553
+ this.taskType = taskType;
13554
+ this.name = "UnknownTaskTypeForSubmitToolError";
13555
+ }
13556
+ };
13557
+ function createSubmitOutputTool(taskType, opts = {}) {
13558
+ const contract = getSubmitOutputContract(taskType);
13559
+ if (!contract) throw new UnknownTaskTypeForSubmitToolError(taskType);
13560
+ const schema = contract.parametersSchema;
13561
+ let captured = null;
13562
+ let callCount = 0;
13563
+ return {
13564
+ tool: defineTool({
13565
+ name: contract.toolName,
13566
+ label: `Submit ${taskType} output`,
13567
+ description: contract.description,
13568
+ parameters: schema,
13569
+ async execute(_id, params) {
13570
+ const errors = [...Value.Errors(schema, params)];
13571
+ if (errors.length > 0) {
13572
+ const detailMsg = errors.slice(0, 3).map((err) => `${err.path || "<root>"}: ${err.message}`).join("; ");
13573
+ const details = {
13574
+ captured: false,
13575
+ callCount,
13576
+ error: "output_validation_failed"
13577
+ };
13578
+ recordTaskOutputParseResult({
13579
+ taskType,
13580
+ model: opts.model,
13581
+ code: "output_validation_failed"
13582
+ });
13583
+ return {
13584
+ content: [{
13585
+ type: "text",
13586
+ text: `Output failed schema validation: ${detailMsg}. Re-call this tool with a corrected output.`
13587
+ }],
13588
+ details,
13589
+ isError: true
13590
+ };
13591
+ }
13592
+ captured = params;
13593
+ callCount += 1;
13594
+ return {
13595
+ content: [{
13596
+ type: "text",
13597
+ text: "Output captured. The runtime now has the validated payload; no further action is needed for output reporting."
13598
+ }],
13599
+ details: {
13600
+ captured: true,
13601
+ callCount,
13602
+ error: null
13603
+ },
13604
+ terminate: true
13605
+ };
13606
+ }
13607
+ }),
13608
+ getCaptured: () => captured,
13609
+ getCallCount: () => callCount
13610
+ };
13611
+ }
13612
+ /**
13613
+ * Build the submit-tool wiring for one task attempt. Returns a handle
13614
+ * (or `null` if no submit-tool should be registered) plus the
13615
+ * `customTools`-shaped array ready to spread into the session config.
13616
+ *
13617
+ * The catch is **narrowed** to `UnknownTaskTypeForSubmitToolError` —
13618
+ * exporters/dependency-API drift would otherwise be silently degraded
13619
+ * to parser-only behaviour, which reintroduces the failure mode this
13620
+ * change is fixing. Any other error from the factory propagates.
13621
+ */
13622
+ function resolveSubmitTools(taskType, opts = {}) {
13623
+ let handle;
13624
+ try {
13625
+ handle = createSubmitOutputTool(taskType, opts);
13626
+ } catch (err) {
13627
+ if (err instanceof UnknownTaskTypeForSubmitToolError) handle = null;
13628
+ else throw err;
13629
+ }
13630
+ return {
13631
+ handle,
13632
+ tools: handle ? [handle.tool] : []
13633
+ };
13634
+ }
13635
+ //#endregion
13692
13636
  //#region src/runtime/execute-pi-task.ts
13693
13637
  /**
13694
13638
  * executePiTask — run a single Task attempt using pi-coding-agent inside a
@@ -13834,6 +13778,8 @@ async function executePiTask(claimedTask, reporter, opts) {
13834
13778
  createEditToolDefinition(mountPath, { operations: createGondolinEditOps(managed.vm, mountPath) }),
13835
13779
  createBashToolDefinition(mountPath, { operations: createGondolinBashOps(managed.vm, mountPath) })
13836
13780
  ];
13781
+ const { handle: submitToolHandle, tools: submitToolDefs } = resolveSubmitTools(task.taskType, { model: opts.model });
13782
+ const submitTools = submitToolDefs;
13837
13783
  try {
13838
13784
  const moltnetAgent = await connect({ configDir: managed.agentDir });
13839
13785
  const moltnetTools = createMoltNetTools({
@@ -13885,7 +13831,11 @@ async function executePiTask(claimedTask, reporter, opts) {
13885
13831
  agentDir: piAuthDir,
13886
13832
  cwd: mountPath,
13887
13833
  model: modelHandle,
13888
- customTools: [...gondolinCustomTools, ...moltnetTools],
13834
+ customTools: [
13835
+ ...gondolinCustomTools,
13836
+ ...moltnetTools,
13837
+ ...submitTools
13838
+ ],
13889
13839
  sessionManager: SessionManager.inMemory(),
13890
13840
  resourceLoader
13891
13841
  })).session;
@@ -13962,14 +13912,43 @@ async function executePiTask(claimedTask, reporter, opts) {
13962
13912
  let parsedOutputCid = null;
13963
13913
  let parseError = null;
13964
13914
  if (!runError && !llmAbort && !cancelled) {
13965
- const parsed = await parseStructuredTaskOutput(assistantText, task.taskType);
13966
- parsedOutput = parsed.output;
13967
- parsedOutputCid = parsed.outputCid;
13968
- parseError = parsed.error;
13969
- if (parseError) await emit("error", {
13970
- message: parseError.message,
13971
- phase: "output_validation"
13972
- });
13915
+ const captured = submitToolHandle?.getCaptured() ?? null;
13916
+ if (captured) try {
13917
+ parsedOutput = captured;
13918
+ parsedOutputCid = await computeJsonCid(captured);
13919
+ recordTaskOutputParseResult({
13920
+ taskType: task.taskType,
13921
+ model: opts.model,
13922
+ code: "captured_via_tool"
13923
+ });
13924
+ } catch (err) {
13925
+ const message = err instanceof Error ? err.message : String(err);
13926
+ parsedOutput = null;
13927
+ parsedOutputCid = null;
13928
+ parseError = {
13929
+ code: "output_cid_compute_failed",
13930
+ message: `Captured submit-tool output could not be canonicalized: ${message}`
13931
+ };
13932
+ recordTaskOutputParseResult({
13933
+ taskType: task.taskType,
13934
+ model: opts.model,
13935
+ code: "output_cid_compute_failed"
13936
+ });
13937
+ await emit("error", {
13938
+ message: parseError.message,
13939
+ phase: "output_validation"
13940
+ });
13941
+ }
13942
+ else {
13943
+ const parsed = await parseStructuredTaskOutput(assistantText, task.taskType, { model: opts.model });
13944
+ parsedOutput = parsed.output;
13945
+ parsedOutputCid = parsed.outputCid;
13946
+ parseError = parsed.error;
13947
+ if (parseError) await emit("error", {
13948
+ message: parseError.message,
13949
+ phase: "output_validation"
13950
+ });
13951
+ }
13973
13952
  }
13974
13953
  if (cancelled) return {
13975
13954
  taskId: task.id,
@@ -14365,4 +14344,4 @@ function moltnetExtension(pi) {
14365
14344
  registerMoltnetReflectCommand(pi, state);
14366
14345
  }
14367
14346
  //#endregion
14368
- export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, buildPiJudgeRecipeManifest, computePiJudgeRecipeCid, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resolvePiJudgeRecipeVersions, resumeVm, toGuestPath };
14347
+ export { HOST_EXEC_DEFAULT_BASE_ENV, activateAgentEnv, createGondolinBashOps, createGondolinEditOps, createGondolinReadOps, createGondolinWriteOps, createMoltNetTools, createPiOtelExtension, createPiTaskExecutor, moltnetExtension as default, ensureSnapshot, executePiTask, findMainWorktree, loadCredentials, resumeVm, toGuestPath };