@forwardimpact/libeval 0.1.49 → 0.1.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-eval.js CHANGED
@@ -34,6 +34,29 @@ const LEAD_OPTIONS = {
34
34
  },
35
35
  };
36
36
 
37
+ // Shared task-input flags: --task-file (path), --task-text (inline), and
38
+ // --task-event (path to native GitHub event JSON composed into a task via
39
+ // libeval/src/events/github.js). Exactly one of the three is required.
40
+ const TASK_INPUT_OPTIONS = {
41
+ "task-file": {
42
+ type: "string",
43
+ description: "Path to a markdown task file",
44
+ },
45
+ "task-text": {
46
+ type: "string",
47
+ description: "Inline task text (alternative to --task-file)",
48
+ },
49
+ "task-event": {
50
+ type: "string",
51
+ description:
52
+ "Path to a native GitHub event payload JSON, composed into the task via libeval/src/events/github.js (reads $GITHUB_EVENT_NAME)",
53
+ },
54
+ "task-amend": {
55
+ type: "string",
56
+ description: "Additional text appended to the task",
57
+ },
58
+ };
59
+
37
60
  const definition = {
38
61
  name: "fit-eval",
39
62
  version: VERSION,
@@ -45,18 +68,7 @@ const definition = {
45
68
  args: "",
46
69
  description: "Run a single agent autonomously on a defined task",
47
70
  options: {
48
- "task-file": {
49
- type: "string",
50
- description: "Path to a markdown task file",
51
- },
52
- "task-text": {
53
- type: "string",
54
- description: "Inline task text (alternative to --task-file)",
55
- },
56
- "task-amend": {
57
- type: "string",
58
- description: "Additional text appended to the task",
59
- },
71
+ ...TASK_INPUT_OPTIONS,
60
72
  "agent-model": {
61
73
  type: "string",
62
74
  description:
@@ -92,18 +104,7 @@ const definition = {
92
104
  description:
93
105
  "Run a supervisor–agent relay — typical shape for agent-as-judge evaluations",
94
106
  options: {
95
- "task-file": {
96
- type: "string",
97
- description: "Path to a markdown task file",
98
- },
99
- "task-text": {
100
- type: "string",
101
- description: "Inline task text (alternative to --task-file)",
102
- },
103
- "task-amend": {
104
- type: "string",
105
- description: "Additional text appended to the task",
106
- },
107
+ ...TASK_INPUT_OPTIONS,
107
108
  "agent-model": {
108
109
  type: "string",
109
110
  description:
@@ -146,18 +147,7 @@ const definition = {
146
147
  description:
147
148
  "Run a facilitator with N participants — typical shape for multi-agent collaboration",
148
149
  options: {
149
- "task-file": {
150
- type: "string",
151
- description: "Path to a markdown task file",
152
- },
153
- "task-text": {
154
- type: "string",
155
- description: "Inline task text (alternative to --task-file)",
156
- },
157
- "task-amend": {
158
- type: "string",
159
- description: "Additional text appended to the task",
160
- },
150
+ ...TASK_INPUT_OPTIONS,
161
151
  "agent-model": {
162
152
  type: "string",
163
153
  description: "Claude model for agents (default: claude-opus-4-7[1m])",
@@ -192,18 +182,7 @@ const definition = {
192
182
  description:
193
183
  "Run an async, suspendable discussion — Chair + N participants + bridge callback",
194
184
  options: {
195
- "task-file": {
196
- type: "string",
197
- description: "Path to a markdown task file",
198
- },
199
- "task-text": {
200
- type: "string",
201
- description: "Inline task text (alternative to --task-file)",
202
- },
203
- "task-amend": {
204
- type: "string",
205
- description: "Additional text appended to the task",
206
- },
185
+ ...TASK_INPUT_OPTIONS,
207
186
  "agent-model": {
208
187
  type: "string",
209
188
  description: "Claude model for agents (default: claude-opus-4-7[1m])",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.49",
3
+ "version": "0.1.50",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -62,7 +62,7 @@
62
62
  "zod": "^4.4.3"
63
63
  },
64
64
  "devDependencies": {
65
- "@forwardimpact/libharness": "^0.1.14"
65
+ "@forwardimpact/libmock": "^0.1.0"
66
66
  },
67
67
  "engines": {
68
68
  "bun": ">=1.2.0",
@@ -62,7 +62,9 @@ export class AgentRunner {
62
62
  const abortController = new AbortController();
63
63
  this.currentAbortController = abortController;
64
64
  const effectiveTask = this.taskAmend
65
- ? `${task}\n\n${this.taskAmend}`
65
+ ? task
66
+ ? `${task}\n\n${this.taskAmend}`
67
+ : this.taskAmend
66
68
  : task;
67
69
  try {
68
70
  const iterator = this.query({
@@ -0,0 +1,87 @@
1
+ /**
2
+ * NpmInstaller — runs `bun install` in the family root when a package.json
3
+ * is present, then copies the resulting `node_modules/` into the staging
4
+ * directory so WorkdirManager can seed each per-task CWD.
5
+ *
6
+ * Symmetric to ApmInstaller: constructor injection of `spawn` for testability,
7
+ * factory function, and a free-function shorthand.
8
+ */
9
+
10
+ import { spawn as nodeSpawn } from "node:child_process";
11
+ import { access, cp } from "node:fs/promises";
12
+ import { join } from "node:path";
13
+
14
+ /** Run `bun install` in the family root and stage node_modules/ for per-task CWDs. */
15
+ export class NpmInstaller {
16
+ /**
17
+ * @param {object} [deps]
18
+ * @param {typeof nodeSpawn} [deps.spawn] - Spawn seam (defaults to
19
+ * `node:child_process` spawn). Tests inject a fake to avoid shelling out.
20
+ */
21
+ constructor({ spawn } = {}) {
22
+ this.spawn = spawn ?? nodeSpawn;
23
+ }
24
+
25
+ /**
26
+ * @param {import("./task-family.js").TaskFamily} family
27
+ * @param {string} stagingDir - The staging directory (created by ApmInstaller).
28
+ * @returns {Promise<void>}
29
+ */
30
+ async install(family, stagingDir) {
31
+ const pkgJson = join(family.rootPath, "package.json");
32
+ const hasPkg = await access(pkgJson)
33
+ .then(() => true)
34
+ .catch(() => false);
35
+ if (!hasPkg) return;
36
+
37
+ await this.#runBunInstall(family.rootPath);
38
+
39
+ const sourceModules = join(family.rootPath, "node_modules");
40
+ try {
41
+ await access(sourceModules);
42
+ } catch {
43
+ throw new Error(
44
+ `bun install did not produce node_modules/ at ${sourceModules}; check the family's package.json`,
45
+ );
46
+ }
47
+
48
+ await cp(sourceModules, join(stagingDir, "node_modules"), {
49
+ recursive: true,
50
+ });
51
+ }
52
+
53
+ #runBunInstall(cwd) {
54
+ return new Promise((res, rej) => {
55
+ const child = this.spawn("bun", ["install"], {
56
+ cwd,
57
+ stdio: ["ignore", "pipe", "pipe"],
58
+ });
59
+ let stderr = "";
60
+ child.stdout.on("data", () => {});
61
+ child.stderr.on("data", (d) => {
62
+ stderr += d.toString();
63
+ });
64
+ child.on("error", (e) => {
65
+ rej(new Error(`failed to spawn bun: ${e.message}`));
66
+ });
67
+ child.on("close", (code) => {
68
+ if (code === 0) res();
69
+ else rej(new Error(`bun install exited ${code}: ${stderr}`));
70
+ });
71
+ });
72
+ }
73
+ }
74
+
75
+ /** Factory function — wires real dependencies. */
76
+ export function createNpmInstaller(deps) {
77
+ return new NpmInstaller(deps);
78
+ }
79
+
80
+ /**
81
+ * Free-function shorthand for callers that don't need to inject a spawn seam.
82
+ * @param {import("./task-family.js").TaskFamily} family
83
+ * @param {string} stagingDir
84
+ */
85
+ export function installNpm(family, stagingDir) {
86
+ return new NpmInstaller().install(family, stagingDir);
87
+ }
@@ -22,6 +22,7 @@ import { join, resolve as resolvePath } from "node:path";
22
22
  import { DEFAULT_ENV_ALLOWLIST, createRedactor } from "../redaction.js";
23
23
  import { createSupervisor } from "../supervisor.js";
24
24
  import { installApm as defaultInstallApm } from "./apm-installer.js";
25
+ import { installNpm as defaultInstallNpm } from "./npm-installer.js";
25
26
  import { runJudge } from "./judge.js";
26
27
  import { validateResultRecord } from "./result.js";
27
28
  import { runScoring } from "./scorer.js";
@@ -68,6 +69,8 @@ export class BenchmarkRunner {
68
69
  * Same contract as `installApm(family, outputDir)`. Lets tests inject a
69
70
  * fake `apm` spawn (or skip the install entirely) so the suite never
70
71
  * shells out to a real `apm` binary. Internal testing only.
72
+ * @param {Function} [opts.installNpm] - Test seam: replaces `installNpm`.
73
+ * Same contract as `installNpm(family, stagingDir)`. Internal testing only.
71
74
  */
72
75
  constructor({
73
76
  family,
@@ -86,6 +89,7 @@ export class BenchmarkRunner {
86
89
  runScoring: runScoringHook,
87
90
  runJudge: runJudgeHook,
88
91
  installApm: installApmHook,
92
+ installNpm: installNpmHook,
89
93
  }) {
90
94
  if (!family) throw new Error("family is required");
91
95
  if (!Number.isInteger(runs) || runs < 1)
@@ -111,6 +115,7 @@ export class BenchmarkRunner {
111
115
  this._runScoringHook = runScoringHook ?? runScoring;
112
116
  this._runJudgeHook = runJudgeHook ?? runJudge;
113
117
  this._installApmHook = installApmHook ?? defaultInstallApm;
118
+ this._installNpmHook = installNpmHook ?? defaultInstallNpm;
114
119
  }
115
120
 
116
121
  /**
@@ -126,6 +131,7 @@ export class BenchmarkRunner {
126
131
  await mkdir(this.output, { recursive: true });
127
132
  const { stagingDir, skillSetHash, judgeProfilesDir } =
128
133
  await this._installApmHook(family, this.output);
134
+ await this._installNpmHook(family, stagingDir);
129
135
 
130
136
  const tasks = family.tasks();
131
137
  if (this.profiles.judge) {
@@ -70,6 +70,11 @@ export class WorkdirManager {
70
70
  await cp(join(this.stagingDir, ".claude"), join(cwd, ".claude"), {
71
71
  recursive: true,
72
72
  });
73
+ await cp(join(this.stagingDir, "node_modules"), join(cwd, "node_modules"), {
74
+ recursive: true,
75
+ }).catch((e) => {
76
+ if (e.code !== "ENOENT") throw e;
77
+ });
73
78
 
74
79
  const envDirs = [
75
80
  ...(this.familyRootPath ? [this.familyRootPath] : []),
@@ -17,6 +17,14 @@ export async function runBenchmarkRunCommand(values, _args) {
17
17
  const opts = parseRunOptions(values);
18
18
  const config = await createConfig("script", "benchmark");
19
19
  process.env.ANTHROPIC_API_KEY = await config.anthropicToken();
20
+
21
+ // The Claude Agent SDK spawns a `claude` subprocess that inherits
22
+ // process.env. NODE_EXTRA_CA_CERTS causes undici (the HTTP client
23
+ // inside that subprocess) to fail with UND_ERR_INVALID_ARG on
24
+ // Node 22+, aborting every API call after 10 retries. Strip it
25
+ // before the SDK loads so the subprocess gets a clean environment.
26
+ delete process.env.NODE_EXTRA_CA_CERTS;
27
+
20
28
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
21
29
  const runner = createBenchmarkRunner({ ...opts, query });
22
30
 
@@ -1,8 +1,9 @@
1
- import { readFileSync, createWriteStream } from "node:fs";
1
+ import { createWriteStream } from "node:fs";
2
2
  import { resolve } from "node:path";
3
3
  import { createDiscusser } from "../discusser.js";
4
4
  import { createRedactor } from "../redaction.js";
5
5
  import { createTeeWriter } from "../tee-writer.js";
6
+ import { resolveTaskContent } from "./task-input.js";
6
7
 
7
8
  function parseAgentProfiles(raw, cwd, maxTurns) {
8
9
  if (!raw) return [];
@@ -18,17 +19,8 @@ function parseAgentProfiles(raw, cwd, maxTurns) {
18
19
  * @param {object} values - Parsed option values
19
20
  * @returns {object}
20
21
  */
21
- // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
22
22
  export function parseDiscussOptions(values) {
23
- const taskFile = values["task-file"];
24
- const taskText = values["task-text"];
25
- if (taskFile && taskText)
26
- throw new Error("--task-file and --task-text are mutually exclusive");
27
- if (!taskFile && !taskText)
28
- throw new Error("--task-file or --task-text is required");
29
-
30
- const taskAmend = values["task-amend"] ?? undefined;
31
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
23
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
32
24
 
33
25
  const profilesRaw = values["agent-profiles"];
34
26
  const agentCwd = resolve(values["agent-cwd"] ?? ".");
@@ -1,8 +1,9 @@
1
- import { readFileSync, createWriteStream } from "node:fs";
1
+ import { createWriteStream } from "node:fs";
2
2
  import { resolve } from "node:path";
3
3
  import { createFacilitator } from "../facilitator.js";
4
4
  import { createRedactor } from "../redaction.js";
5
5
  import { createTeeWriter } from "../tee-writer.js";
6
+ import { resolveTaskContent } from "./task-input.js";
6
7
 
7
8
  /**
8
9
  * Parse comma-separated agent profile names into structured configs.
@@ -25,15 +26,7 @@ function parseAgentProfiles(raw, cwd, maxTurns) {
25
26
  * @returns {object} Parsed options
26
27
  */
27
28
  export function parseFacilitateOptions(values) {
28
- const taskFile = values["task-file"];
29
- const taskText = values["task-text"];
30
- if (taskFile && taskText)
31
- throw new Error("--task-file and --task-text are mutually exclusive");
32
- if (!taskFile && !taskText)
33
- throw new Error("--task-file or --task-text is required");
34
-
35
- const taskAmend = values["task-amend"] ?? undefined;
36
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
29
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
37
30
 
38
31
  const profilesRaw = values["agent-profiles"];
39
32
  if (!profilesRaw) throw new Error("--agent-profiles is required");
@@ -1,4 +1,4 @@
1
- import { readFileSync, createWriteStream } from "node:fs";
1
+ import { createWriteStream } from "node:fs";
2
2
  import { Writable } from "node:stream";
3
3
  import { resolve } from "node:path";
4
4
  import { createAgentRunner } from "../agent-runner.js";
@@ -6,6 +6,7 @@ import { composeProfilePrompt } from "../profile-prompt.js";
6
6
  import { createRedactor } from "../redaction.js";
7
7
  import { createTeeWriter } from "../tee-writer.js";
8
8
  import { SequenceCounter } from "../sequence-counter.js";
9
+ import { resolveTaskContent } from "./task-input.js";
9
10
  import { createServiceConfig } from "@forwardimpact/libconfig";
10
11
 
11
12
  /**
@@ -14,16 +15,8 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
14
15
  * @returns {{ taskContent: string, cwd: string, model: string, maxTurns: number, outputPath: string|undefined, agentProfile: string|undefined, allowedTools: string[] }}
15
16
  */
16
17
  function parseRunOptions(values) {
17
- const taskFile = values["task-file"];
18
- const taskText = values["task-text"];
19
- if (taskFile && taskText)
20
- throw new Error("--task-file and --task-text are mutually exclusive");
21
- if (!taskFile && !taskText)
22
- throw new Error("--task-file or --task-text is required");
23
-
18
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
24
19
  const maxTurnsRaw = values["max-turns"] ?? "50";
25
- const taskAmend = values["task-amend"] ?? undefined;
26
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
27
20
 
28
21
  return {
29
22
  taskContent,
@@ -1,9 +1,10 @@
1
- import { readFileSync, createWriteStream, mkdtempSync } from "node:fs";
1
+ import { createWriteStream, mkdtempSync } from "node:fs";
2
2
  import { resolve, join } from "node:path";
3
3
  import { tmpdir } from "node:os";
4
4
  import { createSupervisor } from "../supervisor.js";
5
5
  import { createRedactor } from "../redaction.js";
6
6
  import { createTeeWriter } from "../tee-writer.js";
7
+ import { resolveTaskContent } from "./task-input.js";
7
8
  import { createServiceConfig } from "@forwardimpact/libconfig";
8
9
 
9
10
  /**
@@ -11,20 +12,10 @@ import { createServiceConfig } from "@forwardimpact/libconfig";
11
12
  * @param {object} values - Parsed option values from cli.parse()
12
13
  * @returns {object}
13
14
  */
14
- // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: CLI option validation
15
15
  export function parseSuperviseOptions(values) {
16
- const taskFile = values["task-file"];
17
- const taskText = values["task-text"];
18
- if (taskFile && taskText)
19
- throw new Error("--task-file and --task-text are mutually exclusive");
20
- if (!taskFile && !taskText)
21
- throw new Error("--task-file or --task-text is required");
22
-
16
+ const { task: taskContent, amend: taskAmend } = resolveTaskContent(values);
23
17
  const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
24
18
 
25
- const taskAmend = values["task-amend"] ?? undefined;
26
- const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
27
-
28
19
  return {
29
20
  taskContent,
30
21
  taskAmend,
@@ -0,0 +1,49 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { composeTaskFromGitHubEvent } from "../events/github.js";
3
+
4
+ /**
5
+ * Resolve `--task-file` / `--task-text` / `--task-event` into the task pair the
6
+ * runner consumes. Exactly one of the three must be set. For `--task-event`,
7
+ * libeval reads the event payload and extracts both the main task (from the
8
+ * template that matches `$GITHUB_EVENT_NAME` + `payload.action`) and the
9
+ * amendment (from `payload.inputs?.prompt`) — so the workflow doesn't need to
10
+ * wire `--task-amend` separately. For the other two modes, `--task-amend`
11
+ * works as before.
12
+ *
13
+ * @param {object} values - Parsed option values from cli.parse()
14
+ * @returns {{ task: string, amend: string | undefined }}
15
+ */
16
+ export function resolveTaskContent(values) {
17
+ const taskFile = values["task-file"];
18
+ const taskText = values["task-text"];
19
+ const taskEvent = values["task-event"];
20
+
21
+ const set = [taskFile, taskText, taskEvent].filter(Boolean).length;
22
+ if (set === 0) {
23
+ throw new Error(
24
+ "one of --task-file, --task-text, --task-event is required",
25
+ );
26
+ }
27
+ if (set > 1) {
28
+ throw new Error(
29
+ "--task-file, --task-text, --task-event are mutually exclusive",
30
+ );
31
+ }
32
+
33
+ const amendFlag = values["task-amend"] ?? undefined;
34
+
35
+ if (taskFile) {
36
+ return { task: readFileSync(taskFile, "utf8"), amend: amendFlag };
37
+ }
38
+ if (taskText) {
39
+ return { task: taskText, amend: amendFlag };
40
+ }
41
+
42
+ const eventName = process.env.GITHUB_EVENT_NAME;
43
+ if (!eventName) {
44
+ throw new Error("--task-event requires GITHUB_EVENT_NAME to be set");
45
+ }
46
+ const payload = JSON.parse(readFileSync(taskEvent, "utf8"));
47
+ const composed = composeTaskFromGitHubEvent(payload, eventName);
48
+ return { task: composed.task, amend: amendFlag ?? composed.amend };
49
+ }
@@ -0,0 +1,133 @@
1
+ /**
2
+ * GitHub event → task-prompt composition. Replaces ~70 lines of shell in
3
+ * kata-dispatch.yml's `Compose task text` step. Each branch in the dispatch
4
+ * function corresponds to one (event_name, action) the agent workflows react
5
+ * to; the rendered string is identical to what the shell `case` block
6
+ * produced, so existing facilitator behaviour is preserved.
7
+ *
8
+ * Templates live as named `export const` declarations at the top of the file,
9
+ * mirroring `SUPERVISOR_SYSTEM_PROMPT` / `JUDGE_SYSTEM_PROMPT` / etc., so a
10
+ * reader scanning libeval source can find the exact string that an agent
11
+ * receives. Substitutions use `${KEY}` so the literal placeholders are
12
+ * grep-discoverable.
13
+ */
14
+
15
+ export const TASK_TEMPLATE_ISSUE_OPENED =
16
+ 'New issue: "${ISSUE_TITLE}" (#${NUMBER}) by @${AUTHOR} (type: ${AUTHOR_TYPE}). Issue URL: ${URL}.';
17
+
18
+ export const TASK_TEMPLATE_ISSUE_LABELED =
19
+ 'Label "${LABEL}" was added to issue "${ISSUE_TITLE}" (#${NUMBER}). Issue URL: ${URL}.';
20
+
21
+ export const TASK_TEMPLATE_PR_LABELED =
22
+ 'Label "${LABEL}" was added to PR "${PR_TITLE}" (#${NUMBER}). PR URL: ${URL}.';
23
+
24
+ export const TASK_TEMPLATE_PR_MERGED =
25
+ 'PR "${PR_TITLE}" (#${NUMBER}) merged. PR URL: ${URL}.';
26
+
27
+ export const TASK_TEMPLATE_ISSUE_COMMENT_ON_ISSUE =
28
+ 'New comment on issue "${ISSUE_TITLE}" (#${NUMBER}) by @${AUTHOR} (type: ${AUTHOR_TYPE}). Comment URL: ${URL}.';
29
+
30
+ export const TASK_TEMPLATE_ISSUE_COMMENT_ON_PR =
31
+ "New comment on PR #${NUMBER} by @${AUTHOR} (type: ${AUTHOR_TYPE}). Comment URL: ${URL}.";
32
+
33
+ export const TASK_TEMPLATE_REVIEW_SUBMITTED =
34
+ 'Review submitted on PR "${PR_TITLE}" (#${NUMBER}) by @${AUTHOR} (type: ${AUTHOR_TYPE}). Review URL: ${URL}.';
35
+
36
+ function render(template, fields) {
37
+ let out = template;
38
+ for (const [key, value] of Object.entries(fields)) {
39
+ out = out.replaceAll("${" + key + "}", value ?? "");
40
+ }
41
+ return out;
42
+ }
43
+
44
+ function extractCommonFields(payload) {
45
+ return {
46
+ NUMBER: String(payload.issue?.number ?? payload.pull_request?.number ?? ""),
47
+ ISSUE_TITLE: payload.issue?.title ?? "",
48
+ PR_TITLE: payload.pull_request?.title ?? "",
49
+ LABEL: payload.label?.name ?? "",
50
+ AUTHOR:
51
+ payload.comment?.user?.login ??
52
+ payload.review?.user?.login ??
53
+ payload.issue?.user?.login ??
54
+ payload.pull_request?.user?.login ??
55
+ "",
56
+ AUTHOR_TYPE:
57
+ payload.comment?.user?.type ??
58
+ payload.review?.user?.type ??
59
+ payload.issue?.user?.type ??
60
+ payload.pull_request?.user?.type ??
61
+ "User",
62
+ URL:
63
+ payload.comment?.html_url ??
64
+ payload.review?.html_url ??
65
+ payload.issue?.html_url ??
66
+ payload.pull_request?.html_url ??
67
+ "",
68
+ };
69
+ }
70
+
71
+ // Static `(event_name, action)` → template lookup. The "issue_comment" /
72
+ // "created" entry needs payload context (issue vs PR), so it returns a chooser
73
+ // instead of a template. Anything missing from the table throws downstream.
74
+ const TEMPLATE_DISPATCH = {
75
+ "issues:opened": () => TASK_TEMPLATE_ISSUE_OPENED,
76
+ "issues:labeled": () => TASK_TEMPLATE_ISSUE_LABELED,
77
+ "pull_request:closed": () => TASK_TEMPLATE_PR_MERGED,
78
+ "pull_request:labeled": () => TASK_TEMPLATE_PR_LABELED,
79
+ "pull_request_target:closed": () => TASK_TEMPLATE_PR_MERGED,
80
+ "pull_request_target:labeled": () => TASK_TEMPLATE_PR_LABELED,
81
+ "pull_request_review:submitted": () => TASK_TEMPLATE_REVIEW_SUBMITTED,
82
+ "issue_comment:created": (payload) =>
83
+ payload.issue?.pull_request != null
84
+ ? TASK_TEMPLATE_ISSUE_COMMENT_ON_PR
85
+ : TASK_TEMPLATE_ISSUE_COMMENT_ON_ISSUE,
86
+ };
87
+
88
+ function pickTemplate(payload, eventName) {
89
+ const chooser = TEMPLATE_DISPATCH[`${eventName}:${payload.action}`];
90
+ return chooser ? chooser(payload) : null;
91
+ }
92
+
93
+ /**
94
+ * Compose the task a libeval lead receives from a native GitHub event payload.
95
+ * Returns `{ task, amend }`: `task` is the template-rendered context for real
96
+ * events (or empty string for `workflow_dispatch`); `amend` is read from
97
+ * `payload.inputs?.prompt` so an ad-hoc dispatcher (workflow_dispatch trigger
98
+ * or bridge) can layer instructions on top without the workflow wiring
99
+ * `--task-amend` separately. The runner combines them via the existing
100
+ * taskAmend path.
101
+ *
102
+ * Throws on unknown (event_name, action) combos so a typo doesn't silently
103
+ * ship a misleading prompt.
104
+ *
105
+ * @param {object} payload - Native event payload (shape mirrors
106
+ * `$GITHUB_EVENT_PATH` JSON written by the runner).
107
+ * @param {string} eventName - Value of `$GITHUB_EVENT_NAME` for the run.
108
+ * @returns {{ task: string, amend: string }}
109
+ */
110
+ export function composeTaskFromGitHubEvent(payload, eventName) {
111
+ if (!eventName) {
112
+ throw new Error("composeTaskFromGitHubEvent: eventName is required");
113
+ }
114
+
115
+ const amend = payload.inputs?.prompt ?? "";
116
+
117
+ if (eventName === "workflow_dispatch") {
118
+ if (!amend) {
119
+ throw new Error(
120
+ "composeTaskFromGitHubEvent: workflow_dispatch payload must include inputs.prompt",
121
+ );
122
+ }
123
+ return { task: "", amend };
124
+ }
125
+
126
+ const template = pickTemplate(payload, eventName);
127
+ if (!template) {
128
+ throw new Error(
129
+ `composeTaskFromGitHubEvent: no template for event_name="${eventName}" action="${payload.action}"`,
130
+ );
131
+ }
132
+ return { task: render(template, extractCommonFields(payload)), amend };
133
+ }
package/src/index.js CHANGED
@@ -50,6 +50,16 @@ export {
50
50
  DISCUSS_AGENT_SYSTEM_PROMPT,
51
51
  } from "./discuss-tools.js";
52
52
  export { Judge, createJudge, JUDGE_SYSTEM_PROMPT } from "./judge.js";
53
+ export {
54
+ composeTaskFromGitHubEvent,
55
+ TASK_TEMPLATE_ISSUE_OPENED,
56
+ TASK_TEMPLATE_ISSUE_LABELED,
57
+ TASK_TEMPLATE_PR_LABELED,
58
+ TASK_TEMPLATE_PR_MERGED,
59
+ TASK_TEMPLATE_ISSUE_COMMENT_ON_ISSUE,
60
+ TASK_TEMPLATE_ISSUE_COMMENT_ON_PR,
61
+ TASK_TEMPLATE_REVIEW_SUBMITTED,
62
+ } from "./events/github.js";
53
63
  export {
54
64
  Redactor,
55
65
  createRedactor,
@@ -94,7 +94,11 @@ export class OrchestrationLoop {
94
94
  */
95
95
  async run(task) {
96
96
  this.emitOrchestratorEvent({ type: "session_start" });
97
- const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
97
+ const initialTask = this.taskAmend
98
+ ? task
99
+ ? `${task}\n\n${this.taskAmend}`
100
+ : this.taskAmend
101
+ : task;
98
102
 
99
103
  let firstError = null;
100
104
  const abort = (err) => {