@forwardimpact/libeval 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/bin/fit-benchmark.js +167 -0
- package/package.json +5 -3
- package/src/agent-runner.js +7 -1
- package/src/benchmark/apm-installer.js +39 -0
- package/src/benchmark/judge.js +146 -0
- package/src/benchmark/report.js +161 -0
- package/src/benchmark/result.js +108 -0
- package/src/benchmark/runner.js +396 -0
- package/src/benchmark/scorer.js +138 -0
- package/src/benchmark/task-family.js +259 -0
- package/src/benchmark/workdir.js +248 -0
- package/src/commands/benchmark-report.js +39 -0
- package/src/commands/benchmark-run.js +53 -0
- package/src/commands/benchmark-score.js +68 -0
- package/src/commands/facilitate.js +7 -0
- package/src/commands/run.js +9 -3
- package/src/commands/supervise.js +7 -0
- package/src/facilitator.js +35 -21
- package/src/index.js +9 -0
- package/src/judge.js +211 -0
- package/src/orchestration-toolkit.js +25 -0
- package/src/redaction.js +163 -0
- package/src/supervisor.js +29 -17
package/src/supervisor.js
CHANGED
|
@@ -74,10 +74,13 @@ export class Supervisor {
|
|
|
74
74
|
ctx,
|
|
75
75
|
messageBus,
|
|
76
76
|
taskAmend,
|
|
77
|
+
redactor,
|
|
77
78
|
}) {
|
|
78
79
|
if (!agentRunner) throw new Error("agentRunner is required");
|
|
79
80
|
if (!supervisorRunner) throw new Error("supervisorRunner is required");
|
|
80
81
|
if (!output) throw new Error("output is required");
|
|
82
|
+
if (!redactor) throw new Error("redactor is required");
|
|
83
|
+
this.redactor = redactor;
|
|
81
84
|
this.agentRunner = agentRunner;
|
|
82
85
|
this.supervisorRunner = supervisorRunner;
|
|
83
86
|
this.output = output;
|
|
@@ -406,7 +409,7 @@ export class Supervisor {
|
|
|
406
409
|
seq: this.counter.next(),
|
|
407
410
|
event,
|
|
408
411
|
};
|
|
409
|
-
this.output.write(JSON.stringify(tagged) + "\n");
|
|
412
|
+
this.output.write(JSON.stringify(this.redactor.redactValue(tagged)) + "\n");
|
|
410
413
|
}
|
|
411
414
|
|
|
412
415
|
/**
|
|
@@ -429,11 +432,13 @@ export class Supervisor {
|
|
|
429
432
|
*/
|
|
430
433
|
emitOrchestratorEvent(event) {
|
|
431
434
|
this.output.write(
|
|
432
|
-
JSON.stringify(
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
435
|
+
JSON.stringify(
|
|
436
|
+
this.redactor.redactValue({
|
|
437
|
+
source: "orchestrator",
|
|
438
|
+
seq: this.counter.next(),
|
|
439
|
+
event,
|
|
440
|
+
}),
|
|
441
|
+
) + "\n",
|
|
437
442
|
);
|
|
438
443
|
}
|
|
439
444
|
|
|
@@ -443,17 +448,19 @@ export class Supervisor {
|
|
|
443
448
|
*/
|
|
444
449
|
emitSummary(result) {
|
|
445
450
|
this.output.write(
|
|
446
|
-
JSON.stringify(
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
451
|
+
JSON.stringify(
|
|
452
|
+
this.redactor.redactValue({
|
|
453
|
+
source: "orchestrator",
|
|
454
|
+
seq: this.counter.next(),
|
|
455
|
+
event: {
|
|
456
|
+
type: "summary",
|
|
457
|
+
success: result.success,
|
|
458
|
+
...(result.verdict && { verdict: result.verdict }),
|
|
459
|
+
turns: result.turns,
|
|
460
|
+
...(result.summary && { summary: result.summary }),
|
|
461
|
+
},
|
|
462
|
+
}),
|
|
463
|
+
) + "\n",
|
|
457
464
|
);
|
|
458
465
|
}
|
|
459
466
|
}
|
|
@@ -498,7 +505,9 @@ export function createSupervisor({
|
|
|
498
505
|
profilesDir,
|
|
499
506
|
taskAmend,
|
|
500
507
|
agentMcpServers,
|
|
508
|
+
redactor,
|
|
501
509
|
}) {
|
|
510
|
+
if (!redactor) throw new Error("redactor is required");
|
|
502
511
|
const resolvedProfilesDir =
|
|
503
512
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
504
513
|
const systemPromptFor = (profile, trailer) => {
|
|
@@ -538,6 +547,7 @@ export function createSupervisor({
|
|
|
538
547
|
settingSources: ["project"],
|
|
539
548
|
systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
|
|
540
549
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|
|
550
|
+
redactor,
|
|
541
551
|
});
|
|
542
552
|
|
|
543
553
|
const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
|
|
@@ -564,6 +574,7 @@ export function createSupervisor({
|
|
|
564
574
|
settingSources: ["project"],
|
|
565
575
|
systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
|
|
566
576
|
mcpServers: { orchestration: supervisorServer },
|
|
577
|
+
redactor,
|
|
567
578
|
});
|
|
568
579
|
|
|
569
580
|
supervisor = new Supervisor({
|
|
@@ -574,6 +585,7 @@ export function createSupervisor({
|
|
|
574
585
|
ctx,
|
|
575
586
|
messageBus,
|
|
576
587
|
taskAmend,
|
|
588
|
+
redactor,
|
|
577
589
|
});
|
|
578
590
|
return supervisor;
|
|
579
591
|
}
|