@alis-build/harness-eval 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +187 -30
  2. package/dist/adapters/claude-code/index.d.ts +2 -2
  3. package/dist/adapters/claude-code/index.js +2 -1
  4. package/dist/adapters/codex/index.d.ts +68 -0
  5. package/dist/adapters/codex/index.js +3 -0
  6. package/dist/{claude-code-DZ4Vkgp6.js → claude-code-C_7hxC8z.js} +3 -245
  7. package/dist/claude-code-C_7hxC8z.js.map +1 -0
  8. package/dist/cli/bin.js +131 -151
  9. package/dist/cli/bin.js.map +1 -1
  10. package/dist/codex-0cHO2te9.js +496 -0
  11. package/dist/codex-0cHO2te9.js.map +1 -0
  12. package/dist/config/loader.d.ts +2 -2
  13. package/dist/config/loader.js +2 -2
  14. package/dist/{index-V22PrR0p.d.ts → index-C56AEDUr.d.ts} +2 -2
  15. package/dist/index.d.ts +134 -6
  16. package/dist/index.js +6 -5
  17. package/dist/index.js.map +1 -1
  18. package/dist/{loader-DcI0KfRX.js → loader-CiBm4Kf6.js} +491 -209
  19. package/dist/loader-CiBm4Kf6.js.map +1 -0
  20. package/dist/loader-CrmzNwkq.d.ts +107 -0
  21. package/dist/{projections-BcX7w-f6.js → reporter-BKCJZRYr.js} +1475 -729
  22. package/dist/reporter-BKCJZRYr.js.map +1 -0
  23. package/dist/runner/suite.d.ts +1 -1
  24. package/dist/runner/suite.js +1 -1
  25. package/dist/{suite-Dlzl-HI0.js → suite-C3-8EjUW.js} +558 -4
  26. package/dist/suite-C3-8EjUW.js.map +1 -0
  27. package/dist/{suite-DPJMIEbu.d.ts → suite-qyOGre2g.d.ts} +2 -2
  28. package/dist/types-Bac8_Ixb.js +246 -0
  29. package/dist/types-Bac8_Ixb.js.map +1 -0
  30. package/dist/{types-CD3TwOtZ.d.ts → types-CLt4Yygc.d.ts} +2 -2
  31. package/dist/{types-B9H4IZtA.d.ts → types-D0HR2WnP.d.ts} +9 -2
  32. package/dist/types-DFMpv_HJ.d.ts +77 -0
  33. package/package.json +11 -2
  34. package/schemas/eval-run-envelope.schema.json +193 -183
  35. package/dist/claude-code-DZ4Vkgp6.js.map +0 -1
  36. package/dist/loader-C9yQHUPC.d.ts +0 -50
  37. package/dist/loader-DcI0KfRX.js.map +0 -1
  38. package/dist/projections-BcX7w-f6.js.map +0 -1
  39. package/dist/suite-Dlzl-HI0.js.map +0 -1
@@ -1,5 +1,11 @@
1
- import { t as claudeCodeAdapter } from "./claude-code-DZ4Vkgp6.js";
1
+ import { n as TrajectoryBuilder, t as AdapterError } from "./types-Bac8_Ixb.js";
2
+ import { t as claudeCodeAdapter } from "./claude-code-C_7hxC8z.js";
2
3
  import { n as parseCardinality, t as describeCardinality } from "./cardinality-DlE44e-4.js";
4
+ import { t as codexAdapter } from "./codex-0cHO2te9.js";
5
+ import { spawn } from "node:child_process";
6
+ import { mkdtemp, rm } from "node:fs/promises";
7
+ import { tmpdir } from "node:os";
8
+ import { join } from "node:path";
3
9
  //#region src/assertions/patterns.ts
4
10
  /**
5
11
  * Test whether a fully-qualified tool name matches a pattern.
@@ -454,12 +460,536 @@ function evaluateAll(view, assertions) {
454
460
  return assertions.map((a) => evaluate(view, a));
455
461
  }
456
462
  //#endregion
463
+ //#region src/adapters/gemini-cli/exit-codes.ts
464
+ /**
465
+ * Known Gemini CLI exit codes for headless / stream-json runs.
466
+ *
467
+ * @see spec P-7 — preserve codes in diagnostics and surface human-readable labels.
468
+ */
469
+ /** Documented Gemini CLI exit codes for headless harness runs (spec P-7). */
470
+ const GEMINI_CLI_EXIT_CODES = {
471
+ /** Normal completion. */
472
+ SUCCESS: 0,
473
+ /** Unhandled CLI or runtime failure. */
474
+ ERROR: 1,
475
+ /** Invalid prompt, flags, or stdin (exit 42). */
476
+ INPUT_ERROR: 42,
477
+ /** Agent exceeded configured turn budget (exit 53). */
478
+ TURN_LIMIT: 53
479
+ };
480
+ /**
481
+ * Return a short description for a non-zero Gemini CLI exit code.
482
+ *
483
+ * Used to populate {@link AdapterDiagnostics.exitCodeDescription} so reports
484
+ * surface human-readable failure reasons without re-parsing stderr.
485
+ */
486
+ function describeGeminiCliExitCode(exitCode) {
487
+ if (exitCode === null || exitCode === GEMINI_CLI_EXIT_CODES.SUCCESS) return;
488
+ switch (exitCode) {
489
+ case GEMINI_CLI_EXIT_CODES.ERROR: return "Gemini CLI exited with a general error (code 1)";
490
+ case GEMINI_CLI_EXIT_CODES.INPUT_ERROR: return "Gemini CLI input error (code 42)";
491
+ case GEMINI_CLI_EXIT_CODES.TURN_LIMIT: return "Gemini CLI turn limit exceeded (code 53)";
492
+ default: return `Gemini CLI exited with code ${exitCode}`;
493
+ }
494
+ }
495
+ //#endregion
496
+ //#region src/adapters/gemini-cli/map-events.ts
497
+ /** Stateful mapper — tracks session id, delta text, and pending tool calls. */
498
+ var GeminiCliEventMapper = class {
499
+ sessionId = "gemini-session";
500
+ model = "";
501
+ sawInit = false;
502
+ startedTools = /* @__PURE__ */ new Set();
503
+ assistantDeltaBuffer = "";
504
+ turnCount = 0;
505
+ /** Map one parsed Gemini JSON object to zero or more stream events. */
506
+ map(event) {
507
+ const type = event.type;
508
+ if (!type) return [];
509
+ switch (type) {
510
+ case "init": return [this.buildInit(event.session_id ?? this.sessionId, event.model ?? "")];
511
+ case "message": return this.mapMessage(event);
512
+ case "tool_use": return this.mapToolUse(event);
513
+ case "tool_result": return this.mapToolResult(event);
514
+ case "error": return [];
515
+ case "result": return this.mapResult(event);
516
+ default: return [];
517
+ }
518
+ }
519
+ buildInit(sessionId, model) {
520
+ this.sessionId = sessionId;
521
+ this.model = model;
522
+ this.sawInit = true;
523
+ return {
524
+ type: "system",
525
+ subtype: "init",
526
+ session_id: sessionId,
527
+ cwd: "",
528
+ model,
529
+ tools: [],
530
+ mcp_servers: []
531
+ };
532
+ }
533
+ ensureInit() {
534
+ if (this.sawInit) return [];
535
+ return [this.buildInit(this.sessionId, this.model)];
536
+ }
537
+ mapMessage(event) {
538
+ if (event.role === "user") return [];
539
+ const chunk = event.content ?? "";
540
+ if (event.delta) {
541
+ this.assistantDeltaBuffer += chunk;
542
+ return this.ensureInit();
543
+ }
544
+ const text = this.assistantDeltaBuffer + chunk;
545
+ this.assistantDeltaBuffer = "";
546
+ if (!text) return this.ensureInit();
547
+ return [...this.ensureInit(), {
548
+ type: "assistant",
549
+ session_id: this.sessionId,
550
+ message: {
551
+ id: `msg_${this.turnCount}`,
552
+ type: "message",
553
+ role: "assistant",
554
+ content: [{
555
+ type: "text",
556
+ text
557
+ }],
558
+ stop_reason: "end_turn"
559
+ }
560
+ }];
561
+ }
562
+ mapToolUse(event) {
563
+ const toolId = event.tool_id ?? `tool_${Math.random().toString(36).slice(2)}`;
564
+ if (event.tool_id) this.startedTools.add(event.tool_id);
565
+ const name = resolveGeminiToolName(event.tool_name ?? "unknown", event.parameters ?? {});
566
+ return [
567
+ ...this.flushDeltaBuffer(),
568
+ ...this.ensureInit(),
569
+ {
570
+ type: "assistant",
571
+ session_id: this.sessionId,
572
+ message: {
573
+ id: `assistant_${toolId}`,
574
+ type: "message",
575
+ role: "assistant",
576
+ content: [{
577
+ type: "tool_use",
578
+ id: toolId,
579
+ name,
580
+ input: event.parameters ?? {}
581
+ }],
582
+ stop_reason: "tool_use"
583
+ }
584
+ }
585
+ ];
586
+ }
587
+ mapToolResult(event) {
588
+ const toolId = event.tool_id ?? "";
589
+ const events = [...this.ensureInit()];
590
+ if (toolId && !this.startedTools.has(toolId)) events.push({
591
+ type: "assistant",
592
+ session_id: this.sessionId,
593
+ message: {
594
+ id: `assistant_${toolId}`,
595
+ type: "message",
596
+ role: "assistant",
597
+ content: [{
598
+ type: "tool_use",
599
+ id: toolId,
600
+ name: "unknown",
601
+ input: {}
602
+ }],
603
+ stop_reason: "tool_use"
604
+ }
605
+ });
606
+ else if (toolId) this.startedTools.delete(toolId);
607
+ const isError = event.status === "error" || event.error != null;
608
+ const content = event.output ?? event.error?.message ?? "";
609
+ events.push({
610
+ type: "user",
611
+ session_id: this.sessionId,
612
+ message: {
613
+ role: "user",
614
+ content: [{
615
+ type: "tool_result",
616
+ tool_use_id: toolId,
617
+ content,
618
+ is_error: isError
619
+ }]
620
+ }
621
+ });
622
+ return events;
623
+ }
624
+ mapResult(event) {
625
+ this.turnCount++;
626
+ const isError = event.status === "error";
627
+ return [...this.flushDeltaBuffer(), {
628
+ type: "result",
629
+ subtype: isError ? "error" : "success",
630
+ session_id: this.sessionId,
631
+ is_error: isError,
632
+ result: event.error?.message ?? "",
633
+ usage: mapUsage(event.stats),
634
+ total_cost_usd: event.stats?.total_cost_usd ?? 0,
635
+ duration_ms: event.stats?.duration_ms ?? 0,
636
+ num_turns: this.turnCount
637
+ }];
638
+ }
639
+ /** Emit buffered delta text as one assistant message before tool/result events. */
640
+ flushDeltaBuffer() {
641
+ if (!this.assistantDeltaBuffer) return [];
642
+ const text = this.assistantDeltaBuffer;
643
+ this.assistantDeltaBuffer = "";
644
+ return [...this.ensureInit(), {
645
+ type: "assistant",
646
+ session_id: this.sessionId,
647
+ message: {
648
+ id: `msg_delta_${this.turnCount}`,
649
+ type: "message",
650
+ role: "assistant",
651
+ content: [{
652
+ type: "text",
653
+ text
654
+ }],
655
+ stop_reason: "end_turn"
656
+ }
657
+ }];
658
+ }
659
+ };
660
+ /**
661
+ * Resolve harness tool name from Gemini tool_name + parameters.
662
+ *
663
+ * MCP tools use `mcp__<server>__<tool>`; built-in Gemini tools keep native names.
664
+ */
665
+ function resolveGeminiToolName(toolName, parameters) {
666
+ if (toolName.startsWith("mcp__")) return toolName;
667
+ const server = typeof parameters.server === "string" ? parameters.server : void 0;
668
+ const tool = typeof parameters.tool === "string" ? parameters.tool : void 0;
669
+ if (server && tool) return `mcp__${server}__${tool}`;
670
+ if (toolName.startsWith("mcp_") && !toolName.startsWith("mcp__")) {
671
+ const rest = toolName.slice(4);
672
+ const separator = rest.lastIndexOf("_");
673
+ if (separator > 0) {
674
+ const geminiServer = rest.slice(0, separator);
675
+ const geminiTool = rest.slice(separator + 1);
676
+ if (geminiServer && geminiTool) return `mcp__${geminiServer}__${geminiTool}`;
677
+ }
678
+ }
679
+ return toolName;
680
+ }
681
+ function mapUsage(stats) {
682
+ return {
683
+ input_tokens: stats?.input_tokens ?? 0,
684
+ output_tokens: stats?.output_tokens ?? 0
685
+ };
686
+ }
687
+ //#endregion
688
+ //#region src/adapters/gemini-cli/parse-json.ts
689
+ /** Parse Gemini JSONL stdout into parsed event objects. */
690
+ async function* parseGeminiCliJson(stream) {
691
+ let buffer = "";
692
+ stream.setEncoding("utf8");
693
+ for await (const chunk of stream) {
694
+ buffer += chunk;
695
+ let newlineIdx;
696
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
697
+ const line = buffer.slice(0, newlineIdx).trim();
698
+ buffer = buffer.slice(newlineIdx + 1);
699
+ if (line.length === 0) continue;
700
+ yield tryParseLine(line);
701
+ }
702
+ }
703
+ const trailing = buffer.trim();
704
+ if (trailing.length > 0) yield tryParseLine(trailing);
705
+ }
706
+ function tryParseLine(line) {
707
+ try {
708
+ return {
709
+ ok: true,
710
+ event: JSON.parse(line),
711
+ rawLine: line
712
+ };
713
+ } catch (err) {
714
+ return {
715
+ ok: false,
716
+ error: err instanceof Error ? err : new Error(String(err)),
717
+ rawLine: line
718
+ };
719
+ }
720
+ }
721
+ //#endregion
722
+ //#region src/adapters/gemini-cli/flags.ts
723
+ function pushRepeatableFlag(args, flag, values) {
724
+ if (!values) return;
725
+ for (const value of values) args.push(flag, value);
726
+ }
727
+ function pushOptionalFlag(args, flag, value) {
728
+ if (value === void 0) return;
729
+ if (typeof value === "boolean") {
730
+ if (value) args.push(flag);
731
+ return;
732
+ }
733
+ args.push(flag, String(value));
734
+ }
735
+ /**
736
+ * Append shared Gemini CLI flags (excluding prompt and output format).
737
+ *
738
+ * Harness and judge subprocesses share this helper so both paths stay aligned
739
+ * on approval mode, sandbox, MCP allowlists, and trust settings.
740
+ */
741
+ function appendGeminiCliFlags(args, config) {
742
+ pushOptionalFlag(args, "--approval-mode", config.approvalMode ?? "yolo");
743
+ pushOptionalFlag(args, "--model", config.model);
744
+ pushOptionalFlag(args, "--sandbox", config.sandbox);
745
+ pushOptionalFlag(args, "--skip-trust", config.skipTrust);
746
+ pushRepeatableFlag(args, "--include-directories", config.includeDirectories);
747
+ pushRepeatableFlag(args, "--allowed-mcp-server-names", config.allowedMcpServerNames);
748
+ pushRepeatableFlag(args, "--extensions", config.extensions);
749
+ pushOptionalFlag(args, "--debug", config.debug);
750
+ }
751
+ /**
752
+ * Build argv for `gemini -p "<prompt>" --output-format stream-json …`.
753
+ *
754
+ * Prompt is passed via `-p` and must remain the final positional argument
755
+ * after all flags. Defaults `skipTrust` to true so CI and ephemeral workspaces
756
+ * do not block on interactive folder-trust prompts.
757
+ */
758
+ function buildArgs(config) {
759
+ const args = [
760
+ "-p",
761
+ config.prompt,
762
+ "--output-format",
763
+ "stream-json"
764
+ ];
765
+ appendGeminiCliFlags(args, {
766
+ ...config,
767
+ skipTrust: config.skipTrust ?? true
768
+ });
769
+ return args;
770
+ }
771
+ /**
772
+ * Build argv for `gemini -p "<prompt>" --output-format json …` (judge).
773
+ *
774
+ * Emits a single JSON object (not NDJSON). The judge grader may read it from
775
+ * stdout or recover it from stderr when Gemini prints warnings first.
776
+ */
777
+ function buildJudgeArgs(prompt, config = {}) {
778
+ const args = [
779
+ "-p",
780
+ prompt,
781
+ "--output-format",
782
+ "json"
783
+ ];
784
+ appendGeminiCliFlags(args, {
785
+ ...config,
786
+ approvalMode: config.approvalMode ?? "yolo",
787
+ skipTrust: config.skipTrust ?? true
788
+ });
789
+ return args;
790
+ }
791
+ //#endregion
792
+ //#region src/adapters/gemini-cli/process.ts
793
+ /**
794
+ * Process management for the Gemini CLI adapter.
795
+ *
796
+ * Spawns `gemini -p … --output-format stream-json`, handles timeout/abort,
797
+ * and optional config-directory isolation.
798
+ */
799
+ const DEFAULT_TIMEOUT_MS = 300 * 1e3;
800
+ /** Grace period between SIGTERM and SIGKILL on timeout or abort. */
801
+ const KILL_GRACE_MS = 5e3;
802
+ /** Env var Gemini CLI uses for config directory isolation. */
803
+ const GEMINI_CONFIG_DIR_ENV = "GEMINI_CONFIG_DIR";
804
+ /** Resolve config dir for isolated runs. Exported for unit tests. */
805
+ function resolveGeminiConfigDir(config, tempDir) {
806
+ if (config.isolateConfig !== true || !tempDir) return void 0;
807
+ return tempDir;
808
+ }
809
+ /**
810
+ * Build subprocess env with optional `GEMINI_CONFIG_DIR` isolation.
811
+ *
812
+ * Shared by harness spawn ({@link spawnGeminiCli}) and the Gemini CLI judge
813
+ * ({@link runGeminiCliGrader}) so both paths use the same config-dir semantics.
814
+ */
815
+ async function prepareGeminiCliEnv(config, baseEnv = process.env) {
816
+ const tempConfigDir = config.isolateConfig === true ? await mkdtemp(join(tmpdir(), "harness-eval-gemini-")) : null;
817
+ const env = {
818
+ ...baseEnv,
819
+ ...config.env
820
+ };
821
+ const configDir = resolveGeminiConfigDir(config, tempConfigDir);
822
+ if (configDir) env[GEMINI_CONFIG_DIR_ENV] = configDir;
823
+ const cleanup = async () => {
824
+ if (!tempConfigDir) return;
825
+ try {
826
+ await rm(tempConfigDir, {
827
+ recursive: true,
828
+ force: true
829
+ });
830
+ } catch {}
831
+ };
832
+ return {
833
+ env,
834
+ cleanup
835
+ };
836
+ }
837
+ /**
838
+ * Spawn `gemini -p … --output-format stream-json` with optional config-dir isolation.
839
+ *
840
+ * Timeout and abort both send SIGTERM to the process group, then SIGKILL after
841
+ * {@link KILL_GRACE_MS} if the group is still alive.
842
+ */
843
+ async function spawnGeminiCli(config) {
844
+ const binary = config.binary ?? "gemini";
845
+ const args = buildArgs(config);
846
+ const { env, cleanup: envCleanup } = await prepareGeminiCliEnv(config);
847
+ const child = spawn(binary, args, {
848
+ cwd: config.cwd ?? process.cwd(),
849
+ env,
850
+ stdio: [
851
+ "ignore",
852
+ "pipe",
853
+ "pipe"
854
+ ],
855
+ detached: true
856
+ });
857
+ let timedOut = false;
858
+ let killEscalation = null;
859
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
860
+ const scheduleKillEscalation = () => {
861
+ if (killEscalation) clearTimeout(killEscalation);
862
+ killEscalation = setTimeout(() => killTree(child, "SIGKILL"), KILL_GRACE_MS);
863
+ };
864
+ const timeoutTimer = setTimeout(() => {
865
+ timedOut = true;
866
+ killTree(child, "SIGTERM");
867
+ scheduleKillEscalation();
868
+ }, timeoutMs);
869
+ const onAbort = () => {
870
+ killTree(child, "SIGTERM");
871
+ scheduleKillEscalation();
872
+ };
873
+ config.signal?.addEventListener("abort", onAbort, { once: true });
874
+ const stderrChunks = [];
875
+ child.stderr?.setEncoding("utf8");
876
+ child.stderr?.on("data", (chunk) => {
877
+ stderrChunks.push(chunk);
878
+ });
879
+ const stderrCollected = new Promise((resolve) => {
880
+ const finalize = () => resolve(stderrChunks.join(""));
881
+ child.stderr?.on("end", finalize);
882
+ child.stderr?.on("error", finalize);
883
+ });
884
+ const done = new Promise((resolve) => {
885
+ let settled = false;
886
+ const finalize = (exitCode, signal) => {
887
+ if (settled) return;
888
+ settled = true;
889
+ clearTimeout(timeoutTimer);
890
+ if (killEscalation) clearTimeout(killEscalation);
891
+ config.signal?.removeEventListener("abort", onAbort);
892
+ resolve({
893
+ exitCode,
894
+ signal
895
+ });
896
+ };
897
+ child.on("close", (code, signal) => finalize(code, signal));
898
+ child.on("error", () => finalize(null, null));
899
+ });
900
+ return {
901
+ stdout: child.stdout,
902
+ done,
903
+ stderrCollected,
904
+ timedOut: () => timedOut,
905
+ cleanup: envCleanup
906
+ };
907
+ }
908
+ function killTree(child, signal) {
909
+ if (child.pid === void 0) return;
910
+ try {
911
+ process.kill(-child.pid, signal);
912
+ } catch {
913
+ try {
914
+ child.kill(signal);
915
+ } catch {}
916
+ }
917
+ }
918
+ //#endregion
919
+ //#region src/adapters/gemini-cli/index.ts
920
+ /**
921
+ * Gemini CLI adapter — public API.
922
+ */
923
+ /**
924
+ * Run Gemini CLI in headless stream-json mode and return a trajectory.
925
+ *
926
+ * Maps NDJSON events through {@link GeminiCliEventMapper}, records parse
927
+ * errors without aborting, and attaches {@link AdapterDiagnostics.exitCodeDescription}
928
+ * for known non-zero exit codes (spec P-7).
929
+ */
930
+ async function runGeminiCli(config) {
931
+ const startTs = Date.now();
932
+ const spawned = await spawnGeminiCli(config);
933
+ const builder = new TrajectoryBuilder();
934
+ const mapper = new GeminiCliEventMapper();
935
+ const rawEvents = [];
936
+ const parseErrors = [];
937
+ try {
938
+ for await (const result of parseGeminiCliJson(spawned.stdout)) {
939
+ if (!result.ok) {
940
+ parseErrors.push({
941
+ line: result.rawLine,
942
+ error: result.error.message
943
+ });
944
+ continue;
945
+ }
946
+ for (const event of mapper.map(result.event)) {
947
+ builder.consume(event);
948
+ rawEvents.push(event);
949
+ }
950
+ }
951
+ const [{ exitCode, signal }, stderr] = await Promise.all([spawned.done, spawned.stderrCollected]);
952
+ const exitCodeDescription = describeGeminiCliExitCode(exitCode);
953
+ const diagnostics = {
954
+ exitCode,
955
+ exitCodeDescription,
956
+ signal,
957
+ stderr,
958
+ parseErrors,
959
+ timedOut: spawned.timedOut(),
960
+ durationMs: Date.now() - startTs
961
+ };
962
+ let view;
963
+ try {
964
+ view = builder.build();
965
+ } catch (err) {
966
+ const message = err instanceof Error ? err.message : String(err);
967
+ const stderrHint = stderr.trim().length > 0 ? ` stderr: ${stderr.trim().slice(0, 400)}` : "";
968
+ throw new AdapterError(`harness produced no usable trajectory: ${message}${exitCodeDescription ? ` (${exitCodeDescription})` : ""}${stderrHint}`, diagnostics);
969
+ }
970
+ return {
971
+ view,
972
+ diagnostics,
973
+ rawEvents
974
+ };
975
+ } finally {
976
+ await spawned.cleanup();
977
+ }
978
+ }
979
+ /** Registered {@link HarnessAdapter} for Gemini CLI headless runs. */
980
+ const geminiCliAdapter = {
981
+ id: "gemini-cli",
982
+ run: runGeminiCli
983
+ };
984
+ //#endregion
457
985
  //#region src/adapters/registry.ts
458
986
  const ADAPTERS = {};
459
987
  function registerBuiltIn(id, adapter) {
460
988
  ADAPTERS[id] = adapter;
461
989
  }
462
990
  registerBuiltIn("claude-code", claudeCodeAdapter);
991
+ registerBuiltIn("codex", codexAdapter);
992
+ registerBuiltIn("gemini-cli", geminiCliAdapter);
463
993
  /**
464
994
  * Register a harness adapter by id.
465
995
  *
@@ -505,12 +1035,36 @@ function toClaudeCodeConfig(layers, prompt) {
505
1035
  merged.prompt = prompt;
506
1036
  return merged;
507
1037
  }
1038
+ /** Merge generic suite config layers into a flat {@link CodexAdapterConfig}. */
1039
+ function toCodexConfig(layers, prompt) {
1040
+ const merged = {};
1041
+ for (const layer of layers) {
1042
+ const { codex, ...generic } = layer;
1043
+ Object.assign(merged, generic);
1044
+ if (codex && typeof codex === "object") Object.assign(merged, codex);
1045
+ }
1046
+ merged.prompt = prompt;
1047
+ return merged;
1048
+ }
1049
+ /** Merge generic suite config layers into a flat {@link GeminiCliAdapterConfig}. */
1050
+ function toGeminiCliConfig(layers, prompt) {
1051
+ const merged = {};
1052
+ for (const layer of layers) {
1053
+ const { geminiCli, ...generic } = layer;
1054
+ Object.assign(merged, generic);
1055
+ if (geminiCli && typeof geminiCli === "object") Object.assign(merged, geminiCli);
1056
+ }
1057
+ merged.prompt = prompt;
1058
+ return merged;
1059
+ }
508
1060
  /**
509
1061
  * Resolve merged suite layers into the flat config shape expected by the
510
- * selected harness adapter.
1062
+ * selected harness adapter (`claude-code`, `codex`, or `gemini-cli`).
511
1063
  */
512
1064
  function resolveRunConfig(adapterId, layers, prompt) {
513
1065
  if (adapterId === "claude-code" || adapterId === "claude-code") return toClaudeCodeConfig(layers, prompt);
1066
+ if (adapterId === "codex") return toCodexConfig(layers, prompt);
1067
+ if (adapterId === "gemini-cli") return toGeminiCliConfig(layers, prompt);
514
1068
  const merged = {};
515
1069
  for (const layer of layers) Object.assign(merged, layer);
516
1070
  merged.prompt = prompt;
@@ -733,6 +1287,6 @@ async function runSuite(suite, options = {}) {
733
1287
  return report;
734
1288
  }
735
1289
  //#endregion
736
- export { aggregateCell as a, runRepetition as c, getDefaultAdapter as d, listAdapters as f, evaluateAll as h, DEFAULT_THRESHOLD as i, DEFAULT_ADAPTER_ID as l, evaluate as m, createLimit as n, getRepetitions as o, registerAdapter as p, DEFAULT_REPETITIONS as r, mergeConfig as s, runSuite as t, getAdapter as u };
1290
+ export { evaluateAll as _, aggregateCell as a, runRepetition as c, getDefaultAdapter as d, listAdapters as f, evaluate as g, buildJudgeArgs as h, DEFAULT_THRESHOLD as i, DEFAULT_ADAPTER_ID as l, prepareGeminiCliEnv as m, createLimit as n, getRepetitions as o, registerAdapter as p, DEFAULT_REPETITIONS as r, mergeConfig as s, runSuite as t, getAdapter as u };
737
1291
 
738
- //# sourceMappingURL=suite-Dlzl-HI0.js.map
1292
+ //# sourceMappingURL=suite-C3-8EjUW.js.map