@alis-build/harness-eval 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +187 -30
  2. package/dist/adapters/claude-code/index.d.ts +2 -2
  3. package/dist/adapters/claude-code/index.js +2 -1
  4. package/dist/adapters/codex/index.d.ts +68 -0
  5. package/dist/adapters/codex/index.js +3 -0
  6. package/dist/{claude-code-DZ4Vkgp6.js → claude-code-C_7hxC8z.js} +3 -245
  7. package/dist/claude-code-C_7hxC8z.js.map +1 -0
  8. package/dist/cli/bin.js +131 -151
  9. package/dist/cli/bin.js.map +1 -1
  10. package/dist/codex-0cHO2te9.js +496 -0
  11. package/dist/codex-0cHO2te9.js.map +1 -0
  12. package/dist/config/loader.d.ts +2 -2
  13. package/dist/config/loader.js +2 -2
  14. package/dist/{index-V22PrR0p.d.ts → index-C56AEDUr.d.ts} +2 -2
  15. package/dist/index.d.ts +134 -6
  16. package/dist/index.js +6 -5
  17. package/dist/index.js.map +1 -1
  18. package/dist/{loader-DcI0KfRX.js → loader-CiBm4Kf6.js} +491 -209
  19. package/dist/loader-CiBm4Kf6.js.map +1 -0
  20. package/dist/loader-CrmzNwkq.d.ts +107 -0
  21. package/dist/{projections-BcX7w-f6.js → reporter-BKCJZRYr.js} +1475 -729
  22. package/dist/reporter-BKCJZRYr.js.map +1 -0
  23. package/dist/runner/suite.d.ts +1 -1
  24. package/dist/runner/suite.js +1 -1
  25. package/dist/{suite-Dlzl-HI0.js → suite-C3-8EjUW.js} +558 -4
  26. package/dist/suite-C3-8EjUW.js.map +1 -0
  27. package/dist/{suite-DPJMIEbu.d.ts → suite-qyOGre2g.d.ts} +2 -2
  28. package/dist/types-Bac8_Ixb.js +246 -0
  29. package/dist/types-Bac8_Ixb.js.map +1 -0
  30. package/dist/{types-CD3TwOtZ.d.ts → types-CLt4Yygc.d.ts} +2 -2
  31. package/dist/{types-B9H4IZtA.d.ts → types-D0HR2WnP.d.ts} +9 -2
  32. package/dist/types-DFMpv_HJ.d.ts +77 -0
  33. package/package.json +11 -2
  34. package/schemas/eval-run-envelope.schema.json +193 -183
  35. package/dist/claude-code-DZ4Vkgp6.js.map +0 -1
  36. package/dist/loader-C9yQHUPC.d.ts +0 -50
  37. package/dist/loader-DcI0KfRX.js.map +0 -1
  38. package/dist/projections-BcX7w-f6.js.map +0 -1
  39. package/dist/suite-Dlzl-HI0.js.map +0 -1
@@ -0,0 +1,77 @@
1
+ import { i as BaseAdapterConfig, r as AdapterResult, x as StreamEvent } from "./types-D0HR2WnP.js";
2
+
3
+ //#region src/adapters/codex/types.d.ts
4
+ /** Codex sandbox policies (`codex exec --sandbox`). */
5
+ type CodexSandbox = "read-only" | "workspace-write" | "danger-full-access";
6
+ /** Codex approval modes (`--ask-for-approval`). */
7
+ type CodexAskForApproval = "untrusted" | "on-request" | "never";
8
+ /** Codex-specific options (nested under `codex` in YAML). */
9
+ interface CodexOptions {
10
+ binary?: string;
11
+ model?: string;
12
+ profile?: string;
13
+ sandbox?: CodexSandbox;
14
+ addDirs?: string[];
15
+ /** Inline `-c key=value` overrides (repeatable on CLI). */
16
+ configOverrides?: string[];
17
+ askForApproval?: CodexAskForApproval;
18
+ dangerouslyBypassApprovalsAndSandbox?: boolean;
19
+ dangerouslyBypassHookTrust?: boolean;
20
+ ephemeral?: boolean;
21
+ ignoreUserConfig?: boolean;
22
+ skipGitRepoCheck?: boolean;
23
+ outputSchema?: string;
24
+ outputLastMessage?: string;
25
+ /**
26
+ * When true (default), harness runs auto-generate a temp `--output-last-message`
27
+ * path and read it back as `finalResponse` if JSONL has no assistant_message.
28
+ */
29
+ captureLastMessage?: boolean;
30
+ /**
31
+ * When true, each run uses a fresh temp `$CODEX_HOME` for isolation.
32
+ * Default false — inherit caller's ~/.codex config and auth.
33
+ */
34
+ isolateConfig?: boolean;
35
+ }
36
+ /** Configuration for a single Codex harness run. */
37
+ interface CodexAdapterConfig extends BaseAdapterConfig, CodexOptions {}
38
+ /** Codex run result — includes mapped stream events for debugging. */
39
+ interface CodexAdapterResult extends AdapterResult {
40
+ rawEvents: StreamEvent[];
41
+ }
42
+ /** Raw Codex `--json` thread event (partial — tolerate unknown fields). */
43
+ interface CodexJsonEvent {
44
+ type?: string;
45
+ thread_id?: string;
46
+ usage?: CodexUsage;
47
+ item?: CodexItem;
48
+ message?: string;
49
+ }
50
+ /** Token usage on a Codex turn or thread event. */
51
+ interface CodexUsage {
52
+ input_tokens?: number;
53
+ cached_input_tokens?: number;
54
+ output_tokens?: number;
55
+ reasoning_output_tokens?: number;
56
+ }
57
+ /** One item in a Codex thread (tool call, command, or assistant message). */
58
+ interface CodexItem {
59
+ id?: string;
60
+ type?: string;
61
+ item_type?: string;
62
+ server?: string;
63
+ tool?: string;
64
+ arguments?: unknown;
65
+ command?: string;
66
+ exit_code?: number;
67
+ aggregated_output?: string;
68
+ text?: string;
69
+ result?: unknown;
70
+ error?: {
71
+ message?: string;
72
+ } | null;
73
+ status?: string;
74
+ }
75
+ //#endregion
76
+ export { CodexOptions as i, CodexAdapterResult as n, CodexJsonEvent as r, CodexAdapterConfig as t };
77
+ //# sourceMappingURL=types-DFMpv_HJ.d.ts.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@alis-build/harness-eval",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "Harness-level eval framework for measuring AI coding agent tool-selection behavior",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -27,6 +27,14 @@
27
27
  "types": "./dist/adapters/claude-code/index.d.ts",
28
28
  "import": "./dist/adapters/claude-code/index.js"
29
29
  },
30
+ "./adapters/codex": {
31
+ "types": "./dist/adapters/codex/index.d.ts",
32
+ "import": "./dist/adapters/codex/index.js"
33
+ },
34
+ "./adapters/gemini-cli": {
35
+ "types": "./dist/adapters/gemini-cli/index.d.ts",
36
+ "import": "./dist/adapters/gemini-cli/index.js"
37
+ },
30
38
  "./runner": {
31
39
  "types": "./dist/runner/suite.d.ts",
32
40
  "import": "./dist/runner/suite.js"
@@ -45,6 +53,7 @@
45
53
  "scripts": {
46
54
  "generate-schemas": "tsx src/schemas/generate.ts",
47
55
  "build": "pnpm run generate-schemas && tsdown",
56
+ "prebuild": "pnpm run typecheck",
48
57
  "postbuild": "node scripts/link-local-bin.mjs",
49
58
  "prepack": "pnpm run build",
50
59
  "prepublishOnly": "pnpm run build",
@@ -72,5 +81,5 @@
72
81
  "publishConfig": {
73
82
  "access": "public"
74
83
  },
75
- "packageManager": "pnpm@11.3.0"
84
+ "packageManager": "pnpm@11.8.0"
76
85
  }