@wingman-ai/gateway 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +14 -0
  2. package/dist/agent/config/mcpClientManager.cjs +104 -1
  3. package/dist/agent/config/mcpClientManager.d.ts +30 -0
  4. package/dist/agent/config/mcpClientManager.js +104 -1
  5. package/dist/agent/config/modelFactory.cjs +10 -0
  6. package/dist/agent/config/modelFactory.js +10 -0
  7. package/dist/agent/config/xaiImageModel.cjs +242 -0
  8. package/dist/agent/config/xaiImageModel.d.ts +33 -0
  9. package/dist/agent/config/xaiImageModel.js +202 -0
  10. package/dist/agent/tests/mcpClientManager.test.cjs +116 -0
  11. package/dist/agent/tests/mcpClientManager.test.js +117 -1
  12. package/dist/agent/tests/mcpResourceTools.test.cjs +101 -0
  13. package/dist/agent/tests/mcpResourceTools.test.d.ts +1 -0
  14. package/dist/agent/tests/mcpResourceTools.test.js +95 -0
  15. package/dist/agent/tests/modelFactory.test.cjs +16 -2
  16. package/dist/agent/tests/modelFactory.test.js +16 -2
  17. package/dist/agent/tests/xaiImageModel.test.cjs +194 -0
  18. package/dist/agent/tests/xaiImageModel.test.d.ts +1 -0
  19. package/dist/agent/tests/xaiImageModel.test.js +188 -0
  20. package/dist/agent/tools/mcp_resources.cjs +111 -0
  21. package/dist/agent/tools/mcp_resources.d.ts +3 -0
  22. package/dist/agent/tools/mcp_resources.js +77 -0
  23. package/dist/bench/adapters/commandAdapter.cjs +93 -0
  24. package/dist/bench/adapters/commandAdapter.d.ts +6 -0
  25. package/dist/bench/adapters/commandAdapter.js +59 -0
  26. package/dist/bench/adapters/helpers.cjs +170 -0
  27. package/dist/bench/adapters/helpers.d.ts +7 -0
  28. package/dist/bench/adapters/helpers.js +133 -0
  29. package/dist/bench/adapters/index.cjs +41 -0
  30. package/dist/bench/adapters/index.d.ts +2 -0
  31. package/dist/bench/adapters/index.js +7 -0
  32. package/dist/bench/adapters/wingmanCliAdapter.cjs +100 -0
  33. package/dist/bench/adapters/wingmanCliAdapter.d.ts +6 -0
  34. package/dist/bench/adapters/wingmanCliAdapter.js +66 -0
  35. package/dist/bench/cleanup.cjs +122 -0
  36. package/dist/bench/cleanup.d.ts +9 -0
  37. package/dist/bench/cleanup.js +85 -0
  38. package/dist/bench/config.cjs +190 -0
  39. package/dist/bench/config.d.ts +2 -0
  40. package/dist/bench/config.js +156 -0
  41. package/dist/bench/index.cjs +43 -0
  42. package/dist/bench/index.d.ts +3 -0
  43. package/dist/bench/index.js +3 -0
  44. package/dist/bench/official.cjs +616 -0
  45. package/dist/bench/official.d.ts +80 -0
  46. package/dist/bench/official.js +546 -0
  47. package/dist/bench/officialCli.cjs +204 -0
  48. package/dist/bench/officialCli.d.ts +5 -0
  49. package/dist/bench/officialCli.js +170 -0
  50. package/dist/bench/process.cjs +78 -0
  51. package/dist/bench/process.d.ts +14 -0
  52. package/dist/bench/process.js +44 -0
  53. package/dist/bench/runner.cjs +237 -0
  54. package/dist/bench/runner.d.ts +7 -0
  55. package/dist/bench/runner.js +197 -0
  56. package/dist/bench/scoring.cjs +171 -0
  57. package/dist/bench/scoring.d.ts +9 -0
  58. package/dist/bench/scoring.js +137 -0
  59. package/dist/bench/types.cjs +18 -0
  60. package/dist/bench/types.d.ts +200 -0
  61. package/dist/bench/types.js +0 -0
  62. package/dist/bench/validator.cjs +92 -0
  63. package/dist/bench/validator.d.ts +2 -0
  64. package/dist/bench/validator.js +58 -0
  65. package/dist/cli/config/schema.cjs +36 -1
  66. package/dist/cli/config/schema.d.ts +46 -0
  67. package/dist/cli/config/schema.js +36 -1
  68. package/dist/cli/config/warnings.cjs +119 -51
  69. package/dist/cli/config/warnings.js +119 -51
  70. package/dist/cli/core/agentInvoker.cjs +9 -2
  71. package/dist/cli/core/agentInvoker.d.ts +1 -0
  72. package/dist/cli/core/agentInvoker.js +9 -2
  73. package/dist/cli/core/imagePersistence.cjs +17 -1
  74. package/dist/cli/core/imagePersistence.d.ts +2 -0
  75. package/dist/cli/core/imagePersistence.js +13 -3
  76. package/dist/cli/core/sessionManager.cjs +2 -0
  77. package/dist/cli/core/sessionManager.js +3 -1
  78. package/dist/cli/types.d.ts +18 -0
  79. package/dist/gateway/adapters/teams.cjs +419 -0
  80. package/dist/gateway/adapters/teams.d.ts +47 -0
  81. package/dist/gateway/adapters/teams.js +361 -0
  82. package/dist/gateway/http/sms.cjs +286 -0
  83. package/dist/gateway/http/sms.d.ts +4 -0
  84. package/dist/gateway/http/sms.js +249 -0
  85. package/dist/gateway/server.cjs +54 -3
  86. package/dist/gateway/server.d.ts +2 -0
  87. package/dist/gateway/server.js +54 -3
  88. package/dist/gateway/sms/commands.cjs +116 -0
  89. package/dist/gateway/sms/commands.d.ts +15 -0
  90. package/dist/gateway/sms/commands.js +79 -0
  91. package/dist/gateway/sms/control.cjs +118 -0
  92. package/dist/gateway/sms/control.d.ts +18 -0
  93. package/dist/gateway/sms/control.js +84 -0
  94. package/dist/gateway/sms/policyStore.cjs +198 -0
  95. package/dist/gateway/sms/policyStore.d.ts +37 -0
  96. package/dist/gateway/sms/policyStore.js +161 -0
  97. package/dist/providers/registry.cjs +1 -0
  98. package/dist/providers/registry.js +1 -0
  99. package/dist/tests/cli-config-warnings.test.cjs +41 -0
  100. package/dist/tests/cli-config-warnings.test.js +41 -0
  101. package/dist/tests/cli-init.test.cjs +32 -26
  102. package/dist/tests/cli-init.test.js +32 -26
  103. package/dist/tests/gateway-http-security.test.cjs +21 -0
  104. package/dist/tests/gateway-http-security.test.js +21 -0
  105. package/dist/tests/gateway-origin-policy.test.cjs +22 -0
  106. package/dist/tests/gateway-origin-policy.test.js +22 -0
  107. package/dist/tests/gateway.test.cjs +57 -0
  108. package/dist/tests/gateway.test.js +57 -0
  109. package/dist/tests/imagePersistence.test.cjs +26 -0
  110. package/dist/tests/imagePersistence.test.js +27 -1
  111. package/dist/tests/run-terminal-bench-official-script.test.cjs +61 -0
  112. package/dist/tests/run-terminal-bench-official-script.test.d.ts +1 -0
  113. package/dist/tests/run-terminal-bench-official-script.test.js +55 -0
  114. package/dist/tests/sessions-api.test.cjs +69 -1
  115. package/dist/tests/sessions-api.test.js +70 -2
  116. package/dist/tests/sms-api.test.cjs +183 -0
  117. package/dist/tests/sms-api.test.d.ts +1 -0
  118. package/dist/tests/sms-api.test.js +177 -0
  119. package/dist/tests/sms-commands.test.cjs +90 -0
  120. package/dist/tests/sms-commands.test.d.ts +1 -0
  121. package/dist/tests/sms-commands.test.js +84 -0
  122. package/dist/tests/sms-policy-store.test.cjs +69 -0
  123. package/dist/tests/sms-policy-store.test.d.ts +1 -0
  124. package/dist/tests/sms-policy-store.test.js +63 -0
  125. package/dist/tests/teams-adapter.test.cjs +58 -0
  126. package/dist/tests/teams-adapter.test.d.ts +1 -0
  127. package/dist/tests/teams-adapter.test.js +52 -0
  128. package/dist/tests/terminal-bench-adapters-helpers.test.cjs +64 -0
  129. package/dist/tests/terminal-bench-adapters-helpers.test.d.ts +1 -0
  130. package/dist/tests/terminal-bench-adapters-helpers.test.js +58 -0
  131. package/dist/tests/terminal-bench-cleanup.test.cjs +93 -0
  132. package/dist/tests/terminal-bench-cleanup.test.d.ts +1 -0
  133. package/dist/tests/terminal-bench-cleanup.test.js +87 -0
  134. package/dist/tests/terminal-bench-config.test.cjs +62 -0
  135. package/dist/tests/terminal-bench-config.test.d.ts +1 -0
  136. package/dist/tests/terminal-bench-config.test.js +56 -0
  137. package/dist/tests/terminal-bench-official.test.cjs +194 -0
  138. package/dist/tests/terminal-bench-official.test.d.ts +1 -0
  139. package/dist/tests/terminal-bench-official.test.js +188 -0
  140. package/dist/tests/terminal-bench-runner.test.cjs +82 -0
  141. package/dist/tests/terminal-bench-runner.test.d.ts +1 -0
  142. package/dist/tests/terminal-bench-runner.test.js +76 -0
  143. package/dist/tests/terminal-bench-scoring.test.cjs +128 -0
  144. package/dist/tests/terminal-bench-scoring.test.d.ts +1 -0
  145. package/dist/tests/terminal-bench-scoring.test.js +122 -0
  146. package/dist/tools/mcp-fal-ai.cjs +1 -1
  147. package/dist/tools/mcp-fal-ai.js +1 -1
  148. package/dist/webui/assets/index-Cyg_Hs57.css +11 -0
  149. package/dist/webui/assets/{index-BMekSELC.js → index-DZXLLjaA.js} +109 -109
  150. package/dist/webui/index.html +2 -2
  151. package/package.json +11 -2
  152. package/templates/agents/game-dev/agent.md +122 -63
  153. package/templates/agents/game-dev/art-director.md +106 -0
  154. package/templates/agents/game-dev/game-designer.md +87 -0
  155. package/templates/agents/game-dev/scene-engineer.md +474 -0
  156. package/dist/webui/assets/index-Cwkg4DKj.css +0 -11
  157. package/templates/agents/game-dev/art-generation.md +0 -38
  158. package/templates/agents/game-dev/asset-refinement.md +0 -17
  159. package/templates/agents/game-dev/planning-idea.md +0 -17
  160. package/templates/agents/game-dev/ui-specialist.md +0 -17
@@ -0,0 +1,58 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { detectAssistantFailureMessage, parseWingmanJsonOutput } from "../bench/adapters/helpers.js";
3
+ describe("parseWingmanJsonOutput", ()=>{
4
+ it("extracts assistant text from ai-role messages", ()=>{
5
+ const output = JSON.stringify({
6
+ type: "agent-complete",
7
+ result: {
8
+ messages: [
9
+ {
10
+ role: "user",
11
+ content: "ignore"
12
+ },
13
+ {
14
+ role: "ai",
15
+ content: "hello from ai role"
16
+ }
17
+ ]
18
+ }
19
+ });
20
+ const parsed = parseWingmanJsonOutput(output);
21
+ expect(parsed.assistantText).toBe("hello from ai role");
22
+ });
23
+ it("extracts assistant text from serialized AIMessage payloads", ()=>{
24
+ const output = JSON.stringify({
25
+ type: "agent-complete",
26
+ result: {
27
+ messages: [
28
+ {
29
+ id: [
30
+ "langchain_core",
31
+ "messages",
32
+ "AIMessage"
33
+ ],
34
+ kwargs: {
35
+ content: [
36
+ {
37
+ type: "text",
38
+ text: "hello from kwargs content"
39
+ }
40
+ ]
41
+ }
42
+ }
43
+ ]
44
+ }
45
+ });
46
+ const parsed = parseWingmanJsonOutput(output);
47
+ expect(parsed.assistantText).toBe("hello from kwargs content");
48
+ });
49
+ });
50
+ describe("detectAssistantFailureMessage", ()=>{
51
+ it("detects provider bad request failures", ()=>{
52
+ const error = detectAssistantFailureMessage("Model call failed after 3 attempts with BadRequestError: 400 status code (no body)");
53
+ expect(error).toContain("BadRequestError");
54
+ });
55
+ it("ignores normal assistant content", ()=>{
56
+ expect(detectAssistantFailureMessage('{"state_analysis":"ok","commands":[],"is_task_complete":false}')).toBeUndefined();
57
+ });
58
+ });
@@ -0,0 +1,93 @@
1
+ "use strict";
2
+ var __webpack_exports__ = {};
3
+ const external_node_fs_namespaceObject = require("node:fs");
4
+ const promises_namespaceObject = require("node:fs/promises");
5
+ const external_node_os_namespaceObject = require("node:os");
6
+ const external_node_path_namespaceObject = require("node:path");
7
+ const external_vitest_namespaceObject = require("vitest");
8
+ const cleanup_cjs_namespaceObject = require("../bench/cleanup.cjs");
9
+ const tempDirs = [];
10
+ async function pathExists(path) {
11
+ try {
12
+ await (0, promises_namespaceObject.access)(path);
13
+ return true;
14
+ } catch {
15
+ return false;
16
+ }
17
+ }
18
+ (0, external_vitest_namespaceObject.describe)("terminal bench cleanup", ()=>{
19
+ (0, external_vitest_namespaceObject.afterEach)(async ()=>{
20
+ for (const dir of tempDirs)await (0, promises_namespaceObject.rm)(dir, {
21
+ recursive: true,
22
+ force: true
23
+ });
24
+ tempDirs.length = 0;
25
+ });
26
+ (0, external_vitest_namespaceObject.it)("targets generated bench artifacts only", async ()=>{
27
+ const root = await (0, promises_namespaceObject.mkdtemp)((0, external_node_path_namespaceObject.join)((0, external_node_os_namespaceObject.tmpdir)(), "wingman-bench-cleanup-"));
28
+ tempDirs.push(root);
29
+ const jobsRun = (0, external_node_path_namespaceObject.join)(root, "jobs", "2026-01-01__00-00-00");
30
+ const officialRun = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official", "2026-01-01__00-00-00");
31
+ const wrapperRun = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official-wrapper", "2026-01-01T00-00-00-000Z");
32
+ const pycacheDir = (0, external_node_path_namespaceObject.join)(root, "bench", "harbor_agents", "__pycache__");
33
+ const configPath = (0, external_node_path_namespaceObject.join)(root, "bench", "config.tb2-wingman.json");
34
+ (0, external_node_fs_namespaceObject.mkdirSync)(jobsRun, {
35
+ recursive: true
36
+ });
37
+ (0, external_node_fs_namespaceObject.mkdirSync)(officialRun, {
38
+ recursive: true
39
+ });
40
+ (0, external_node_fs_namespaceObject.mkdirSync)(wrapperRun, {
41
+ recursive: true
42
+ });
43
+ (0, external_node_fs_namespaceObject.mkdirSync)(pycacheDir, {
44
+ recursive: true
45
+ });
46
+ (0, external_node_fs_namespaceObject.writeFileSync)((0, external_node_path_namespaceObject.join)(jobsRun, "result.json"), "{}");
47
+ (0, external_node_fs_namespaceObject.writeFileSync)((0, external_node_path_namespaceObject.join)(officialRun, "result.json"), "{}");
48
+ (0, external_node_fs_namespaceObject.writeFileSync)((0, external_node_path_namespaceObject.join)(wrapperRun, "summary.json"), "{}");
49
+ (0, external_node_fs_namespaceObject.writeFileSync)((0, external_node_path_namespaceObject.join)(pycacheDir, "cache.pyc"), "x");
50
+ (0, external_node_fs_namespaceObject.writeFileSync)(configPath, "{}");
51
+ const targets = await (0, cleanup_cjs_namespaceObject.getBenchCleanupTargets)(root);
52
+ (0, external_vitest_namespaceObject.expect)(targets).toContain(jobsRun);
53
+ (0, external_vitest_namespaceObject.expect)(targets).toContain(officialRun);
54
+ (0, external_vitest_namespaceObject.expect)(targets).toContain(wrapperRun);
55
+ (0, external_vitest_namespaceObject.expect)(targets).toContain(pycacheDir);
56
+ (0, external_vitest_namespaceObject.expect)(targets).not.toContain(configPath);
57
+ });
58
+ (0, external_vitest_namespaceObject.it)("removes generated artifacts and keeps config files", async ()=>{
59
+ const root = await (0, promises_namespaceObject.mkdtemp)((0, external_node_path_namespaceObject.join)((0, external_node_os_namespaceObject.tmpdir)(), "wingman-bench-cleanup-"));
60
+ tempDirs.push(root);
61
+ const jobsRun = (0, external_node_path_namespaceObject.join)(root, "jobs", "2026-01-01__00-00-00");
62
+ const officialRun = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official", "2026-01-01__00-00-00");
63
+ const wrapperRun = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official-wrapper", "2026-01-01T00-00-00-000Z");
64
+ const pycacheDir = (0, external_node_path_namespaceObject.join)(root, "bench", "harbor_agents", "__pycache__");
65
+ const configPath = (0, external_node_path_namespaceObject.join)(root, "bench", "config.tb2-wingman.json");
66
+ (0, external_node_fs_namespaceObject.mkdirSync)(jobsRun, {
67
+ recursive: true
68
+ });
69
+ (0, external_node_fs_namespaceObject.mkdirSync)(officialRun, {
70
+ recursive: true
71
+ });
72
+ (0, external_node_fs_namespaceObject.mkdirSync)(wrapperRun, {
73
+ recursive: true
74
+ });
75
+ (0, external_node_fs_namespaceObject.mkdirSync)(pycacheDir, {
76
+ recursive: true
77
+ });
78
+ (0, external_node_fs_namespaceObject.writeFileSync)(configPath, '{"dataset":"terminal-bench@2.0"}');
79
+ const result = await (0, cleanup_cjs_namespaceObject.cleanBenchArtifacts)(root);
80
+ (0, external_vitest_namespaceObject.expect)(result.missingPaths).toHaveLength(0);
81
+ (0, external_vitest_namespaceObject.expect)(result.removedPaths.length).toBeGreaterThanOrEqual(4);
82
+ (0, external_vitest_namespaceObject.expect)(await pathExists(jobsRun)).toBe(false);
83
+ (0, external_vitest_namespaceObject.expect)(await pathExists(officialRun)).toBe(false);
84
+ (0, external_vitest_namespaceObject.expect)(await pathExists(wrapperRun)).toBe(false);
85
+ (0, external_vitest_namespaceObject.expect)(await pathExists(pycacheDir)).toBe(false);
86
+ (0, external_vitest_namespaceObject.expect)(await pathExists(configPath)).toBe(true);
87
+ (0, external_vitest_namespaceObject.expect)(await (0, promises_namespaceObject.readFile)(configPath, "utf-8")).toContain("terminal-bench@2.0");
88
+ });
89
+ });
90
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
91
+ Object.defineProperty(exports, '__esModule', {
92
+ value: true
93
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,87 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { access, mkdtemp, readFile, rm } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { afterEach, describe, expect, it } from "vitest";
6
+ import { cleanBenchArtifacts, getBenchCleanupTargets } from "../bench/cleanup.js";
7
+ const tempDirs = [];
8
+ async function pathExists(path) {
9
+ try {
10
+ await access(path);
11
+ return true;
12
+ } catch {
13
+ return false;
14
+ }
15
+ }
16
+ describe("terminal bench cleanup", ()=>{
17
+ afterEach(async ()=>{
18
+ for (const dir of tempDirs)await rm(dir, {
19
+ recursive: true,
20
+ force: true
21
+ });
22
+ tempDirs.length = 0;
23
+ });
24
+ it("targets generated bench artifacts only", async ()=>{
25
+ const root = await mkdtemp(join(tmpdir(), "wingman-bench-cleanup-"));
26
+ tempDirs.push(root);
27
+ const jobsRun = join(root, "jobs", "2026-01-01__00-00-00");
28
+ const officialRun = join(root, "bench", "results", "official", "2026-01-01__00-00-00");
29
+ const wrapperRun = join(root, "bench", "results", "official-wrapper", "2026-01-01T00-00-00-000Z");
30
+ const pycacheDir = join(root, "bench", "harbor_agents", "__pycache__");
31
+ const configPath = join(root, "bench", "config.tb2-wingman.json");
32
+ mkdirSync(jobsRun, {
33
+ recursive: true
34
+ });
35
+ mkdirSync(officialRun, {
36
+ recursive: true
37
+ });
38
+ mkdirSync(wrapperRun, {
39
+ recursive: true
40
+ });
41
+ mkdirSync(pycacheDir, {
42
+ recursive: true
43
+ });
44
+ writeFileSync(join(jobsRun, "result.json"), "{}");
45
+ writeFileSync(join(officialRun, "result.json"), "{}");
46
+ writeFileSync(join(wrapperRun, "summary.json"), "{}");
47
+ writeFileSync(join(pycacheDir, "cache.pyc"), "x");
48
+ writeFileSync(configPath, "{}");
49
+ const targets = await getBenchCleanupTargets(root);
50
+ expect(targets).toContain(jobsRun);
51
+ expect(targets).toContain(officialRun);
52
+ expect(targets).toContain(wrapperRun);
53
+ expect(targets).toContain(pycacheDir);
54
+ expect(targets).not.toContain(configPath);
55
+ });
56
+ it("removes generated artifacts and keeps config files", async ()=>{
57
+ const root = await mkdtemp(join(tmpdir(), "wingman-bench-cleanup-"));
58
+ tempDirs.push(root);
59
+ const jobsRun = join(root, "jobs", "2026-01-01__00-00-00");
60
+ const officialRun = join(root, "bench", "results", "official", "2026-01-01__00-00-00");
61
+ const wrapperRun = join(root, "bench", "results", "official-wrapper", "2026-01-01T00-00-00-000Z");
62
+ const pycacheDir = join(root, "bench", "harbor_agents", "__pycache__");
63
+ const configPath = join(root, "bench", "config.tb2-wingman.json");
64
+ mkdirSync(jobsRun, {
65
+ recursive: true
66
+ });
67
+ mkdirSync(officialRun, {
68
+ recursive: true
69
+ });
70
+ mkdirSync(wrapperRun, {
71
+ recursive: true
72
+ });
73
+ mkdirSync(pycacheDir, {
74
+ recursive: true
75
+ });
76
+ writeFileSync(configPath, '{"dataset":"terminal-bench@2.0"}');
77
+ const result = await cleanBenchArtifacts(root);
78
+ expect(result.missingPaths).toHaveLength(0);
79
+ expect(result.removedPaths.length).toBeGreaterThanOrEqual(4);
80
+ expect(await pathExists(jobsRun)).toBe(false);
81
+ expect(await pathExists(officialRun)).toBe(false);
82
+ expect(await pathExists(wrapperRun)).toBe(false);
83
+ expect(await pathExists(pycacheDir)).toBe(false);
84
+ expect(await pathExists(configPath)).toBe(true);
85
+ expect(await readFile(configPath, "utf-8")).toContain("terminal-bench@2.0");
86
+ });
87
+ });
@@ -0,0 +1,62 @@
1
+ "use strict";
2
+ var __webpack_exports__ = {};
3
+ const external_node_fs_namespaceObject = require("node:fs");
4
+ const external_node_os_namespaceObject = require("node:os");
5
+ const external_node_path_namespaceObject = require("node:path");
6
+ const external_vitest_namespaceObject = require("vitest");
7
+ const config_cjs_namespaceObject = require("../bench/config.cjs");
8
+ (0, external_vitest_namespaceObject.describe)("terminal bench config", ()=>{
9
+ const workdirs = [];
10
+ (0, external_vitest_namespaceObject.afterEach)(()=>{
11
+ for (const workdir of workdirs)(0, external_node_fs_namespaceObject.rmSync)(workdir, {
12
+ recursive: true,
13
+ force: true
14
+ });
15
+ workdirs.length = 0;
16
+ });
17
+ (0, external_vitest_namespaceObject.it)("loads config/tasks with defaults and resolves relative paths", async ()=>{
18
+ const workdir = (0, external_node_fs_namespaceObject.mkdtempSync)((0, external_node_path_namespaceObject.join)((0, external_node_os_namespaceObject.tmpdir)(), "wingman-bench-config-"));
19
+ workdirs.push(workdir);
20
+ const taskFilePath = (0, external_node_path_namespaceObject.join)(workdir, "tasks.json");
21
+ (0, external_node_fs_namespaceObject.writeFileSync)(taskFilePath, JSON.stringify({
22
+ tasks: [
23
+ {
24
+ id: "t1",
25
+ prompt: "hello",
26
+ validator: {
27
+ type: "assistant_contains",
28
+ includes: "ok"
29
+ }
30
+ }
31
+ ]
32
+ }, null, 2));
33
+ const configPath = (0, external_node_path_namespaceObject.join)(workdir, "config.json");
34
+ (0, external_node_fs_namespaceObject.writeFileSync)(configPath, JSON.stringify({
35
+ taskFile: "./tasks.json",
36
+ adapter: {
37
+ type: "command",
38
+ command: {
39
+ command: "echo",
40
+ args: [
41
+ "ok"
42
+ ]
43
+ }
44
+ }
45
+ }, null, 2));
46
+ const config = await (0, config_cjs_namespaceObject.loadTerminalBenchConfig)(configPath);
47
+ (0, external_vitest_namespaceObject.expect)(config.version).toBe(1);
48
+ (0, external_vitest_namespaceObject.expect)(config.taskFilePath).toBe(taskFilePath);
49
+ (0, external_vitest_namespaceObject.expect)(config.resultsDir).toBe((0, external_node_path_namespaceObject.join)(workdir, "bench/results"));
50
+ (0, external_vitest_namespaceObject.expect)(config.run.defaultTimeoutMs).toBe(300000);
51
+ (0, external_vitest_namespaceObject.expect)(config.tasks).toHaveLength(1);
52
+ (0, external_vitest_namespaceObject.expect)(config.tasks[0].validator.type).toBe("assistant_contains");
53
+ if ("assistant_contains" !== config.tasks[0].validator.type) throw new Error("Unexpected validator type");
54
+ (0, external_vitest_namespaceObject.expect)(config.tasks[0].validator.includes).toEqual([
55
+ "ok"
56
+ ]);
57
+ });
58
+ });
59
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
60
+ Object.defineProperty(exports, '__esModule', {
61
+ value: true
62
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,56 @@
1
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
4
+ import { afterEach, describe, expect, it } from "vitest";
5
+ import { loadTerminalBenchConfig } from "../bench/config.js";
6
+ describe("terminal bench config", ()=>{
7
+ const workdirs = [];
8
+ afterEach(()=>{
9
+ for (const workdir of workdirs)rmSync(workdir, {
10
+ recursive: true,
11
+ force: true
12
+ });
13
+ workdirs.length = 0;
14
+ });
15
+ it("loads config/tasks with defaults and resolves relative paths", async ()=>{
16
+ const workdir = mkdtempSync(join(tmpdir(), "wingman-bench-config-"));
17
+ workdirs.push(workdir);
18
+ const taskFilePath = join(workdir, "tasks.json");
19
+ writeFileSync(taskFilePath, JSON.stringify({
20
+ tasks: [
21
+ {
22
+ id: "t1",
23
+ prompt: "hello",
24
+ validator: {
25
+ type: "assistant_contains",
26
+ includes: "ok"
27
+ }
28
+ }
29
+ ]
30
+ }, null, 2));
31
+ const configPath = join(workdir, "config.json");
32
+ writeFileSync(configPath, JSON.stringify({
33
+ taskFile: "./tasks.json",
34
+ adapter: {
35
+ type: "command",
36
+ command: {
37
+ command: "echo",
38
+ args: [
39
+ "ok"
40
+ ]
41
+ }
42
+ }
43
+ }, null, 2));
44
+ const config = await loadTerminalBenchConfig(configPath);
45
+ expect(config.version).toBe(1);
46
+ expect(config.taskFilePath).toBe(taskFilePath);
47
+ expect(config.resultsDir).toBe(join(workdir, "bench/results"));
48
+ expect(config.run.defaultTimeoutMs).toBe(300000);
49
+ expect(config.tasks).toHaveLength(1);
50
+ expect(config.tasks[0].validator.type).toBe("assistant_contains");
51
+ if ("assistant_contains" !== config.tasks[0].validator.type) throw new Error("Unexpected validator type");
52
+ expect(config.tasks[0].validator.includes).toEqual([
53
+ "ok"
54
+ ]);
55
+ });
56
+ });
@@ -0,0 +1,194 @@
1
+ "use strict";
2
+ var __webpack_exports__ = {};
3
+ const external_vitest_namespaceObject = require("vitest");
4
+ const official_cjs_namespaceObject = require("../bench/official.cjs");
5
+ (0, external_vitest_namespaceObject.describe)("terminal bench official runner (harbor tb2)", ()=>{
6
+ (0, external_vitest_namespaceObject.it)("builds harbor args with overrides", ()=>{
7
+ const args = (0, official_cjs_namespaceObject.buildHarborRunArgs)({
8
+ dataset: "terminal-bench@2.0",
9
+ taskNames: [
10
+ "a",
11
+ "b"
12
+ ],
13
+ agent: "oracle",
14
+ nConcurrent: 1,
15
+ nAttempts: 1
16
+ }, {
17
+ taskNames: [
18
+ "single"
19
+ ],
20
+ agent: "codex",
21
+ nConcurrent: 2,
22
+ nAttempts: 3,
23
+ nTasks: 2,
24
+ model: "openai/gpt-4.1-mini",
25
+ agentKwargs: {
26
+ foo: "bar"
27
+ }
28
+ });
29
+ (0, external_vitest_namespaceObject.expect)(args).toEqual([
30
+ "run",
31
+ "--dataset",
32
+ "terminal-bench@2.0",
33
+ "--agent",
34
+ "codex",
35
+ "--model",
36
+ "openai/gpt-4.1-mini",
37
+ "--n-concurrent",
38
+ "2",
39
+ "--n-attempts",
40
+ "3",
41
+ "--n-tasks",
42
+ "2",
43
+ "--agent-kwarg",
44
+ "foo=bar",
45
+ "--task-name",
46
+ "single"
47
+ ]);
48
+ });
49
+ (0, external_vitest_namespaceObject.it)("builds harbor args with explicit registry url", ()=>{
50
+ const args = (0, official_cjs_namespaceObject.buildHarborRunArgs)({
51
+ dataset: "terminal-bench@2.0",
52
+ registryUrl: "https://raw.githubusercontent.com/laude-institute/harbor/main/registry.json?source=wingman",
53
+ agent: "oracle"
54
+ }, {});
55
+ (0, external_vitest_namespaceObject.expect)(args).toEqual([
56
+ "run",
57
+ "--dataset",
58
+ "terminal-bench@2.0",
59
+ "--registry-url",
60
+ "https://raw.githubusercontent.com/laude-institute/harbor/main/registry.json?source=wingman",
61
+ "--agent",
62
+ "oracle"
63
+ ]);
64
+ });
65
+ (0, external_vitest_namespaceObject.it)("builds harbor args without task names when running all dataset tasks", ()=>{
66
+ const args = (0, official_cjs_namespaceObject.buildHarborRunArgs)({
67
+ dataset: "terminal-bench@2.0",
68
+ agent: "oracle",
69
+ nConcurrent: 1
70
+ }, {
71
+ taskNames: []
72
+ });
73
+ (0, external_vitest_namespaceObject.expect)(args).toEqual([
74
+ "run",
75
+ "--dataset",
76
+ "terminal-bench@2.0",
77
+ "--agent",
78
+ "oracle",
79
+ "--n-concurrent",
80
+ "1"
81
+ ]);
82
+ });
83
+ (0, external_vitest_namespaceObject.it)("builds harbor args with custom import-path agent", ()=>{
84
+ const args = (0, official_cjs_namespaceObject.buildHarborRunArgs)({
85
+ dataset: "terminal-bench@2.0",
86
+ taskNames: [
87
+ "hello-world"
88
+ ],
89
+ agent: "oracle",
90
+ agentImportPath: "my_pkg.my_agent:MyAgent",
91
+ agentKwargs: {
92
+ wingman_agent: "coding",
93
+ model_name: "should-not-pass"
94
+ },
95
+ nConcurrent: 1
96
+ }, {
97
+ agentKwargs: {
98
+ wingman_cli_path: "./bin/wingman"
99
+ }
100
+ });
101
+ (0, external_vitest_namespaceObject.expect)(args).toEqual([
102
+ "run",
103
+ "--dataset",
104
+ "terminal-bench@2.0",
105
+ "--agent-import-path",
106
+ "my_pkg.my_agent:MyAgent",
107
+ "--n-concurrent",
108
+ "1",
109
+ "--agent-kwarg",
110
+ "wingman_agent=coding",
111
+ "--agent-kwarg",
112
+ "wingman_cli_path=./bin/wingman",
113
+ "--task-name",
114
+ "hello-world"
115
+ ]);
116
+ });
117
+ (0, external_vitest_namespaceObject.it)("parses resolved/unresolved/accuracy and pass@k", ()=>{
118
+ const parsed = (0, official_cjs_namespaceObject.parseHarborRunOutput)(`
119
+ │ Resolved Trials │ 1 │
120
+ │ Unresolved Trials │ 1 │
121
+ │ Accuracy │ 50.00% │
122
+ │ Pass@1 │ 50.00% │
123
+ Results saved to /tmp/harbor/runs/run-1
124
+ `);
125
+ (0, external_vitest_namespaceObject.expect)(parsed.resolvedTrials).toBe(1);
126
+ (0, external_vitest_namespaceObject.expect)(parsed.unresolvedTrials).toBe(1);
127
+ (0, external_vitest_namespaceObject.expect)(parsed.accuracyPercent).toBe(50);
128
+ (0, external_vitest_namespaceObject.expect)(parsed.passAtK["1"]).toBe(50);
129
+ (0, external_vitest_namespaceObject.expect)(parsed.runOutputPath).toBe("/tmp/harbor/runs/run-1");
130
+ });
131
+ (0, external_vitest_namespaceObject.it)("builds a docker shim script and path for podman fallback", ()=>{
132
+ const script = (0, official_cjs_namespaceObject.createDockerShimScript)("/usr/local/bin/podman");
133
+ (0, external_vitest_namespaceObject.expect)(script).toContain("TARGET_BINARY='/usr/local/bin/podman'");
134
+ (0, external_vitest_namespaceObject.expect)(script).toContain("exec podman-compose");
135
+ (0, external_vitest_namespaceObject.expect)(script).toContain("exec podman cp");
136
+ (0, external_vitest_namespaceObject.expect)(script).toContain("exec podman exec");
137
+ (0, external_vitest_namespaceObject.expect)(script).toContain("label=com.docker.compose.project");
138
+ (0, external_vitest_namespaceObject.expect)(script).toContain("--project-directory");
139
+ (0, external_vitest_namespaceObject.expect)(script.startsWith("#!/bin/bash")).toBe(true);
140
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.buildRuntimePathEnv)("/tmp/runtime-bin", "/usr/bin")).toBe("/tmp/runtime-bin:/usr/bin");
141
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.buildPythonPathEnv)("/tmp/repo", "/usr/lib/python")).toBe("/tmp/repo:/usr/lib/python");
142
+ });
143
+ (0, external_vitest_namespaceObject.it)("extracts a concise harbor error message", ()=>{
144
+ const message = (0, official_cjs_namespaceObject.extractHarborErrorMessage)(`
145
+ Traceback...
146
+ ValueError: No tasks found matching pattern: jq-data-processing
147
+ `);
148
+ (0, external_vitest_namespaceObject.expect)(message).toBe("ValueError: No tasks found matching pattern: jq-data-processing");
149
+ });
150
+ (0, external_vitest_namespaceObject.it)("extracts a specific dataset resolution error over generic fallback", ()=>{
151
+ const message = (0, official_cjs_namespaceObject.extractHarborErrorMessage)(`
152
+ Traceback...
153
+ ValueError: Error getting dataset terminal-bench@2.0
154
+ ValueError: Either datasets or tasks must be provided.
155
+ `);
156
+ (0, external_vitest_namespaceObject.expect)(message).toBe("ValueError: Error getting dataset terminal-bench@2.0");
157
+ });
158
+ (0, external_vitest_namespaceObject.it)("rewrites generic empty-task selection error", ()=>{
159
+ const message = (0, official_cjs_namespaceObject.normalizeHarborFailureMessage)({
160
+ rawMessage: "ValueError: Either datasets or tasks must be provided.",
161
+ args: [
162
+ "run",
163
+ "--dataset",
164
+ "terminal-bench@2.0",
165
+ "--task-name",
166
+ "heterogeneous-dates"
167
+ ],
168
+ dataset: "terminal-bench@2.0"
169
+ });
170
+ (0, external_vitest_namespaceObject.expect)(message).toBe('No tasks matched "heterogeneous-dates" in dataset "terminal-bench@2.0". Verify task ids for Terminal-Bench 2.0.');
171
+ });
172
+ (0, external_vitest_namespaceObject.it)("normalizes podman docker host candidates", ()=>{
173
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.parseDockerHostCandidate)("unix:///tmp/podman.sock")).toBe("unix:///tmp/podman.sock");
174
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.parseDockerHostCandidate)("/tmp/podman.sock")).toBe("unix:///tmp/podman.sock");
175
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.parseDockerHostCandidate)("'unix:///tmp/podman.sock'")).toBe("unix:///tmp/podman.sock");
176
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.parseDockerHostCandidate)("<nil>")).toBeUndefined();
177
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.parseDockerHostCandidate)(void 0)).toBeUndefined();
178
+ });
179
+ (0, external_vitest_namespaceObject.it)("detects missing compose provider errors", ()=>{
180
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.isMissingComposeProviderError)(`
181
+ Error: looking up compose provider failed
182
+ * exec: "podman-compose": executable file not found in $PATH
183
+ `)).toBe(true);
184
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.isMissingComposeProviderError)("some other error")).toBe(false);
185
+ });
186
+ (0, external_vitest_namespaceObject.it)("detects podman-backed docker version output", ()=>{
187
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.isPodmanBackedDockerVersionOutput)("Emulate Docker CLI using podman")).toBe(true);
188
+ (0, external_vitest_namespaceObject.expect)((0, official_cjs_namespaceObject.isPodmanBackedDockerVersionOutput)("Docker version 27.0.0")).toBe(false);
189
+ });
190
+ });
191
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
192
+ Object.defineProperty(exports, '__esModule', {
193
+ value: true
194
+ });
@@ -0,0 +1 @@
1
+ export {};