@wingman-ai/gateway 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +14 -0
  2. package/dist/agent/config/mcpClientManager.cjs +104 -1
  3. package/dist/agent/config/mcpClientManager.d.ts +30 -0
  4. package/dist/agent/config/mcpClientManager.js +104 -1
  5. package/dist/agent/config/modelFactory.cjs +10 -0
  6. package/dist/agent/config/modelFactory.js +10 -0
  7. package/dist/agent/config/xaiImageModel.cjs +242 -0
  8. package/dist/agent/config/xaiImageModel.d.ts +33 -0
  9. package/dist/agent/config/xaiImageModel.js +202 -0
  10. package/dist/agent/tests/mcpClientManager.test.cjs +116 -0
  11. package/dist/agent/tests/mcpClientManager.test.js +117 -1
  12. package/dist/agent/tests/mcpResourceTools.test.cjs +101 -0
  13. package/dist/agent/tests/mcpResourceTools.test.d.ts +1 -0
  14. package/dist/agent/tests/mcpResourceTools.test.js +95 -0
  15. package/dist/agent/tests/modelFactory.test.cjs +16 -2
  16. package/dist/agent/tests/modelFactory.test.js +16 -2
  17. package/dist/agent/tests/xaiImageModel.test.cjs +194 -0
  18. package/dist/agent/tests/xaiImageModel.test.d.ts +1 -0
  19. package/dist/agent/tests/xaiImageModel.test.js +188 -0
  20. package/dist/agent/tools/mcp_resources.cjs +111 -0
  21. package/dist/agent/tools/mcp_resources.d.ts +3 -0
  22. package/dist/agent/tools/mcp_resources.js +77 -0
  23. package/dist/bench/adapters/commandAdapter.cjs +93 -0
  24. package/dist/bench/adapters/commandAdapter.d.ts +6 -0
  25. package/dist/bench/adapters/commandAdapter.js +59 -0
  26. package/dist/bench/adapters/helpers.cjs +170 -0
  27. package/dist/bench/adapters/helpers.d.ts +7 -0
  28. package/dist/bench/adapters/helpers.js +133 -0
  29. package/dist/bench/adapters/index.cjs +41 -0
  30. package/dist/bench/adapters/index.d.ts +2 -0
  31. package/dist/bench/adapters/index.js +7 -0
  32. package/dist/bench/adapters/wingmanCliAdapter.cjs +100 -0
  33. package/dist/bench/adapters/wingmanCliAdapter.d.ts +6 -0
  34. package/dist/bench/adapters/wingmanCliAdapter.js +66 -0
  35. package/dist/bench/cleanup.cjs +122 -0
  36. package/dist/bench/cleanup.d.ts +9 -0
  37. package/dist/bench/cleanup.js +85 -0
  38. package/dist/bench/config.cjs +190 -0
  39. package/dist/bench/config.d.ts +2 -0
  40. package/dist/bench/config.js +156 -0
  41. package/dist/bench/index.cjs +43 -0
  42. package/dist/bench/index.d.ts +3 -0
  43. package/dist/bench/index.js +3 -0
  44. package/dist/bench/official.cjs +616 -0
  45. package/dist/bench/official.d.ts +80 -0
  46. package/dist/bench/official.js +546 -0
  47. package/dist/bench/officialCli.cjs +204 -0
  48. package/dist/bench/officialCli.d.ts +5 -0
  49. package/dist/bench/officialCli.js +170 -0
  50. package/dist/bench/process.cjs +78 -0
  51. package/dist/bench/process.d.ts +14 -0
  52. package/dist/bench/process.js +44 -0
  53. package/dist/bench/runner.cjs +237 -0
  54. package/dist/bench/runner.d.ts +7 -0
  55. package/dist/bench/runner.js +197 -0
  56. package/dist/bench/scoring.cjs +171 -0
  57. package/dist/bench/scoring.d.ts +9 -0
  58. package/dist/bench/scoring.js +137 -0
  59. package/dist/bench/types.cjs +18 -0
  60. package/dist/bench/types.d.ts +200 -0
  61. package/dist/bench/types.js +0 -0
  62. package/dist/bench/validator.cjs +92 -0
  63. package/dist/bench/validator.d.ts +2 -0
  64. package/dist/bench/validator.js +58 -0
  65. package/dist/cli/config/schema.cjs +36 -1
  66. package/dist/cli/config/schema.d.ts +46 -0
  67. package/dist/cli/config/schema.js +36 -1
  68. package/dist/cli/config/warnings.cjs +119 -51
  69. package/dist/cli/config/warnings.js +119 -51
  70. package/dist/cli/core/agentInvoker.cjs +9 -2
  71. package/dist/cli/core/agentInvoker.d.ts +1 -0
  72. package/dist/cli/core/agentInvoker.js +9 -2
  73. package/dist/cli/core/imagePersistence.cjs +17 -1
  74. package/dist/cli/core/imagePersistence.d.ts +2 -0
  75. package/dist/cli/core/imagePersistence.js +13 -3
  76. package/dist/cli/core/sessionManager.cjs +2 -0
  77. package/dist/cli/core/sessionManager.js +3 -1
  78. package/dist/cli/types.d.ts +18 -0
  79. package/dist/gateway/adapters/teams.cjs +419 -0
  80. package/dist/gateway/adapters/teams.d.ts +47 -0
  81. package/dist/gateway/adapters/teams.js +361 -0
  82. package/dist/gateway/http/sms.cjs +286 -0
  83. package/dist/gateway/http/sms.d.ts +4 -0
  84. package/dist/gateway/http/sms.js +249 -0
  85. package/dist/gateway/server.cjs +54 -3
  86. package/dist/gateway/server.d.ts +2 -0
  87. package/dist/gateway/server.js +54 -3
  88. package/dist/gateway/sms/commands.cjs +116 -0
  89. package/dist/gateway/sms/commands.d.ts +15 -0
  90. package/dist/gateway/sms/commands.js +79 -0
  91. package/dist/gateway/sms/control.cjs +118 -0
  92. package/dist/gateway/sms/control.d.ts +18 -0
  93. package/dist/gateway/sms/control.js +84 -0
  94. package/dist/gateway/sms/policyStore.cjs +198 -0
  95. package/dist/gateway/sms/policyStore.d.ts +37 -0
  96. package/dist/gateway/sms/policyStore.js +161 -0
  97. package/dist/providers/registry.cjs +1 -0
  98. package/dist/providers/registry.js +1 -0
  99. package/dist/tests/cli-config-warnings.test.cjs +41 -0
  100. package/dist/tests/cli-config-warnings.test.js +41 -0
  101. package/dist/tests/cli-init.test.cjs +32 -26
  102. package/dist/tests/cli-init.test.js +32 -26
  103. package/dist/tests/gateway-http-security.test.cjs +21 -0
  104. package/dist/tests/gateway-http-security.test.js +21 -0
  105. package/dist/tests/gateway-origin-policy.test.cjs +22 -0
  106. package/dist/tests/gateway-origin-policy.test.js +22 -0
  107. package/dist/tests/gateway.test.cjs +57 -0
  108. package/dist/tests/gateway.test.js +57 -0
  109. package/dist/tests/imagePersistence.test.cjs +26 -0
  110. package/dist/tests/imagePersistence.test.js +27 -1
  111. package/dist/tests/run-terminal-bench-official-script.test.cjs +61 -0
  112. package/dist/tests/run-terminal-bench-official-script.test.d.ts +1 -0
  113. package/dist/tests/run-terminal-bench-official-script.test.js +55 -0
  114. package/dist/tests/sessions-api.test.cjs +69 -1
  115. package/dist/tests/sessions-api.test.js +70 -2
  116. package/dist/tests/sms-api.test.cjs +183 -0
  117. package/dist/tests/sms-api.test.d.ts +1 -0
  118. package/dist/tests/sms-api.test.js +177 -0
  119. package/dist/tests/sms-commands.test.cjs +90 -0
  120. package/dist/tests/sms-commands.test.d.ts +1 -0
  121. package/dist/tests/sms-commands.test.js +84 -0
  122. package/dist/tests/sms-policy-store.test.cjs +69 -0
  123. package/dist/tests/sms-policy-store.test.d.ts +1 -0
  124. package/dist/tests/sms-policy-store.test.js +63 -0
  125. package/dist/tests/teams-adapter.test.cjs +58 -0
  126. package/dist/tests/teams-adapter.test.d.ts +1 -0
  127. package/dist/tests/teams-adapter.test.js +52 -0
  128. package/dist/tests/terminal-bench-adapters-helpers.test.cjs +64 -0
  129. package/dist/tests/terminal-bench-adapters-helpers.test.d.ts +1 -0
  130. package/dist/tests/terminal-bench-adapters-helpers.test.js +58 -0
  131. package/dist/tests/terminal-bench-cleanup.test.cjs +93 -0
  132. package/dist/tests/terminal-bench-cleanup.test.d.ts +1 -0
  133. package/dist/tests/terminal-bench-cleanup.test.js +87 -0
  134. package/dist/tests/terminal-bench-config.test.cjs +62 -0
  135. package/dist/tests/terminal-bench-config.test.d.ts +1 -0
  136. package/dist/tests/terminal-bench-config.test.js +56 -0
  137. package/dist/tests/terminal-bench-official.test.cjs +194 -0
  138. package/dist/tests/terminal-bench-official.test.d.ts +1 -0
  139. package/dist/tests/terminal-bench-official.test.js +188 -0
  140. package/dist/tests/terminal-bench-runner.test.cjs +82 -0
  141. package/dist/tests/terminal-bench-runner.test.d.ts +1 -0
  142. package/dist/tests/terminal-bench-runner.test.js +76 -0
  143. package/dist/tests/terminal-bench-scoring.test.cjs +128 -0
  144. package/dist/tests/terminal-bench-scoring.test.d.ts +1 -0
  145. package/dist/tests/terminal-bench-scoring.test.js +122 -0
  146. package/dist/tools/mcp-fal-ai.cjs +1 -1
  147. package/dist/tools/mcp-fal-ai.js +1 -1
  148. package/dist/webui/assets/index-Cyg_Hs57.css +11 -0
  149. package/dist/webui/assets/{index-BMekSELC.js → index-DZXLLjaA.js} +109 -109
  150. package/dist/webui/index.html +2 -2
  151. package/package.json +11 -2
  152. package/templates/agents/game-dev/agent.md +122 -63
  153. package/templates/agents/game-dev/art-director.md +106 -0
  154. package/templates/agents/game-dev/game-designer.md +87 -0
  155. package/templates/agents/game-dev/scene-engineer.md +474 -0
  156. package/dist/webui/assets/index-Cwkg4DKj.css +0 -11
  157. package/templates/agents/game-dev/art-generation.md +0 -38
  158. package/templates/agents/game-dev/asset-refinement.md +0 -17
  159. package/templates/agents/game-dev/planning-idea.md +0 -17
  160. package/templates/agents/game-dev/ui-specialist.md +0 -17
@@ -0,0 +1,3 @@
1
+ import { loadTerminalBenchConfig } from "./config.js";
2
+ import { runTerminalBench, runTerminalBenchWithConfig } from "./runner.js";
3
+ export { loadTerminalBenchConfig, runTerminalBench, runTerminalBenchWithConfig };
@@ -0,0 +1,616 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ runOfficialTerminalBench: ()=>runOfficialTerminalBench,
28
+ createDockerShimScript: ()=>createDockerShimScript,
29
+ extractTaskNamesFromArgs: ()=>extractTaskNamesFromArgs,
30
+ buildRuntimePathEnv: ()=>buildRuntimePathEnv,
31
+ extractHarborErrorMessage: ()=>extractHarborErrorMessage,
32
+ parseHarborRunOutput: ()=>parseHarborRunOutput,
33
+ normalizeHarborFailureMessage: ()=>normalizeHarborFailureMessage,
34
+ buildHarborRunArgs: ()=>buildHarborRunArgs,
35
+ isPodmanBackedDockerVersionOutput: ()=>isPodmanBackedDockerVersionOutput,
36
+ buildPythonPathEnv: ()=>buildPythonPathEnv,
37
+ parseDockerHostCandidate: ()=>parseDockerHostCandidate,
38
+ loadOfficialBenchConfig: ()=>loadOfficialBenchConfig,
39
+ isMissingComposeProviderError: ()=>isMissingComposeProviderError
40
+ });
41
+ const promises_namespaceObject = require("node:fs/promises");
42
+ const external_node_path_namespaceObject = require("node:path");
43
+ const external_zod_namespaceObject = require("zod");
44
+ const external_process_cjs_namespaceObject = require("./process.cjs");
45
+ const officialConfigSchema = external_zod_namespaceObject.z.object({
46
+ dataset: external_zod_namespaceObject.z.string().min(1),
47
+ taskNames: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string().min(1)).optional(),
48
+ registryUrl: external_zod_namespaceObject.z.string().min(1).optional(),
49
+ registryPath: external_zod_namespaceObject.z.string().min(1).optional(),
50
+ agent: external_zod_namespaceObject.z.string().min(1).optional(),
51
+ agentImportPath: external_zod_namespaceObject.z.string().min(1).optional(),
52
+ agentKwargs: external_zod_namespaceObject.z.record(external_zod_namespaceObject.z.string(), external_zod_namespaceObject.z.string()).optional(),
53
+ model: external_zod_namespaceObject.z.string().optional(),
54
+ nConcurrent: external_zod_namespaceObject.z.number().int().positive().optional(),
55
+ nAttempts: external_zod_namespaceObject.z.number().int().positive().optional(),
56
+ nTasks: external_zod_namespaceObject.z.number().int().positive().optional(),
57
+ timeoutMs: external_zod_namespaceObject.z.number().int().positive().optional(),
58
+ extraArgs: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string()).optional()
59
+ });
60
+ function extractTaskNamesFromArgs(args) {
61
+ const names = [];
62
+ for(let i = 0; i < args.length; i += 1){
63
+ const arg = args[i];
64
+ if ("--task-name" === arg && args[i + 1]) {
65
+ names.push(args[i + 1]);
66
+ i += 1;
67
+ continue;
68
+ }
69
+ if (arg.startsWith("--task-name=")) names.push(arg.slice(12));
70
+ }
71
+ return names;
72
+ }
73
+ function normalizeHarborFailureMessage(params) {
74
+ const { rawMessage, args, dataset } = params;
75
+ if ("ValueError: Either datasets or tasks must be provided." !== rawMessage) return rawMessage;
76
+ const selectedTaskNames = extractTaskNamesFromArgs(args);
77
+ if (0 === selectedTaskNames.length) return rawMessage;
78
+ return `No tasks matched ${selectedTaskNames.map((name)=>`"${name}"`).join(", ")} in dataset "${dataset}". Verify task ids for Terminal-Bench 2.0.`;
79
+ }
80
+ function stripAnsi(value) {
81
+ return value.replace(/\u001B\[[0-9;]*[A-Za-z]/g, "");
82
+ }
83
+ function parseMetricNumber(line) {
84
+ const match = line.match(/(-?\d+(?:\.\d+)?)(?:\s*%?)\s*[│|]?\s*$/);
85
+ if (!match) return;
86
+ const value = Number.parseFloat(match[1]);
87
+ return Number.isFinite(value) ? value : void 0;
88
+ }
89
+ function isMissingComposeProviderError(output) {
90
+ const normalized = stripAnsi(output).toLowerCase();
91
+ return normalized.includes("looking up compose provider failed") || normalized.includes('exec: "docker-compose": executable file not found') || normalized.includes('exec: "podman-compose": executable file not found');
92
+ }
93
+ function isPodmanBackedDockerVersionOutput(output) {
94
+ const normalized = stripAnsi(output).toLowerCase();
95
+ return normalized.includes("podman") || normalized.includes("emulate docker cli using podman");
96
+ }
97
+ function parseHarborRunOutput(output) {
98
+ const normalized = stripAnsi(output);
99
+ const lines = normalized.split(/\r?\n/);
100
+ let resolvedTrials;
101
+ let unresolvedTrials;
102
+ let accuracyPercent;
103
+ const passAtK = {};
104
+ let runOutputPath;
105
+ for (const rawLine of lines){
106
+ const line = rawLine.trim();
107
+ if (!line) continue;
108
+ if (line.includes("Resolved Trials")) {
109
+ resolvedTrials = parseMetricNumber(line);
110
+ continue;
111
+ }
112
+ if (line.includes("Unresolved Trials")) {
113
+ unresolvedTrials = parseMetricNumber(line);
114
+ continue;
115
+ }
116
+ if (line.includes("Accuracy")) {
117
+ accuracyPercent = parseMetricNumber(line);
118
+ continue;
119
+ }
120
+ const passAtKMatch = line.match(/Pass@(\d+)/i);
121
+ if (passAtKMatch) {
122
+ const value = parseMetricNumber(line);
123
+ if (void 0 !== value) passAtK[passAtKMatch[1]] = value;
124
+ continue;
125
+ }
126
+ const pathMatch = line.match(/results written to\s+(.+)$/i) || line.match(/results saved to\s+(.+)$/i) || line.match(/output written to\s+(.+)$/i);
127
+ if (pathMatch?.[1]) runOutputPath = pathMatch[1].trim();
128
+ }
129
+ return {
130
+ resolvedTrials,
131
+ unresolvedTrials,
132
+ accuracyPercent,
133
+ passAtK,
134
+ runOutputPath
135
+ };
136
+ }
137
+ function extractHarborErrorMessage(stderr) {
138
+ const normalized = stripAnsi(stderr);
139
+ if (normalized.includes("ValueError: Error getting dataset") && normalized.match(/ValueError: Error getting dataset[^\n]*/g)) return normalized.match(/ValueError: Error getting dataset[^\n]*/g)?.at(-1);
140
+ if (normalized.includes("ConnectError:") && normalized.match(/ConnectError:[^\n]*/g)) return normalized.match(/ConnectError:[^\n]*/g)?.at(-1);
141
+ if (normalized.includes("nodename nor servname provided") || normalized.includes("temporary failure in name resolution")) return "Harbor registry lookup failed due DNS/network error. Verify internet access or pass --registry-url/--registry-path.";
142
+ const lines = normalized.split(/\r?\n/).map((line)=>line.trim()).filter(Boolean);
143
+ if (0 === lines.length) return;
144
+ for(let i = lines.length - 1; i >= 0; i -= 1)if (lines[i].startsWith("ValueError:")) return lines[i];
145
+ for(let i = lines.length - 1; i >= 0; i -= 1)if (/\w+Error:/.test(lines[i])) return lines[i];
146
+ return lines.at(-1);
147
+ }
148
+ function buildHarborRunArgs(config, overrides) {
149
+ const dataset = overrides.dataset || config.dataset;
150
+ const taskNames = overrides.taskNames ?? config.taskNames ?? [];
151
+ const registryUrl = overrides.registryUrl || config.registryUrl;
152
+ const registryPath = overrides.registryPath || config.registryPath;
153
+ const agent = overrides.agent || config.agent || "oracle";
154
+ const agentImportPath = overrides.agentImportPath || config.agentImportPath;
155
+ const agentKwargs = {
156
+ ...config.agentKwargs || {},
157
+ ...overrides.agentKwargs || {}
158
+ };
159
+ const model = overrides.model || config.model;
160
+ const nConcurrent = overrides.nConcurrent || config.nConcurrent;
161
+ const nAttempts = overrides.nAttempts || config.nAttempts;
162
+ const nTasks = overrides.nTasks || config.nTasks;
163
+ const args = [
164
+ "run",
165
+ "--dataset",
166
+ dataset
167
+ ];
168
+ if (registryUrl) args.push("--registry-url", registryUrl);
169
+ if (registryPath) args.push("--registry-path", registryPath);
170
+ if (agentImportPath) args.push("--agent-import-path", agentImportPath);
171
+ else args.push("--agent", agent);
172
+ if (model) args.push("--model", model);
173
+ if (nConcurrent) args.push("--n-concurrent", String(nConcurrent));
174
+ if (nAttempts) args.push("--n-attempts", String(nAttempts));
175
+ if (nTasks) args.push("--n-tasks", String(nTasks));
176
+ for (const [key, value] of Object.entries(agentKwargs))if (!agentImportPath || "model_name" !== key) args.push("--agent-kwarg", `${key}=${value}`);
177
+ for (const taskName of taskNames)args.push("--task-name", taskName);
178
+ if (config.extraArgs && config.extraArgs.length > 0) args.push(...config.extraArgs);
179
+ return args;
180
+ }
181
+ async function loadOfficialBenchConfig(configPath) {
182
+ const path = (0, external_node_path_namespaceObject.resolve)(configPath);
183
+ const text = await Bun.file(path).text();
184
+ return officialConfigSchema.parse(JSON.parse(text));
185
+ }
186
+ async function resolveRequiredBinary(name) {
187
+ const check = await (0, external_process_cjs_namespaceObject.runCommand)("sh", [
188
+ "-lc",
189
+ `command -v ${name}`
190
+ ], {
191
+ cwd: process.cwd(),
192
+ timeoutMs: 5000
193
+ });
194
+ if (0 !== check.exitCode) throw new Error("harbor is not installed or not on PATH. Install Harbor CLI and verify with `harbor --help`.");
195
+ const resolvedPath = check.stdout.trim().split(/\r?\n/).at(-1)?.trim();
196
+ if (!resolvedPath) throw new Error(`Unable to resolve ${name} binary path.`);
197
+ return resolvedPath;
198
+ }
199
+ async function resolveBinary(name) {
200
+ const check = await (0, external_process_cjs_namespaceObject.runCommand)("sh", [
201
+ "-lc",
202
+ `command -v ${name}`
203
+ ], {
204
+ cwd: process.cwd(),
205
+ timeoutMs: 5000
206
+ });
207
+ if (0 !== check.exitCode) return null;
208
+ const resolvedPath = check.stdout.trim().split(/\r?\n/).at(-1)?.trim();
209
+ return resolvedPath || null;
210
+ }
211
+ function shellQuote(value) {
212
+ return `'${value.replace(/'/g, "'\"'\"'")}'`;
213
+ }
214
+ function createDockerShimScript(targetBinary) {
215
+ return `#!/bin/bash
216
+ set -e
217
+ TARGET_BINARY=${shellQuote(targetBinary)}
218
+
219
+ if [[ "$1" == "compose" ]] && command -v podman-compose >/dev/null 2>&1; then
220
+ shift
221
+ PROJECT_DIR=""
222
+ PROJECT_NAME=""
223
+ COMPOSE_FILES=()
224
+ TRANSLATED_ARGS=()
225
+
226
+ while [[ $# -gt 0 ]]; do
227
+ case "$1" in
228
+ -p)
229
+ PROJECT_NAME="$2"
230
+ TRANSLATED_ARGS+=("$1" "$2")
231
+ shift 2
232
+ ;;
233
+ -p=*)
234
+ PROJECT_NAME="\${1#*=}"
235
+ TRANSLATED_ARGS+=("$1")
236
+ shift
237
+ ;;
238
+ -f)
239
+ COMPOSE_FILES+=("$2")
240
+ TRANSLATED_ARGS+=("$1" "$2")
241
+ shift 2
242
+ ;;
243
+ -f=*)
244
+ COMPOSE_FILES+=("\${1#*=}")
245
+ TRANSLATED_ARGS+=("$1")
246
+ shift
247
+ ;;
248
+ --project-directory)
249
+ PROJECT_DIR="$2"
250
+ shift 2
251
+ ;;
252
+ --project-directory=*)
253
+ PROJECT_DIR="\${1#*=}"
254
+ shift
255
+ ;;
256
+ *)
257
+ TRANSLATED_ARGS+=("$1")
258
+ shift
259
+ ;;
260
+ esac
261
+ done
262
+
263
+ if [[ -n "$PROJECT_DIR" ]]; then
264
+ cd "$PROJECT_DIR"
265
+ fi
266
+
267
+ resolve_container_id() {
268
+ local service="$1"
269
+ local container_id=""
270
+
271
+ if [[ -n "$PROJECT_NAME" ]]; then
272
+ container_id=$(podman ps -a \
273
+ --filter "label=com.docker.compose.project=$PROJECT_NAME" \
274
+ --filter "label=com.docker.compose.service=$service" \
275
+ --format "{{.ID}}" | head -n 1 || true)
276
+ fi
277
+
278
+ if [[ -z "$container_id" && -n "$PROJECT_NAME" ]]; then
279
+ local c1="\${PROJECT_NAME}_\${service}_1"
280
+ local c2="\${PROJECT_NAME}-\${service}-1"
281
+ if podman container exists "$c1" >/dev/null 2>&1; then
282
+ container_id="$c1"
283
+ elif podman container exists "$c2" >/dev/null 2>&1; then
284
+ container_id="$c2"
285
+ fi
286
+ fi
287
+
288
+ echo "$container_id"
289
+ }
290
+
291
+ translate_cp_endpoint() {
292
+ local endpoint="$1"
293
+ if [[ "$endpoint" == *:* ]]; then
294
+ local service="\${endpoint%%:*}"
295
+ local inner_path="\${endpoint#*:}"
296
+ local container_id
297
+ container_id=$(resolve_container_id "$service")
298
+ if [[ -z "$container_id" ]]; then
299
+ echo "docker shim: unable to resolve container for service '$service' (project '$PROJECT_NAME')" >&2
300
+ exit 2
301
+ fi
302
+ echo "$container_id:$inner_path"
303
+ return
304
+ fi
305
+ echo "$endpoint"
306
+ }
307
+
308
+ # podman-compose does not implement compose cp, so map it directly to podman cp.
309
+ if [[ "\${#TRANSLATED_ARGS[@]}" -gt 0 ]]; then
310
+ for i in "\${!TRANSLATED_ARGS[@]}"; do
311
+ if [[ "\${TRANSLATED_ARGS[$i]}" == "cp" ]]; then
312
+ cp_index="$i"
313
+ src_idx=$((cp_index + 1))
314
+ dst_idx=$((cp_index + 2))
315
+ if [[ -z "\${TRANSLATED_ARGS[$src_idx]:-}" || -z "\${TRANSLATED_ARGS[$dst_idx]:-}" ]]; then
316
+ echo "docker shim: compose cp requires source and destination" >&2
317
+ exit 2
318
+ fi
319
+ src=""
320
+ dst=""
321
+ src=$(translate_cp_endpoint "\${TRANSLATED_ARGS[$src_idx]}")
322
+ dst=$(translate_cp_endpoint "\${TRANSLATED_ARGS[$dst_idx]}")
323
+ exec podman cp "$src" "$dst"
324
+ fi
325
+
326
+ if [[ "\${TRANSLATED_ARGS[$i]}" == "exec" ]]; then
327
+ exec_idx="$i"
328
+ j=$((exec_idx + 1))
329
+ PODMAN_EXEC_ARGS=()
330
+
331
+ while [[ $j -lt \${#TRANSLATED_ARGS[@]} ]]; do
332
+ tok="\${TRANSLATED_ARGS[$j]}"
333
+ case "$tok" in
334
+ -it|-ti|-i|-t|--interactive|--tty)
335
+ # Skip compose tty/interactive flags to avoid non-tty failures.
336
+ j=$((j + 1))
337
+ ;;
338
+ -w|--workdir|-e|--env)
339
+ if [[ $((j + 1)) -ge \${#TRANSLATED_ARGS[@]} ]]; then
340
+ echo "docker shim: missing value for $tok in compose exec" >&2
341
+ exit 2
342
+ fi
343
+ PODMAN_EXEC_ARGS+=("$tok" "\${TRANSLATED_ARGS[$((j + 1))]}")
344
+ j=$((j + 2))
345
+ ;;
346
+ -w=*|--workdir=*|-e=*|--env=*)
347
+ PODMAN_EXEC_ARGS+=("$tok")
348
+ j=$((j + 1))
349
+ ;;
350
+ --)
351
+ j=$((j + 1))
352
+ break
353
+ ;;
354
+ -*)
355
+ PODMAN_EXEC_ARGS+=("$tok")
356
+ j=$((j + 1))
357
+ ;;
358
+ *)
359
+ service="$tok"
360
+ j=$((j + 1))
361
+ break
362
+ ;;
363
+ esac
364
+ done
365
+
366
+ if [[ -z "\${service:-}" ]]; then
367
+ echo "docker shim: compose exec missing service name" >&2
368
+ exit 2
369
+ fi
370
+
371
+ container_id=$(resolve_container_id "$service")
372
+ if [[ -z "$container_id" ]]; then
373
+ echo "docker shim: unable to resolve container for service '$service' (project '$PROJECT_NAME')" >&2
374
+ exit 2
375
+ fi
376
+
377
+ REMAINDER=("\${TRANSLATED_ARGS[@]:$j}")
378
+ if [[ \${#REMAINDER[@]} -eq 0 ]]; then
379
+ echo "docker shim: compose exec missing command" >&2
380
+ exit 2
381
+ fi
382
+ exec podman exec "\${PODMAN_EXEC_ARGS[@]}" "$container_id" "\${REMAINDER[@]}"
383
+ fi
384
+ done
385
+ fi
386
+
387
+ exec podman-compose "\${TRANSLATED_ARGS[@]}"
388
+ fi
389
+
390
+ exec "$TARGET_BINARY" "$@"
391
+ `;
392
+ }
393
+ function buildRuntimePathEnv(shimDir, basePath = process.env.PATH || "") {
394
+ return basePath ? `${shimDir}:${basePath}` : shimDir;
395
+ }
396
+ function buildPythonPathEnv(pathToAdd, basePythonPath = process.env.PYTHONPATH || "") {
397
+ return basePythonPath ? `${pathToAdd}${external_node_path_namespaceObject.delimiter}${basePythonPath}` : pathToAdd;
398
+ }
399
+ function parseLastNonEmptyLine(value) {
400
+ const lines = value.split(/\r?\n/).map((line)=>line.trim()).filter(Boolean);
401
+ return lines.at(-1);
402
+ }
403
+ function normalizeDockerHost(value) {
404
+ const trimmed = value.trim().replace(/^['"]|['"]$/g, "");
405
+ if (/^[a-zA-Z]+:\/\//.test(trimmed)) return trimmed;
406
+ if (trimmed.startsWith("/")) return `unix://${trimmed}`;
407
+ return trimmed;
408
+ }
409
+ function parseDockerHostCandidate(value) {
410
+ if (!value) return;
411
+ const normalized = normalizeDockerHost(value);
412
+ if (0 === normalized.length || "null" === normalized || "<nil>" === normalized || "<no value>" === normalized || "[]" === normalized) return;
413
+ return normalized;
414
+ }
415
+ async function resolvePodmanDockerHost() {
416
+ const existing = parseDockerHostCandidate(process.env.DOCKER_HOST);
417
+ if (existing) return existing;
418
+ const inspect = await (0, external_process_cjs_namespaceObject.runCommand)("podman", [
419
+ "machine",
420
+ "inspect",
421
+ "--format",
422
+ "{{.ConnectionInfo.PodmanSocket.Path}}"
423
+ ], {
424
+ cwd: process.cwd(),
425
+ timeoutMs: 5000
426
+ });
427
+ if (0 === inspect.exitCode) {
428
+ const fromInspect = parseDockerHostCandidate(parseLastNonEmptyLine(inspect.stdout));
429
+ if (fromInspect) return fromInspect;
430
+ }
431
+ const machineEnv = await (0, external_process_cjs_namespaceObject.runCommand)("podman", [
432
+ "machine",
433
+ "env"
434
+ ], {
435
+ cwd: process.cwd(),
436
+ timeoutMs: 5000
437
+ });
438
+ if (0 === machineEnv.exitCode) {
439
+ const match = machineEnv.stdout.match(/DOCKER_HOST=(['"]?)([^'"\n]+)\1/);
440
+ const fromMachineEnv = parseDockerHostCandidate(match?.[2]);
441
+ if (fromMachineEnv) return fromMachineEnv;
442
+ }
443
+ const info = await (0, external_process_cjs_namespaceObject.runCommand)("podman", [
444
+ "info",
445
+ "--format",
446
+ "{{.Host.RemoteSocket.Path}}"
447
+ ], {
448
+ cwd: process.cwd(),
449
+ timeoutMs: 5000
450
+ });
451
+ if (0 === info.exitCode) return parseDockerHostCandidate(parseLastNonEmptyLine(info.stdout));
452
+ }
453
+ async function resolveContainerRuntime(wrapperOutputDir) {
454
+ const dockerBinary = await resolveBinary("docker");
455
+ const podmanBinary = await resolveBinary("podman");
456
+ if (dockerBinary) {
457
+ const dockerVersionCheck = await (0, external_process_cjs_namespaceObject.runCommand)(dockerBinary, [
458
+ "--version"
459
+ ], {
460
+ cwd: process.cwd(),
461
+ timeoutMs: 5000
462
+ });
463
+ const dockerVersionOutput = `${dockerVersionCheck.stdout}\n${dockerVersionCheck.stderr}`;
464
+ const dockerLooksPodman = dockerBinary.toLowerCase().includes("podman") || isPodmanBackedDockerVersionOutput(dockerVersionOutput);
465
+ if (!dockerLooksPodman || !podmanBinary) return {
466
+ containerRuntime: "docker"
467
+ };
468
+ const shimDir = (0, external_node_path_namespaceObject.join)(wrapperOutputDir, "runtime-bin");
469
+ const shimPath = (0, external_node_path_namespaceObject.join)(shimDir, "docker");
470
+ await (0, promises_namespaceObject.mkdir)(shimDir, {
471
+ recursive: true
472
+ });
473
+ await (0, promises_namespaceObject.writeFile)(shimPath, createDockerShimScript(podmanBinary), "utf-8");
474
+ await (0, promises_namespaceObject.chmod)(shimPath, 493);
475
+ const runtimeEnv = {
476
+ PATH: buildRuntimePathEnv(shimDir)
477
+ };
478
+ const podmanDockerHost = await resolvePodmanDockerHost();
479
+ if (podmanDockerHost) runtimeEnv.DOCKER_HOST = podmanDockerHost;
480
+ return {
481
+ containerRuntime: "podman",
482
+ env: runtimeEnv
483
+ };
484
+ }
485
+ if (!podmanBinary) throw new Error("Neither docker nor podman is installed or on PATH. Install Docker Desktop or Podman, then retry.");
486
+ const shimDir = (0, external_node_path_namespaceObject.join)(wrapperOutputDir, "runtime-bin");
487
+ const shimPath = (0, external_node_path_namespaceObject.join)(shimDir, "docker");
488
+ await (0, promises_namespaceObject.mkdir)(shimDir, {
489
+ recursive: true
490
+ });
491
+ await (0, promises_namespaceObject.writeFile)(shimPath, createDockerShimScript(podmanBinary), "utf-8");
492
+ await (0, promises_namespaceObject.chmod)(shimPath, 493);
493
+ const runtimeEnv = {
494
+ PATH: buildRuntimePathEnv(shimDir)
495
+ };
496
+ const podmanDockerHost = await resolvePodmanDockerHost();
497
+ if (podmanDockerHost) runtimeEnv.DOCKER_HOST = podmanDockerHost;
498
+ return {
499
+ containerRuntime: "podman",
500
+ env: runtimeEnv
501
+ };
502
+ }
503
+ async function ensureComposeAvailableForPodman(runtime) {
504
+ if ("podman" !== runtime.containerRuntime) return;
505
+ const check = await (0, external_process_cjs_namespaceObject.runCommand)("docker", [
506
+ "compose",
507
+ "version"
508
+ ], {
509
+ cwd: process.cwd(),
510
+ timeoutMs: 10000,
511
+ env: runtime.env
512
+ });
513
+ if (0 === check.exitCode) return;
514
+ const combinedOutput = `${check.stdout}\n${check.stderr}`;
515
+ if (isMissingComposeProviderError(combinedOutput)) throw new Error("Podman compose provider is missing. Install `podman-compose` (e.g. `uv tool install podman-compose`) or `docker-compose`, then verify with `docker compose version`.");
516
+ }
517
+ function createRunId() {
518
+ return new Date().toISOString().replace(/[:.]/g, "-");
519
+ }
520
+ async function runOfficialTerminalBench(options) {
521
+ const config = await loadOfficialBenchConfig(options.configPath);
522
+ const selectedAgentImportPath = options.overrides.agentImportPath || config.agentImportPath;
523
+ const harborBinary = await resolveRequiredBinary("harbor");
524
+ const runId = createRunId();
525
+ const wrapperOutputDir = (0, external_node_path_namespaceObject.join)(process.cwd(), "bench", "results", "official-wrapper", runId);
526
+ await (0, promises_namespaceObject.mkdir)(wrapperOutputDir, {
527
+ recursive: true
528
+ });
529
+ const runtime = await resolveContainerRuntime(wrapperOutputDir);
530
+ await ensureComposeAvailableForPodman(runtime);
531
+ const args = buildHarborRunArgs(config, options.overrides);
532
+ const timeoutMs = options.overrides.timeoutMs || config.timeoutMs || 3600000;
533
+ const runEnv = {
534
+ ...runtime.env || {}
535
+ };
536
+ if (selectedAgentImportPath) runEnv.PYTHONPATH = buildPythonPathEnv(process.cwd(), runEnv.PYTHONPATH);
537
+ const effectiveRuntime = {
538
+ ...runtime,
539
+ env: runEnv
540
+ };
541
+ const result = await (0, external_process_cjs_namespaceObject.runCommand)(harborBinary, args, {
542
+ cwd: process.cwd(),
543
+ timeoutMs,
544
+ env: effectiveRuntime.env
545
+ });
546
+ const parsed = parseHarborRunOutput(`${result.stdout}\n${result.stderr}`);
547
+ const rawStdoutPath = (0, external_node_path_namespaceObject.join)(wrapperOutputDir, "harbor.stdout.log");
548
+ const rawStderrPath = (0, external_node_path_namespaceObject.join)(wrapperOutputDir, "harbor.stderr.log");
549
+ const summaryPath = (0, external_node_path_namespaceObject.join)(wrapperOutputDir, "summary.json");
550
+ const summary = {
551
+ timestamp: new Date().toISOString(),
552
+ command: {
553
+ binary: harborBinary,
554
+ args
555
+ },
556
+ runtime: effectiveRuntime,
557
+ exitCode: result.exitCode,
558
+ timedOut: result.timedOut,
559
+ durationMs: result.durationMs,
560
+ errorMessage: 0 !== result.exitCode ? extractHarborErrorMessage(`${result.stderr}\n${result.stdout}`) : void 0,
561
+ metrics: {
562
+ resolvedTrials: parsed.resolvedTrials,
563
+ unresolvedTrials: parsed.unresolvedTrials,
564
+ accuracyPercent: parsed.accuracyPercent,
565
+ passAtK: parsed.passAtK
566
+ },
567
+ runOutputPath: parsed.runOutputPath,
568
+ artifacts: {
569
+ rawStdoutPath,
570
+ rawStderrPath,
571
+ summaryPath
572
+ }
573
+ };
574
+ summary.errorMessage = normalizeHarborFailureMessage({
575
+ rawMessage: summary.errorMessage,
576
+ args,
577
+ dataset: options.overrides.dataset || config.dataset
578
+ });
579
+ await Promise.all([
580
+ (0, promises_namespaceObject.writeFile)(rawStdoutPath, result.stdout, "utf-8"),
581
+ (0, promises_namespaceObject.writeFile)(rawStderrPath, result.stderr, "utf-8"),
582
+ (0, promises_namespaceObject.writeFile)(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf-8")
583
+ ]);
584
+ return summary;
585
+ }
586
+ exports.buildHarborRunArgs = __webpack_exports__.buildHarborRunArgs;
587
+ exports.buildPythonPathEnv = __webpack_exports__.buildPythonPathEnv;
588
+ exports.buildRuntimePathEnv = __webpack_exports__.buildRuntimePathEnv;
589
+ exports.createDockerShimScript = __webpack_exports__.createDockerShimScript;
590
+ exports.extractHarborErrorMessage = __webpack_exports__.extractHarborErrorMessage;
591
+ exports.extractTaskNamesFromArgs = __webpack_exports__.extractTaskNamesFromArgs;
592
+ exports.isMissingComposeProviderError = __webpack_exports__.isMissingComposeProviderError;
593
+ exports.isPodmanBackedDockerVersionOutput = __webpack_exports__.isPodmanBackedDockerVersionOutput;
594
+ exports.loadOfficialBenchConfig = __webpack_exports__.loadOfficialBenchConfig;
595
+ exports.normalizeHarborFailureMessage = __webpack_exports__.normalizeHarborFailureMessage;
596
+ exports.parseDockerHostCandidate = __webpack_exports__.parseDockerHostCandidate;
597
+ exports.parseHarborRunOutput = __webpack_exports__.parseHarborRunOutput;
598
+ exports.runOfficialTerminalBench = __webpack_exports__.runOfficialTerminalBench;
599
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
600
+ "buildHarborRunArgs",
601
+ "buildPythonPathEnv",
602
+ "buildRuntimePathEnv",
603
+ "createDockerShimScript",
604
+ "extractHarborErrorMessage",
605
+ "extractTaskNamesFromArgs",
606
+ "isMissingComposeProviderError",
607
+ "isPodmanBackedDockerVersionOutput",
608
+ "loadOfficialBenchConfig",
609
+ "normalizeHarborFailureMessage",
610
+ "parseDockerHostCandidate",
611
+ "parseHarborRunOutput",
612
+ "runOfficialTerminalBench"
613
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
614
+ Object.defineProperty(exports, '__esModule', {
615
+ value: true
616
+ });