@wingman-ai/gateway 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +14 -0
  2. package/dist/agent/config/mcpClientManager.cjs +104 -1
  3. package/dist/agent/config/mcpClientManager.d.ts +30 -0
  4. package/dist/agent/config/mcpClientManager.js +104 -1
  5. package/dist/agent/config/modelFactory.cjs +10 -0
  6. package/dist/agent/config/modelFactory.js +10 -0
  7. package/dist/agent/config/xaiImageModel.cjs +242 -0
  8. package/dist/agent/config/xaiImageModel.d.ts +33 -0
  9. package/dist/agent/config/xaiImageModel.js +202 -0
  10. package/dist/agent/tests/mcpClientManager.test.cjs +116 -0
  11. package/dist/agent/tests/mcpClientManager.test.js +117 -1
  12. package/dist/agent/tests/mcpResourceTools.test.cjs +101 -0
  13. package/dist/agent/tests/mcpResourceTools.test.d.ts +1 -0
  14. package/dist/agent/tests/mcpResourceTools.test.js +95 -0
  15. package/dist/agent/tests/modelFactory.test.cjs +16 -2
  16. package/dist/agent/tests/modelFactory.test.js +16 -2
  17. package/dist/agent/tests/xaiImageModel.test.cjs +194 -0
  18. package/dist/agent/tests/xaiImageModel.test.d.ts +1 -0
  19. package/dist/agent/tests/xaiImageModel.test.js +188 -0
  20. package/dist/agent/tools/mcp_resources.cjs +111 -0
  21. package/dist/agent/tools/mcp_resources.d.ts +3 -0
  22. package/dist/agent/tools/mcp_resources.js +77 -0
  23. package/dist/bench/adapters/commandAdapter.cjs +93 -0
  24. package/dist/bench/adapters/commandAdapter.d.ts +6 -0
  25. package/dist/bench/adapters/commandAdapter.js +59 -0
  26. package/dist/bench/adapters/helpers.cjs +170 -0
  27. package/dist/bench/adapters/helpers.d.ts +7 -0
  28. package/dist/bench/adapters/helpers.js +133 -0
  29. package/dist/bench/adapters/index.cjs +41 -0
  30. package/dist/bench/adapters/index.d.ts +2 -0
  31. package/dist/bench/adapters/index.js +7 -0
  32. package/dist/bench/adapters/wingmanCliAdapter.cjs +100 -0
  33. package/dist/bench/adapters/wingmanCliAdapter.d.ts +6 -0
  34. package/dist/bench/adapters/wingmanCliAdapter.js +66 -0
  35. package/dist/bench/cleanup.cjs +122 -0
  36. package/dist/bench/cleanup.d.ts +9 -0
  37. package/dist/bench/cleanup.js +85 -0
  38. package/dist/bench/config.cjs +190 -0
  39. package/dist/bench/config.d.ts +2 -0
  40. package/dist/bench/config.js +156 -0
  41. package/dist/bench/index.cjs +43 -0
  42. package/dist/bench/index.d.ts +3 -0
  43. package/dist/bench/index.js +3 -0
  44. package/dist/bench/official.cjs +616 -0
  45. package/dist/bench/official.d.ts +80 -0
  46. package/dist/bench/official.js +546 -0
  47. package/dist/bench/officialCli.cjs +204 -0
  48. package/dist/bench/officialCli.d.ts +5 -0
  49. package/dist/bench/officialCli.js +170 -0
  50. package/dist/bench/process.cjs +78 -0
  51. package/dist/bench/process.d.ts +14 -0
  52. package/dist/bench/process.js +44 -0
  53. package/dist/bench/runner.cjs +237 -0
  54. package/dist/bench/runner.d.ts +7 -0
  55. package/dist/bench/runner.js +197 -0
  56. package/dist/bench/scoring.cjs +171 -0
  57. package/dist/bench/scoring.d.ts +9 -0
  58. package/dist/bench/scoring.js +137 -0
  59. package/dist/bench/types.cjs +18 -0
  60. package/dist/bench/types.d.ts +200 -0
  61. package/dist/bench/types.js +0 -0
  62. package/dist/bench/validator.cjs +92 -0
  63. package/dist/bench/validator.d.ts +2 -0
  64. package/dist/bench/validator.js +58 -0
  65. package/dist/cli/config/schema.cjs +36 -1
  66. package/dist/cli/config/schema.d.ts +46 -0
  67. package/dist/cli/config/schema.js +36 -1
  68. package/dist/cli/config/warnings.cjs +119 -51
  69. package/dist/cli/config/warnings.js +119 -51
  70. package/dist/cli/core/agentInvoker.cjs +9 -2
  71. package/dist/cli/core/agentInvoker.d.ts +1 -0
  72. package/dist/cli/core/agentInvoker.js +9 -2
  73. package/dist/cli/core/imagePersistence.cjs +17 -1
  74. package/dist/cli/core/imagePersistence.d.ts +2 -0
  75. package/dist/cli/core/imagePersistence.js +13 -3
  76. package/dist/cli/core/sessionManager.cjs +2 -0
  77. package/dist/cli/core/sessionManager.js +3 -1
  78. package/dist/cli/types.d.ts +18 -0
  79. package/dist/gateway/adapters/teams.cjs +419 -0
  80. package/dist/gateway/adapters/teams.d.ts +47 -0
  81. package/dist/gateway/adapters/teams.js +361 -0
  82. package/dist/gateway/http/sms.cjs +286 -0
  83. package/dist/gateway/http/sms.d.ts +4 -0
  84. package/dist/gateway/http/sms.js +249 -0
  85. package/dist/gateway/server.cjs +54 -3
  86. package/dist/gateway/server.d.ts +2 -0
  87. package/dist/gateway/server.js +54 -3
  88. package/dist/gateway/sms/commands.cjs +116 -0
  89. package/dist/gateway/sms/commands.d.ts +15 -0
  90. package/dist/gateway/sms/commands.js +79 -0
  91. package/dist/gateway/sms/control.cjs +118 -0
  92. package/dist/gateway/sms/control.d.ts +18 -0
  93. package/dist/gateway/sms/control.js +84 -0
  94. package/dist/gateway/sms/policyStore.cjs +198 -0
  95. package/dist/gateway/sms/policyStore.d.ts +37 -0
  96. package/dist/gateway/sms/policyStore.js +161 -0
  97. package/dist/providers/registry.cjs +1 -0
  98. package/dist/providers/registry.js +1 -0
  99. package/dist/tests/cli-config-warnings.test.cjs +41 -0
  100. package/dist/tests/cli-config-warnings.test.js +41 -0
  101. package/dist/tests/cli-init.test.cjs +32 -26
  102. package/dist/tests/cli-init.test.js +32 -26
  103. package/dist/tests/gateway-http-security.test.cjs +21 -0
  104. package/dist/tests/gateway-http-security.test.js +21 -0
  105. package/dist/tests/gateway-origin-policy.test.cjs +22 -0
  106. package/dist/tests/gateway-origin-policy.test.js +22 -0
  107. package/dist/tests/gateway.test.cjs +57 -0
  108. package/dist/tests/gateway.test.js +57 -0
  109. package/dist/tests/imagePersistence.test.cjs +26 -0
  110. package/dist/tests/imagePersistence.test.js +27 -1
  111. package/dist/tests/run-terminal-bench-official-script.test.cjs +61 -0
  112. package/dist/tests/run-terminal-bench-official-script.test.d.ts +1 -0
  113. package/dist/tests/run-terminal-bench-official-script.test.js +55 -0
  114. package/dist/tests/sessions-api.test.cjs +69 -1
  115. package/dist/tests/sessions-api.test.js +70 -2
  116. package/dist/tests/sms-api.test.cjs +183 -0
  117. package/dist/tests/sms-api.test.d.ts +1 -0
  118. package/dist/tests/sms-api.test.js +177 -0
  119. package/dist/tests/sms-commands.test.cjs +90 -0
  120. package/dist/tests/sms-commands.test.d.ts +1 -0
  121. package/dist/tests/sms-commands.test.js +84 -0
  122. package/dist/tests/sms-policy-store.test.cjs +69 -0
  123. package/dist/tests/sms-policy-store.test.d.ts +1 -0
  124. package/dist/tests/sms-policy-store.test.js +63 -0
  125. package/dist/tests/teams-adapter.test.cjs +58 -0
  126. package/dist/tests/teams-adapter.test.d.ts +1 -0
  127. package/dist/tests/teams-adapter.test.js +52 -0
  128. package/dist/tests/terminal-bench-adapters-helpers.test.cjs +64 -0
  129. package/dist/tests/terminal-bench-adapters-helpers.test.d.ts +1 -0
  130. package/dist/tests/terminal-bench-adapters-helpers.test.js +58 -0
  131. package/dist/tests/terminal-bench-cleanup.test.cjs +93 -0
  132. package/dist/tests/terminal-bench-cleanup.test.d.ts +1 -0
  133. package/dist/tests/terminal-bench-cleanup.test.js +87 -0
  134. package/dist/tests/terminal-bench-config.test.cjs +62 -0
  135. package/dist/tests/terminal-bench-config.test.d.ts +1 -0
  136. package/dist/tests/terminal-bench-config.test.js +56 -0
  137. package/dist/tests/terminal-bench-official.test.cjs +194 -0
  138. package/dist/tests/terminal-bench-official.test.d.ts +1 -0
  139. package/dist/tests/terminal-bench-official.test.js +188 -0
  140. package/dist/tests/terminal-bench-runner.test.cjs +82 -0
  141. package/dist/tests/terminal-bench-runner.test.d.ts +1 -0
  142. package/dist/tests/terminal-bench-runner.test.js +76 -0
  143. package/dist/tests/terminal-bench-scoring.test.cjs +128 -0
  144. package/dist/tests/terminal-bench-scoring.test.d.ts +1 -0
  145. package/dist/tests/terminal-bench-scoring.test.js +122 -0
  146. package/dist/tools/mcp-fal-ai.cjs +1 -1
  147. package/dist/tools/mcp-fal-ai.js +1 -1
  148. package/dist/webui/assets/index-Cyg_Hs57.css +11 -0
  149. package/dist/webui/assets/{index-BMekSELC.js → index-DZXLLjaA.js} +109 -109
  150. package/dist/webui/index.html +2 -2
  151. package/package.json +11 -2
  152. package/templates/agents/game-dev/agent.md +110 -63
  153. package/templates/agents/game-dev/art-director.md +106 -0
  154. package/templates/agents/game-dev/game-designer.md +87 -0
  155. package/templates/agents/game-dev/scene-engineer.md +474 -0
  156. package/dist/webui/assets/index-Cwkg4DKj.css +0 -11
  157. package/templates/agents/game-dev/art-generation.md +0 -38
  158. package/templates/agents/game-dev/asset-refinement.md +0 -17
  159. package/templates/agents/game-dev/planning-idea.md +0 -17
  160. package/templates/agents/game-dev/ui-specialist.md +0 -17
@@ -0,0 +1,66 @@
1
+ import { existsSync } from "node:fs";
2
+ import { isAbsolute, resolve } from "node:path";
3
+ import { runCommand } from "../process.js";
4
+ import { parseWingmanJsonOutput } from "./helpers.js";
5
+ function _define_property(obj, key, value) {
6
+ if (key in obj) Object.defineProperty(obj, key, {
7
+ value: value,
8
+ enumerable: true,
9
+ configurable: true,
10
+ writable: true
11
+ });
12
+ else obj[key] = value;
13
+ return obj;
14
+ }
15
+ function resolveAgent(config, context) {
16
+ return context.task.adapterOverrides?.agent || config.agent;
17
+ }
18
+ function resolveArgs(config, context) {
19
+ const overrideArgs = context.task.adapterOverrides?.extraArgs || [];
20
+ const extraArgs = [
21
+ ...config.extraArgs || [],
22
+ ...overrideArgs
23
+ ];
24
+ const cliArgs = [
25
+ "agent",
26
+ "--local",
27
+ "--output=json",
28
+ "--agent",
29
+ resolveAgent(config, context),
30
+ ...extraArgs,
31
+ context.task.prompt
32
+ ];
33
+ return cliArgs;
34
+ }
35
+ class WingmanCliAdapter {
36
+ async invoke(context) {
37
+ const configuredCommand = this.config.cliPath || "./bin/wingman";
38
+ const cliEntryPath = isAbsolute(configuredCommand) ? configuredCommand : resolve(process.cwd(), configuredCommand);
39
+ const args = resolveArgs(this.config, context);
40
+ const runtimeCommand = existsSync(process.execPath) ? process.execPath : "bun";
41
+ const execution = await runCommand(runtimeCommand, [
42
+ cliEntryPath,
43
+ ...args
44
+ ], {
45
+ cwd: context.workingDirectory,
46
+ timeoutMs: context.timeoutMs,
47
+ env: this.config.env
48
+ });
49
+ const parsed = parseWingmanJsonOutput(execution.stdout);
50
+ return {
51
+ exitCode: execution.exitCode,
52
+ timedOut: execution.timedOut,
53
+ durationMs: execution.durationMs,
54
+ stdout: execution.stdout,
55
+ stderr: execution.stderr,
56
+ assistantText: parsed.assistantText,
57
+ errorMessage: parsed.errorMessage || (0 === execution.exitCode ? void 0 : execution.stderr.trim()),
58
+ tokens: parsed.tokenUsage
59
+ };
60
+ }
61
+ constructor(config){
62
+ _define_property(this, "config", void 0);
63
+ this.config = config;
64
+ }
65
+ }
66
+ export { WingmanCliAdapter };
@@ -0,0 +1,122 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ cleanBenchArtifacts: ()=>cleanBenchArtifacts,
28
+ getBenchCleanupTargets: ()=>getBenchCleanupTargets
29
+ });
30
+ const promises_namespaceObject = require("node:fs/promises");
31
+ const external_node_path_namespaceObject = require("node:path");
32
+ async function listImmediateChildren(dirPath) {
33
+ try {
34
+ const entries = await (0, promises_namespaceObject.readdir)(dirPath, {
35
+ withFileTypes: true,
36
+ encoding: "utf8"
37
+ });
38
+ return entries.map((entry)=>(0, external_node_path_namespaceObject.join)(dirPath, entry.name));
39
+ } catch {
40
+ return [];
41
+ }
42
+ }
43
+ async function collectPycacheDirs(rootPath) {
44
+ const discovered = [];
45
+ const queue = [
46
+ rootPath
47
+ ];
48
+ while(queue.length > 0){
49
+ const current = queue.shift();
50
+ if (!current) continue;
51
+ let entries;
52
+ try {
53
+ entries = await (0, promises_namespaceObject.readdir)(current, {
54
+ withFileTypes: true,
55
+ encoding: "utf8"
56
+ });
57
+ } catch {
58
+ continue;
59
+ }
60
+ for (const entry of entries){
61
+ if (!entry.isDirectory()) continue;
62
+ const entryName = "string" == typeof entry.name ? entry.name : String(entry.name);
63
+ const child = (0, external_node_path_namespaceObject.join)(current, entryName);
64
+ if ("__pycache__" === entryName) {
65
+ discovered.push(child);
66
+ continue;
67
+ }
68
+ queue.push(child);
69
+ }
70
+ }
71
+ return discovered;
72
+ }
73
+ async function getBenchCleanupTargets(workspaceRoot) {
74
+ const root = (0, external_node_path_namespaceObject.resolve)(workspaceRoot);
75
+ const jobsDir = (0, external_node_path_namespaceObject.join)(root, "jobs");
76
+ const officialResultsDir = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official");
77
+ const wrapperResultsDir = (0, external_node_path_namespaceObject.join)(root, "bench", "results", "official-wrapper");
78
+ const benchDir = (0, external_node_path_namespaceObject.join)(root, "bench");
79
+ const [jobArtifacts, officialArtifacts, wrapperArtifacts, pycacheDirs] = await Promise.all([
80
+ listImmediateChildren(jobsDir),
81
+ listImmediateChildren(officialResultsDir),
82
+ listImmediateChildren(wrapperResultsDir),
83
+ collectPycacheDirs(benchDir)
84
+ ]);
85
+ return Array.from(new Set([
86
+ ...jobArtifacts,
87
+ ...officialArtifacts,
88
+ ...wrapperArtifacts,
89
+ ...pycacheDirs
90
+ ]));
91
+ }
92
+ async function cleanBenchArtifacts(workspaceRoot, options = {}) {
93
+ const targets = await getBenchCleanupTargets(workspaceRoot);
94
+ const removedPaths = [];
95
+ const missingPaths = [];
96
+ for (const target of targets)try {
97
+ if (options.dryRun) {
98
+ removedPaths.push(target);
99
+ continue;
100
+ }
101
+ await (0, promises_namespaceObject.rm)(target, {
102
+ recursive: true,
103
+ force: false
104
+ });
105
+ removedPaths.push(target);
106
+ } catch {
107
+ missingPaths.push(target);
108
+ }
109
+ return {
110
+ removedPaths,
111
+ missingPaths
112
+ };
113
+ }
114
+ exports.cleanBenchArtifacts = __webpack_exports__.cleanBenchArtifacts;
115
+ exports.getBenchCleanupTargets = __webpack_exports__.getBenchCleanupTargets;
116
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
117
+ "cleanBenchArtifacts",
118
+ "getBenchCleanupTargets"
119
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
120
+ Object.defineProperty(exports, '__esModule', {
121
+ value: true
122
+ });
@@ -0,0 +1,9 @@
1
+ export interface BenchCleanupResult {
2
+ removedPaths: string[];
3
+ missingPaths: string[];
4
+ }
5
+ export interface BenchCleanupOptions {
6
+ dryRun?: boolean;
7
+ }
8
+ export declare function getBenchCleanupTargets(workspaceRoot: string): Promise<string[]>;
9
+ export declare function cleanBenchArtifacts(workspaceRoot: string, options?: BenchCleanupOptions): Promise<BenchCleanupResult>;
@@ -0,0 +1,85 @@
1
+ import { readdir, rm } from "node:fs/promises";
2
+ import { join, resolve } from "node:path";
3
+ async function listImmediateChildren(dirPath) {
4
+ try {
5
+ const entries = await readdir(dirPath, {
6
+ withFileTypes: true,
7
+ encoding: "utf8"
8
+ });
9
+ return entries.map((entry)=>join(dirPath, entry.name));
10
+ } catch {
11
+ return [];
12
+ }
13
+ }
14
+ async function collectPycacheDirs(rootPath) {
15
+ const discovered = [];
16
+ const queue = [
17
+ rootPath
18
+ ];
19
+ while(queue.length > 0){
20
+ const current = queue.shift();
21
+ if (!current) continue;
22
+ let entries;
23
+ try {
24
+ entries = await readdir(current, {
25
+ withFileTypes: true,
26
+ encoding: "utf8"
27
+ });
28
+ } catch {
29
+ continue;
30
+ }
31
+ for (const entry of entries){
32
+ if (!entry.isDirectory()) continue;
33
+ const entryName = "string" == typeof entry.name ? entry.name : String(entry.name);
34
+ const child = join(current, entryName);
35
+ if ("__pycache__" === entryName) {
36
+ discovered.push(child);
37
+ continue;
38
+ }
39
+ queue.push(child);
40
+ }
41
+ }
42
+ return discovered;
43
+ }
44
+ async function getBenchCleanupTargets(workspaceRoot) {
45
+ const root = resolve(workspaceRoot);
46
+ const jobsDir = join(root, "jobs");
47
+ const officialResultsDir = join(root, "bench", "results", "official");
48
+ const wrapperResultsDir = join(root, "bench", "results", "official-wrapper");
49
+ const benchDir = join(root, "bench");
50
+ const [jobArtifacts, officialArtifacts, wrapperArtifacts, pycacheDirs] = await Promise.all([
51
+ listImmediateChildren(jobsDir),
52
+ listImmediateChildren(officialResultsDir),
53
+ listImmediateChildren(wrapperResultsDir),
54
+ collectPycacheDirs(benchDir)
55
+ ]);
56
+ return Array.from(new Set([
57
+ ...jobArtifacts,
58
+ ...officialArtifacts,
59
+ ...wrapperArtifacts,
60
+ ...pycacheDirs
61
+ ]));
62
+ }
63
+ async function cleanBenchArtifacts(workspaceRoot, options = {}) {
64
+ const targets = await getBenchCleanupTargets(workspaceRoot);
65
+ const removedPaths = [];
66
+ const missingPaths = [];
67
+ for (const target of targets)try {
68
+ if (options.dryRun) {
69
+ removedPaths.push(target);
70
+ continue;
71
+ }
72
+ await rm(target, {
73
+ recursive: true,
74
+ force: false
75
+ });
76
+ removedPaths.push(target);
77
+ } catch {
78
+ missingPaths.push(target);
79
+ }
80
+ return {
81
+ removedPaths,
82
+ missingPaths
83
+ };
84
+ }
85
+ export { cleanBenchArtifacts, getBenchCleanupTargets };
@@ -0,0 +1,190 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ loadTerminalBenchConfig: ()=>loadTerminalBenchConfig
28
+ });
29
+ const promises_namespaceObject = require("node:fs/promises");
30
+ const external_node_path_namespaceObject = require("node:path");
31
+ const external_zod_namespaceObject = require("zod");
32
+ const commandSchema = external_zod_namespaceObject.z.object({
33
+ command: external_zod_namespaceObject.z.string().min(1),
34
+ args: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string()).optional(),
35
+ shell: external_zod_namespaceObject.z.boolean().optional(),
36
+ env: external_zod_namespaceObject.z.record(external_zod_namespaceObject.z.string(), external_zod_namespaceObject.z.string()).optional(),
37
+ allowFailure: external_zod_namespaceObject.z.boolean().optional()
38
+ });
39
+ const includesSchema = external_zod_namespaceObject.z.union([
40
+ external_zod_namespaceObject.z.string(),
41
+ external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string())
42
+ ]).transform((value)=>"string" == typeof value ? [
43
+ value
44
+ ] : value);
45
+ const validatorSchema = external_zod_namespaceObject.z.union([
46
+ external_zod_namespaceObject.z.object({
47
+ type: external_zod_namespaceObject.z.literal("command"),
48
+ command: commandSchema,
49
+ expectedExitCode: external_zod_namespaceObject.z.number().int().optional()
50
+ }),
51
+ external_zod_namespaceObject.z.object({
52
+ type: external_zod_namespaceObject.z.literal("assistant_contains"),
53
+ includes: includesSchema
54
+ }),
55
+ external_zod_namespaceObject.z.object({
56
+ type: external_zod_namespaceObject.z.literal("file_contains"),
57
+ path: external_zod_namespaceObject.z.string().min(1),
58
+ includes: includesSchema
59
+ })
60
+ ]);
61
+ const taskSchema = external_zod_namespaceObject.z.object({
62
+ id: external_zod_namespaceObject.z.string().min(1),
63
+ description: external_zod_namespaceObject.z.string().optional(),
64
+ prompt: external_zod_namespaceObject.z.string(),
65
+ workingDirectory: external_zod_namespaceObject.z.string().optional(),
66
+ timeoutMs: external_zod_namespaceObject.z.number().int().positive().optional(),
67
+ setup: external_zod_namespaceObject.z.array(commandSchema).optional(),
68
+ validator: validatorSchema,
69
+ metadata: external_zod_namespaceObject.z.record(external_zod_namespaceObject.z.string(), external_zod_namespaceObject.z.string()).optional(),
70
+ adapterOverrides: external_zod_namespaceObject.z.object({
71
+ agent: external_zod_namespaceObject.z.string().optional(),
72
+ extraArgs: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string()).optional()
73
+ }).optional()
74
+ });
75
+ const taskFileSchema = external_zod_namespaceObject.z.object({
76
+ tasks: external_zod_namespaceObject.z.array(taskSchema).min(1)
77
+ });
78
+ const configSchema = external_zod_namespaceObject.z.object({
79
+ version: external_zod_namespaceObject.z.literal(1).optional(),
80
+ taskFile: external_zod_namespaceObject.z.string().min(1),
81
+ resultsDir: external_zod_namespaceObject.z.string().optional(),
82
+ run: external_zod_namespaceObject.z.object({
83
+ defaultTimeoutMs: external_zod_namespaceObject.z.number().int().positive().optional(),
84
+ continueOnFailure: external_zod_namespaceObject.z.boolean().optional()
85
+ }).optional(),
86
+ adapter: external_zod_namespaceObject.z.union([
87
+ external_zod_namespaceObject.z.object({
88
+ type: external_zod_namespaceObject.z.literal("wingman-cli"),
89
+ agent: external_zod_namespaceObject.z.string().min(1),
90
+ cliPath: external_zod_namespaceObject.z.string().optional(),
91
+ extraArgs: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string()).optional(),
92
+ env: external_zod_namespaceObject.z.record(external_zod_namespaceObject.z.string(), external_zod_namespaceObject.z.string()).optional()
93
+ }),
94
+ external_zod_namespaceObject.z.object({
95
+ type: external_zod_namespaceObject.z.literal("command"),
96
+ command: commandSchema
97
+ })
98
+ ]),
99
+ scoring: external_zod_namespaceObject.z.object({
100
+ weights: external_zod_namespaceObject.z.object({
101
+ passRate: external_zod_namespaceObject.z.number().nonnegative().optional(),
102
+ reliability: external_zod_namespaceObject.z.number().nonnegative().optional(),
103
+ duration: external_zod_namespaceObject.z.number().nonnegative().optional(),
104
+ cost: external_zod_namespaceObject.z.number().nonnegative().optional()
105
+ }).optional(),
106
+ budgets: external_zod_namespaceObject.z.object({
107
+ targetAvgDurationMs: external_zod_namespaceObject.z.number().positive().optional(),
108
+ targetCostPerTaskUsd: external_zod_namespaceObject.z.number().nonnegative().optional()
109
+ }).optional(),
110
+ pricing: external_zod_namespaceObject.z.object({
111
+ inputPer1kTokensUsd: external_zod_namespaceObject.z.number().nonnegative().optional(),
112
+ outputPer1kTokensUsd: external_zod_namespaceObject.z.number().nonnegative().optional()
113
+ }).optional()
114
+ }).optional(),
115
+ qualityGate: external_zod_namespaceObject.z.object({
116
+ enabled: external_zod_namespaceObject.z.boolean().optional(),
117
+ baselineFile: external_zod_namespaceObject.z.string().optional(),
118
+ minPassRateDelta: external_zod_namespaceObject.z.number().optional(),
119
+ maxCostIncreaseRatio: external_zod_namespaceObject.z.number().nonnegative().optional(),
120
+ maxAvgDurationIncreaseRatio: external_zod_namespaceObject.z.number().nonnegative().optional()
121
+ }).optional(),
122
+ metadata: external_zod_namespaceObject.z.record(external_zod_namespaceObject.z.string(), external_zod_namespaceObject.z.string()).optional()
123
+ });
124
+ function normalizeValidator(validator) {
125
+ if ("assistant_contains" === validator.type) return {
126
+ ...validator,
127
+ includes: validator.includes.filter((entry)=>entry.trim().length > 0)
128
+ };
129
+ if ("file_contains" === validator.type) return {
130
+ ...validator,
131
+ includes: validator.includes.filter((entry)=>entry.trim().length > 0)
132
+ };
133
+ return validator;
134
+ }
135
+ async function loadTerminalBenchConfig(configPath) {
136
+ const resolvedConfigPath = (0, external_node_path_namespaceObject.resolve)(configPath);
137
+ const configFile = JSON.parse(await (0, promises_namespaceObject.readFile)(resolvedConfigPath, "utf-8"));
138
+ const parsedConfig = configSchema.parse(configFile);
139
+ const configDir = (0, external_node_path_namespaceObject.dirname)(resolvedConfigPath);
140
+ const taskFilePath = (0, external_node_path_namespaceObject.resolve)(configDir, parsedConfig.taskFile);
141
+ const tasksFile = JSON.parse(await (0, promises_namespaceObject.readFile)(taskFilePath, "utf-8"));
142
+ const parsedTasks = taskFileSchema.parse(tasksFile);
143
+ const tasks = parsedTasks.tasks.map((task)=>({
144
+ ...task,
145
+ validator: normalizeValidator(task.validator)
146
+ }));
147
+ return {
148
+ version: 1,
149
+ configPath: resolvedConfigPath,
150
+ taskFilePath,
151
+ resultsDir: (0, external_node_path_namespaceObject.resolve)(configDir, parsedConfig.resultsDir || "bench/results"),
152
+ run: {
153
+ defaultTimeoutMs: parsedConfig.run?.defaultTimeoutMs || 300000,
154
+ continueOnFailure: parsedConfig.run?.continueOnFailure ?? true
155
+ },
156
+ adapter: parsedConfig.adapter,
157
+ tasks,
158
+ scoring: {
159
+ weights: {
160
+ passRate: parsedConfig.scoring?.weights?.passRate ?? 0.7,
161
+ reliability: parsedConfig.scoring?.weights?.reliability ?? 0.15,
162
+ duration: parsedConfig.scoring?.weights?.duration ?? 0.1,
163
+ cost: parsedConfig.scoring?.weights?.cost ?? 0.05
164
+ },
165
+ budgets: {
166
+ targetAvgDurationMs: parsedConfig.scoring?.budgets?.targetAvgDurationMs,
167
+ targetCostPerTaskUsd: parsedConfig.scoring?.budgets?.targetCostPerTaskUsd
168
+ },
169
+ pricing: {
170
+ inputPer1kTokensUsd: parsedConfig.scoring?.pricing?.inputPer1kTokensUsd ?? 0,
171
+ outputPer1kTokensUsd: parsedConfig.scoring?.pricing?.outputPer1kTokensUsd ?? 0
172
+ }
173
+ },
174
+ qualityGate: {
175
+ enabled: parsedConfig.qualityGate?.enabled ?? false,
176
+ baselineFile: parsedConfig.qualityGate?.baselineFile ? (0, external_node_path_namespaceObject.resolve)(configDir, parsedConfig.qualityGate.baselineFile) : void 0,
177
+ minPassRateDelta: parsedConfig.qualityGate?.minPassRateDelta ?? -0.03,
178
+ maxCostIncreaseRatio: parsedConfig.qualityGate?.maxCostIncreaseRatio ?? 0.15,
179
+ maxAvgDurationIncreaseRatio: parsedConfig.qualityGate?.maxAvgDurationIncreaseRatio ?? 0.2
180
+ },
181
+ metadata: parsedConfig.metadata || {}
182
+ };
183
+ }
184
+ exports.loadTerminalBenchConfig = __webpack_exports__.loadTerminalBenchConfig;
185
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
186
+ "loadTerminalBenchConfig"
187
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
188
+ Object.defineProperty(exports, '__esModule', {
189
+ value: true
190
+ });
@@ -0,0 +1,2 @@
1
+ import type { TerminalBenchResolvedConfig } from "./types.js";
2
+ export declare function loadTerminalBenchConfig(configPath: string): Promise<TerminalBenchResolvedConfig>;
@@ -0,0 +1,156 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { dirname, resolve } from "node:path";
3
+ import { z } from "zod";
4
+ const commandSchema = z.object({
5
+ command: z.string().min(1),
6
+ args: z.array(z.string()).optional(),
7
+ shell: z.boolean().optional(),
8
+ env: z.record(z.string(), z.string()).optional(),
9
+ allowFailure: z.boolean().optional()
10
+ });
11
+ const includesSchema = z.union([
12
+ z.string(),
13
+ z.array(z.string())
14
+ ]).transform((value)=>"string" == typeof value ? [
15
+ value
16
+ ] : value);
17
+ const validatorSchema = z.union([
18
+ z.object({
19
+ type: z.literal("command"),
20
+ command: commandSchema,
21
+ expectedExitCode: z.number().int().optional()
22
+ }),
23
+ z.object({
24
+ type: z.literal("assistant_contains"),
25
+ includes: includesSchema
26
+ }),
27
+ z.object({
28
+ type: z.literal("file_contains"),
29
+ path: z.string().min(1),
30
+ includes: includesSchema
31
+ })
32
+ ]);
33
+ const taskSchema = z.object({
34
+ id: z.string().min(1),
35
+ description: z.string().optional(),
36
+ prompt: z.string(),
37
+ workingDirectory: z.string().optional(),
38
+ timeoutMs: z.number().int().positive().optional(),
39
+ setup: z.array(commandSchema).optional(),
40
+ validator: validatorSchema,
41
+ metadata: z.record(z.string(), z.string()).optional(),
42
+ adapterOverrides: z.object({
43
+ agent: z.string().optional(),
44
+ extraArgs: z.array(z.string()).optional()
45
+ }).optional()
46
+ });
47
+ const taskFileSchema = z.object({
48
+ tasks: z.array(taskSchema).min(1)
49
+ });
50
+ const configSchema = z.object({
51
+ version: z.literal(1).optional(),
52
+ taskFile: z.string().min(1),
53
+ resultsDir: z.string().optional(),
54
+ run: z.object({
55
+ defaultTimeoutMs: z.number().int().positive().optional(),
56
+ continueOnFailure: z.boolean().optional()
57
+ }).optional(),
58
+ adapter: z.union([
59
+ z.object({
60
+ type: z.literal("wingman-cli"),
61
+ agent: z.string().min(1),
62
+ cliPath: z.string().optional(),
63
+ extraArgs: z.array(z.string()).optional(),
64
+ env: z.record(z.string(), z.string()).optional()
65
+ }),
66
+ z.object({
67
+ type: z.literal("command"),
68
+ command: commandSchema
69
+ })
70
+ ]),
71
+ scoring: z.object({
72
+ weights: z.object({
73
+ passRate: z.number().nonnegative().optional(),
74
+ reliability: z.number().nonnegative().optional(),
75
+ duration: z.number().nonnegative().optional(),
76
+ cost: z.number().nonnegative().optional()
77
+ }).optional(),
78
+ budgets: z.object({
79
+ targetAvgDurationMs: z.number().positive().optional(),
80
+ targetCostPerTaskUsd: z.number().nonnegative().optional()
81
+ }).optional(),
82
+ pricing: z.object({
83
+ inputPer1kTokensUsd: z.number().nonnegative().optional(),
84
+ outputPer1kTokensUsd: z.number().nonnegative().optional()
85
+ }).optional()
86
+ }).optional(),
87
+ qualityGate: z.object({
88
+ enabled: z.boolean().optional(),
89
+ baselineFile: z.string().optional(),
90
+ minPassRateDelta: z.number().optional(),
91
+ maxCostIncreaseRatio: z.number().nonnegative().optional(),
92
+ maxAvgDurationIncreaseRatio: z.number().nonnegative().optional()
93
+ }).optional(),
94
+ metadata: z.record(z.string(), z.string()).optional()
95
+ });
96
+ function normalizeValidator(validator) {
97
+ if ("assistant_contains" === validator.type) return {
98
+ ...validator,
99
+ includes: validator.includes.filter((entry)=>entry.trim().length > 0)
100
+ };
101
+ if ("file_contains" === validator.type) return {
102
+ ...validator,
103
+ includes: validator.includes.filter((entry)=>entry.trim().length > 0)
104
+ };
105
+ return validator;
106
+ }
107
+ async function loadTerminalBenchConfig(configPath) {
108
+ const resolvedConfigPath = resolve(configPath);
109
+ const configFile = JSON.parse(await readFile(resolvedConfigPath, "utf-8"));
110
+ const parsedConfig = configSchema.parse(configFile);
111
+ const configDir = dirname(resolvedConfigPath);
112
+ const taskFilePath = resolve(configDir, parsedConfig.taskFile);
113
+ const tasksFile = JSON.parse(await readFile(taskFilePath, "utf-8"));
114
+ const parsedTasks = taskFileSchema.parse(tasksFile);
115
+ const tasks = parsedTasks.tasks.map((task)=>({
116
+ ...task,
117
+ validator: normalizeValidator(task.validator)
118
+ }));
119
+ return {
120
+ version: 1,
121
+ configPath: resolvedConfigPath,
122
+ taskFilePath,
123
+ resultsDir: resolve(configDir, parsedConfig.resultsDir || "bench/results"),
124
+ run: {
125
+ defaultTimeoutMs: parsedConfig.run?.defaultTimeoutMs || 300000,
126
+ continueOnFailure: parsedConfig.run?.continueOnFailure ?? true
127
+ },
128
+ adapter: parsedConfig.adapter,
129
+ tasks,
130
+ scoring: {
131
+ weights: {
132
+ passRate: parsedConfig.scoring?.weights?.passRate ?? 0.7,
133
+ reliability: parsedConfig.scoring?.weights?.reliability ?? 0.15,
134
+ duration: parsedConfig.scoring?.weights?.duration ?? 0.1,
135
+ cost: parsedConfig.scoring?.weights?.cost ?? 0.05
136
+ },
137
+ budgets: {
138
+ targetAvgDurationMs: parsedConfig.scoring?.budgets?.targetAvgDurationMs,
139
+ targetCostPerTaskUsd: parsedConfig.scoring?.budgets?.targetCostPerTaskUsd
140
+ },
141
+ pricing: {
142
+ inputPer1kTokensUsd: parsedConfig.scoring?.pricing?.inputPer1kTokensUsd ?? 0,
143
+ outputPer1kTokensUsd: parsedConfig.scoring?.pricing?.outputPer1kTokensUsd ?? 0
144
+ }
145
+ },
146
+ qualityGate: {
147
+ enabled: parsedConfig.qualityGate?.enabled ?? false,
148
+ baselineFile: parsedConfig.qualityGate?.baselineFile ? resolve(configDir, parsedConfig.qualityGate.baselineFile) : void 0,
149
+ minPassRateDelta: parsedConfig.qualityGate?.minPassRateDelta ?? -0.03,
150
+ maxCostIncreaseRatio: parsedConfig.qualityGate?.maxCostIncreaseRatio ?? 0.15,
151
+ maxAvgDurationIncreaseRatio: parsedConfig.qualityGate?.maxAvgDurationIncreaseRatio ?? 0.2
152
+ },
153
+ metadata: parsedConfig.metadata || {}
154
+ };
155
+ }
156
+ export { loadTerminalBenchConfig };
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ runTerminalBench: ()=>external_runner_cjs_namespaceObject.runTerminalBench,
28
+ runTerminalBenchWithConfig: ()=>external_runner_cjs_namespaceObject.runTerminalBenchWithConfig,
29
+ loadTerminalBenchConfig: ()=>external_config_cjs_namespaceObject.loadTerminalBenchConfig
30
+ });
31
+ const external_config_cjs_namespaceObject = require("./config.cjs");
32
+ const external_runner_cjs_namespaceObject = require("./runner.cjs");
33
+ exports.loadTerminalBenchConfig = __webpack_exports__.loadTerminalBenchConfig;
34
+ exports.runTerminalBench = __webpack_exports__.runTerminalBench;
35
+ exports.runTerminalBenchWithConfig = __webpack_exports__.runTerminalBenchWithConfig;
36
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
37
+ "loadTerminalBenchConfig",
38
+ "runTerminalBench",
39
+ "runTerminalBenchWithConfig"
40
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
41
+ Object.defineProperty(exports, '__esModule', {
42
+ value: true
43
+ });
@@ -0,0 +1,3 @@
1
+ export { loadTerminalBenchConfig } from "./config.js";
2
+ export { runTerminalBench, runTerminalBenchWithConfig } from "./runner.js";
3
+ export type { TaskRunResult, TerminalBenchResolvedConfig, TerminalBenchSummary, } from "./types.js";