@wingman-ai/gateway 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +14 -0
  2. package/dist/agent/config/mcpClientManager.cjs +104 -1
  3. package/dist/agent/config/mcpClientManager.d.ts +30 -0
  4. package/dist/agent/config/mcpClientManager.js +104 -1
  5. package/dist/agent/config/modelFactory.cjs +10 -0
  6. package/dist/agent/config/modelFactory.js +10 -0
  7. package/dist/agent/config/xaiImageModel.cjs +242 -0
  8. package/dist/agent/config/xaiImageModel.d.ts +33 -0
  9. package/dist/agent/config/xaiImageModel.js +202 -0
  10. package/dist/agent/tests/mcpClientManager.test.cjs +116 -0
  11. package/dist/agent/tests/mcpClientManager.test.js +117 -1
  12. package/dist/agent/tests/mcpResourceTools.test.cjs +101 -0
  13. package/dist/agent/tests/mcpResourceTools.test.d.ts +1 -0
  14. package/dist/agent/tests/mcpResourceTools.test.js +95 -0
  15. package/dist/agent/tests/modelFactory.test.cjs +16 -2
  16. package/dist/agent/tests/modelFactory.test.js +16 -2
  17. package/dist/agent/tests/xaiImageModel.test.cjs +194 -0
  18. package/dist/agent/tests/xaiImageModel.test.d.ts +1 -0
  19. package/dist/agent/tests/xaiImageModel.test.js +188 -0
  20. package/dist/agent/tools/mcp_resources.cjs +111 -0
  21. package/dist/agent/tools/mcp_resources.d.ts +3 -0
  22. package/dist/agent/tools/mcp_resources.js +77 -0
  23. package/dist/bench/adapters/commandAdapter.cjs +93 -0
  24. package/dist/bench/adapters/commandAdapter.d.ts +6 -0
  25. package/dist/bench/adapters/commandAdapter.js +59 -0
  26. package/dist/bench/adapters/helpers.cjs +170 -0
  27. package/dist/bench/adapters/helpers.d.ts +7 -0
  28. package/dist/bench/adapters/helpers.js +133 -0
  29. package/dist/bench/adapters/index.cjs +41 -0
  30. package/dist/bench/adapters/index.d.ts +2 -0
  31. package/dist/bench/adapters/index.js +7 -0
  32. package/dist/bench/adapters/wingmanCliAdapter.cjs +100 -0
  33. package/dist/bench/adapters/wingmanCliAdapter.d.ts +6 -0
  34. package/dist/bench/adapters/wingmanCliAdapter.js +66 -0
  35. package/dist/bench/cleanup.cjs +122 -0
  36. package/dist/bench/cleanup.d.ts +9 -0
  37. package/dist/bench/cleanup.js +85 -0
  38. package/dist/bench/config.cjs +190 -0
  39. package/dist/bench/config.d.ts +2 -0
  40. package/dist/bench/config.js +156 -0
  41. package/dist/bench/index.cjs +43 -0
  42. package/dist/bench/index.d.ts +3 -0
  43. package/dist/bench/index.js +3 -0
  44. package/dist/bench/official.cjs +616 -0
  45. package/dist/bench/official.d.ts +80 -0
  46. package/dist/bench/official.js +546 -0
  47. package/dist/bench/officialCli.cjs +204 -0
  48. package/dist/bench/officialCli.d.ts +5 -0
  49. package/dist/bench/officialCli.js +170 -0
  50. package/dist/bench/process.cjs +78 -0
  51. package/dist/bench/process.d.ts +14 -0
  52. package/dist/bench/process.js +44 -0
  53. package/dist/bench/runner.cjs +237 -0
  54. package/dist/bench/runner.d.ts +7 -0
  55. package/dist/bench/runner.js +197 -0
  56. package/dist/bench/scoring.cjs +171 -0
  57. package/dist/bench/scoring.d.ts +9 -0
  58. package/dist/bench/scoring.js +137 -0
  59. package/dist/bench/types.cjs +18 -0
  60. package/dist/bench/types.d.ts +200 -0
  61. package/dist/bench/types.js +0 -0
  62. package/dist/bench/validator.cjs +92 -0
  63. package/dist/bench/validator.d.ts +2 -0
  64. package/dist/bench/validator.js +58 -0
  65. package/dist/cli/config/schema.cjs +36 -1
  66. package/dist/cli/config/schema.d.ts +46 -0
  67. package/dist/cli/config/schema.js +36 -1
  68. package/dist/cli/config/warnings.cjs +119 -51
  69. package/dist/cli/config/warnings.js +119 -51
  70. package/dist/cli/core/agentInvoker.cjs +9 -2
  71. package/dist/cli/core/agentInvoker.d.ts +1 -0
  72. package/dist/cli/core/agentInvoker.js +9 -2
  73. package/dist/cli/core/imagePersistence.cjs +17 -1
  74. package/dist/cli/core/imagePersistence.d.ts +2 -0
  75. package/dist/cli/core/imagePersistence.js +13 -3
  76. package/dist/cli/core/sessionManager.cjs +2 -0
  77. package/dist/cli/core/sessionManager.js +3 -1
  78. package/dist/cli/types.d.ts +18 -0
  79. package/dist/gateway/adapters/teams.cjs +419 -0
  80. package/dist/gateway/adapters/teams.d.ts +47 -0
  81. package/dist/gateway/adapters/teams.js +361 -0
  82. package/dist/gateway/http/sms.cjs +286 -0
  83. package/dist/gateway/http/sms.d.ts +4 -0
  84. package/dist/gateway/http/sms.js +249 -0
  85. package/dist/gateway/server.cjs +54 -3
  86. package/dist/gateway/server.d.ts +2 -0
  87. package/dist/gateway/server.js +54 -3
  88. package/dist/gateway/sms/commands.cjs +116 -0
  89. package/dist/gateway/sms/commands.d.ts +15 -0
  90. package/dist/gateway/sms/commands.js +79 -0
  91. package/dist/gateway/sms/control.cjs +118 -0
  92. package/dist/gateway/sms/control.d.ts +18 -0
  93. package/dist/gateway/sms/control.js +84 -0
  94. package/dist/gateway/sms/policyStore.cjs +198 -0
  95. package/dist/gateway/sms/policyStore.d.ts +37 -0
  96. package/dist/gateway/sms/policyStore.js +161 -0
  97. package/dist/providers/registry.cjs +1 -0
  98. package/dist/providers/registry.js +1 -0
  99. package/dist/tests/cli-config-warnings.test.cjs +41 -0
  100. package/dist/tests/cli-config-warnings.test.js +41 -0
  101. package/dist/tests/cli-init.test.cjs +32 -26
  102. package/dist/tests/cli-init.test.js +32 -26
  103. package/dist/tests/gateway-http-security.test.cjs +21 -0
  104. package/dist/tests/gateway-http-security.test.js +21 -0
  105. package/dist/tests/gateway-origin-policy.test.cjs +22 -0
  106. package/dist/tests/gateway-origin-policy.test.js +22 -0
  107. package/dist/tests/gateway.test.cjs +57 -0
  108. package/dist/tests/gateway.test.js +57 -0
  109. package/dist/tests/imagePersistence.test.cjs +26 -0
  110. package/dist/tests/imagePersistence.test.js +27 -1
  111. package/dist/tests/run-terminal-bench-official-script.test.cjs +61 -0
  112. package/dist/tests/run-terminal-bench-official-script.test.d.ts +1 -0
  113. package/dist/tests/run-terminal-bench-official-script.test.js +55 -0
  114. package/dist/tests/sessions-api.test.cjs +69 -1
  115. package/dist/tests/sessions-api.test.js +70 -2
  116. package/dist/tests/sms-api.test.cjs +183 -0
  117. package/dist/tests/sms-api.test.d.ts +1 -0
  118. package/dist/tests/sms-api.test.js +177 -0
  119. package/dist/tests/sms-commands.test.cjs +90 -0
  120. package/dist/tests/sms-commands.test.d.ts +1 -0
  121. package/dist/tests/sms-commands.test.js +84 -0
  122. package/dist/tests/sms-policy-store.test.cjs +69 -0
  123. package/dist/tests/sms-policy-store.test.d.ts +1 -0
  124. package/dist/tests/sms-policy-store.test.js +63 -0
  125. package/dist/tests/teams-adapter.test.cjs +58 -0
  126. package/dist/tests/teams-adapter.test.d.ts +1 -0
  127. package/dist/tests/teams-adapter.test.js +52 -0
  128. package/dist/tests/terminal-bench-adapters-helpers.test.cjs +64 -0
  129. package/dist/tests/terminal-bench-adapters-helpers.test.d.ts +1 -0
  130. package/dist/tests/terminal-bench-adapters-helpers.test.js +58 -0
  131. package/dist/tests/terminal-bench-cleanup.test.cjs +93 -0
  132. package/dist/tests/terminal-bench-cleanup.test.d.ts +1 -0
  133. package/dist/tests/terminal-bench-cleanup.test.js +87 -0
  134. package/dist/tests/terminal-bench-config.test.cjs +62 -0
  135. package/dist/tests/terminal-bench-config.test.d.ts +1 -0
  136. package/dist/tests/terminal-bench-config.test.js +56 -0
  137. package/dist/tests/terminal-bench-official.test.cjs +194 -0
  138. package/dist/tests/terminal-bench-official.test.d.ts +1 -0
  139. package/dist/tests/terminal-bench-official.test.js +188 -0
  140. package/dist/tests/terminal-bench-runner.test.cjs +82 -0
  141. package/dist/tests/terminal-bench-runner.test.d.ts +1 -0
  142. package/dist/tests/terminal-bench-runner.test.js +76 -0
  143. package/dist/tests/terminal-bench-scoring.test.cjs +128 -0
  144. package/dist/tests/terminal-bench-scoring.test.d.ts +1 -0
  145. package/dist/tests/terminal-bench-scoring.test.js +122 -0
  146. package/dist/tools/mcp-fal-ai.cjs +1 -1
  147. package/dist/tools/mcp-fal-ai.js +1 -1
  148. package/dist/webui/assets/index-Cyg_Hs57.css +11 -0
  149. package/dist/webui/assets/{index-BMekSELC.js → index-DZXLLjaA.js} +109 -109
  150. package/dist/webui/index.html +2 -2
  151. package/package.json +11 -2
  152. package/templates/agents/game-dev/agent.md +110 -63
  153. package/templates/agents/game-dev/art-director.md +106 -0
  154. package/templates/agents/game-dev/game-designer.md +87 -0
  155. package/templates/agents/game-dev/scene-engineer.md +474 -0
  156. package/dist/webui/assets/index-Cwkg4DKj.css +0 -11
  157. package/templates/agents/game-dev/art-generation.md +0 -38
  158. package/templates/agents/game-dev/asset-refinement.md +0 -17
  159. package/templates/agents/game-dev/planning-idea.md +0 -17
  160. package/templates/agents/game-dev/ui-specialist.md +0 -17
@@ -0,0 +1,137 @@
1
+ import { readFile } from "node:fs/promises";
2
+ function average(values) {
3
+ if (0 === values.length) return 0;
4
+ return values.reduce((total, value)=>total + value, 0) / values.length;
5
+ }
6
+ function percentile(values, p) {
7
+ if (0 === values.length) return 0;
8
+ const sorted = [
9
+ ...values
10
+ ].sort((a, b)=>a - b);
11
+ const index = Math.ceil(p / 100 * sorted.length) - 1;
12
+ return sorted[Math.max(0, Math.min(index, sorted.length - 1))];
13
+ }
14
+ function clamp(value, min = 0, max = 1) {
15
+ return Math.max(min, Math.min(max, value));
16
+ }
17
+ function computeCostUsd(inputTokens, outputTokens, config) {
18
+ const inputCost = inputTokens / 1000 * config.scoring.pricing.inputPer1kTokensUsd;
19
+ const outputCost = outputTokens / 1000 * config.scoring.pricing.outputPer1kTokensUsd;
20
+ return inputCost + outputCost;
21
+ }
22
+ function normalizeWeightedScore(input) {
23
+ const weightTotal = input.weights.passRate + input.weights.reliability + input.weights.duration + input.weights.cost;
24
+ if (weightTotal <= 0) return 0;
25
+ const weighted = input.passRate * input.weights.passRate + input.reliability * input.weights.reliability + input.duration * input.weights.duration + input.cost * input.weights.cost;
26
+ return weighted / weightTotal;
27
+ }
28
+ async function compareToBaseline(summary, config) {
29
+ if (!config.qualityGate.enabled || !config.qualityGate.baselineFile) return {
30
+ passed: true,
31
+ messages: []
32
+ };
33
+ let baseline;
34
+ try {
35
+ baseline = JSON.parse(await readFile(config.qualityGate.baselineFile, "utf-8"));
36
+ } catch (error) {
37
+ return {
38
+ passed: false,
39
+ messages: [
40
+ `Unable to read baseline file ${config.qualityGate.baselineFile}: ${error instanceof Error ? error.message : String(error)}`
41
+ ]
42
+ };
43
+ }
44
+ const messages = [];
45
+ let passed = true;
46
+ const passRateDelta = summary.metrics.passRate - baseline.metrics.passRate;
47
+ if (passRateDelta < config.qualityGate.minPassRateDelta) {
48
+ passed = false;
49
+ messages.push(`Pass rate delta ${passRateDelta.toFixed(4)} is below threshold ${config.qualityGate.minPassRateDelta.toFixed(4)}.`);
50
+ }
51
+ if (baseline.metrics.totalCostUsd > 0) {
52
+ const costIncreaseRatio = (summary.metrics.totalCostUsd - baseline.metrics.totalCostUsd) / baseline.metrics.totalCostUsd;
53
+ if (costIncreaseRatio > config.qualityGate.maxCostIncreaseRatio) {
54
+ passed = false;
55
+ messages.push(`Cost increase ratio ${(100 * costIncreaseRatio).toFixed(2)}% exceeded threshold ${(100 * config.qualityGate.maxCostIncreaseRatio).toFixed(2)}%.`);
56
+ }
57
+ }
58
+ if (baseline.metrics.avgDurationMs > 0) {
59
+ const durationIncreaseRatio = (summary.metrics.avgDurationMs - baseline.metrics.avgDurationMs) / baseline.metrics.avgDurationMs;
60
+ if (durationIncreaseRatio > config.qualityGate.maxAvgDurationIncreaseRatio) {
61
+ passed = false;
62
+ messages.push(`Average duration increase ratio ${(100 * durationIncreaseRatio).toFixed(2)}% exceeded threshold ${(100 * config.qualityGate.maxAvgDurationIncreaseRatio).toFixed(2)}%.`);
63
+ }
64
+ }
65
+ return {
66
+ passed,
67
+ messages
68
+ };
69
+ }
70
+ async function buildTerminalBenchSummary(input) {
71
+ const totalTasks = input.tasks.length;
72
+ const passedTasks = input.tasks.filter((task)=>"passed" === task.status).length;
73
+ const failedTasks = totalTasks - passedTasks;
74
+ const passRate = totalTasks > 0 ? passedTasks / totalTasks : 0;
75
+ const timeoutRate = totalTasks > 0 ? input.tasks.filter((task)=>task.adapter.timedOut).length / totalTasks : 0;
76
+ const durationValues = input.tasks.map((task)=>task.durationMs);
77
+ const totalDurationMs = durationValues.reduce((total, value)=>total + value, 0);
78
+ const avgDurationMs = average(durationValues);
79
+ const p95DurationMs = percentile(durationValues, 95);
80
+ const totalInputTokens = input.tasks.reduce((total, task)=>total + task.adapter.tokens.inputTokens, 0);
81
+ const totalOutputTokens = input.tasks.reduce((total, task)=>total + task.adapter.tokens.outputTokens, 0);
82
+ const totalTokens = input.tasks.reduce((total, task)=>total + task.adapter.tokens.totalTokens, 0);
83
+ const totalCostUsd = computeCostUsd(totalInputTokens, totalOutputTokens, input.config);
84
+ const avgCostPerTaskUsd = totalTasks > 0 ? totalCostUsd / totalTasks : 0;
85
+ const costPerPassUsd = passedTasks > 0 ? totalCostUsd / passedTasks : totalCostUsd;
86
+ const durationBudget = input.config.scoring.budgets.targetAvgDurationMs;
87
+ const durationScore = durationBudget ? 100 * clamp(durationBudget / Math.max(avgDurationMs, 1)) : 100;
88
+ const costBudget = input.config.scoring.budgets.targetCostPerTaskUsd;
89
+ const costScore = costBudget ? 100 * clamp(costBudget / Math.max(avgCostPerTaskUsd, Number.EPSILON)) : 100;
90
+ const reliabilityScore = (1 - timeoutRate) * 100;
91
+ const passRateScore = 100 * passRate;
92
+ const overallScore = normalizeWeightedScore({
93
+ passRate: passRateScore,
94
+ reliability: reliabilityScore,
95
+ duration: durationScore,
96
+ cost: costScore,
97
+ weights: input.config.scoring.weights
98
+ });
99
+ const summary = {
100
+ runId: input.runId,
101
+ startedAt: input.startedAt,
102
+ endedAt: input.endedAt,
103
+ configPath: input.config.configPath,
104
+ taskFilePath: input.config.taskFilePath,
105
+ resultsDir: input.resultsDir,
106
+ metrics: {
107
+ totalTasks,
108
+ passedTasks,
109
+ failedTasks,
110
+ passRate,
111
+ timeoutRate,
112
+ totalDurationMs,
113
+ avgDurationMs,
114
+ p95DurationMs,
115
+ totalInputTokens,
116
+ totalOutputTokens,
117
+ totalTokens,
118
+ totalCostUsd,
119
+ avgCostPerTaskUsd,
120
+ costPerPassUsd,
121
+ overallScore
122
+ },
123
+ qualityGate: {
124
+ enabled: input.config.qualityGate.enabled,
125
+ passed: true,
126
+ messages: [],
127
+ baselineFile: input.config.qualityGate.baselineFile
128
+ },
129
+ tasks: input.tasks,
130
+ metadata: input.config.metadata
131
+ };
132
+ const qualityGateResult = await compareToBaseline(summary, input.config);
133
+ summary.qualityGate.passed = qualityGateResult.passed;
134
+ summary.qualityGate.messages = qualityGateResult.messages;
135
+ return summary;
136
+ }
137
+ export { buildTerminalBenchSummary };
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.r = (exports1)=>{
5
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
6
+ value: 'Module'
7
+ });
8
+ Object.defineProperty(exports1, '__esModule', {
9
+ value: true
10
+ });
11
+ };
12
+ })();
13
+ var __webpack_exports__ = {};
14
+ __webpack_require__.r(__webpack_exports__);
15
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
16
+ Object.defineProperty(exports, '__esModule', {
17
+ value: true
18
+ });
@@ -0,0 +1,200 @@
1
+ export type BenchCommand = {
2
+ command: string;
3
+ args?: string[];
4
+ shell?: boolean;
5
+ env?: Record<string, string>;
6
+ allowFailure?: boolean;
7
+ };
8
+ export type BenchValidator = {
9
+ type: "command";
10
+ command: BenchCommand;
11
+ expectedExitCode?: number;
12
+ } | {
13
+ type: "assistant_contains";
14
+ includes: string[];
15
+ } | {
16
+ type: "file_contains";
17
+ path: string;
18
+ includes: string[];
19
+ };
20
+ export interface TerminalBenchTask {
21
+ id: string;
22
+ description?: string;
23
+ prompt: string;
24
+ workingDirectory?: string;
25
+ timeoutMs?: number;
26
+ setup?: BenchCommand[];
27
+ validator: BenchValidator;
28
+ metadata?: Record<string, string>;
29
+ adapterOverrides?: {
30
+ agent?: string;
31
+ extraArgs?: string[];
32
+ };
33
+ }
34
+ export interface TerminalBenchTaskFile {
35
+ tasks: TerminalBenchTask[];
36
+ }
37
+ export type WingmanCliAdapterConfig = {
38
+ type: "wingman-cli";
39
+ agent: string;
40
+ cliPath?: string;
41
+ extraArgs?: string[];
42
+ env?: Record<string, string>;
43
+ };
44
+ export type CommandAdapterConfig = {
45
+ type: "command";
46
+ command: BenchCommand;
47
+ };
48
+ export type TerminalBenchAdapterConfig = WingmanCliAdapterConfig | CommandAdapterConfig;
49
+ export interface TerminalBenchConfigFile {
50
+ version?: 1;
51
+ taskFile: string;
52
+ resultsDir?: string;
53
+ run?: {
54
+ defaultTimeoutMs?: number;
55
+ continueOnFailure?: boolean;
56
+ };
57
+ adapter: TerminalBenchAdapterConfig;
58
+ scoring?: {
59
+ weights?: {
60
+ passRate?: number;
61
+ reliability?: number;
62
+ duration?: number;
63
+ cost?: number;
64
+ };
65
+ budgets?: {
66
+ targetAvgDurationMs?: number;
67
+ targetCostPerTaskUsd?: number;
68
+ };
69
+ pricing?: {
70
+ inputPer1kTokensUsd?: number;
71
+ outputPer1kTokensUsd?: number;
72
+ };
73
+ };
74
+ qualityGate?: {
75
+ enabled?: boolean;
76
+ baselineFile?: string;
77
+ minPassRateDelta?: number;
78
+ maxCostIncreaseRatio?: number;
79
+ maxAvgDurationIncreaseRatio?: number;
80
+ };
81
+ metadata?: Record<string, string>;
82
+ }
83
+ export interface TerminalBenchResolvedConfig {
84
+ version: 1;
85
+ configPath: string;
86
+ taskFilePath: string;
87
+ resultsDir: string;
88
+ run: {
89
+ defaultTimeoutMs: number;
90
+ continueOnFailure: boolean;
91
+ };
92
+ adapter: TerminalBenchAdapterConfig;
93
+ tasks: TerminalBenchTask[];
94
+ scoring: {
95
+ weights: {
96
+ passRate: number;
97
+ reliability: number;
98
+ duration: number;
99
+ cost: number;
100
+ };
101
+ budgets: {
102
+ targetAvgDurationMs?: number;
103
+ targetCostPerTaskUsd?: number;
104
+ };
105
+ pricing: {
106
+ inputPer1kTokensUsd: number;
107
+ outputPer1kTokensUsd: number;
108
+ };
109
+ };
110
+ qualityGate: {
111
+ enabled: boolean;
112
+ baselineFile?: string;
113
+ minPassRateDelta: number;
114
+ maxCostIncreaseRatio: number;
115
+ maxAvgDurationIncreaseRatio: number;
116
+ };
117
+ metadata: Record<string, string>;
118
+ }
119
+ export interface AdapterTokenUsage {
120
+ inputTokens: number;
121
+ outputTokens: number;
122
+ totalTokens: number;
123
+ }
124
+ export interface AdapterInvocationResult {
125
+ exitCode: number;
126
+ timedOut: boolean;
127
+ durationMs: number;
128
+ stdout: string;
129
+ stderr: string;
130
+ assistantText: string;
131
+ errorMessage?: string;
132
+ tokens: AdapterTokenUsage;
133
+ }
134
+ export interface TaskRunContext {
135
+ task: TerminalBenchTask;
136
+ workingDirectory: string;
137
+ timeoutMs: number;
138
+ }
139
+ export interface TerminalBenchAdapter {
140
+ invoke(context: TaskRunContext): Promise<AdapterInvocationResult>;
141
+ }
142
+ export interface TaskValidatorResult {
143
+ passed: boolean;
144
+ details: string;
145
+ }
146
+ export interface TaskRunResult {
147
+ taskId: string;
148
+ description?: string;
149
+ workingDirectory: string;
150
+ prompt: string;
151
+ status: "passed" | "failed";
152
+ startedAt: string;
153
+ endedAt: string;
154
+ durationMs: number;
155
+ adapter: AdapterInvocationResult;
156
+ validator: TaskValidatorResult;
157
+ setup: {
158
+ runCount: number;
159
+ failed?: string;
160
+ };
161
+ artifacts: {
162
+ stdoutFile: string;
163
+ stderrFile: string;
164
+ assistantFile: string;
165
+ recordFile: string;
166
+ };
167
+ }
168
+ export interface TerminalBenchSummary {
169
+ runId: string;
170
+ startedAt: string;
171
+ endedAt: string;
172
+ configPath: string;
173
+ taskFilePath: string;
174
+ resultsDir: string;
175
+ metrics: {
176
+ totalTasks: number;
177
+ passedTasks: number;
178
+ failedTasks: number;
179
+ passRate: number;
180
+ timeoutRate: number;
181
+ totalDurationMs: number;
182
+ avgDurationMs: number;
183
+ p95DurationMs: number;
184
+ totalInputTokens: number;
185
+ totalOutputTokens: number;
186
+ totalTokens: number;
187
+ totalCostUsd: number;
188
+ avgCostPerTaskUsd: number;
189
+ costPerPassUsd: number;
190
+ overallScore: number;
191
+ };
192
+ qualityGate: {
193
+ enabled: boolean;
194
+ passed: boolean;
195
+ messages: string[];
196
+ baselineFile?: string;
197
+ };
198
+ tasks: TaskRunResult[];
199
+ metadata: Record<string, string>;
200
+ }
File without changes
@@ -0,0 +1,92 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ runTaskValidator: ()=>runTaskValidator
28
+ });
29
+ const promises_namespaceObject = require("node:fs/promises");
30
+ const external_node_path_namespaceObject = require("node:path");
31
+ const external_process_cjs_namespaceObject = require("./process.cjs");
32
+ function includesAll(haystack, needles) {
33
+ const missing = [];
34
+ for (const needle of needles)if (!haystack.includes(needle)) missing.push(needle);
35
+ return missing;
36
+ }
37
+ async function runTaskValidator(context, adapterResult) {
38
+ const validator = context.task.validator;
39
+ if ("assistant_contains" === validator.type) {
40
+ const missing = includesAll(adapterResult.assistantText, validator.includes);
41
+ if (0 === missing.length) return {
42
+ passed: true,
43
+ details: "Assistant response contains all required strings."
44
+ };
45
+ return {
46
+ passed: false,
47
+ details: `Missing assistant substrings: ${missing.join(", ")}`
48
+ };
49
+ }
50
+ if ("file_contains" === validator.type) {
51
+ const filePath = (0, external_node_path_namespaceObject.resolve)(context.workingDirectory, validator.path);
52
+ try {
53
+ const content = await (0, promises_namespaceObject.readFile)(filePath, "utf-8");
54
+ const missing = includesAll(content, validator.includes);
55
+ if (0 === missing.length) return {
56
+ passed: true,
57
+ details: `File ${validator.path} contains all required strings.`
58
+ };
59
+ return {
60
+ passed: false,
61
+ details: `File ${validator.path} missing: ${missing.join(", ")}`
62
+ };
63
+ } catch (error) {
64
+ return {
65
+ passed: false,
66
+ details: `Unable to read ${validator.path}: ${error instanceof Error ? error.message : String(error)}`
67
+ };
68
+ }
69
+ }
70
+ const run = await (0, external_process_cjs_namespaceObject.runCommand)(validator.command.command, validator.command.args || [], {
71
+ cwd: context.workingDirectory,
72
+ timeoutMs: context.timeoutMs,
73
+ shell: validator.command.shell,
74
+ env: validator.command.env
75
+ });
76
+ const expectedExitCode = validator.expectedExitCode ?? 0;
77
+ if (run.exitCode === expectedExitCode) return {
78
+ passed: true,
79
+ details: `Validator command exit code matched ${expectedExitCode}.`
80
+ };
81
+ return {
82
+ passed: false,
83
+ details: `Validator command exit code ${run.exitCode} did not match ${expectedExitCode}. stderr: ${run.stderr.trim()}`
84
+ };
85
+ }
86
+ exports.runTaskValidator = __webpack_exports__.runTaskValidator;
87
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
88
+ "runTaskValidator"
89
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
90
+ Object.defineProperty(exports, '__esModule', {
91
+ value: true
92
+ });
@@ -0,0 +1,2 @@
1
+ import type { AdapterInvocationResult, TaskRunContext, TaskValidatorResult } from "./types.js";
2
+ export declare function runTaskValidator(context: TaskRunContext, adapterResult: AdapterInvocationResult): Promise<TaskValidatorResult>;
@@ -0,0 +1,58 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+ import { runCommand } from "./process.js";
4
+ function includesAll(haystack, needles) {
5
+ const missing = [];
6
+ for (const needle of needles)if (!haystack.includes(needle)) missing.push(needle);
7
+ return missing;
8
+ }
9
+ async function runTaskValidator(context, adapterResult) {
10
+ const validator = context.task.validator;
11
+ if ("assistant_contains" === validator.type) {
12
+ const missing = includesAll(adapterResult.assistantText, validator.includes);
13
+ if (0 === missing.length) return {
14
+ passed: true,
15
+ details: "Assistant response contains all required strings."
16
+ };
17
+ return {
18
+ passed: false,
19
+ details: `Missing assistant substrings: ${missing.join(", ")}`
20
+ };
21
+ }
22
+ if ("file_contains" === validator.type) {
23
+ const filePath = resolve(context.workingDirectory, validator.path);
24
+ try {
25
+ const content = await readFile(filePath, "utf-8");
26
+ const missing = includesAll(content, validator.includes);
27
+ if (0 === missing.length) return {
28
+ passed: true,
29
+ details: `File ${validator.path} contains all required strings.`
30
+ };
31
+ return {
32
+ passed: false,
33
+ details: `File ${validator.path} missing: ${missing.join(", ")}`
34
+ };
35
+ } catch (error) {
36
+ return {
37
+ passed: false,
38
+ details: `Unable to read ${validator.path}: ${error instanceof Error ? error.message : String(error)}`
39
+ };
40
+ }
41
+ }
42
+ const run = await runCommand(validator.command.command, validator.command.args || [], {
43
+ cwd: context.workingDirectory,
44
+ timeoutMs: context.timeoutMs,
45
+ shell: validator.command.shell,
46
+ env: validator.command.env
47
+ });
48
+ const expectedExitCode = validator.expectedExitCode ?? 0;
49
+ if (run.exitCode === expectedExitCode) return {
50
+ passed: true,
51
+ details: `Validator command exit code matched ${expectedExitCode}.`
52
+ };
53
+ return {
54
+ passed: false,
55
+ details: `Validator command exit code ${run.exitCode} did not match ${expectedExitCode}. stderr: ${run.stderr.trim()}`
56
+ };
57
+ }
58
+ export { runTaskValidator };
@@ -225,8 +225,43 @@ const DiscordAdapterSchema = external_zod_namespaceObject.object({
225
225
  sessionCommand: "!session",
226
226
  responseChunkSize: 1900
227
227
  });
228
+ const TeamsAdapterSchema = external_zod_namespaceObject.object({
229
+ enabled: external_zod_namespaceObject.boolean().default(false),
230
+ appId: external_zod_namespaceObject.string().optional(),
231
+ appPassword: external_zod_namespaceObject.string().optional(),
232
+ appType: external_zod_namespaceObject["enum"]([
233
+ "MultiTenant",
234
+ "SingleTenant",
235
+ "UserAssignedMsi",
236
+ "UserAssignedMSI"
237
+ ]).default("MultiTenant"),
238
+ tenantId: external_zod_namespaceObject.string().optional(),
239
+ endpointPath: external_zod_namespaceObject.string().default("/api/adapters/teams/messages"),
240
+ mentionOnly: external_zod_namespaceObject.boolean().default(true),
241
+ allowBots: external_zod_namespaceObject.boolean().default(false),
242
+ allowedTeamIds: external_zod_namespaceObject.array(external_zod_namespaceObject.string()).default([]),
243
+ allowedChannelIds: external_zod_namespaceObject.array(external_zod_namespaceObject.string()).default([]),
244
+ channelSessions: external_zod_namespaceObject.record(external_zod_namespaceObject.string(), external_zod_namespaceObject.string()).default({}),
245
+ sessionCommand: external_zod_namespaceObject.string().default("!session"),
246
+ gatewayUrl: external_zod_namespaceObject.string().optional(),
247
+ gatewayToken: external_zod_namespaceObject.string().optional(),
248
+ gatewayPassword: external_zod_namespaceObject.string().optional(),
249
+ responseChunkSize: external_zod_namespaceObject.number().min(500).max(5000).default(3500)
250
+ }).default({
251
+ enabled: false,
252
+ appType: "MultiTenant",
253
+ endpointPath: "/api/adapters/teams/messages",
254
+ mentionOnly: true,
255
+ allowBots: false,
256
+ allowedTeamIds: [],
257
+ allowedChannelIds: [],
258
+ channelSessions: {},
259
+ sessionCommand: "!session",
260
+ responseChunkSize: 3500
261
+ });
228
262
  const GatewayAdaptersSchema = external_zod_namespaceObject.object({
229
- discord: DiscordAdapterSchema.optional()
263
+ discord: DiscordAdapterSchema.optional(),
264
+ teams: TeamsAdapterSchema.optional()
230
265
  }).default({});
231
266
  const GatewayMcpProxySchema = external_zod_namespaceObject.object({
232
267
  enabled: external_zod_namespaceObject.boolean().optional().default(false).describe("Enable MCP stdio proxy wrapper for gateway agent execution"),
@@ -154,6 +154,29 @@ export declare const GatewayConfigSchema: z.ZodDefault<z.ZodObject<{
154
154
  gatewayPassword: z.ZodOptional<z.ZodString>;
155
155
  responseChunkSize: z.ZodDefault<z.ZodNumber>;
156
156
  }, z.core.$strip>>>;
157
+ teams: z.ZodOptional<z.ZodDefault<z.ZodObject<{
158
+ enabled: z.ZodDefault<z.ZodBoolean>;
159
+ appId: z.ZodOptional<z.ZodString>;
160
+ appPassword: z.ZodOptional<z.ZodString>;
161
+ appType: z.ZodDefault<z.ZodEnum<{
162
+ MultiTenant: "MultiTenant";
163
+ SingleTenant: "SingleTenant";
164
+ UserAssignedMsi: "UserAssignedMsi";
165
+ UserAssignedMSI: "UserAssignedMSI";
166
+ }>>;
167
+ tenantId: z.ZodOptional<z.ZodString>;
168
+ endpointPath: z.ZodDefault<z.ZodString>;
169
+ mentionOnly: z.ZodDefault<z.ZodBoolean>;
170
+ allowBots: z.ZodDefault<z.ZodBoolean>;
171
+ allowedTeamIds: z.ZodDefault<z.ZodArray<z.ZodString>>;
172
+ allowedChannelIds: z.ZodDefault<z.ZodArray<z.ZodString>>;
173
+ channelSessions: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodString>>;
174
+ sessionCommand: z.ZodDefault<z.ZodString>;
175
+ gatewayUrl: z.ZodOptional<z.ZodString>;
176
+ gatewayToken: z.ZodOptional<z.ZodString>;
177
+ gatewayPassword: z.ZodOptional<z.ZodString>;
178
+ responseChunkSize: z.ZodDefault<z.ZodNumber>;
179
+ }, z.core.$strip>>>;
157
180
  }, z.core.$strip>>>>;
158
181
  }, z.core.$strip>>;
159
182
  export type GatewayConfig = z.infer<typeof GatewayConfigSchema>;
@@ -409,6 +432,29 @@ export declare const WingmanConfigSchema: z.ZodObject<{
409
432
  gatewayPassword: z.ZodOptional<z.ZodString>;
410
433
  responseChunkSize: z.ZodDefault<z.ZodNumber>;
411
434
  }, z.core.$strip>>>;
435
+ teams: z.ZodOptional<z.ZodDefault<z.ZodObject<{
436
+ enabled: z.ZodDefault<z.ZodBoolean>;
437
+ appId: z.ZodOptional<z.ZodString>;
438
+ appPassword: z.ZodOptional<z.ZodString>;
439
+ appType: z.ZodDefault<z.ZodEnum<{
440
+ MultiTenant: "MultiTenant";
441
+ SingleTenant: "SingleTenant";
442
+ UserAssignedMsi: "UserAssignedMsi";
443
+ UserAssignedMSI: "UserAssignedMSI";
444
+ }>>;
445
+ tenantId: z.ZodOptional<z.ZodString>;
446
+ endpointPath: z.ZodDefault<z.ZodString>;
447
+ mentionOnly: z.ZodDefault<z.ZodBoolean>;
448
+ allowBots: z.ZodDefault<z.ZodBoolean>;
449
+ allowedTeamIds: z.ZodDefault<z.ZodArray<z.ZodString>>;
450
+ allowedChannelIds: z.ZodDefault<z.ZodArray<z.ZodString>>;
451
+ channelSessions: z.ZodDefault<z.ZodRecord<z.ZodString, z.ZodString>>;
452
+ sessionCommand: z.ZodDefault<z.ZodString>;
453
+ gatewayUrl: z.ZodOptional<z.ZodString>;
454
+ gatewayToken: z.ZodOptional<z.ZodString>;
455
+ gatewayPassword: z.ZodOptional<z.ZodString>;
456
+ responseChunkSize: z.ZodDefault<z.ZodNumber>;
457
+ }, z.core.$strip>>>;
412
458
  }, z.core.$strip>>>>;
413
459
  }, z.core.$strip>>>>;
414
460
  agents: z.ZodDefault<z.ZodOptional<z.ZodDefault<z.ZodObject<{
@@ -185,8 +185,43 @@ const DiscordAdapterSchema = object({
185
185
  sessionCommand: "!session",
186
186
  responseChunkSize: 1900
187
187
  });
188
+ const TeamsAdapterSchema = object({
189
+ enabled: external_zod_boolean().default(false),
190
+ appId: string().optional(),
191
+ appPassword: string().optional(),
192
+ appType: external_zod_enum([
193
+ "MultiTenant",
194
+ "SingleTenant",
195
+ "UserAssignedMsi",
196
+ "UserAssignedMSI"
197
+ ]).default("MultiTenant"),
198
+ tenantId: string().optional(),
199
+ endpointPath: string().default("/api/adapters/teams/messages"),
200
+ mentionOnly: external_zod_boolean().default(true),
201
+ allowBots: external_zod_boolean().default(false),
202
+ allowedTeamIds: array(string()).default([]),
203
+ allowedChannelIds: array(string()).default([]),
204
+ channelSessions: record(string(), string()).default({}),
205
+ sessionCommand: string().default("!session"),
206
+ gatewayUrl: string().optional(),
207
+ gatewayToken: string().optional(),
208
+ gatewayPassword: string().optional(),
209
+ responseChunkSize: number().min(500).max(5000).default(3500)
210
+ }).default({
211
+ enabled: false,
212
+ appType: "MultiTenant",
213
+ endpointPath: "/api/adapters/teams/messages",
214
+ mentionOnly: true,
215
+ allowBots: false,
216
+ allowedTeamIds: [],
217
+ allowedChannelIds: [],
218
+ channelSessions: {},
219
+ sessionCommand: "!session",
220
+ responseChunkSize: 3500
221
+ });
188
222
  const GatewayAdaptersSchema = object({
189
- discord: DiscordAdapterSchema.optional()
223
+ discord: DiscordAdapterSchema.optional(),
224
+ teams: TeamsAdapterSchema.optional()
190
225
  }).default({});
191
226
  const GatewayMcpProxySchema = object({
192
227
  enabled: external_zod_boolean().optional().default(false).describe("Enable MCP stdio proxy wrapper for gateway agent execution"),