@salesforce/plugin-agent 1.30.11 → 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -116,7 +116,7 @@ EXAMPLES
116
116
  $ sf agent activate --api-name Resort_Manager --target-org my-org
117
117
  ```
118
118
 
119
- _See code: [src/commands/agent/activate.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/activate.ts)_
119
+ _See code: [src/commands/agent/activate.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/activate.ts)_
120
120
 
121
121
  ## `sf agent create`
122
122
 
@@ -183,7 +183,7 @@ EXAMPLES
183
183
  $ sf agent create --name "Resort Manager" --spec specs/resortManagerAgent.yaml --preview
184
184
  ```
185
185
 
186
- _See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/create.ts)_
186
+ _See code: [src/commands/agent/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/create.ts)_
187
187
 
188
188
  ## `sf agent deactivate`
189
189
 
@@ -223,7 +223,7 @@ EXAMPLES
223
223
  $ sf agent deactivate --api-name Resort_Manager --target-org my-org
224
224
  ```
225
225
 
226
- _See code: [src/commands/agent/deactivate.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/deactivate.ts)_
226
+ _See code: [src/commands/agent/deactivate.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/deactivate.ts)_
227
227
 
228
228
  ## `sf agent generate agent-spec`
229
229
 
@@ -330,7 +330,7 @@ EXAMPLES
330
330
  $ sf agent generate agent-spec --tone formal --agent-user resortmanager@myorg.com
331
331
  ```
332
332
 
333
- _See code: [src/commands/agent/generate/agent-spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/generate/agent-spec.ts)_
333
+ _See code: [src/commands/agent/generate/agent-spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/generate/agent-spec.ts)_
334
334
 
335
335
  ## `sf agent generate authoring-bundle`
336
336
 
@@ -407,7 +407,7 @@ EXAMPLES
407
407
  other-package-dir/main/default --target-org my-dev-org
408
408
  ```
409
409
 
410
- _See code: [src/commands/agent/generate/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/generate/authoring-bundle.ts)_
410
+ _See code: [src/commands/agent/generate/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/generate/authoring-bundle.ts)_
411
411
 
412
412
  ## `sf agent generate template`
413
413
 
@@ -455,7 +455,7 @@ EXAMPLES
455
455
  force-app/main/default/bots/My_Awesome_Agent/My_Awesome_Agent.bot-meta.xml --agent-version 1
456
456
  ```
457
457
 
458
- _See code: [src/commands/agent/generate/template.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/generate/template.ts)_
458
+ _See code: [src/commands/agent/generate/template.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/generate/template.ts)_
459
459
 
460
460
  ## `sf agent generate test-spec`
461
461
 
@@ -516,7 +516,7 @@ EXAMPLES
516
516
  force-app//main/default/aiEvaluationDefinitions/Resort_Manager_Tests.aiEvaluationDefinition-meta.xml
517
517
  ```
518
518
 
519
- _See code: [src/commands/agent/generate/test-spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/generate/test-spec.ts)_
519
+ _See code: [src/commands/agent/generate/test-spec.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/generate/test-spec.ts)_
520
520
 
521
521
  ## `sf agent preview`
522
522
 
@@ -589,7 +589,7 @@ EXAMPLES
589
589
  $ sf agent preview --use-live-actions --apex-debug --output-dir transcripts/my-preview
590
590
  ```
591
591
 
592
- _See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/preview.ts)_
592
+ _See code: [src/commands/agent/preview.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/preview.ts)_
593
593
 
594
594
  ## `sf agent preview end`
595
595
 
@@ -644,7 +644,7 @@ EXAMPLES
644
644
  $ sf agent preview end --authoring-bundle My_Local_Agent
645
645
  ```
646
646
 
647
- _See code: [src/commands/agent/preview/end.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/preview/end.ts)_
647
+ _See code: [src/commands/agent/preview/end.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/preview/end.ts)_
648
648
 
649
649
  ## `sf agent preview send`
650
650
 
@@ -702,7 +702,7 @@ EXAMPLES
702
702
  $ sf agent preview send --utterance "what can you help me with?" --authoring-bundle My_Local_Agent
703
703
  ```
704
704
 
705
- _See code: [src/commands/agent/preview/send.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/preview/send.ts)_
705
+ _See code: [src/commands/agent/preview/send.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/preview/send.ts)_
706
706
 
707
707
  ## `sf agent preview sessions`
708
708
 
@@ -735,7 +735,7 @@ EXAMPLES
735
735
  $ sf agent preview sessions
736
736
  ```
737
737
 
738
- _See code: [src/commands/agent/preview/sessions.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/preview/sessions.ts)_
738
+ _See code: [src/commands/agent/preview/sessions.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/preview/sessions.ts)_
739
739
 
740
740
  ## `sf agent preview start`
741
741
 
@@ -792,7 +792,7 @@ EXAMPLES
792
792
  $ sf agent preview start --api-name My_Published_Agent
793
793
  ```
794
794
 
795
- _See code: [src/commands/agent/preview/start.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/preview/start.ts)_
795
+ _See code: [src/commands/agent/preview/start.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/preview/start.ts)_
796
796
 
797
797
  ## `sf agent publish authoring-bundle`
798
798
 
@@ -841,7 +841,7 @@ EXAMPLES
841
841
  $ sf agent publish authoring-bundle --api-name MyAuthoringbundle --target-org my-dev-org
842
842
  ```
843
843
 
844
- _See code: [src/commands/agent/publish/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/publish/authoring-bundle.ts)_
844
+ _See code: [src/commands/agent/publish/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/publish/authoring-bundle.ts)_
845
845
 
846
846
  ## `sf agent test create`
847
847
 
@@ -896,7 +896,7 @@ EXAMPLES
896
896
  $ sf agent test create --spec specs/Resort_Manager-testSpec.yaml --api-name Resort_Manager_Test --preview
897
897
  ```
898
898
 
899
- _See code: [src/commands/agent/test/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/test/create.ts)_
899
+ _See code: [src/commands/agent/test/create.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/test/create.ts)_
900
900
 
901
901
  ## `sf agent test list`
902
902
 
@@ -931,7 +931,7 @@ EXAMPLES
931
931
  $ sf agent test list --target-org my-org
932
932
  ```
933
933
 
934
- _See code: [src/commands/agent/test/list.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/test/list.ts)_
934
+ _See code: [src/commands/agent/test/list.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/test/list.ts)_
935
935
 
936
936
  ## `sf agent test results`
937
937
 
@@ -997,7 +997,7 @@ FLAG DESCRIPTIONS
997
997
  expression when using custom evaluations.
998
998
  ```
999
999
 
1000
- _See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/test/results.ts)_
1000
+ _See code: [src/commands/agent/test/results.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/test/results.ts)_
1001
1001
 
1002
1002
  ## `sf agent test resume`
1003
1003
 
@@ -1070,7 +1070,7 @@ FLAG DESCRIPTIONS
1070
1070
  expression when using custom evaluations.
1071
1071
  ```
1072
1072
 
1073
- _See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/test/resume.ts)_
1073
+ _See code: [src/commands/agent/test/resume.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/test/resume.ts)_
1074
1074
 
1075
1075
  ## `sf agent test run`
1076
1076
 
@@ -1144,7 +1144,7 @@ FLAG DESCRIPTIONS
1144
1144
  expression when using custom evaluations.
1145
1145
  ```
1146
1146
 
1147
- _See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/test/run.ts)_
1147
+ _See code: [src/commands/agent/test/run.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/test/run.ts)_
1148
1148
 
1149
1149
  ## `sf agent validate authoring-bundle`
1150
1150
 
@@ -1191,6 +1191,6 @@ EXAMPLES
1191
1191
  $ sf agent validate authoring-bundle --api-name MyAuthoringBundle --target-org my-dev-org
1192
1192
  ```
1193
1193
 
1194
- _See code: [src/commands/agent/validate/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.30.11/src/commands/agent/validate/authoring-bundle.ts)_
1194
+ _See code: [src/commands/agent/validate/authoring-bundle.ts](https://github.com/salesforcecli/plugin-agent/blob/1.31.0/src/commands/agent/validate/authoring-bundle.ts)_
1195
1195
 
1196
1196
  <!-- commandsstop -->
@@ -0,0 +1,33 @@
1
+ import { SfCommand } from '@salesforce/sf-plugins-core';
2
+ import { Org } from '@salesforce/core';
3
+ export type RunEvalResult = {
4
+ tests: Array<{
5
+ id: string;
6
+ status: string;
7
+ evaluations: unknown[];
8
+ }>;
9
+ summary: {
10
+ passed: number;
11
+ failed: number;
12
+ scored: number;
13
+ errors: number;
14
+ };
15
+ };
16
+ export default class AgentTestRunEval extends SfCommand<RunEvalResult> {
17
+ static readonly summary: string;
18
+ static readonly description: string;
19
+ static readonly examples: string[];
20
+ static state: string;
21
+ static readonly hidden = true;
22
+ static readonly flags: {
23
+ 'target-org': import("@oclif/core/interfaces").OptionFlag<Org, import("@oclif/core/interfaces").CustomOptions>;
24
+ 'api-version': import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
25
+ spec: import("@oclif/core/interfaces").OptionFlag<string, import("@oclif/core/interfaces").CustomOptions>;
26
+ 'api-name': import("@oclif/core/interfaces").OptionFlag<string | undefined, import("@oclif/core/interfaces").CustomOptions>;
27
+ wait: import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
28
+ 'result-format': import("@oclif/core/interfaces").OptionFlag<"json" | "human" | "junit" | "tap", import("@oclif/core/interfaces").CustomOptions>;
29
+ 'batch-size': import("@oclif/core/interfaces").OptionFlag<number, import("@oclif/core/interfaces").CustomOptions>;
30
+ 'no-normalize': import("@oclif/core/interfaces").BooleanFlag<boolean>;
31
+ };
32
+ run(): Promise<RunEvalResult>;
33
+ }
@@ -0,0 +1,221 @@
1
+ /*
2
+ * Copyright 2026, Salesforce, Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+ import { readFile } from 'node:fs/promises';
17
+ import { Flags, SfCommand } from '@salesforce/sf-plugins-core';
18
+ import { Messages } from '@salesforce/core';
19
+ import { normalizePayload, splitIntoBatches } from '../../../evalNormalizer.js';
20
+ import { formatResults } from '../../../evalFormatter.js';
21
+ import { resultFormatFlag } from '../../../flags.js';
22
+ import { isYamlTestSpec, parseTestSpec, translateTestSpec } from '../../../yamlSpecTranslator.js';
23
+ Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
24
+ const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run-eval');
25
+ async function getApiHeaders(org) {
26
+ const conn = org.getConnection();
27
+ const userInfo = await conn.request(`${conn.instanceUrl}/services/oauth2/userinfo`);
28
+ return {
29
+ orgId: org.getOrgId(),
30
+ userId: userInfo.user_id,
31
+ instanceUrl: conn.instanceUrl,
32
+ };
33
+ }
34
+ async function callEvalApi(org, payload, headers) {
35
+ const conn = org.getConnection();
36
+ return conn.request({
37
+ url: 'https://api.salesforce.com/einstein/evaluation/v1/tests',
38
+ method: 'POST',
39
+ headers: {
40
+ 'Content-Type': 'application/json',
41
+ 'x-sfdc-core-tenant-id': `core/prod/${headers.orgId}`,
42
+ 'x-org-id': headers.orgId,
43
+ 'x-sfdc-core-instance-url': headers.instanceUrl,
44
+ 'x-sfdc-user-id': headers.userId,
45
+ 'x-client-feature-id': 'AIPlatformEvaluation',
46
+ 'x-sfdc-app-context': 'EinsteinGPT',
47
+ },
48
+ body: JSON.stringify(payload),
49
+ });
50
+ }
51
+ async function resolveAgent(org, apiName) {
52
+ const conn = org.getConnection();
53
+ // Escape single quotes to prevent SOQL injection
54
+ const escapedApiName = apiName.replace(/'/g, "\\'");
55
+ const botResult = await conn.query(`SELECT Id FROM BotDefinition WHERE DeveloperName = '${escapedApiName}'`);
56
+ if (!botResult.records.length) {
57
+ throw messages.createError('error.agentNotFound', [apiName]);
58
+ }
59
+ const agentId = botResult.records[0].Id;
60
+ // Filter to published/active versions only
61
+ const versionResult = await conn.query(`SELECT Id FROM BotVersion WHERE BotDefinitionId = '${agentId}' ORDER BY VersionNumber DESC LIMIT 1`);
62
+ if (!versionResult.records.length) {
63
+ throw messages.createError('error.agentVersionNotFound', [apiName]);
64
+ }
65
+ const versionId = versionResult.records[0].Id;
66
+ return { agentId, versionId };
67
+ }
68
+ async function executeBatches(org, batches, log) {
69
+ // Pre-calculate headers once to avoid redundant API calls
70
+ const headers = await getApiHeaders(org);
71
+ // Execute all batches in parallel for better performance
72
+ if (batches.length > 1) {
73
+ log(messages.getMessage('info.batchProgress', [batches.length, batches.length, 'total']));
74
+ }
75
+ const batchPromises = batches.map(async (batch) => {
76
+ const batchPayload = { tests: batch };
77
+ const resultObj = await callEvalApi(org, batchPayload, headers);
78
+ return resultObj.results ?? [];
79
+ });
80
+ const batchResults = await Promise.all(batchPromises);
81
+ return batchResults.flat();
82
+ }
83
+ function buildResultSummary(mergedResponse) {
84
+ const summary = { passed: 0, failed: 0, scored: 0, errors: 0 };
85
+ const testSummaries = [];
86
+ for (const testResult of mergedResponse.results ?? []) {
87
+ const tr = testResult;
88
+ const testId = tr.id ?? 'unknown';
89
+ const evalResults = tr.evaluation_results ?? [];
90
+ const testErrors = tr.errors ?? [];
91
+ const passed = evalResults.filter((e) => e.is_pass === true).length;
92
+ const failed = evalResults.filter((e) => e.is_pass === false).length;
93
+ const scored = evalResults.filter((e) => e.score != null && e.is_pass == null).length;
94
+ summary.passed += passed;
95
+ summary.failed += failed;
96
+ summary.scored += scored;
97
+ summary.errors += testErrors.length;
98
+ testSummaries.push({
99
+ id: testId,
100
+ status: failed > 0 || testErrors.length > 0 ? 'failed' : 'passed',
101
+ evaluations: evalResults,
102
+ });
103
+ }
104
+ return { summary, testSummaries };
105
+ }
106
+ export default class AgentTestRunEval extends SfCommand {
107
+ static summary = messages.getMessage('summary');
108
+ static description = messages.getMessage('description');
109
+ static examples = messages.getMessages('examples');
110
+ static state = 'beta';
111
+ static hidden = true;
112
+ static flags = {
113
+ 'target-org': Flags.requiredOrg(),
114
+ 'api-version': Flags.orgApiVersion(),
115
+ spec: Flags.string({
116
+ char: 's',
117
+ required: true,
118
+ summary: messages.getMessage('flags.spec.summary'),
119
+ allowStdin: true,
120
+ }),
121
+ 'api-name': Flags.string({
122
+ char: 'n',
123
+ summary: messages.getMessage('flags.api-name.summary'),
124
+ }),
125
+ wait: Flags.integer({
126
+ char: 'w',
127
+ default: 10,
128
+ summary: messages.getMessage('flags.wait.summary'),
129
+ }),
130
+ 'result-format': resultFormatFlag(),
131
+ 'batch-size': Flags.integer({
132
+ default: 5,
133
+ summary: messages.getMessage('flags.batch-size.summary'),
134
+ }),
135
+ 'no-normalize': Flags.boolean({
136
+ default: false,
137
+ summary: messages.getMessage('flags.no-normalize.summary'),
138
+ }),
139
+ };
140
+ async run() {
141
+ const { flags } = await this.parse(AgentTestRunEval);
142
+ const org = flags['target-org'];
143
+ // 1. Get spec content (from file or stdin via allowStdin)
144
+ let rawContent = flags.spec;
145
+ // If spec looks like it might be a file path (not parseable content), read the file
146
+ try {
147
+ // Try to detect if it's actual content vs a file path
148
+ // If it's a valid YAML/JSON, it's content; otherwise treat as file path
149
+ if (!isYamlTestSpec(rawContent)) {
150
+ JSON.parse(rawContent);
151
+ }
152
+ // If we got here, it's valid content
153
+ }
154
+ catch {
155
+ // Not valid content, must be a file path - read it
156
+ rawContent = await readFile(flags.spec, 'utf-8');
157
+ }
158
+ // 2. Detect format and parse
159
+ let payload;
160
+ let agentApiName = flags['api-name'];
161
+ if (isYamlTestSpec(rawContent)) {
162
+ // YAML TestSpec detected — translate to EvalPayload
163
+ const spec = parseTestSpec(rawContent);
164
+ payload = translateTestSpec(spec);
165
+ // Auto-infer api-name from subjectName if not explicitly provided
166
+ if (!agentApiName) {
167
+ agentApiName = spec.subjectName;
168
+ this.log(messages.getMessage('info.yamlDetected', [spec.subjectName, spec.testCases.length.toString()]));
169
+ }
170
+ }
171
+ else {
172
+ // JSON EvalPayload (original behavior)
173
+ try {
174
+ payload = JSON.parse(rawContent);
175
+ }
176
+ catch (e) {
177
+ throw messages.createError('error.invalidPayload', [e.message]);
178
+ }
179
+ }
180
+ if (!payload.tests || !Array.isArray(payload.tests) || payload.tests.length === 0) {
181
+ throw messages.createError('error.invalidPayload', ['missing or empty "tests" array']);
182
+ }
183
+ // 3. If --api-name (or auto-inferred from YAML), resolve IDs and inject
184
+ if (agentApiName) {
185
+ const { agentId, versionId } = await resolveAgent(org, agentApiName);
186
+ for (const test of payload.tests) {
187
+ for (const step of test.steps) {
188
+ if (step.type === 'agent.create_session') {
189
+ // eslint-disable-next-line camelcase
190
+ step.agent_id = agentId;
191
+ // eslint-disable-next-line camelcase
192
+ step.agent_version_id = versionId;
193
+ }
194
+ }
195
+ }
196
+ }
197
+ // 4. Normalize payload unless --no-normalize
198
+ if (!flags['no-normalize']) {
199
+ payload = normalizePayload(payload);
200
+ }
201
+ // 5. Clamp batch size
202
+ const batchSize = Math.min(Math.max(flags['batch-size'], 1), 5);
203
+ // 6. Split into batches
204
+ const batches = splitIntoBatches(payload.tests, batchSize);
205
+ // 7. Execute batches
206
+ const allResults = await executeBatches(org, batches, (msg) => this.log(msg));
207
+ const mergedResponse = { results: allResults };
208
+ // 9. Format output
209
+ const resultFormat = (flags['result-format'] ?? 'human');
210
+ const formatted = formatResults(mergedResponse, resultFormat);
211
+ this.log(formatted);
212
+ // 10. Build structured result for --json
213
+ const { summary, testSummaries } = buildResultSummary(mergedResponse);
214
+ // Set exit code to 1 if any tests failed
215
+ if (summary.failed > 0 || summary.errors > 0) {
216
+ process.exitCode = 1;
217
+ }
218
+ return { tests: testSummaries, summary };
219
+ }
220
+ }
221
+ //# sourceMappingURL=run-eval.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run-eval.js","sourceRoot":"","sources":["../../../../src/commands/agent/test/run-eval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,QAAQ,EAAO,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAoB,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAClG,OAAO,EAAwB,aAAa,EAAqB,MAAM,2BAA2B,CAAC;AACnG,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAElG,QAAQ,CAAC,kCAAkC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7D,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,0BAA0B,EAAE,qBAAqB,CAAC,CAAC;AAe1F,KAAK,UAAU,aAAa,CAAC,GAAQ;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAsB,GAAG,IAAI,CAAC,WAAW,2BAA2B,CAAC,CAAC;IAEzG,OAAO;QACL,KAAK,EAAE,GAAG,CAAC,QAAQ,EAAE;QACrB,MAAM,EAAE,QAAQ,CAAC,OAAO;QACxB,WAAW,EAAE,IAAI,CAAC,WAAW;KAC9B,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,GAAQ,EAAE,OAAoB,EAAE,OAAmB;IAC5E,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,EAAE,CAAC;IAEjC,OAAO,IAAI,CAAC,OAAO,CAA0B;QAC3C,GAAG,EAAE,yDAAyD;QAC9D,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACP,cAAc,EAAE,kBAAkB;YAClC,uBAAuB,EAAE,aAAa,OAAO,CAAC,KAAK,EAAE;YACrD,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,0BAA0B,EAAE,OAAO,CAAC,WAAW;YAC/C,gBAAgB,EAAE,OAAO,CAAC,MAAM;YAChC,qBAAqB,EAAE,sBAAsB;YAC7C,oBAAoB,EAAE,aAAa;SACpC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,GAAQ,EAAE,OAAe;IACnD,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,EAAE,CAAC;IAEjC,iDAAiD;IACjD,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;IAEpD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,KAAK,CAChC,uDAAuD,cAAc,GAAG,CACzE,CAAC;IACF,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QAC9B,MAAM,QAAQ,CAAC,WAAW,CAAC,qBAAqB,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/D,CAAC;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAExC,2CAA2C;IAC3C,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,KAAK,CACpC,sDAAsD,OAAO,wCAAwC,CACtG,CAAC;IACF,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QAClC,MAAM,QAAQ,CAAC,WAAW,CAAC,4BAA4B,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,CAAC;IACD,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE9C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,GAAQ,EACR,OAAoC,EACpC,GAA0B;IAE1B,0DAA0D;IAC1D,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC;IAEzC,yDAAyD;IACzD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,oBAAoB,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;IAC5F,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAChD,MAAM,YAAY,GAAgB,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;QACnD,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAChE,OAAO,SAAS,CAAC,OAAO,IAAI,EAAE,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;IACtD,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC;AAC7B,CAAC;AAED,SAAS,kBAAkB,CAAC,cAA+B;IAIzD,MAAM,OAAO,GAAG,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAC/D,MAAM,aAAa,GAAkE,EAAE,CAAC;IAExF,KAAK,MAAM,UAAU,IAAI,cAAc,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,EAAE,GAAG,UAAqC,CAAC;QACjD,MAAM,MAAM,GAAI,EAAE,CAAC,EAAa,IAAI,SAAS,CAAC;QAC9C,MAAM,WAAW,GAAI,EAAE,CAAC,kBAAqD,IAAI,EAAE,CAAC;QACpF,MAAM,UAAU,GAAI,EAAE,CAAC,MAAoB,IAAI,EAAE,CAAC;QAElD,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;QACpE,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,KAAK,CAAC,CAAC,MAAM,CAAC;QACrE,MAAM,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC,MAAM,CAAC;QAEtF,OAAO,CAAC,MAAM,IAAI,MAAM,CAAC;QACzB,OAAO,CAAC,MAAM,IAAI,MAAM,CAAC;QACzB,OAAO,CAAC,MAAM,IAAI,MAAM,CAAC;QACzB,OAAO,CAAC,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC;QAEpC,aAAa,CAAC,IAAI,CAAC;YACjB,EAAE,EAAE,MAAM;YACV,MAAM,EAAE,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ;YACjE,WAAW,EAAE,WAAW;SACzB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC;AACpC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,gBAAiB,SAAQ,SAAwB;IAC7D,MAAM,CAAU,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;IACzD,MAAM,CAAU,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;IACjE,MAAM,CAAU,QAAQ,GAAG,QAAQ,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;IAC5D,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC;IACtB,MAAM,CAAU,MAAM,GAAG,IAAI,CAAC;IAE9B,MAAM,CAAU,KAAK,GAAG;QAC7B,YAAY,EAAE,KAAK,CAAC,WAAW,EAAE;QACjC,aAAa,EAAE,KAAK,CAAC,aAAa,EAAE;QACpC,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC;YACjB,IAAI,EAAE,GAAG;YACT,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC;YAClD,UAAU,EAAE,IAAI;SACjB,CAAC;QACF,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC;YACvB,IAAI,EAAE,GAAG;YACT,OAAO,EAAE,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;SACvD,CAAC;QACF,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC;YAClB,IAAI,EAAE,GAAG;YACT,OAAO,EAAE,EAAE;YACX,OAAO,EAAE,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC;SACnD,CAAC;QACF,eAAe,EAAE,gBAAgB,EAAE;QACnC,YAAY,EAAE,KAAK,CAAC,OAAO,CAAC;YAC1B,OAAO,EAAE,CAAC;YACV,OAAO,EAAE,QAAQ,CAAC,UAAU,CAAC,0BAA0B,CAAC;SACzD,CAAC;QACF,cAAc,EAAE,KAAK,CAAC,OAAO,CAAC;YAC5B,OAAO,EAAE,KAAK;YACd,OAAO,EAAE,QAAQ,CAAC,UAAU,CAAC,4BAA4B,CAAC;SAC3D,CAAC;KACH,CAAC;IAEK,KAAK,CAAC,GAAG;QACd,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QACrD,MAAM,GAAG,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC;QAEhC,0DAA0D;QAC1D,IAAI,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC;QAE5B,oFAAoF;QACpF,IAAI,CAAC;YACH,sDAAsD;YACtD,wEAAwE;YACxE,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,EAAE,CAAC;gBAChC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YACzB,CAAC;YACD,qCAAqC;QACvC,CAAC;QAAC,MAAM,CAAC;YACP,mDAAmD;YACnD,UAAU,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACnD,CAAC;QAED,6BAA6B;QAC7B,IAAI,OAAoB,CAAC;QACzB,IAAI,YAAY,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC;QAErC,IAAI,cAAc,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,oDAAoD;YACpD,MAAM,IAAI,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC;YACvC,OAAO,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAElC,kEAAkE;YAClE,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC;gBAChC,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,mBAAmB,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;YAC3G,CAAC;QACH,CAAC;aAAM,CAAC;YACN,uCAAuC;YACvC,IAAI,CAAC;gBACH,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAgB,CAAC;YAClD,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,MAAM,QAAQ,CAAC,WAAW,CAAC,sBAAsB,EAAE,CAAE,CAAW,CAAC,OAAO,CAAC,CAAC,CAAC;YAC7E,CAAC;QACH,CAAC;QAED,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClF,MAAM,QAAQ,CAAC,WAAW,CAAC,sBAAsB,EAAE,CAAC,gCAAgC,CAAC,CAAC,CAAC;QACzF,CAAC;QAED,wEAAwE;QACxE,IAAI,YAAY,EAAE,CAAC;YACjB,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC;YACrE,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;gBACjC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;oBAC9B,IAAI,IAAI,CAAC,IAAI,KAAK,sBAAsB,EAAE,CAAC;wBACzC,qCAAqC;wBACrC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;wBACxB,qCAAqC;wBACrC,IAAI,CAAC,gBAAgB,GAAG,SAAS,CAAC;oBACpC,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;YAC3B,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;QAED,sBAAsB;QACtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEhE,wBAAwB;QACxB,MAAM,OAAO,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAE3D,qBAAqB;QACrB,MAAM,UAAU,GAAG,MAAM,cAAc,CAAC,GAAG,EAAE,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;QAE9E,MAAM,cAAc,GAAoB,EAAE,OAAO,EAAE,UAAwC,EAAE,CAAC;QAE9F,mBAAmB;QACnB,MAAM,YAAY,GAAG,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,OAAO,CAAiB,CAAC;QACzE,MAAM,SAAS,GAAG,aAAa,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAC9D,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAEpB,yCAAyC;QACzC,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,kBAAkB,CAAC,cAAc,CAAC,CAAC;QAEtE,yCAAyC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC;IAC3C,CAAC"}
@@ -0,0 +1,30 @@
1
+ export type ResultFormat = 'human' | 'json' | 'junit' | 'tap';
2
+ type EvalOutput = {
3
+ type?: string;
4
+ id?: string;
5
+ session_id?: string;
6
+ response?: unknown;
7
+ };
8
+ type EvalResult = {
9
+ id?: string;
10
+ score?: number | null;
11
+ is_pass?: boolean | null;
12
+ actual_value?: string;
13
+ expected_value?: string;
14
+ error_message?: string;
15
+ };
16
+ type TestError = {
17
+ id?: string;
18
+ error_message?: string;
19
+ };
20
+ type TestResult = {
21
+ id?: string;
22
+ outputs?: EvalOutput[];
23
+ evaluation_results?: EvalResult[];
24
+ errors?: TestError[];
25
+ };
26
+ export type EvalApiResponse = {
27
+ results?: TestResult[];
28
+ };
29
+ export declare function formatResults(results: EvalApiResponse, format: ResultFormat): string;
30
+ export {};