@microsoft/m365-copilot-eval 1.3.0-preview.1 → 1.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +135 -100
  2. package/package.json +7 -4
  3. package/schema/CHANGELOG.md +7 -0
  4. package/schema/v1/eval-document.schema.json +143 -11
  5. package/schema/v1/examples/invalid/multi-turn-empty-turns.json +8 -0
  6. package/schema/v1/examples/invalid/multi-turn-has-both-prompt-and-turns.json +13 -0
  7. package/schema/v1/examples/invalid/multi-turn-missing-prompt.json +12 -0
  8. package/schema/v1/examples/invalid/multi-turn-typo-in-turn.json +13 -0
  9. package/schema/v1/examples/invalid/multi-turn-unknown-evaluator.json +15 -0
  10. package/schema/v1/examples/valid/mixed-single-and-multi-turn.json +30 -0
  11. package/schema/v1/examples/valid/multi-turn-output.json +59 -0
  12. package/schema/v1/examples/valid/multi-turn-simple.json +21 -0
  13. package/schema/v1/examples/valid/multi-turn-with-evaluators.json +34 -0
  14. package/schema/version.json +2 -2
  15. package/src/clients/cli/agent_selector.py +74 -0
  16. package/src/clients/cli/api_clients/A2A/__init__.py +3 -0
  17. package/src/clients/cli/api_clients/A2A/a2a_client.py +475 -0
  18. package/src/clients/cli/api_clients/__init__.py +3 -0
  19. package/src/clients/cli/api_clients/base_agent_client.py +77 -0
  20. package/src/clients/cli/cli_args.py +136 -0
  21. package/src/clients/cli/cli_logging/cli_logger.py +33 -0
  22. package/src/clients/cli/cli_logging/console_diagnostics.py +56 -2
  23. package/src/clients/cli/cli_logging/logging_utils.py +0 -1
  24. package/src/clients/cli/common.py +64 -0
  25. package/src/clients/cli/env_validator.py +73 -0
  26. package/src/clients/cli/evaluation_runner.py +653 -0
  27. package/src/clients/cli/evaluator_resolver.py +9 -6
  28. package/src/clients/cli/generate_report.py +272 -129
  29. package/src/clients/cli/main.py +157 -1174
  30. package/src/clients/cli/parallel_executor.py +57 -0
  31. package/src/clients/cli/prompt_loader.py +148 -0
  32. package/src/clients/cli/readme.md +9 -53
  33. package/src/clients/cli/requirements.txt +1 -1
  34. package/src/clients/cli/response_extractor.py +4 -603
  35. package/src/clients/cli/result_writer.py +488 -0
  36. package/src/clients/cli/retry_policy.py +52 -0
  37. package/src/clients/cli/samples/multiturn_example.json +35 -0
  38. package/src/clients/cli/throttle_gate.py +82 -0
  39. package/src/clients/node-js/bin/runevals.js +82 -20
  40. package/src/clients/node-js/config/default.js +12 -11
  41. package/src/clients/node-js/lib/agent-id.js +12 -0
  42. package/src/clients/node-js/lib/env-loader.js +14 -20
  43. package/src/clients/node-js/lib/eula-manager.js +78 -0
  44. package/src/clients/node-js/lib/progress.js +13 -11
@@ -8,8 +8,10 @@ import { ensurePythonRuntime, getCacheDir } from '../lib/python-runtime.js';
8
8
  import { ensureVenv, executePythonCli } from '../lib/venv-manager.js';
9
9
  import { getCacheStats, clearCache, formatBytes } from '../lib/cache-utils.js';
10
10
  import { checkPackageExpiry } from '../lib/expiry-check.js';
11
+ import { recordAcceptance, checkAcceptance } from '../lib/eula-manager.js';
11
12
  import { ProgressReporter } from '../lib/progress.js';
12
13
  import { _loadEnvFile as loadEnvFile, _loadUserEnvOverride } from '../lib/env-loader.js';
14
+ import { normalizeAgentId } from '../lib/agent-id.js';
13
15
 
14
16
  // Check package expiry (exits if expired, warns if close to expiry)
15
17
  checkPackageExpiry();
@@ -22,20 +24,13 @@ const packageJsonPath = path.join(__dirname, '..', '..', '..', '..', 'package.js
22
24
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
23
25
  const VERSION = packageJson.version;
24
26
 
27
+ const EULA_URL = 'https://aka.ms/evaltoolterms';
28
+
25
29
  // Path to Python CLI and requirements
26
30
  const PYTHON_CLI_DIR = path.join(__dirname, '..', '..', 'cli');
27
31
  const MAIN_SCRIPT = path.join(PYTHON_CLI_DIR, 'main.py');
28
32
  const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
29
33
 
30
- /**
31
- * Display usage terms notice
32
- * Called before running evaluations (but not for --init-only, cache commands, or --signout).
33
- * This notice is always displayed per legal requirements (FR-006).
34
- */
35
- function displayUsageTerms() {
36
- console.log('By using this tool, you agree to the Terms of Use: https://aka.ms/evaltoolterms\n');
37
- }
38
-
39
34
  /**
40
35
  * Set default environment constants that cannot be overridden
41
36
  * This ensures these values are always set regardless of .env files
@@ -43,10 +38,9 @@ function displayUsageTerms() {
43
38
  */
44
39
  async function setDefaultEnvironmentConstants() {
45
40
  const config = (await import('../config/default.js')).default;
46
- process.env.M365_EVAL_CLIENT_ID = config.copilotApi.m365EvalClientId;
47
- process.env.COPILOT_API_ENDPOINT = config.copilotApi.copilotApiEndpoint;
48
- process.env.COPILOT_SCOPES = config.copilotApi.copilotScopes;
49
- process.env.X_SCENARIO_HEADER = config.copilotApi.scenarioHeader;
41
+ process.env.WORK_IQ_A2A_ENDPOINT = config.workIq.a2aEndpoint;
42
+ process.env.WORK_IQ_A2A_CLIENT_ID = config.workIq.a2aClientId;
43
+ process.env.WORK_IQ_A2A_SCOPES = config.workIq.a2aScopes;
50
44
  }
51
45
 
52
46
  /**
@@ -176,21 +170,80 @@ async function main() {
176
170
  .option('--prompts-file <file>', 'JSON file with prompts and expected responses')
177
171
  .option('-o, --output <file>', 'output file (JSON, CSV, or HTML)')
178
172
  .option('-i, --interactive', 'interactive mode (enter prompts interactively)')
173
+ .option('--concurrency <number>', 'max prompts to process in parallel (1-5)')
179
174
  .option('--m365-agent-id <id>', 'agent ID (overrides env vars and auto-construction)')
180
175
  .option('--env <environment>', 'environment name (loads env/.env.<environment>)', 'local')
181
176
  .option('--init-only', 'only initialize Python environment, don\'t run evaluations')
182
177
  .option('--cache-info', 'show cache information and statistics')
183
178
  .option('--cache-clear', 'clear the cache (removes Python runtime and venv)')
184
179
  .option('--cache-dir', 'print the cache directory path')
185
- .option('--signout', 'sign out and clear cached authentication tokens');
180
+ .option('--signout', 'sign out and clear cached authentication tokens')
181
+ .action(() => {
182
+ // Default command — handled by the main flow below parseAsync()
183
+ });
184
+
185
+ program
186
+ .command('accept-eula')
187
+ .description('Accept the End User License Agreement (EULA)')
188
+ .action(async () => {
189
+ const config = (await import('../config/default.js')).default;
190
+ try {
191
+ await recordAcceptance(config.eula.version);
192
+ console.log('EULA has been accepted');
193
+ process.exit(0);
194
+ } catch (err) {
195
+ console.error(
196
+ `⚠️ Unable to persist EULA acceptance: ${err.message}`,
197
+ );
198
+ console.error(
199
+ 'Please ensure the directory ~/.m365-copilot-agent-evals/ is writable.',
200
+ );
201
+ process.exit(1);
202
+ }
203
+ });
186
204
 
187
- program.parse(process.argv);
205
+ await program.parseAsync(process.argv);
188
206
  const options = program.opts();
189
207
  const effectiveLogLevel = resolveLogLevel(options);
190
208
  const outputMode = deriveWrapperOutputMode(effectiveLogLevel);
191
209
  const wrapperVerbose = outputMode.verbose;
192
210
  const wrapperQuiet = outputMode.quiet;
193
211
 
212
+ // === EULA Enforcement Gate ===
213
+ // Block all commands until EULA is accepted (FR-010, FR-011).
214
+ // accept-eula subcommand, --help, and --version are already handled
215
+ // by Commander during program.parse() and exit before reaching here.
216
+ const config = (await import('../config/default.js')).default;
217
+ const { accepted, stale } = await checkAcceptance(config.eula.version);
218
+ if (!accepted) {
219
+ if (stale) {
220
+ console.error(
221
+ `==============================================================
222
+ The End User License Agreement (EULA) has been updated.
223
+ Please review the updated terms at:
224
+ ${EULA_URL}
225
+
226
+ To accept the updated EULA, please execute the following command:
227
+
228
+ runevals accept-eula
229
+
230
+ ==============================================================`);
231
+ } else {
232
+ console.error(
233
+ `==============================================================
234
+ In order to use this tool you must accept the End User License
235
+ Agreement (EULA) found at:
236
+ ${EULA_URL}
237
+
238
+ To accept the EULA, please execute the following command:
239
+
240
+ runevals accept-eula
241
+
242
+ ==============================================================`);
243
+ }
244
+ process.exit(2);
245
+ }
246
+
194
247
  // Handle cache commands first (they don't need environment validation or config)
195
248
  if (options.cacheInfo) {
196
249
  console.log('🗂️ Cache Information\n');
@@ -251,8 +304,7 @@ async function main() {
251
304
 
252
305
  // === From here on, we're running actual evals - load config and env files ===
253
306
 
254
- displayUsageTerms();
255
- // Load build-time config
307
+ // Load build-time config (already loaded above for EULA check)
256
308
  await setDefaultEnvironmentConstants();
257
309
 
258
310
  // Load environment files
@@ -322,11 +374,17 @@ async function main() {
322
374
  }
323
375
  }
324
376
 
325
- // Resolve agent ID from environment if not explicitly provided via CLI flag
326
- // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID
377
+ // Resolve agent ID from environment if not explicitly provided via CLI flag.
378
+ // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID.
379
+ // Then normalize via shared helper and sync to process.env so downstream
380
+ // readers (and the python CLI) see the canonical form.
327
381
  if (!resolvedAgentId) {
328
382
  resolvedAgentId = envVars['M365_AGENT_ID'] || process.env.M365_AGENT_ID;
329
- if (resolvedAgentId && !wrapperQuiet) {
383
+ }
384
+ resolvedAgentId = normalizeAgentId(resolvedAgentId);
385
+ if (resolvedAgentId) {
386
+ process.env.M365_AGENT_ID = resolvedAgentId;
387
+ if (!options.m365AgentId && !wrapperQuiet) {
330
388
  console.log(`🤖 Agent ID: ${resolvedAgentId}`);
331
389
  }
332
390
  }
@@ -458,6 +516,10 @@ async function main() {
458
516
  if (options.prompts && options.prompts.length > 0) {
459
517
  pythonArgs.push('--prompts', ...options.prompts);
460
518
  }
519
+
520
+ if (options.concurrency !== undefined) {
521
+ pythonArgs.push('--concurrency', String(options.concurrency));
522
+ }
461
523
 
462
524
  if (options.expected && options.expected.length > 0) {
463
525
  pythonArgs.push('--expected', ...options.expected);
@@ -2,24 +2,25 @@
2
2
  * Build-time injected default values
3
3
  * DO NOT EDIT - This file is auto-generated during build.
4
4
  *
5
- * Generated: 2026-04-01T19:33:48.937Z
5
+ * Generated: 2026-04-30T18:03:21.788Z
6
6
  *
7
7
  * @copyright Microsoft Corporation. All rights reserved.
8
8
  * @license MIT
9
9
  */
10
10
 
11
11
  export default {
12
- copilotApi: {
13
- /** Microsoft M365 Evaluation Client ID */
14
- m365EvalClientId: "c678803a-d8e9-4d67-849c-3a8b2d7ba5d3",
12
+ workIq: {
13
+ /** Work IQ A2A Endpoint */
14
+ a2aEndpoint: "https://graph.microsoft.com/rp/workiq",
15
15
 
16
- /** Copilot OAuth Scopes */
17
- copilotScopes: "https://substrate.office.com/sydney/.default",
16
+ /** Work IQ A2A Client ID */
17
+ a2aClientId: "ba081686-5d24-4bc6-a0d6-d034ecffed87",
18
18
 
19
- /** Copilot API Endpoint */
20
- copilotApiEndpoint: "https://substrate.office.com/m365Copilot",
21
-
22
- /** Scenario Header for Copilot API */
23
- scenarioHeader: "agenticevaluation"
19
+ /** Work IQ A2A OAuth Scopes */
20
+ a2aScopes: "Sites.Read.All Mail.Read People.Read.All OnlineMeetingTranscript.Read.All Chat.Read ChannelMessage.Read.All ExternalItem.Read.All"
21
+ },
22
+ eula: {
23
+ /** EULA version string for acceptance tracking */
24
+ version: "2026-04-01"
24
25
  }
25
26
  };
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Normalize an M365 agent ID by appending '.declarativeAgent' when the value
3
+ * has no '.' segment. Returns the input unchanged when null/undefined/empty
4
+ * or when it already contains a dot.
5
+ *
6
+ * @param {string|null|undefined} id - The raw agent ID value.
7
+ * @returns {string|null|undefined} The normalized agent ID.
8
+ */
9
+ export function normalizeAgentId(id) {
10
+ if (!id) return id;
11
+ return id.includes('.') ? id : `${id}.declarativeAgent`;
12
+ }
@@ -3,15 +3,15 @@
3
3
  * Handles .env.local, .env.local.user, and other env file formats.
4
4
  */
5
5
 
6
+ import { parse as dotenvParse } from 'dotenv';
6
7
  import fs from 'fs';
7
8
  import path from 'path';
8
9
 
9
10
  // Keys that cannot be overridden from .env files (baked in via default.js config)
10
11
  const PROTECTED_KEYS = [
11
- 'M365_EVAL_CLIENT_ID',
12
- 'COPILOT_API_ENDPOINT',
13
- 'COPILOT_SCOPES',
14
- 'X_SCENARIO_HEADER',
12
+ 'WORK_IQ_A2A_ENDPOINT',
13
+ 'WORK_IQ_A2A_CLIENT_ID',
14
+ 'WORK_IQ_A2A_SCOPES',
15
15
  ];
16
16
 
17
17
  // Aliases resolved into M365_AGENT_ID (first match wins)
@@ -21,7 +21,8 @@ const AGENT_ID_ALIASES = [
21
21
 
22
22
  /**
23
23
  * Load environment variables from a .env-style file.
24
- * Skips blank lines and comments. Protected keys are ignored with a warning.
24
+ * Uses dotenv.parse() for standards-compliant parsing (handles quoted values,
25
+ * inline comments, escape sequences). Protected keys are ignored with a warning.
25
26
  * Malformed lines (no '=' separator) are skipped with a warning.
26
27
  * @param {string} envFilePath - Absolute path to the env file
27
28
  * @returns {Object|null} Parsed key-value pairs, or null if file cannot be read
@@ -34,32 +35,25 @@ export function _loadEnvFile(envFilePath) {
34
35
  const envVars = {};
35
36
  try {
36
37
  const content = fs.readFileSync(envFilePath, 'utf-8');
37
- const lines = content.split('\n');
38
38
 
39
- for (const line of lines) {
39
+ // Pre-scan for malformed lines (no '=') and emit warnings
40
+ for (const line of content.split('\n')) {
40
41
  const trimmedLine = line.trim();
41
42
  if (!trimmedLine || trimmedLine.startsWith('#')) {
42
43
  continue;
43
44
  }
44
-
45
- const eqIndex = trimmedLine.indexOf('=');
46
- if (eqIndex === -1) {
45
+ if (trimmedLine.indexOf('=') === -1) {
47
46
  console.warn(
48
47
  `⚠️ Ignoring malformed line in env file (missing '='): ${trimmedLine}`
49
48
  );
50
- continue;
51
49
  }
50
+ }
52
51
 
53
- const keyName = trimmedLine.slice(0, eqIndex).trim();
54
- const value = trimmedLine
55
- .slice(eqIndex + 1)
56
- .trim()
57
- .replace(/^(['"])(.*)\1$/, '$2');
58
-
59
- if (!keyName) {
60
- continue;
61
- }
52
+ // Use dotenv.parse() for standards-compliant .env parsing
53
+ // (handles quoted values, inline comments, escape sequences, export prefix)
54
+ const parsed = dotenvParse(content);
62
55
 
56
+ for (const [keyName, value] of Object.entries(parsed)) {
63
57
  if (PROTECTED_KEYS.includes(keyName)) {
64
58
  console.warn(
65
59
  `⚠️ Ignoring ${keyName} from .env file (using built-in value)`
@@ -0,0 +1,78 @@
1
+ /**
2
+ * EULA acceptance manager
3
+ *
4
+ * Manages reading and writing the EULA acceptance marker file at
5
+ * ~/.m365-copilot-agent-evals/eula-acceptance.json.
6
+ * This location is independent of the cache directory so acceptance
7
+ * survives --cache-clear operations.
8
+ */
9
+
10
+ import fs from 'node:fs/promises';
11
+ import path from 'node:path';
12
+ import os from 'node:os';
13
+
14
+ const EULA_DIR_NAME = '.m365-copilot-agent-evals';
15
+ const EULA_FILE_NAME = 'eula-acceptance.json';
16
+
17
+ /**
18
+ * Returns the EULA directory path (~/.m365-copilot-agent-evals/).
19
+ * @returns {string}
20
+ */
21
+ export function getEulaDir() {
22
+ return path.join(os.homedir(), EULA_DIR_NAME);
23
+ }
24
+
25
+ /**
26
+ * Returns the full path to the acceptance marker file.
27
+ * @returns {string}
28
+ */
29
+ export function getEulaFilePath() {
30
+ return path.join(getEulaDir(), EULA_FILE_NAME);
31
+ }
32
+
33
+ /**
34
+ * Write an acceptance marker for the given EULA version.
35
+ * Creates the directory if it doesn't exist.
36
+ * @param {string} version - EULA version string
37
+ * @returns {Promise<void>}
38
+ */
39
+ export async function recordAcceptance(version) {
40
+ const dir = getEulaDir();
41
+ await fs.mkdir(dir, { recursive: true });
42
+ const marker = { version, acceptedAt: new Date().toISOString() };
43
+ await fs.writeFile(
44
+ getEulaFilePath(),
45
+ JSON.stringify(marker, null, 2),
46
+ 'utf-8'
47
+ );
48
+ }
49
+
50
+ /**
51
+ * Check whether the EULA has been accepted for the required version.
52
+ * @param {string} requiredVersion - The version to check against
53
+ * @returns {Promise<{accepted: boolean, stale: boolean, marker: object|null}>}
54
+ */
55
+ export async function checkAcceptance(requiredVersion) {
56
+ const marker = await _readMarker();
57
+ if (!marker) return { accepted: false, stale: false, marker: null };
58
+ if (marker.version !== requiredVersion)
59
+ return { accepted: false, stale: true, marker };
60
+ return { accepted: true, stale: false, marker };
61
+ }
62
+
63
+ /**
64
+ * Read and parse the acceptance marker file.
65
+ * Returns null if the file is missing, unreadable, or malformed.
66
+ * Exported with _ prefix for unit testing.
67
+ * @returns {Promise<object|null>}
68
+ */
69
+ export async function _readMarker() {
70
+ try {
71
+ const raw = await fs.readFile(getEulaFilePath(), 'utf-8');
72
+ const parsed = JSON.parse(raw);
73
+ if (!parsed.version || !parsed.acceptedAt) return null;
74
+ return parsed;
75
+ } catch {
76
+ return null;
77
+ }
78
+ }
@@ -605,18 +605,20 @@ export class ProgressReporter {
605
605
 
606
606
  this.phaseStatuses.set(phaseId, 'failed');
607
607
 
608
- // Clear current line and display error
609
- if (this.isInteractive) {
610
- readline.clearLine(process.stdout, 0);
611
- readline.cursorTo(process.stdout, 0);
612
- }
608
+ if (!this.options.quiet) {
609
+ // Clear current line and display error
610
+ if (this.isInteractive) {
611
+ readline.clearLine(process.stdout, 0);
612
+ readline.cursorTo(process.stdout, 0);
613
+ }
613
614
 
614
- console.log(`\n❌ Failed: ${phase.name}`);
615
- console.log(`\nError: ${error.message}`);
616
- console.log(`\nSuggested actions:`);
617
- console.log(` • Check your internet connection`);
618
- console.log(` • If behind a proxy, set HTTP_PROXY/HTTPS_PROXY`);
619
- console.log(` • Run with --verbose for detailed output`);
615
+ console.error(`\n❌ Failed: ${phase.name}`);
616
+ console.error(`\nError: ${error.message}`);
617
+ console.error(`\nSuggested actions:`);
618
+ console.error(` • Check your internet connection`);
619
+ console.error(` • If behind a proxy, set HTTP_PROXY/HTTPS_PROXY`);
620
+ console.error(` • Run with --verbose for detailed output`);
621
+ }
620
622
 
621
623
  this.currentPhase = null;
622
624
  this.phaseStartTime = null;