@microsoft/m365-copilot-eval 1.2.1-preview.1 → 1.4.0-preview.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -101
- package/package.json +7 -4
- package/schema/CHANGELOG.md +8 -0
- package/schema/v1/eval-document.schema.json +256 -8
- package/schema/v1/examples/invalid/multi-turn-empty-turns.json +8 -0
- package/schema/v1/examples/invalid/multi-turn-has-both-prompt-and-turns.json +13 -0
- package/schema/v1/examples/invalid/multi-turn-missing-prompt.json +12 -0
- package/schema/v1/examples/invalid/multi-turn-typo-in-turn.json +13 -0
- package/schema/v1/examples/invalid/multi-turn-unknown-evaluator.json +15 -0
- package/schema/v1/examples/valid/comprehensive.json +27 -2
- package/schema/v1/examples/valid/mixed-single-and-multi-turn.json +30 -0
- package/schema/v1/examples/valid/multi-turn-output.json +59 -0
- package/schema/v1/examples/valid/multi-turn-simple.json +21 -0
- package/schema/v1/examples/valid/multi-turn-with-evaluators.json +34 -0
- package/schema/version.json +2 -2
- package/src/clients/cli/api_clients/A2A/__init__.py +3 -0
- package/src/clients/cli/api_clients/A2A/a2a_client.py +456 -0
- package/src/clients/cli/api_clients/REST/__init__.py +3 -0
- package/src/clients/cli/api_clients/REST/sydney_client.py +204 -0
- package/src/clients/cli/api_clients/__init__.py +3 -0
- package/src/clients/cli/api_clients/base_agent_client.py +78 -0
- package/src/clients/cli/cli_logging/__init__.py +0 -0
- package/src/clients/cli/cli_logging/console_diagnostics.py +107 -0
- package/src/clients/cli/cli_logging/logging_utils.py +144 -0
- package/src/clients/cli/common.py +62 -0
- package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +3 -3
- package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +11 -11
- package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +1 -11
- package/src/clients/cli/evaluator_resolver.py +150 -0
- package/src/clients/cli/generate_report.py +347 -184
- package/src/clients/cli/main.py +1288 -481
- package/src/clients/cli/parallel_executor.py +57 -0
- package/src/clients/cli/readme.md +14 -7
- package/src/clients/cli/requirements.txt +1 -1
- package/src/clients/cli/response_extractor.py +30 -14
- package/src/clients/cli/retry_policy.py +52 -0
- package/src/clients/cli/samples/multiturn_example.json +35 -0
- package/src/clients/cli/throttle_gate.py +82 -0
- package/src/clients/node-js/bin/runevals.js +134 -41
- package/src/clients/node-js/config/default.js +5 -1
- package/src/clients/node-js/lib/agent-id.js +12 -0
- package/src/clients/node-js/lib/env-loader.js +11 -16
- package/src/clients/node-js/lib/eula-manager.js +78 -0
- package/src/clients/node-js/lib/progress.js +13 -11
|
@@ -8,8 +8,10 @@ import { ensurePythonRuntime, getCacheDir } from '../lib/python-runtime.js';
|
|
|
8
8
|
import { ensureVenv, executePythonCli } from '../lib/venv-manager.js';
|
|
9
9
|
import { getCacheStats, clearCache, formatBytes } from '../lib/cache-utils.js';
|
|
10
10
|
import { checkPackageExpiry } from '../lib/expiry-check.js';
|
|
11
|
+
import { recordAcceptance, checkAcceptance } from '../lib/eula-manager.js';
|
|
11
12
|
import { ProgressReporter } from '../lib/progress.js';
|
|
12
13
|
import { _loadEnvFile as loadEnvFile, _loadUserEnvOverride } from '../lib/env-loader.js';
|
|
14
|
+
import { normalizeAgentId } from '../lib/agent-id.js';
|
|
13
15
|
|
|
14
16
|
// Check package expiry (exits if expired, warns if close to expiry)
|
|
15
17
|
checkPackageExpiry();
|
|
@@ -22,20 +24,13 @@ const packageJsonPath = path.join(__dirname, '..', '..', '..', '..', 'package.js
|
|
|
22
24
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
23
25
|
const VERSION = packageJson.version;
|
|
24
26
|
|
|
27
|
+
const EULA_URL = 'https://aka.ms/evaltoolterms';
|
|
28
|
+
|
|
25
29
|
// Path to Python CLI and requirements
|
|
26
30
|
const PYTHON_CLI_DIR = path.join(__dirname, '..', '..', 'cli');
|
|
27
31
|
const MAIN_SCRIPT = path.join(PYTHON_CLI_DIR, 'main.py');
|
|
28
32
|
const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
|
|
29
33
|
|
|
30
|
-
/**
|
|
31
|
-
* Display usage terms notice
|
|
32
|
-
* Called before running evaluations (but not for --init-only, cache commands, or --signout)
|
|
33
|
-
* This notice MUST be displayed even in quiet mode per legal requirements (FR-006)
|
|
34
|
-
*/
|
|
35
|
-
function displayUsageTerms() {
|
|
36
|
-
console.log('By using this tool, you agree to the Terms of Use: https://aka.ms/evaltoolterms\n');
|
|
37
|
-
}
|
|
38
|
-
|
|
39
34
|
/**
|
|
40
35
|
* Set default environment constants that cannot be overridden
|
|
41
36
|
* This ensures these values are always set regardless of .env files
|
|
@@ -52,10 +47,10 @@ async function setDefaultEnvironmentConstants() {
|
|
|
52
47
|
/**
|
|
53
48
|
* Check for required environment variables and provide helpful guidance.
|
|
54
49
|
* @param {string} envName - Environment name (e.g. 'dev')
|
|
55
|
-
* @param {boolean} [
|
|
50
|
+
* @param {boolean} [suppressOutput=false] - Suppress guidance output
|
|
56
51
|
* @returns {boolean} true if all required vars are present
|
|
57
52
|
*/
|
|
58
|
-
function validateEnvironmentVariables(envName,
|
|
53
|
+
function validateEnvironmentVariables(envName, suppressOutput = false) {
|
|
59
54
|
const required = [
|
|
60
55
|
{ key: 'TENANT_ID', description: 'Your Tenant ID' },
|
|
61
56
|
{ key: 'AZURE_AI_OPENAI_ENDPOINT', description: 'Azure OpenAI endpoint URL' },
|
|
@@ -72,8 +67,8 @@ function validateEnvironmentVariables(envName, quiet = false) {
|
|
|
72
67
|
return true; // All required vars present
|
|
73
68
|
}
|
|
74
69
|
|
|
75
|
-
// Show error with helpful guidance
|
|
76
|
-
if (!
|
|
70
|
+
// Show error with helpful guidance unless output is suppressed.
|
|
71
|
+
if (!suppressOutput) {
|
|
77
72
|
console.error('\n❌ Missing required environment variables:\n');
|
|
78
73
|
|
|
79
74
|
const envFile = envName ? `env/.env.${envName}` : '.env.local or env/.env.local';
|
|
@@ -96,6 +91,34 @@ function validateEnvironmentVariables(envName, quiet = false) {
|
|
|
96
91
|
return false;
|
|
97
92
|
}
|
|
98
93
|
|
|
94
|
+
/**
|
|
95
|
+
* Resolve canonical log level for Python CLI forwarding.
|
|
96
|
+
*/
|
|
97
|
+
function resolveLogLevel(options) {
|
|
98
|
+
const allowedLevels = new Set(['debug', 'info', 'warning', 'error']);
|
|
99
|
+
const rawLogLevel =
|
|
100
|
+
options.logLevel === true || options.logLevel === undefined
|
|
101
|
+
? undefined
|
|
102
|
+
: String(options.logLevel).toLowerCase();
|
|
103
|
+
const effectiveLogLevel = rawLogLevel || 'info';
|
|
104
|
+
|
|
105
|
+
if (!allowedLevels.has(effectiveLogLevel)) {
|
|
106
|
+
console.error(
|
|
107
|
+
`❌ Invalid --log-level value: ${effectiveLogLevel}. Supported values: debug, info, warning, error.`
|
|
108
|
+
);
|
|
109
|
+
process.exit(2);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return effectiveLogLevel;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function deriveWrapperOutputMode(effectiveLogLevel) {
|
|
116
|
+
return {
|
|
117
|
+
verbose: effectiveLogLevel === 'debug',
|
|
118
|
+
quiet: effectiveLogLevel === 'warning' || effectiveLogLevel === 'error',
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
99
122
|
/**
|
|
100
123
|
* Initialize the Python environment (download, venv, pip install)
|
|
101
124
|
* @param {boolean} [verbose=false] - Enable verbose output
|
|
@@ -126,7 +149,7 @@ async function initializePythonEnvironment(verbose = false, quiet = false) {
|
|
|
126
149
|
console.error(' - Check your internet connection');
|
|
127
150
|
console.error(' - If behind a proxy, set HTTP_PROXY/HTTPS_PROXY environment variables');
|
|
128
151
|
console.error(' - For SSL issues, set NODE_EXTRA_CA_CERTS or PIP_CERT');
|
|
129
|
-
console.error(' - Run with --
|
|
152
|
+
console.error(' - Run with --log-level debug for detailed output');
|
|
130
153
|
|
|
131
154
|
process.exit(1);
|
|
132
155
|
}
|
|
@@ -142,23 +165,85 @@ async function main() {
|
|
|
142
165
|
.name('runevals')
|
|
143
166
|
.description('M365 Copilot Agent Evaluations CLI - Zero-config Python evaluation tool')
|
|
144
167
|
.version(VERSION)
|
|
145
|
-
.option('-
|
|
146
|
-
.option('-q, --quiet', 'quiet mode (minimal output)')
|
|
168
|
+
.option('--log-level [level]', 'log level for diagnostics: debug, info, warning, error (bare flag resolves to info)')
|
|
147
169
|
.option('--prompts <prompts...>', 'prompts to evaluate')
|
|
148
170
|
.option('--expected <responses...>', 'expected responses')
|
|
149
171
|
.option('--prompts-file <file>', 'JSON file with prompts and expected responses')
|
|
150
172
|
.option('-o, --output <file>', 'output file (JSON, CSV, or HTML)')
|
|
151
173
|
.option('-i, --interactive', 'interactive mode (enter prompts interactively)')
|
|
174
|
+
.option('--concurrency <number>', 'max prompts to process in parallel (1-5)')
|
|
152
175
|
.option('--m365-agent-id <id>', 'agent ID (overrides env vars and auto-construction)')
|
|
153
176
|
.option('--env <environment>', 'environment name (loads env/.env.<environment>)', 'local')
|
|
154
177
|
.option('--init-only', 'only initialize Python environment, don\'t run evaluations')
|
|
155
178
|
.option('--cache-info', 'show cache information and statistics')
|
|
156
179
|
.option('--cache-clear', 'clear the cache (removes Python runtime and venv)')
|
|
157
180
|
.option('--cache-dir', 'print the cache directory path')
|
|
158
|
-
.option('--signout', 'sign out and clear cached authentication tokens')
|
|
181
|
+
.option('--signout', 'sign out and clear cached authentication tokens')
|
|
182
|
+
.action(() => {
|
|
183
|
+
// Default command — handled by the main flow below parseAsync()
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
program
|
|
187
|
+
.command('accept-eula')
|
|
188
|
+
.description('Accept the End User License Agreement (EULA)')
|
|
189
|
+
.action(async () => {
|
|
190
|
+
const config = (await import('../config/default.js')).default;
|
|
191
|
+
try {
|
|
192
|
+
await recordAcceptance(config.eula.version);
|
|
193
|
+
console.log('EULA has been accepted');
|
|
194
|
+
process.exit(0);
|
|
195
|
+
} catch (err) {
|
|
196
|
+
console.error(
|
|
197
|
+
`⚠️ Unable to persist EULA acceptance: ${err.message}`,
|
|
198
|
+
);
|
|
199
|
+
console.error(
|
|
200
|
+
'Please ensure the directory ~/.m365-copilot-agent-evals/ is writable.',
|
|
201
|
+
);
|
|
202
|
+
process.exit(1);
|
|
203
|
+
}
|
|
204
|
+
});
|
|
159
205
|
|
|
160
|
-
program.
|
|
206
|
+
await program.parseAsync(process.argv);
|
|
161
207
|
const options = program.opts();
|
|
208
|
+
const effectiveLogLevel = resolveLogLevel(options);
|
|
209
|
+
const outputMode = deriveWrapperOutputMode(effectiveLogLevel);
|
|
210
|
+
const wrapperVerbose = outputMode.verbose;
|
|
211
|
+
const wrapperQuiet = outputMode.quiet;
|
|
212
|
+
|
|
213
|
+
// === EULA Enforcement Gate ===
|
|
214
|
+
// Block all commands until EULA is accepted (FR-010, FR-011).
|
|
215
|
+
// accept-eula subcommand, --help, and --version are already handled
|
|
216
|
+
// by Commander during program.parse() and exit before reaching here.
|
|
217
|
+
const config = (await import('../config/default.js')).default;
|
|
218
|
+
const { accepted, stale } = await checkAcceptance(config.eula.version);
|
|
219
|
+
if (!accepted) {
|
|
220
|
+
if (stale) {
|
|
221
|
+
console.error(
|
|
222
|
+
`==============================================================
|
|
223
|
+
The End User License Agreement (EULA) has been updated.
|
|
224
|
+
Please review the updated terms at:
|
|
225
|
+
${EULA_URL}
|
|
226
|
+
|
|
227
|
+
To accept the updated EULA, please execute the following command:
|
|
228
|
+
|
|
229
|
+
runevals accept-eula
|
|
230
|
+
|
|
231
|
+
==============================================================`);
|
|
232
|
+
} else {
|
|
233
|
+
console.error(
|
|
234
|
+
`==============================================================
|
|
235
|
+
In order to use this tool you must accept the End User License
|
|
236
|
+
Agreement (EULA) found at:
|
|
237
|
+
${EULA_URL}
|
|
238
|
+
|
|
239
|
+
To accept the EULA, please execute the following command:
|
|
240
|
+
|
|
241
|
+
runevals accept-eula
|
|
242
|
+
|
|
243
|
+
==============================================================`);
|
|
244
|
+
}
|
|
245
|
+
process.exit(2);
|
|
246
|
+
}
|
|
162
247
|
|
|
163
248
|
// Handle cache commands first (they don't need environment validation or config)
|
|
164
249
|
if (options.cacheInfo) {
|
|
@@ -183,7 +268,7 @@ async function main() {
|
|
|
183
268
|
|
|
184
269
|
if (options.cacheClear) {
|
|
185
270
|
console.log('🗑️ Clearing cache...\n');
|
|
186
|
-
const success = await clearCache(
|
|
271
|
+
const success = await clearCache(wrapperVerbose);
|
|
187
272
|
|
|
188
273
|
if (success) {
|
|
189
274
|
console.log('✅ Cache cleared successfully!');
|
|
@@ -203,14 +288,14 @@ async function main() {
|
|
|
203
288
|
|
|
204
289
|
// Initialize Python environment (do this early for --init-only)
|
|
205
290
|
// Skip env file loading for --init-only since it's not needed
|
|
206
|
-
if (!
|
|
291
|
+
if (!wrapperQuiet) {
|
|
207
292
|
console.log('🚀 M365 Copilot Agent Evaluations CLI\n');
|
|
208
293
|
}
|
|
209
|
-
await initializePythonEnvironment(
|
|
294
|
+
await initializePythonEnvironment(wrapperVerbose, wrapperQuiet);
|
|
210
295
|
|
|
211
296
|
// If --init-only, stop here (no config or env files needed)
|
|
212
297
|
if (options.initOnly) {
|
|
213
|
-
if (!
|
|
298
|
+
if (!wrapperQuiet) {
|
|
214
299
|
console.log('\n✅ Python environment initialized successfully!\n');
|
|
215
300
|
console.log('⚠️ Note: Configure environment variables before running evaluations.');
|
|
216
301
|
console.log('📖 See README.md for complete setup guide.\n');
|
|
@@ -220,8 +305,7 @@ async function main() {
|
|
|
220
305
|
|
|
221
306
|
// === From here on, we're running actual evals - load config and env files ===
|
|
222
307
|
|
|
223
|
-
|
|
224
|
-
// Load build-time config
|
|
308
|
+
// Load build-time config (already loaded above for EULA check)
|
|
225
309
|
await setDefaultEnvironmentConstants();
|
|
226
310
|
|
|
227
311
|
// Load environment files
|
|
@@ -233,7 +317,7 @@ async function main() {
|
|
|
233
317
|
let localEnvFound = false;
|
|
234
318
|
|
|
235
319
|
if (fs.existsSync(localEnvPath)) {
|
|
236
|
-
if (!
|
|
320
|
+
if (!wrapperQuiet && wrapperVerbose) {
|
|
237
321
|
console.log(`📂 Loading .env.local from current directory`);
|
|
238
322
|
}
|
|
239
323
|
const localEnvVars = loadEnvFile(localEnvPath) || {};
|
|
@@ -245,7 +329,7 @@ async function main() {
|
|
|
245
329
|
if (!localEnvFound) {
|
|
246
330
|
localEnvPath = path.join(process.cwd(), 'env', '.env.local');
|
|
247
331
|
if (fs.existsSync(localEnvPath)) {
|
|
248
|
-
if (!
|
|
332
|
+
if (!wrapperQuiet && wrapperVerbose) {
|
|
249
333
|
console.log(`📂 Loading .env.local from current directory env folder`);
|
|
250
334
|
}
|
|
251
335
|
const localEnvVars = loadEnvFile(localEnvPath) || {};
|
|
@@ -263,7 +347,7 @@ async function main() {
|
|
|
263
347
|
let envFileFound = false;
|
|
264
348
|
|
|
265
349
|
if (fs.existsSync(envFilePath)) {
|
|
266
|
-
if (!
|
|
350
|
+
if (!wrapperQuiet) {
|
|
267
351
|
console.log(`📂 Loading environment: ${options.env} from current directory env folder`);
|
|
268
352
|
}
|
|
269
353
|
const fileEnvVars = loadEnvFile(envFilePath) || {};
|
|
@@ -274,7 +358,7 @@ async function main() {
|
|
|
274
358
|
envFilePath = path.join(__dirname, '..', 'env', `.env.${options.env}`);
|
|
275
359
|
|
|
276
360
|
if (fs.existsSync(envFilePath)) {
|
|
277
|
-
if (!
|
|
361
|
+
if (!wrapperQuiet) {
|
|
278
362
|
console.log(`📂 Loading environment: ${options.env} from package env folder`);
|
|
279
363
|
}
|
|
280
364
|
const fileEnvVars = loadEnvFile(envFilePath) || {};
|
|
@@ -291,18 +375,24 @@ async function main() {
|
|
|
291
375
|
}
|
|
292
376
|
}
|
|
293
377
|
|
|
294
|
-
// Resolve agent ID from environment if not explicitly provided via CLI flag
|
|
295
|
-
// loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID
|
|
378
|
+
// Resolve agent ID from environment if not explicitly provided via CLI flag.
|
|
379
|
+
// loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID.
|
|
380
|
+
// Then normalize via shared helper and sync to process.env so downstream
|
|
381
|
+
// readers (and the python CLI) see the canonical form.
|
|
296
382
|
if (!resolvedAgentId) {
|
|
297
383
|
resolvedAgentId = envVars['M365_AGENT_ID'] || process.env.M365_AGENT_ID;
|
|
298
|
-
|
|
384
|
+
}
|
|
385
|
+
resolvedAgentId = normalizeAgentId(resolvedAgentId);
|
|
386
|
+
if (resolvedAgentId) {
|
|
387
|
+
process.env.M365_AGENT_ID = resolvedAgentId;
|
|
388
|
+
if (!options.m365AgentId && !wrapperQuiet) {
|
|
299
389
|
console.log(`🤖 Agent ID: ${resolvedAgentId}`);
|
|
300
390
|
}
|
|
301
391
|
}
|
|
302
392
|
|
|
303
393
|
// Validate required environment variables (always validate, quiet just suppresses output)
|
|
304
|
-
if (!validateEnvironmentVariables(options.env,
|
|
305
|
-
if (
|
|
394
|
+
if (!validateEnvironmentVariables(options.env, wrapperQuiet)) {
|
|
395
|
+
if (wrapperQuiet) {
|
|
306
396
|
console.error('📖 Setup guide: https://www.npmjs.com/package/@microsoft/m365-copilot-eval?activeTab=readme\n');
|
|
307
397
|
}
|
|
308
398
|
process.exit(1);
|
|
@@ -311,8 +401,7 @@ async function main() {
|
|
|
311
401
|
// Build arguments to pass to Python CLI
|
|
312
402
|
const pythonArgs = [];
|
|
313
403
|
|
|
314
|
-
|
|
315
|
-
if (options.quiet) pythonArgs.push('--quiet');
|
|
404
|
+
pythonArgs.push('--log-level', effectiveLogLevel);
|
|
316
405
|
if (options.interactive) pythonArgs.push('--interactive');
|
|
317
406
|
if (resolvedAgentId) pythonArgs.push('--m365-agent-id', resolvedAgentId);
|
|
318
407
|
|
|
@@ -327,7 +416,7 @@ async function main() {
|
|
|
327
416
|
console.log('✅ Successfully signed out and cleared cached authentication tokens!');
|
|
328
417
|
} catch (error) {
|
|
329
418
|
console.error('❌ Failed to sign out:', error.message);
|
|
330
|
-
if (
|
|
419
|
+
if (wrapperVerbose) {
|
|
331
420
|
console.error('\nFull error:', error);
|
|
332
421
|
}
|
|
333
422
|
process.exit(1);
|
|
@@ -350,7 +439,7 @@ async function main() {
|
|
|
350
439
|
const candidatePath = path.join(location.dir, filename);
|
|
351
440
|
if (fs.existsSync(candidatePath)) {
|
|
352
441
|
promptsFile = candidatePath;
|
|
353
|
-
if (!
|
|
442
|
+
if (!wrapperQuiet) {
|
|
354
443
|
const displayPath = candidatePath.startsWith(process.cwd())
|
|
355
444
|
? './' + path.relative(process.cwd(), candidatePath)
|
|
356
445
|
: candidatePath;
|
|
@@ -363,7 +452,7 @@ async function main() {
|
|
|
363
452
|
}
|
|
364
453
|
|
|
365
454
|
// If no prompts file found, offer to create starter file
|
|
366
|
-
if (!promptsFile && !
|
|
455
|
+
if (!promptsFile && !wrapperQuiet) {
|
|
367
456
|
console.log('\n⚠️ No prompts file found in current directory or ./evals/\n');
|
|
368
457
|
|
|
369
458
|
// Use readline for interactive prompt
|
|
@@ -413,7 +502,7 @@ async function main() {
|
|
|
413
502
|
const evalsDir = path.join(process.cwd(), '.evals');
|
|
414
503
|
if (!fs.existsSync(evalsDir)) {
|
|
415
504
|
fs.mkdirSync(evalsDir, { recursive: true });
|
|
416
|
-
if (
|
|
505
|
+
if (wrapperVerbose) {
|
|
417
506
|
console.log(`Created .evals directory`);
|
|
418
507
|
}
|
|
419
508
|
}
|
|
@@ -428,13 +517,17 @@ async function main() {
|
|
|
428
517
|
if (options.prompts && options.prompts.length > 0) {
|
|
429
518
|
pythonArgs.push('--prompts', ...options.prompts);
|
|
430
519
|
}
|
|
520
|
+
|
|
521
|
+
if (options.concurrency !== undefined) {
|
|
522
|
+
pythonArgs.push('--concurrency', String(options.concurrency));
|
|
523
|
+
}
|
|
431
524
|
|
|
432
525
|
if (options.expected && options.expected.length > 0) {
|
|
433
526
|
pythonArgs.push('--expected', ...options.expected);
|
|
434
527
|
}
|
|
435
528
|
|
|
436
529
|
// Execute Python CLI with working directory set to Python CLI directory
|
|
437
|
-
if (!
|
|
530
|
+
if (!wrapperQuiet) {
|
|
438
531
|
console.log('\n📊 Running evaluations...\n');
|
|
439
532
|
console.log('─────────────────────────────────────────────────────────────\n');
|
|
440
533
|
}
|
|
@@ -442,7 +535,7 @@ async function main() {
|
|
|
442
535
|
try {
|
|
443
536
|
await executePythonCli(MAIN_SCRIPT, pythonArgs, { cwd: PYTHON_CLI_DIR });
|
|
444
537
|
|
|
445
|
-
if (!
|
|
538
|
+
if (!wrapperQuiet) {
|
|
446
539
|
console.log('\n─────────────────────────────────────────────────────────────\n');
|
|
447
540
|
console.log('✓ Evals completed successfully!');
|
|
448
541
|
if (outputFile) {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Build-time injected default values
|
|
3
3
|
* DO NOT EDIT - This file is auto-generated during build.
|
|
4
4
|
*
|
|
5
|
-
* Generated: 2026-
|
|
5
|
+
* Generated: 2026-04-22T20:44:41.713Z
|
|
6
6
|
*
|
|
7
7
|
* @copyright Microsoft Corporation. All rights reserved.
|
|
8
8
|
* @license MIT
|
|
@@ -21,5 +21,9 @@ export default {
|
|
|
21
21
|
|
|
22
22
|
/** Scenario Header for Copilot API */
|
|
23
23
|
scenarioHeader: "agenticevaluation"
|
|
24
|
+
},
|
|
25
|
+
eula: {
|
|
26
|
+
/** EULA version string for acceptance tracking */
|
|
27
|
+
version: "2026-04-01"
|
|
24
28
|
}
|
|
25
29
|
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize an M365 agent ID by appending '.declarativeAgent' when the value
|
|
3
|
+
* has no '.' segment. Returns the input unchanged when null/undefined/empty
|
|
4
|
+
* or when it already contains a dot.
|
|
5
|
+
*
|
|
6
|
+
* @param {string|null|undefined} id - The raw agent ID value.
|
|
7
|
+
* @returns {string|null|undefined} The normalized agent ID.
|
|
8
|
+
*/
|
|
9
|
+
export function normalizeAgentId(id) {
|
|
10
|
+
if (!id) return id;
|
|
11
|
+
return id.includes('.') ? id : `${id}.declarativeAgent`;
|
|
12
|
+
}
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Handles .env.local, .env.local.user, and other env file formats.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { parse as dotenvParse } from 'dotenv';
|
|
6
7
|
import fs from 'fs';
|
|
7
8
|
import path from 'path';
|
|
8
9
|
|
|
@@ -21,7 +22,8 @@ const AGENT_ID_ALIASES = [
|
|
|
21
22
|
|
|
22
23
|
/**
|
|
23
24
|
* Load environment variables from a .env-style file.
|
|
24
|
-
*
|
|
25
|
+
* Uses dotenv.parse() for standards-compliant parsing (handles quoted values,
|
|
26
|
+
* inline comments, escape sequences). Protected keys are ignored with a warning.
|
|
25
27
|
* Malformed lines (no '=' separator) are skipped with a warning.
|
|
26
28
|
* @param {string} envFilePath - Absolute path to the env file
|
|
27
29
|
* @returns {Object|null} Parsed key-value pairs, or null if file cannot be read
|
|
@@ -34,32 +36,25 @@ export function _loadEnvFile(envFilePath) {
|
|
|
34
36
|
const envVars = {};
|
|
35
37
|
try {
|
|
36
38
|
const content = fs.readFileSync(envFilePath, 'utf-8');
|
|
37
|
-
const lines = content.split('\n');
|
|
38
39
|
|
|
39
|
-
for
|
|
40
|
+
// Pre-scan for malformed lines (no '=') and emit warnings
|
|
41
|
+
for (const line of content.split('\n')) {
|
|
40
42
|
const trimmedLine = line.trim();
|
|
41
43
|
if (!trimmedLine || trimmedLine.startsWith('#')) {
|
|
42
44
|
continue;
|
|
43
45
|
}
|
|
44
|
-
|
|
45
|
-
const eqIndex = trimmedLine.indexOf('=');
|
|
46
|
-
if (eqIndex === -1) {
|
|
46
|
+
if (trimmedLine.indexOf('=') === -1) {
|
|
47
47
|
console.warn(
|
|
48
48
|
`⚠️ Ignoring malformed line in env file (missing '='): ${trimmedLine}`
|
|
49
49
|
);
|
|
50
|
-
continue;
|
|
51
50
|
}
|
|
51
|
+
}
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
.trim()
|
|
57
|
-
.replace(/^(['"])(.*)\1$/, '$2');
|
|
58
|
-
|
|
59
|
-
if (!keyName) {
|
|
60
|
-
continue;
|
|
61
|
-
}
|
|
53
|
+
// Use dotenv.parse() for standards-compliant .env parsing
|
|
54
|
+
// (handles quoted values, inline comments, escape sequences, export prefix)
|
|
55
|
+
const parsed = dotenvParse(content);
|
|
62
56
|
|
|
57
|
+
for (const [keyName, value] of Object.entries(parsed)) {
|
|
63
58
|
if (PROTECTED_KEYS.includes(keyName)) {
|
|
64
59
|
console.warn(
|
|
65
60
|
`⚠️ Ignoring ${keyName} from .env file (using built-in value)`
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EULA acceptance manager
|
|
3
|
+
*
|
|
4
|
+
* Manages reading and writing the EULA acceptance marker file at
|
|
5
|
+
* ~/.m365-copilot-agent-evals/eula-acceptance.json.
|
|
6
|
+
* This location is independent of the cache directory so acceptance
|
|
7
|
+
* survives --cache-clear operations.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import fs from 'node:fs/promises';
|
|
11
|
+
import path from 'node:path';
|
|
12
|
+
import os from 'node:os';
|
|
13
|
+
|
|
14
|
+
const EULA_DIR_NAME = '.m365-copilot-agent-evals';
|
|
15
|
+
const EULA_FILE_NAME = 'eula-acceptance.json';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Returns the EULA directory path (~/.m365-copilot-agent-evals/).
|
|
19
|
+
* @returns {string}
|
|
20
|
+
*/
|
|
21
|
+
export function getEulaDir() {
|
|
22
|
+
return path.join(os.homedir(), EULA_DIR_NAME);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Returns the full path to the acceptance marker file.
|
|
27
|
+
* @returns {string}
|
|
28
|
+
*/
|
|
29
|
+
export function getEulaFilePath() {
|
|
30
|
+
return path.join(getEulaDir(), EULA_FILE_NAME);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Write an acceptance marker for the given EULA version.
|
|
35
|
+
* Creates the directory if it doesn't exist.
|
|
36
|
+
* @param {string} version - EULA version string
|
|
37
|
+
* @returns {Promise<void>}
|
|
38
|
+
*/
|
|
39
|
+
export async function recordAcceptance(version) {
|
|
40
|
+
const dir = getEulaDir();
|
|
41
|
+
await fs.mkdir(dir, { recursive: true });
|
|
42
|
+
const marker = { version, acceptedAt: new Date().toISOString() };
|
|
43
|
+
await fs.writeFile(
|
|
44
|
+
getEulaFilePath(),
|
|
45
|
+
JSON.stringify(marker, null, 2),
|
|
46
|
+
'utf-8'
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Check whether the EULA has been accepted for the required version.
|
|
52
|
+
* @param {string} requiredVersion - The version to check against
|
|
53
|
+
* @returns {Promise<{accepted: boolean, stale: boolean, marker: object|null}>}
|
|
54
|
+
*/
|
|
55
|
+
export async function checkAcceptance(requiredVersion) {
|
|
56
|
+
const marker = await _readMarker();
|
|
57
|
+
if (!marker) return { accepted: false, stale: false, marker: null };
|
|
58
|
+
if (marker.version !== requiredVersion)
|
|
59
|
+
return { accepted: false, stale: true, marker };
|
|
60
|
+
return { accepted: true, stale: false, marker };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Read and parse the acceptance marker file.
|
|
65
|
+
* Returns null if the file is missing, unreadable, or malformed.
|
|
66
|
+
* Exported with _ prefix for unit testing.
|
|
67
|
+
* @returns {Promise<object|null>}
|
|
68
|
+
*/
|
|
69
|
+
export async function _readMarker() {
|
|
70
|
+
try {
|
|
71
|
+
const raw = await fs.readFile(getEulaFilePath(), 'utf-8');
|
|
72
|
+
const parsed = JSON.parse(raw);
|
|
73
|
+
if (!parsed.version || !parsed.acceptedAt) return null;
|
|
74
|
+
return parsed;
|
|
75
|
+
} catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -605,18 +605,20 @@ export class ProgressReporter {
|
|
|
605
605
|
|
|
606
606
|
this.phaseStatuses.set(phaseId, 'failed');
|
|
607
607
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
608
|
+
if (!this.options.quiet) {
|
|
609
|
+
// Clear current line and display error
|
|
610
|
+
if (this.isInteractive) {
|
|
611
|
+
readline.clearLine(process.stdout, 0);
|
|
612
|
+
readline.cursorTo(process.stdout, 0);
|
|
613
|
+
}
|
|
613
614
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
615
|
+
console.error(`\n❌ Failed: ${phase.name}`);
|
|
616
|
+
console.error(`\nError: ${error.message}`);
|
|
617
|
+
console.error(`\nSuggested actions:`);
|
|
618
|
+
console.error(` • Check your internet connection`);
|
|
619
|
+
console.error(` • If behind a proxy, set HTTP_PROXY/HTTPS_PROXY`);
|
|
620
|
+
console.error(` • Run with --verbose for detailed output`);
|
|
621
|
+
}
|
|
620
622
|
|
|
621
623
|
this.currentPhase = null;
|
|
622
624
|
this.phaseStartTime = null;
|