@microsoft/m365-copilot-eval 1.0.1-preview.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +415 -0
- package/TERMS.txt +65 -0
- package/package.json +82 -0
- package/src/clients/cli/auth/__init__.py +1 -0
- package/src/clients/cli/auth/auth_handler.py +262 -0
- package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +136 -0
- package/src/clients/cli/custom_evaluators/ConcisenessNonLLMEvaluator.py +18 -0
- package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +25 -0
- package/src/clients/cli/custom_evaluators/PII/PII.py +45 -0
- package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +39 -0
- package/src/clients/cli/custom_evaluators/__init__.py +1 -0
- package/src/clients/cli/demo_usage.py +83 -0
- package/src/clients/cli/generate_report.py +251 -0
- package/src/clients/cli/main.py +766 -0
- package/src/clients/cli/readme.md +301 -0
- package/src/clients/cli/requirements.txt +10 -0
- package/src/clients/cli/response_extractor.py +589 -0
- package/src/clients/cli/samples/PartnerSuccess.json +122 -0
- package/src/clients/cli/samples/example_prompts.json +14 -0
- package/src/clients/cli/samples/example_prompts_alt.json +12 -0
- package/src/clients/cli/samples/prompts_ambiguity.json +22 -0
- package/src/clients/cli/samples/prompts_rag_grounding.json +22 -0
- package/src/clients/cli/samples/prompts_security_injection.json +22 -0
- package/src/clients/cli/samples/prompts_tool_use_negatives.json +22 -0
- package/src/clients/cli/samples/psaSample.json +18 -0
- package/src/clients/cli/samples/starter.json +10 -0
- package/src/clients/node-js/bin/runevals.js +505 -0
- package/src/clients/node-js/config/default.js +25 -0
- package/src/clients/node-js/lib/cache-utils.js +119 -0
- package/src/clients/node-js/lib/expiry-check.js +164 -0
- package/src/clients/node-js/lib/index.js +25 -0
- package/src/clients/node-js/lib/python-runtime.js +253 -0
- package/src/clients/node-js/lib/venv-manager.js +242 -0
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import fs from 'fs';
|
|
6
|
+
import { Command } from 'commander';
|
|
7
|
+
import { ensurePythonRuntime, getCacheDir } from '../lib/python-runtime.js';
|
|
8
|
+
import { ensureVenv, executePythonCli } from '../lib/venv-manager.js';
|
|
9
|
+
import { getCacheStats, clearCache, formatBytes } from '../lib/cache-utils.js';
|
|
10
|
+
import { checkPackageExpiry } from '../lib/expiry-check.js';
|
|
11
|
+
import config from '../config/default.js';
|
|
12
|
+
|
|
13
|
+
// Check package expiry (exits if expired, warns if close to expiry)
|
|
14
|
+
checkPackageExpiry();
|
|
15
|
+
|
|
16
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
17
|
+
const __dirname = path.dirname(__filename);
|
|
18
|
+
|
|
19
|
+
// Read version from package.json
|
|
20
|
+
const packageJsonPath = path.join(__dirname, '..', '..', '..', '..', 'package.json');
|
|
21
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
|
|
22
|
+
const VERSION = packageJson.version;
|
|
23
|
+
|
|
24
|
+
// Path to Python CLI and requirements
|
|
25
|
+
const PYTHON_CLI_DIR = path.join(__dirname, '..', '..', 'cli');
|
|
26
|
+
const MAIN_SCRIPT = path.join(PYTHON_CLI_DIR, 'main.py');
|
|
27
|
+
const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Set default environment constants that cannot be overridden
|
|
31
|
+
* This ensures these values are always set regardless of .env files
|
|
32
|
+
*/
|
|
33
|
+
function setDefaultEnvironmentConstants() {
|
|
34
|
+
process.env.M365_EVAL_CLIENT_ID = config.copilotApi.m365EvalClientId;
|
|
35
|
+
process.env.COPILOT_API_ENDPOINT = config.copilotApi.copilotApiEndpoint;
|
|
36
|
+
process.env.COPILOT_SCOPES = config.copilotApi.copilotScopes;
|
|
37
|
+
process.env.X_SCENARIO_HEADER = config.copilotApi.scenarioHeader;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Load environment variables from .env file
|
|
42
|
+
*/
|
|
43
|
+
function loadEnvFile(envFilePath) {
|
|
44
|
+
if (!fs.existsSync(envFilePath)) {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const envVars = {};
|
|
49
|
+
try {
|
|
50
|
+
const content = fs.readFileSync(envFilePath, 'utf-8');
|
|
51
|
+
const lines = content.split('\n');
|
|
52
|
+
|
|
53
|
+
// Protected keys that cannot be overridden from .env files
|
|
54
|
+
const PROTECTED_KEYS = [
|
|
55
|
+
'M365_EVAL_CLIENT_ID',
|
|
56
|
+
'COPILOT_API_ENDPOINT',
|
|
57
|
+
'COPILOT_SCOPES',
|
|
58
|
+
'X_SCENARIO_HEADER'
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
for (const line of lines) {
|
|
62
|
+
const trimmedLine = line.trim();
|
|
63
|
+
if (trimmedLine && !trimmedLine.startsWith('#')) {
|
|
64
|
+
const [key, ...valueParts] = trimmedLine.split('=');
|
|
65
|
+
const keyName = key.trim();
|
|
66
|
+
const value = valueParts.join('=').trim().replace(/['"]/g, '');
|
|
67
|
+
|
|
68
|
+
// Skip built-in defaults - they cannot be overridden.
|
|
69
|
+
if (PROTECTED_KEYS.includes(keyName)) {
|
|
70
|
+
console.warn(`⚠️ Ignoring ${keyName} from .env file (using built-in value)`);
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (keyName && value) {
|
|
75
|
+
envVars[keyName] = value;
|
|
76
|
+
// Also set in process.env for Python script
|
|
77
|
+
process.env[keyName] = value;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error(`Failed to read environment file: ${error.message}`);
|
|
83
|
+
return null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return envVars;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Check for required environment variables and provide helpful guidance
|
|
91
|
+
*/
|
|
92
|
+
function validateEnvironmentVariables(envName, detectedVars) {
|
|
93
|
+
const required = [
|
|
94
|
+
{ key: 'TENANT_ID', description: 'Your Tenant ID' },
|
|
95
|
+
{ key: 'AZURE_AI_OPENAI_ENDPOINT', description: 'Azure OpenAI endpoint URL' },
|
|
96
|
+
{ key: 'AZURE_AI_API_KEY', description: 'Azure OpenAI API key' }
|
|
97
|
+
];
|
|
98
|
+
|
|
99
|
+
const detected = [
|
|
100
|
+
{ key: 'M365_TITLE_ID', description: 'M365 Agent Title ID' }
|
|
101
|
+
].filter(opt => process.env[opt.key] || detectedVars[opt.key]);
|
|
102
|
+
|
|
103
|
+
const missing = required.filter(req => !process.env[req.key]);
|
|
104
|
+
|
|
105
|
+
if (missing.length === 0) {
|
|
106
|
+
return true; // All required vars present
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Show error with helpful guidance
|
|
110
|
+
console.error('\n❌ Missing required environment variables:\n');
|
|
111
|
+
|
|
112
|
+
const envFile = envName ? `env/.env.${envName}` : '.env.local or env/env.local';
|
|
113
|
+
console.error(`Create ${envFile} with:\n`);
|
|
114
|
+
|
|
115
|
+
for (const req of missing) {
|
|
116
|
+
console.error(` ${req.key}="<your-${req.description.toLowerCase().replace(/\s+/g, '-')}>"`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (detected.length > 0) {
|
|
120
|
+
console.error(`\n✓ Already detected:`);
|
|
121
|
+
for (const det of detected) {
|
|
122
|
+
const value = process.env[det.key] || detectedVars[det.key];
|
|
123
|
+
console.error(` ${det.key}="${value}"`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
console.error(`\nThen run: npx runevals${envName ? ` --env ${envName}` : ''}\n`);
|
|
128
|
+
console.error('📖 See README.md for complete setup guide.\n');
|
|
129
|
+
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Construct agent ID from M365 variables
|
|
135
|
+
*/
|
|
136
|
+
function constructAgentId(envVars) {
|
|
137
|
+
const m365TitleId = envVars['M365_TITLE_ID'];
|
|
138
|
+
|
|
139
|
+
if (!m365TitleId) {
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Construct agent ID: {M365_TITLE_ID}.declarativeAgent
|
|
144
|
+
return `${m365TitleId}.declarativeAgent`;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Initialize the Python environment (download, venv, pip install)
|
|
149
|
+
*/
|
|
150
|
+
async function initializePythonEnvironment(verbose = false) {
|
|
151
|
+
try {
|
|
152
|
+
// Step 1: Ensure Python runtime is available
|
|
153
|
+
await ensurePythonRuntime(verbose);
|
|
154
|
+
|
|
155
|
+
// Step 2: Ensure venv with dependencies is set up
|
|
156
|
+
await ensureVenv(REQUIREMENTS_FILE, verbose);
|
|
157
|
+
|
|
158
|
+
} catch (error) {
|
|
159
|
+
console.error('\n❌ Failed to initialize Python environment:');
|
|
160
|
+
console.error(error.message);
|
|
161
|
+
|
|
162
|
+
if (verbose) {
|
|
163
|
+
console.error('\nFull error:', error);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
console.error('\nTroubleshooting:');
|
|
167
|
+
console.error(' - Check your internet connection');
|
|
168
|
+
console.error(' - If behind a proxy, set HTTP_PROXY/HTTPS_PROXY environment variables');
|
|
169
|
+
console.error(' - For SSL issues, set NODE_EXTRA_CA_CERTS or PIP_CERT');
|
|
170
|
+
console.error(' - Run with --verbose for detailed output');
|
|
171
|
+
|
|
172
|
+
process.exit(1);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Main CLI entry point
|
|
178
|
+
*/
|
|
179
|
+
async function main() {
|
|
180
|
+
// Set default environment constants.
|
|
181
|
+
setDefaultEnvironmentConstants();
|
|
182
|
+
|
|
183
|
+
const program = new Command();
|
|
184
|
+
|
|
185
|
+
program
|
|
186
|
+
.name('runevals')
|
|
187
|
+
.description('M365 Copilot Agent Evaluations CLI - Zero-config Python evaluation tool')
|
|
188
|
+
.version(VERSION)
|
|
189
|
+
.option('-v, --verbose', 'verbose output (shows detailed processing steps)')
|
|
190
|
+
.option('-q, --quiet', 'quiet mode (minimal output)')
|
|
191
|
+
.option('--prompts <prompts...>', 'prompts to evaluate')
|
|
192
|
+
.option('--expected <responses...>', 'expected responses')
|
|
193
|
+
.option('--prompts-file <file>', 'JSON file with prompts and expected responses')
|
|
194
|
+
.option('-o, --output <file>', 'output file (JSON, CSV, or HTML)')
|
|
195
|
+
.option('-i, --interactive', 'interactive mode (enter prompts interactively)')
|
|
196
|
+
.option('--agent-id <id>', 'agent ID (overrides env vars and auto-construction)')
|
|
197
|
+
.option('--env <environment>', 'environment name (loads env/.env.<environment>)', 'local')
|
|
198
|
+
.option('--init-only', 'only initialize Python environment, don\'t run evaluations')
|
|
199
|
+
.option('--cache-info', 'show cache information and statistics')
|
|
200
|
+
.option('--cache-clear', 'clear the cache (removes Python runtime and venv)')
|
|
201
|
+
.option('--cache-dir', 'print the cache directory path')
|
|
202
|
+
.option('--signout', 'sign out and clear cached authentication tokens');
|
|
203
|
+
|
|
204
|
+
program.parse(process.argv);
|
|
205
|
+
const options = program.opts();
|
|
206
|
+
|
|
207
|
+
// Handle cache commands first (they don't need environment validation)
|
|
208
|
+
if (options.cacheInfo) {
|
|
209
|
+
console.log('🗂️ Cache Information\n');
|
|
210
|
+
const stats = await getCacheStats();
|
|
211
|
+
|
|
212
|
+
console.log(`Cache Directory: ${stats.cacheDir}`);
|
|
213
|
+
console.log(`Cache Exists: ${stats.exists ? '✓' : '✗'}`);
|
|
214
|
+
|
|
215
|
+
if (stats.exists) {
|
|
216
|
+
console.log(`Total Size: ${formatBytes(stats.size)}`);
|
|
217
|
+
console.log('\nComponents:');
|
|
218
|
+
console.log(` Python Runtime: ${stats.pythonRuntime ? '✓' : '✗'}`);
|
|
219
|
+
console.log(` Virtual Env: ${stats.venv ? '✓' : '✗'}`);
|
|
220
|
+
console.log(` Downloads: ${stats.downloads ? '✓' : '✗'}`);
|
|
221
|
+
} else {
|
|
222
|
+
console.log('\nCache is empty. Run "runevals --init-only" to initialize.');
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
process.exit(0);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (options.cacheClear) {
|
|
229
|
+
console.log('🗑️ Clearing cache...\n');
|
|
230
|
+
const success = await clearCache(options.verbose);
|
|
231
|
+
|
|
232
|
+
if (success) {
|
|
233
|
+
console.log('✅ Cache cleared successfully!');
|
|
234
|
+
console.log('Run "runevals --init-only" to reinitialize.');
|
|
235
|
+
} else {
|
|
236
|
+
console.error('❌ Failed to clear cache');
|
|
237
|
+
process.exit(1);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
process.exit(0);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (options.cacheDir) {
|
|
244
|
+
console.log(getCacheDir());
|
|
245
|
+
process.exit(0);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Load environment file if specified
|
|
249
|
+
const envVars = {};
|
|
250
|
+
let resolvedAgentId = options.agentId;
|
|
251
|
+
|
|
252
|
+
// Check for .env.local in current directory (ATK projects)
|
|
253
|
+
// First check for .env.local directly in current directory
|
|
254
|
+
let localEnvPath = path.join(process.cwd(), '.env.local');
|
|
255
|
+
let localEnvFound = false;
|
|
256
|
+
|
|
257
|
+
if (fs.existsSync(localEnvPath)) {
|
|
258
|
+
if (!options.quiet && options.verbose) {
|
|
259
|
+
console.log(`📂 Loading .env.local from current directory`);
|
|
260
|
+
}
|
|
261
|
+
const localEnvVars = loadEnvFile(localEnvPath) || {};
|
|
262
|
+
Object.assign(envVars, localEnvVars);
|
|
263
|
+
localEnvFound = true;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// If not found, check for env.local in env subfolder of current directory
|
|
267
|
+
if (!localEnvFound) {
|
|
268
|
+
localEnvPath = path.join(process.cwd(), 'env', 'env.local');
|
|
269
|
+
if (fs.existsSync(localEnvPath)) {
|
|
270
|
+
if (!options.quiet && options.verbose) {
|
|
271
|
+
console.log(`📂 Loading env.local from current directory env folder`);
|
|
272
|
+
}
|
|
273
|
+
const localEnvVars = loadEnvFile(localEnvPath) || {};
|
|
274
|
+
Object.assign(envVars, localEnvVars);
|
|
275
|
+
localEnvFound = true;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (options.env) {
|
|
280
|
+
// First check current directory's env folder
|
|
281
|
+
let envFilePath = path.join(process.cwd(), 'env', `.env.${options.env}`);
|
|
282
|
+
let envFileFound = false;
|
|
283
|
+
|
|
284
|
+
if (fs.existsSync(envFilePath)) {
|
|
285
|
+
if (!options.quiet) {
|
|
286
|
+
console.log(`📂 Loading environment: ${options.env} from current directory env folder`);
|
|
287
|
+
}
|
|
288
|
+
const fileEnvVars = loadEnvFile(envFilePath) || {};
|
|
289
|
+
Object.assign(envVars, fileEnvVars);
|
|
290
|
+
envFileFound = true;
|
|
291
|
+
} else {
|
|
292
|
+
// Fallback to package's env directory
|
|
293
|
+
envFilePath = path.join(__dirname, '..', 'env', `.env.${options.env}`);
|
|
294
|
+
|
|
295
|
+
if (fs.existsSync(envFilePath)) {
|
|
296
|
+
if (!options.quiet) {
|
|
297
|
+
console.log(`📂 Loading environment: ${options.env} from package env folder`);
|
|
298
|
+
}
|
|
299
|
+
const fileEnvVars = loadEnvFile(envFilePath) || {};
|
|
300
|
+
Object.assign(envVars, fileEnvVars);
|
|
301
|
+
envFileFound = true;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (envFileFound) {
|
|
306
|
+
// Auto-construct agent ID if not explicitly provided
|
|
307
|
+
if (!resolvedAgentId) {
|
|
308
|
+
resolvedAgentId = constructAgentId(envVars);
|
|
309
|
+
if (resolvedAgentId && !options.quiet) {
|
|
310
|
+
console.log(`🤖 Agent ID (from M365_TITLE_ID): ${resolvedAgentId}`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
} else if (options.env !== 'dev') {
|
|
314
|
+
// Only warn if non-default env specified
|
|
315
|
+
console.warn(`⚠️ Environment file not found: .env.${options.env}`);
|
|
316
|
+
console.warn(` Searched in: ${path.join(process.cwd(), 'env')} and ${path.join(__dirname, '..', 'env')}`);
|
|
317
|
+
console.warn(` Continuing with system environment variables...\n`);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Auto-construct agent ID from loaded env vars if not explicitly provided
|
|
322
|
+
if (!resolvedAgentId && Object.keys(envVars).length > 0) {
|
|
323
|
+
resolvedAgentId = constructAgentId(envVars);
|
|
324
|
+
if (resolvedAgentId && !options.quiet) {
|
|
325
|
+
console.log(`🤖 Agent ID (from M365_TITLE_ID): ${resolvedAgentId}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Fallback to M365_AGENT_ID env var if still not resolved
|
|
330
|
+
if (!resolvedAgentId) {
|
|
331
|
+
resolvedAgentId = process.env.M365_AGENT_ID;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Validate required environment variables (skip for init-only and cache commands)
|
|
335
|
+
if (!options.initOnly && !options.quiet) {
|
|
336
|
+
if (!validateEnvironmentVariables(options.env, envVars)) {
|
|
337
|
+
process.exit(1);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Initialize Python environment
|
|
342
|
+
console.log('🚀 M365 Copilot Agent Evaluations CLI\n');
|
|
343
|
+
await initializePythonEnvironment(options.verbose);
|
|
344
|
+
|
|
345
|
+
// If --init-only, stop here
|
|
346
|
+
if (options.initOnly) {
|
|
347
|
+
console.log('\n✅ Python environment initialized successfully!\n');
|
|
348
|
+
console.log('⚠️ Note: Configure environment variables before running evaluations.');
|
|
349
|
+
console.log('📖 See README.md for complete setup guide.\n');
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Build arguments to pass to Python CLI
|
|
354
|
+
const pythonArgs = [];
|
|
355
|
+
|
|
356
|
+
if (options.verbose) pythonArgs.push('--verbose');
|
|
357
|
+
if (options.quiet) pythonArgs.push('--quiet');
|
|
358
|
+
if (options.interactive) pythonArgs.push('--interactive');
|
|
359
|
+
if (resolvedAgentId) pythonArgs.push('--agent-id', resolvedAgentId);
|
|
360
|
+
|
|
361
|
+
// Handle signout
|
|
362
|
+
if (options.signout) {
|
|
363
|
+
console.log('🔓 Signing out...\n');
|
|
364
|
+
|
|
365
|
+
try {
|
|
366
|
+
// Execute Python CLI with --signout flag
|
|
367
|
+
await executePythonCli(MAIN_SCRIPT, [...pythonArgs, '--signout'], { cwd: PYTHON_CLI_DIR });
|
|
368
|
+
|
|
369
|
+
console.log('✅ Successfully signed out and cleared cached authentication tokens!');
|
|
370
|
+
} catch (error) {
|
|
371
|
+
console.error('❌ Failed to sign out:', error.message);
|
|
372
|
+
if (options.verbose) {
|
|
373
|
+
console.error('\nFull error:', error);
|
|
374
|
+
}
|
|
375
|
+
process.exit(1);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
process.exit(0);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Default prompts file lookup in current working directory
|
|
382
|
+
let promptsFile = options.promptsFile;
|
|
383
|
+
if (!promptsFile && !options.prompts && !options.interactive) {
|
|
384
|
+
// Look for common prompts file names in current directory and ./evals/ subdirectory
|
|
385
|
+
const searchLocations = [
|
|
386
|
+
{ dir: process.cwd(), files: ['prompts.json', 'evals.json', 'tests.json'] },
|
|
387
|
+
{ dir: path.join(process.cwd(), 'evals'), files: ['prompts.json', 'evals.json', 'tests.json'] }
|
|
388
|
+
];
|
|
389
|
+
|
|
390
|
+
for (const location of searchLocations) {
|
|
391
|
+
for (const filename of location.files) {
|
|
392
|
+
const candidatePath = path.join(location.dir, filename);
|
|
393
|
+
if (fs.existsSync(candidatePath)) {
|
|
394
|
+
promptsFile = candidatePath;
|
|
395
|
+
if (!options.quiet) {
|
|
396
|
+
const displayPath = candidatePath.startsWith(process.cwd())
|
|
397
|
+
? './' + path.relative(process.cwd(), candidatePath)
|
|
398
|
+
: candidatePath;
|
|
399
|
+
console.log(`📄 Using prompts file: ${displayPath}`);
|
|
400
|
+
}
|
|
401
|
+
break;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
if (promptsFile) break;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// If no prompts file found, offer to create starter file
|
|
408
|
+
if (!promptsFile && !options.quiet) {
|
|
409
|
+
console.log('\n⚠️ No prompts file found in current directory or ./evals/\n');
|
|
410
|
+
|
|
411
|
+
// Use readline for interactive prompt
|
|
412
|
+
const readline = await import('readline');
|
|
413
|
+
const rl = readline.createInterface({
|
|
414
|
+
input: process.stdin,
|
|
415
|
+
output: process.stdout
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
const answer = await new Promise((resolve) => {
|
|
419
|
+
rl.question('Create a starter evals file with sample prompts? (Y/n): ', resolve);
|
|
420
|
+
});
|
|
421
|
+
rl.close();
|
|
422
|
+
|
|
423
|
+
const response = answer.trim().toLowerCase();
|
|
424
|
+
if (response === '' || response === 'y' || response === 'yes') {
|
|
425
|
+
const sourcePath = path.join(PYTHON_CLI_DIR, 'samples', 'starter.json');
|
|
426
|
+
|
|
427
|
+
// Create ./evals/ directory if it doesn't exist
|
|
428
|
+
const evalsDir = path.join(process.cwd(), 'evals');
|
|
429
|
+
if (!fs.existsSync(evalsDir)) {
|
|
430
|
+
fs.mkdirSync(evalsDir, { recursive: true });
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
const targetPath = path.join(evalsDir, 'evals.json');
|
|
434
|
+
fs.copyFileSync(sourcePath, targetPath);
|
|
435
|
+
|
|
436
|
+
console.log('\n✅ Created ./evals/evals.json with 2 starter prompts');
|
|
437
|
+
console.log('Edit this file with your own prompts, then run "npx runevals" again.\n');
|
|
438
|
+
process.exit(0);
|
|
439
|
+
} else {
|
|
440
|
+
console.log('\nUse --prompts or --interactive to continue without a file:\n');
|
|
441
|
+
console.log(' npx runevals --prompts "Your prompt here"');
|
|
442
|
+
console.log(' npx runevals --interactive');
|
|
443
|
+
console.log(' npx runevals --prompts-file path/to/prompts.json\n');
|
|
444
|
+
process.exit(0);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Handle output file with automatic .evals directory and timestamping
|
|
450
|
+
let outputFile = options.output;
|
|
451
|
+
if (outputFile) {
|
|
452
|
+
pythonArgs.push('--output', outputFile);
|
|
453
|
+
} else if (promptsFile) {
|
|
454
|
+
// Auto-generate timestamped output in .evals directory
|
|
455
|
+
const evalsDir = path.join(process.cwd(), '.evals');
|
|
456
|
+
if (!fs.existsSync(evalsDir)) {
|
|
457
|
+
fs.mkdirSync(evalsDir, { recursive: true });
|
|
458
|
+
if (options.verbose) {
|
|
459
|
+
console.log(`Created .evals directory`);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').replace('T', '_').split('Z')[0];
|
|
464
|
+
outputFile = path.join(evalsDir, `${timestamp}.html`);
|
|
465
|
+
pythonArgs.push('--output', outputFile);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
if (promptsFile) pythonArgs.push('--prompts-file', promptsFile);
|
|
469
|
+
|
|
470
|
+
if (options.prompts && options.prompts.length > 0) {
|
|
471
|
+
pythonArgs.push('--prompts', ...options.prompts);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
if (options.expected && options.expected.length > 0) {
|
|
475
|
+
pythonArgs.push('--expected', ...options.expected);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// Execute Python CLI with working directory set to Python CLI directory
|
|
479
|
+
if (!options.quiet) {
|
|
480
|
+
console.log('\n📊 Running evaluations...\n');
|
|
481
|
+
console.log('─────────────────────────────────────────────────────────────\n');
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
try {
|
|
485
|
+
await executePythonCli(MAIN_SCRIPT, pythonArgs, { cwd: PYTHON_CLI_DIR });
|
|
486
|
+
|
|
487
|
+
if (!options.quiet) {
|
|
488
|
+
console.log('\n─────────────────────────────────────────────────────────────\n');
|
|
489
|
+
console.log('✓ Evals completed successfully!');
|
|
490
|
+
if (outputFile) {
|
|
491
|
+
console.log(`\nResults saved to: ${outputFile}\n`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} catch (error) {
|
|
495
|
+
console.error('\n─────────────────────────────────────────────────────────────\n');
|
|
496
|
+
console.error('❌ Evaluation failed:', error.message);
|
|
497
|
+
process.exit(1);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Run the CLI
|
|
502
|
+
main().catch((error) => {
|
|
503
|
+
console.error('Fatal error:', error);
|
|
504
|
+
process.exit(1);
|
|
505
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build-time injected default values
|
|
3
|
+
* DO NOT EDIT - This file is auto-generated during build.
|
|
4
|
+
*
|
|
5
|
+
* Generated: 2026-01-21T21:10:04.863Z
|
|
6
|
+
*
|
|
7
|
+
* @copyright Microsoft Corporation. All rights reserved.
|
|
8
|
+
* @license MIT
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export default {
|
|
12
|
+
copilotApi: {
|
|
13
|
+
/** Microsoft M365 Evaluation Client ID */
|
|
14
|
+
m365EvalClientId: "c678803a-d8e9-4d67-849c-3a8b2d7ba5d3",
|
|
15
|
+
|
|
16
|
+
/** Copilot OAuth Scopes */
|
|
17
|
+
copilotScopes: "https://substrate.office.com/sydney/.default",
|
|
18
|
+
|
|
19
|
+
/** Copilot API Endpoint */
|
|
20
|
+
copilotApiEndpoint: "https://substrate.office.com/m365Copilot",
|
|
21
|
+
|
|
22
|
+
/** Scenario Header for Copilot API */
|
|
23
|
+
scenarioHeader: "agenticevaluation"
|
|
24
|
+
}
|
|
25
|
+
};
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { getCacheDir } from './python-runtime.js';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Get cache statistics
|
|
6
|
+
*/
|
|
7
|
+
export async function getCacheStats() {
|
|
8
|
+
const cacheDir = getCacheDir();
|
|
9
|
+
const stats = {
|
|
10
|
+
cacheDir,
|
|
11
|
+
exists: false,
|
|
12
|
+
size: 0,
|
|
13
|
+
pythonRuntime: false,
|
|
14
|
+
venv: false,
|
|
15
|
+
downloads: false,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
try {
|
|
19
|
+
await fs.access(cacheDir);
|
|
20
|
+
stats.exists = true;
|
|
21
|
+
|
|
22
|
+
// Check for Python runtime
|
|
23
|
+
try {
|
|
24
|
+
const pythonDir = `${cacheDir}/python`;
|
|
25
|
+
await fs.access(pythonDir);
|
|
26
|
+
stats.pythonRuntime = true;
|
|
27
|
+
} catch {
|
|
28
|
+
// Not found
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Check for venv
|
|
32
|
+
try {
|
|
33
|
+
const venvDir = `${cacheDir}/venv`;
|
|
34
|
+
await fs.access(venvDir);
|
|
35
|
+
stats.venv = true;
|
|
36
|
+
} catch {
|
|
37
|
+
// Not found
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Check for downloads
|
|
41
|
+
try {
|
|
42
|
+
const downloadsDir = `${cacheDir}/downloads`;
|
|
43
|
+
await fs.access(downloadsDir);
|
|
44
|
+
stats.downloads = true;
|
|
45
|
+
} catch {
|
|
46
|
+
// Not found
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Calculate total size (approximate)
|
|
50
|
+
stats.size = await getDirectorySize(cacheDir);
|
|
51
|
+
} catch {
|
|
52
|
+
// Cache doesn't exist
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return stats;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Get directory size recursively
|
|
60
|
+
*/
|
|
61
|
+
async function getDirectorySize(dirPath) {
|
|
62
|
+
let totalSize = 0;
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
const items = await fs.readdir(dirPath, { withFileTypes: true });
|
|
66
|
+
|
|
67
|
+
for (const item of items) {
|
|
68
|
+
const fullPath = `${dirPath}/${item.name}`;
|
|
69
|
+
|
|
70
|
+
if (item.isDirectory()) {
|
|
71
|
+
totalSize += await getDirectorySize(fullPath);
|
|
72
|
+
} else if (item.isFile()) {
|
|
73
|
+
const stats = await fs.stat(fullPath);
|
|
74
|
+
totalSize += stats.size;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
// Ignore errors (permission denied, etc.)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return totalSize;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Clear cache
|
|
86
|
+
*/
|
|
87
|
+
export async function clearCache(verbose = false) {
|
|
88
|
+
const cacheDir = getCacheDir();
|
|
89
|
+
|
|
90
|
+
if (verbose) {
|
|
91
|
+
console.log(`Clearing cache: ${cacheDir}`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
await fs.rm(cacheDir, { recursive: true, force: true });
|
|
96
|
+
if (verbose) {
|
|
97
|
+
console.log('Cache cleared successfully');
|
|
98
|
+
}
|
|
99
|
+
return true;
|
|
100
|
+
} catch (err) {
|
|
101
|
+
if (verbose) {
|
|
102
|
+
console.error('Error clearing cache:', err.message);
|
|
103
|
+
}
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Format bytes to human-readable string
|
|
110
|
+
*/
|
|
111
|
+
export function formatBytes(bytes) {
|
|
112
|
+
if (bytes === 0) return '0 B';
|
|
113
|
+
|
|
114
|
+
const k = 1024;
|
|
115
|
+
const sizes = ['B', 'KB', 'MB', 'GB'];
|
|
116
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
117
|
+
|
|
118
|
+
return `${(bytes / Math.pow(k, i)).toFixed(2)} ${sizes[i]}`;
|
|
119
|
+
}
|