@microsoft/m365-copilot-eval 1.2.1-preview.1 → 1.3.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,21 +97,28 @@ python main.py --interactive
97
97
 
98
98
  #### Additional Options
99
99
  ```bash
100
- # Verbose output (shows detailed processing steps)
101
- python main.py --verbose
100
+ # Logging verbosity (canonical control surface)
101
+ python main.py --log-level debug
102
+ python main.py --log-level info
103
+ python main.py --log-level warning
104
+ python main.py --log-level error
105
+
106
+ # Bare flag resolves to info
107
+ python main.py --log-level
102
108
 
103
- # Quiet mode (minimal output)
109
+ # Legacy flags (no longer supported; use --log-level instead)
110
+ # The following will fail with "unrecognized arguments" errors:
111
+ python main.py --verbose
104
112
  python main.py --quiet
105
113
 
114
+ # Share diagnostics with support (console-based, no archive artifacts)
115
+ python main.py --log-level debug --prompts-file samples/example_prompts.json
116
+
106
117
  # Get help and see all options
107
118
  python main.py --help
108
119
 
109
120
  # Specify / override the Agent ID (takes precedence over M365_AGENT_ID env var)
110
121
  python main.py --m365-agent-id "00000000-0000-0000-0000-000000000000"
111
-
112
- # Citation format options
113
- python main.py --citation-format oai_unicode # Default: New OAI format
114
- python main.py --citation-format legacy_bracket # Old [^i^] format
115
122
  ```
116
123
 
117
124
  #### File Format Examples
@@ -27,14 +27,20 @@ Date: September 21, 2025
27
27
 
28
28
  import json
29
29
  import logging
30
- from typing import Dict, List, Any, Optional
30
+ from typing import Dict, List, Any, Optional, Tuple
31
31
  from datetime import datetime
32
32
  from enum import Enum
33
+ from cli_logging.logging_utils import LOG_LEVEL_MAP, LogLevel
33
34
 
34
35
  # Configure logging
35
- logging.basicConfig(level=logging.INFO)
36
+ if not logging.getLogger().handlers:
37
+ logging.basicConfig(level=logging.INFO)
36
38
  logger = logging.getLogger(__name__)
37
39
 
40
+ def _log_level_to_python_level(log_level: str) -> int:
41
+ normalized = (log_level or "info").strip().lower()
42
+ return LOG_LEVEL_MAP.get(normalized, logging.INFO)
43
+
38
44
  class MessageRole(Enum):
39
45
  """Enumeration for message roles."""
40
46
  USER = "user"
@@ -71,8 +77,10 @@ class EnhancedResponseExtractor:
71
77
  "generate_response"
72
78
  }
73
79
 
74
- def __init__(self):
80
+ def __init__(self, log_level: str = "info"):
75
81
  self.tool_call_counter = 0
82
+ self.log_level = (log_level or "info").strip().lower()
83
+ logger.setLevel(_log_level_to_python_level(self.log_level))
76
84
 
77
85
  def _generate_tool_call_id(self, tool_name: str) -> str:
78
86
  """Generate a unique tool call ID."""
@@ -461,6 +469,7 @@ class EnhancedResponseExtractor:
461
469
  "metadata": {
462
470
  "conversation_id": response_data.get("conversationId"),
463
471
  "request_id": response_data.get("requestId"),
472
+ "message_id": None,
464
473
  "telemetry_available": False
465
474
  }
466
475
  }
@@ -470,6 +479,11 @@ class EnhancedResponseExtractor:
470
479
  if isinstance(response_data, dict):
471
480
  # Messages are directly in the response_data object
472
481
  messages = response_data.get("messages", [])
482
+
483
+ # Extract message_id from the last bot message in this response
484
+ bot_messages = [m for m in messages if m.get("author") != "user"]
485
+ if bot_messages and bot_messages[-1].get("messageId"):
486
+ enhanced_response["metadata"]["message_id"] = bot_messages[-1]["messageId"]
473
487
 
474
488
  # Extract telemetry tools if available
475
489
  telemetry_tools = []
@@ -526,6 +540,7 @@ class EnhancedResponseExtractor:
526
540
  "metadata": {
527
541
  "conversation_id": None,
528
542
  "request_id": None,
543
+ "message_id": None,
529
544
  "telemetry_available": False
530
545
  }
531
546
  }
@@ -552,27 +567,30 @@ class EnhancedResponseExtractor:
552
567
  "metadata": {
553
568
  "conversation_id": None,
554
569
  "request_id": None,
570
+ "message_id": None,
555
571
  "telemetry_available": False,
556
572
  "error": str(e)
557
573
  }
558
574
  }
559
575
 
560
- def extract_enhanced_responses(responses: Dict[str, str]) -> Dict[str, Dict[str, Any]]:
576
+ def extract_enhanced_responses(responses: List[Tuple[str, str]], log_level: str = "info") -> List[Dict[str, Any]]:
561
577
  """
562
578
  Extract enhanced response information for multiple responses.
563
-
579
+
564
580
  Args:
565
- responses: Dictionary mapping prompts to raw response strings
566
-
581
+ responses: List of (prompt_text, raw_response_string) tuples, one per prompt
582
+ sent to the chat API. Order and duplicates are preserved.
583
+
567
584
  Returns:
568
- Dictionary mapping prompts to enhanced response data
585
+ List of enhanced response dicts (one per prompt, same order as input).
569
586
  """
570
- extractor = EnhancedResponseExtractor()
571
- enhanced_responses = {}
572
-
573
- for prompt, raw_response in responses.items():
574
- enhanced_responses[prompt] = extractor.extract_enhanced_response(raw_response)
575
-
587
+ extractor = EnhancedResponseExtractor(log_level=log_level)
588
+ enhanced_responses = []
589
+
590
+ for prompt, raw_response in responses:
591
+ enhanced = extractor.extract_enhanced_response(raw_response)
592
+ enhanced_responses.append(enhanced)
593
+
576
594
  return enhanced_responses
577
595
 
578
596
  def get_response_text_for_evaluation(enhanced_response: Dict[str, Any]) -> str:
@@ -29,8 +29,8 @@ const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
29
29
 
30
30
  /**
31
31
  * Display usage terms notice
32
- * Called before running evaluations (but not for --init-only, cache commands, or --signout)
33
- * This notice MUST be displayed even in quiet mode per legal requirements (FR-006)
32
+ * Called before running evaluations (but not for --init-only, cache commands, or --signout).
33
+ * This notice is always displayed per legal requirements (FR-006).
34
34
  */
35
35
  function displayUsageTerms() {
36
36
  console.log('By using this tool, you agree to the Terms of Use: https://aka.ms/evaltoolterms\n');
@@ -52,10 +52,10 @@ async function setDefaultEnvironmentConstants() {
52
52
  /**
53
53
  * Check for required environment variables and provide helpful guidance.
54
54
  * @param {string} envName - Environment name (e.g. 'dev')
55
- * @param {boolean} [quiet=false] - Suppress output
55
+ * @param {boolean} [suppressOutput=false] - Suppress guidance output
56
56
  * @returns {boolean} true if all required vars are present
57
57
  */
58
- function validateEnvironmentVariables(envName, quiet = false) {
58
+ function validateEnvironmentVariables(envName, suppressOutput = false) {
59
59
  const required = [
60
60
  { key: 'TENANT_ID', description: 'Your Tenant ID' },
61
61
  { key: 'AZURE_AI_OPENAI_ENDPOINT', description: 'Azure OpenAI endpoint URL' },
@@ -72,8 +72,8 @@ function validateEnvironmentVariables(envName, quiet = false) {
72
72
  return true; // All required vars present
73
73
  }
74
74
 
75
- // Show error with helpful guidance (skip output in quiet mode, but still return false)
76
- if (!quiet) {
75
+ // Show error with helpful guidance unless output is suppressed.
76
+ if (!suppressOutput) {
77
77
  console.error('\nāŒ Missing required environment variables:\n');
78
78
 
79
79
  const envFile = envName ? `env/.env.${envName}` : '.env.local or env/.env.local';
@@ -96,6 +96,34 @@ function validateEnvironmentVariables(envName, quiet = false) {
96
96
  return false;
97
97
  }
98
98
 
99
+ /**
100
+ * Resolve canonical log level for Python CLI forwarding.
101
+ */
102
+ function resolveLogLevel(options) {
103
+ const allowedLevels = new Set(['debug', 'info', 'warning', 'error']);
104
+ const rawLogLevel =
105
+ options.logLevel === true || options.logLevel === undefined
106
+ ? undefined
107
+ : String(options.logLevel).toLowerCase();
108
+ const effectiveLogLevel = rawLogLevel || 'info';
109
+
110
+ if (!allowedLevels.has(effectiveLogLevel)) {
111
+ console.error(
112
+ `āŒ Invalid --log-level value: ${effectiveLogLevel}. Supported values: debug, info, warning, error.`
113
+ );
114
+ process.exit(2);
115
+ }
116
+
117
+ return effectiveLogLevel;
118
+ }
119
+
120
+ function deriveWrapperOutputMode(effectiveLogLevel) {
121
+ return {
122
+ verbose: effectiveLogLevel === 'debug',
123
+ quiet: effectiveLogLevel === 'warning' || effectiveLogLevel === 'error',
124
+ };
125
+ }
126
+
99
127
  /**
100
128
  * Initialize the Python environment (download, venv, pip install)
101
129
  * @param {boolean} [verbose=false] - Enable verbose output
@@ -126,7 +154,7 @@ async function initializePythonEnvironment(verbose = false, quiet = false) {
126
154
  console.error(' - Check your internet connection');
127
155
  console.error(' - If behind a proxy, set HTTP_PROXY/HTTPS_PROXY environment variables');
128
156
  console.error(' - For SSL issues, set NODE_EXTRA_CA_CERTS or PIP_CERT');
129
- console.error(' - Run with --verbose for detailed output');
157
+ console.error(' - Run with --log-level debug for detailed output');
130
158
 
131
159
  process.exit(1);
132
160
  }
@@ -142,8 +170,7 @@ async function main() {
142
170
  .name('runevals')
143
171
  .description('M365 Copilot Agent Evaluations CLI - Zero-config Python evaluation tool')
144
172
  .version(VERSION)
145
- .option('-v, --verbose', 'verbose output (shows detailed processing steps)')
146
- .option('-q, --quiet', 'quiet mode (minimal output)')
173
+ .option('--log-level [level]', 'log level for diagnostics: debug, info, warning, error (bare flag resolves to info)')
147
174
  .option('--prompts <prompts...>', 'prompts to evaluate')
148
175
  .option('--expected <responses...>', 'expected responses')
149
176
  .option('--prompts-file <file>', 'JSON file with prompts and expected responses')
@@ -159,6 +186,10 @@ async function main() {
159
186
 
160
187
  program.parse(process.argv);
161
188
  const options = program.opts();
189
+ const effectiveLogLevel = resolveLogLevel(options);
190
+ const outputMode = deriveWrapperOutputMode(effectiveLogLevel);
191
+ const wrapperVerbose = outputMode.verbose;
192
+ const wrapperQuiet = outputMode.quiet;
162
193
 
163
194
  // Handle cache commands first (they don't need environment validation or config)
164
195
  if (options.cacheInfo) {
@@ -183,7 +214,7 @@ async function main() {
183
214
 
184
215
  if (options.cacheClear) {
185
216
  console.log('šŸ—‘ļø Clearing cache...\n');
186
- const success = await clearCache(options.verbose);
217
+ const success = await clearCache(wrapperVerbose);
187
218
 
188
219
  if (success) {
189
220
  console.log('āœ… Cache cleared successfully!');
@@ -203,14 +234,14 @@ async function main() {
203
234
 
204
235
  // Initialize Python environment (do this early for --init-only)
205
236
  // Skip env file loading for --init-only since it's not needed
206
- if (!options.quiet) {
237
+ if (!wrapperQuiet) {
207
238
  console.log('šŸš€ M365 Copilot Agent Evaluations CLI\n');
208
239
  }
209
- await initializePythonEnvironment(options.verbose, options.quiet);
240
+ await initializePythonEnvironment(wrapperVerbose, wrapperQuiet);
210
241
 
211
242
  // If --init-only, stop here (no config or env files needed)
212
243
  if (options.initOnly) {
213
- if (!options.quiet) {
244
+ if (!wrapperQuiet) {
214
245
  console.log('\nāœ… Python environment initialized successfully!\n');
215
246
  console.log('āš ļø Note: Configure environment variables before running evaluations.');
216
247
  console.log('šŸ“– See README.md for complete setup guide.\n');
@@ -233,7 +264,7 @@ async function main() {
233
264
  let localEnvFound = false;
234
265
 
235
266
  if (fs.existsSync(localEnvPath)) {
236
- if (!options.quiet && options.verbose) {
267
+ if (!wrapperQuiet && wrapperVerbose) {
237
268
  console.log(`šŸ“‚ Loading .env.local from current directory`);
238
269
  }
239
270
  const localEnvVars = loadEnvFile(localEnvPath) || {};
@@ -245,7 +276,7 @@ async function main() {
245
276
  if (!localEnvFound) {
246
277
  localEnvPath = path.join(process.cwd(), 'env', '.env.local');
247
278
  if (fs.existsSync(localEnvPath)) {
248
- if (!options.quiet && options.verbose) {
279
+ if (!wrapperQuiet && wrapperVerbose) {
249
280
  console.log(`šŸ“‚ Loading .env.local from current directory env folder`);
250
281
  }
251
282
  const localEnvVars = loadEnvFile(localEnvPath) || {};
@@ -263,7 +294,7 @@ async function main() {
263
294
  let envFileFound = false;
264
295
 
265
296
  if (fs.existsSync(envFilePath)) {
266
- if (!options.quiet) {
297
+ if (!wrapperQuiet) {
267
298
  console.log(`šŸ“‚ Loading environment: ${options.env} from current directory env folder`);
268
299
  }
269
300
  const fileEnvVars = loadEnvFile(envFilePath) || {};
@@ -274,7 +305,7 @@ async function main() {
274
305
  envFilePath = path.join(__dirname, '..', 'env', `.env.${options.env}`);
275
306
 
276
307
  if (fs.existsSync(envFilePath)) {
277
- if (!options.quiet) {
308
+ if (!wrapperQuiet) {
278
309
  console.log(`šŸ“‚ Loading environment: ${options.env} from package env folder`);
279
310
  }
280
311
  const fileEnvVars = loadEnvFile(envFilePath) || {};
@@ -295,14 +326,14 @@ async function main() {
295
326
  // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID
296
327
  if (!resolvedAgentId) {
297
328
  resolvedAgentId = envVars['M365_AGENT_ID'] || process.env.M365_AGENT_ID;
298
- if (resolvedAgentId && !options.quiet) {
329
+ if (resolvedAgentId && !wrapperQuiet) {
299
330
  console.log(`šŸ¤– Agent ID: ${resolvedAgentId}`);
300
331
  }
301
332
  }
302
333
 
303
334
  // Validate required environment variables (always validate, quiet just suppresses output)
304
- if (!validateEnvironmentVariables(options.env, options.quiet)) {
305
- if (options.quiet) {
335
+ if (!validateEnvironmentVariables(options.env, wrapperQuiet)) {
336
+ if (wrapperQuiet) {
306
337
  console.error('šŸ“– Setup guide: https://www.npmjs.com/package/@microsoft/m365-copilot-eval?activeTab=readme\n');
307
338
  }
308
339
  process.exit(1);
@@ -311,8 +342,7 @@ async function main() {
311
342
  // Build arguments to pass to Python CLI
312
343
  const pythonArgs = [];
313
344
 
314
- if (options.verbose) pythonArgs.push('--verbose');
315
- if (options.quiet) pythonArgs.push('--quiet');
345
+ pythonArgs.push('--log-level', effectiveLogLevel);
316
346
  if (options.interactive) pythonArgs.push('--interactive');
317
347
  if (resolvedAgentId) pythonArgs.push('--m365-agent-id', resolvedAgentId);
318
348
 
@@ -327,7 +357,7 @@ async function main() {
327
357
  console.log('āœ… Successfully signed out and cleared cached authentication tokens!');
328
358
  } catch (error) {
329
359
  console.error('āŒ Failed to sign out:', error.message);
330
- if (options.verbose) {
360
+ if (wrapperVerbose) {
331
361
  console.error('\nFull error:', error);
332
362
  }
333
363
  process.exit(1);
@@ -350,7 +380,7 @@ async function main() {
350
380
  const candidatePath = path.join(location.dir, filename);
351
381
  if (fs.existsSync(candidatePath)) {
352
382
  promptsFile = candidatePath;
353
- if (!options.quiet) {
383
+ if (!wrapperQuiet) {
354
384
  const displayPath = candidatePath.startsWith(process.cwd())
355
385
  ? './' + path.relative(process.cwd(), candidatePath)
356
386
  : candidatePath;
@@ -363,7 +393,7 @@ async function main() {
363
393
  }
364
394
 
365
395
  // If no prompts file found, offer to create starter file
366
- if (!promptsFile && !options.quiet) {
396
+ if (!promptsFile && !wrapperQuiet) {
367
397
  console.log('\nāš ļø No prompts file found in current directory or ./evals/\n');
368
398
 
369
399
  // Use readline for interactive prompt
@@ -413,7 +443,7 @@ async function main() {
413
443
  const evalsDir = path.join(process.cwd(), '.evals');
414
444
  if (!fs.existsSync(evalsDir)) {
415
445
  fs.mkdirSync(evalsDir, { recursive: true });
416
- if (options.verbose) {
446
+ if (wrapperVerbose) {
417
447
  console.log(`Created .evals directory`);
418
448
  }
419
449
  }
@@ -434,7 +464,7 @@ async function main() {
434
464
  }
435
465
 
436
466
  // Execute Python CLI with working directory set to Python CLI directory
437
- if (!options.quiet) {
467
+ if (!wrapperQuiet) {
438
468
  console.log('\nšŸ“Š Running evaluations...\n');
439
469
  console.log('─────────────────────────────────────────────────────────────\n');
440
470
  }
@@ -442,7 +472,7 @@ async function main() {
442
472
  try {
443
473
  await executePythonCli(MAIN_SCRIPT, pythonArgs, { cwd: PYTHON_CLI_DIR });
444
474
 
445
- if (!options.quiet) {
475
+ if (!wrapperQuiet) {
446
476
  console.log('\n─────────────────────────────────────────────────────────────\n');
447
477
  console.log('āœ“ Evals completed successfully!');
448
478
  if (outputFile) {
@@ -2,7 +2,7 @@
2
2
  * Build-time injected default values
3
3
  * DO NOT EDIT - This file is auto-generated during build.
4
4
  *
5
- * Generated: 2026-03-23T18:11:37.402Z
5
+ * Generated: 2026-04-01T19:33:48.937Z
6
6
  *
7
7
  * @copyright Microsoft Corporation. All rights reserved.
8
8
  * @license MIT