npm - @microsoft/m365-copilot-eval - Versions diffs - 1.2.1-preview.1 → 1.3.0-preview.1 - Mend

@microsoft/m365-copilot-eval 1.2.1-preview.1 → 1.3.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +11 -4
package/package.json +2 -2
package/schema/CHANGELOG.md +8 -0
package/schema/v1/eval-document.schema.json +117 -1
package/schema/v1/examples/valid/comprehensive.json +27 -2
package/schema/version.json +2 -2
package/src/clients/cli/cli_logging/__init__.py +0 -0
package/src/clients/cli/cli_logging/console_diagnostics.py +55 -0
package/src/clients/cli/cli_logging/logging_utils.py +145 -0
package/src/clients/cli/common.py +51 -0
package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +3 -3
package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +11 -11
package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +1 -11
package/src/clients/cli/evaluator_resolver.py +150 -0
package/src/clients/cli/generate_report.py +130 -110
package/src/clients/cli/main.py +513 -236
package/src/clients/cli/readme.md +14 -7
package/src/clients/cli/response_extractor.py +32 -14
package/src/clients/node-js/bin/runevals.js +58 -28
package/src/clients/node-js/config/default.js +1 -1

package/src/clients/cli/readme.md CHANGED Viewed

@@ -97,21 +97,28 @@ python main.py --interactive
 #### Additional Options
 ```bash
-# Verbose output (shows detailed processing steps)
-python main.py --verbose
+# Logging verbosity (canonical control surface)
+python main.py --log-level debug
+python main.py --log-level info
+python main.py --log-level warning
+python main.py --log-level error
+# Bare flag resolves to info
+python main.py --log-level
-# Quiet mode (minimal output)
+# Legacy flags (no longer supported; use --log-level instead)
+# The following will fail with "unrecognized arguments" errors:
+python main.py --verbose
 python main.py --quiet
+# Share diagnostics with support (console-based, no archive artifacts)
+python main.py --log-level debug --prompts-file samples/example_prompts.json
 # Get help and see all options
 python main.py --help
 # Specify / override the Agent ID (takes precedence over M365_AGENT_ID env var)
 python main.py --m365-agent-id "00000000-0000-0000-0000-000000000000"
-# Citation format options
-python main.py --citation-format oai_unicode      # Default: New OAI format
-python main.py --citation-format legacy_bracket   # Old [^i^] format
 ```
 #### File Format Examples

package/src/clients/cli/response_extractor.py CHANGED Viewed

@@ -27,14 +27,20 @@ Date: September 21, 2025
 import json
 import logging
-from typing import Dict, List, Any, Optional
+from typing import Dict, List, Any, Optional, Tuple
 from datetime import datetime
 from enum import Enum
+from cli_logging.logging_utils import LOG_LEVEL_MAP, LogLevel
 # Configure logging
-logging.basicConfig(level=logging.INFO)
+if not logging.getLogger().handlers:
+    logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+def _log_level_to_python_level(log_level: str) -> int:
+    normalized = (log_level or "info").strip().lower()
+    return LOG_LEVEL_MAP.get(normalized, logging.INFO)
 class MessageRole(Enum):
     """Enumeration for message roles."""
     USER = "user"
@@ -71,8 +77,10 @@ class EnhancedResponseExtractor:
         "generate_response"
     }
-    def __init__(self):
+    def __init__(self, log_level: str = "info"):
         self.tool_call_counter = 0
+        self.log_level = (log_level or "info").strip().lower()
+        logger.setLevel(_log_level_to_python_level(self.log_level))
     def _generate_tool_call_id(self, tool_name: str) -> str:
         """Generate a unique tool call ID."""
@@ -461,6 +469,7 @@ class EnhancedResponseExtractor:
                 "metadata": {
                     "conversation_id": response_data.get("conversationId"),
                     "request_id": response_data.get("requestId"),
+                    "message_id": None,
                     "telemetry_available": False
                 }
             }
@@ -470,6 +479,11 @@ class EnhancedResponseExtractor:
             if isinstance(response_data, dict):
                 # Messages are directly in the response_data object
                 messages = response_data.get("messages", [])
+            # Extract message_id from the last bot message in this response
+            bot_messages = [m for m in messages if m.get("author") != "user"]
+            if bot_messages and bot_messages[-1].get("messageId"):
+                enhanced_response["metadata"]["message_id"] = bot_messages[-1]["messageId"]
             # Extract telemetry tools if available
             telemetry_tools = []
@@ -526,6 +540,7 @@ class EnhancedResponseExtractor:
                 "metadata": {
                     "conversation_id": None,
                     "request_id": None,
+                    "message_id": None,
                     "telemetry_available": False
                 }
             }
@@ -552,27 +567,30 @@ class EnhancedResponseExtractor:
                 "metadata": {
                     "conversation_id": None,
                     "request_id": None,
+                    "message_id": None,
                     "telemetry_available": False,
                     "error": str(e)
                 }
             }
-def extract_enhanced_responses(responses: Dict[str, str]) -> Dict[str, Dict[str, Any]]:
+def extract_enhanced_responses(responses: List[Tuple[str, str]], log_level: str = "info") -> List[Dict[str, Any]]:
     """
     Extract enhanced response information for multiple responses.
     Args:
-        responses: Dictionary mapping prompts to raw response strings
+        responses: List of (prompt_text, raw_response_string) tuples, one per prompt
+                   sent to the chat API. Order and duplicates are preserved.
     Returns:
-        Dictionary mapping prompts to enhanced response data
+        List of enhanced response dicts (one per prompt, same order as input).
     """
-    extractor = EnhancedResponseExtractor()
-    enhanced_responses = {}
-    for prompt, raw_response in responses.items():
-        enhanced_responses[prompt] = extractor.extract_enhanced_response(raw_response)
+    extractor = EnhancedResponseExtractor(log_level=log_level)
+    enhanced_responses = []
+    for prompt, raw_response in responses:
+        enhanced = extractor.extract_enhanced_response(raw_response)
+        enhanced_responses.append(enhanced)
     return enhanced_responses
 def get_response_text_for_evaluation(enhanced_response: Dict[str, Any]) -> str:

package/src/clients/node-js/bin/runevals.js CHANGED Viewed

@@ -29,8 +29,8 @@ const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
 /**
  * Display usage terms notice
- * Called before running evaluations (but not for --init-only, cache commands, or --signout)
- * This notice MUST be displayed even in quiet mode per legal requirements (FR-006)
+ * Called before running evaluations (but not for --init-only, cache commands, or --signout).
+ * This notice is always displayed per legal requirements (FR-006).
  */
 function displayUsageTerms() {
   console.log('By using this tool, you agree to the Terms of Use: https://aka.ms/evaltoolterms\n');
@@ -52,10 +52,10 @@ async function setDefaultEnvironmentConstants() {
 /**
  * Check for required environment variables and provide helpful guidance.
  * @param {string} envName - Environment name (e.g. 'dev')
- * @param {boolean} [quiet=false] - Suppress output
+ * @param {boolean} [suppressOutput=false] - Suppress guidance output
  * @returns {boolean} true if all required vars are present
  */
-function validateEnvironmentVariables(envName, quiet = false) {
+function validateEnvironmentVariables(envName, suppressOutput = false) {
   const required = [
     { key: 'TENANT_ID', description: 'Your Tenant ID' },
     { key: 'AZURE_AI_OPENAI_ENDPOINT', description: 'Azure OpenAI endpoint URL' },
@@ -72,8 +72,8 @@ function validateEnvironmentVariables(envName, quiet = false) {
     return true; // All required vars present
   }
-  // Show error with helpful guidance (skip output in quiet mode, but still return false)
-  if (!quiet) {
+  // Show error with helpful guidance unless output is suppressed.
+  if (!suppressOutput) {
     console.error('\n❌ Missing required environment variables:\n');
     const envFile = envName ? `env/.env.${envName}` : '.env.local or env/.env.local';
@@ -96,6 +96,34 @@ function validateEnvironmentVariables(envName, quiet = false) {
   return false;
 }
+/**
+ * Resolve canonical log level for Python CLI forwarding.
+ */
+function resolveLogLevel(options) {
+  const allowedLevels = new Set(['debug', 'info', 'warning', 'error']);
+  const rawLogLevel =
+    options.logLevel === true || options.logLevel === undefined
+      ? undefined
+      : String(options.logLevel).toLowerCase();
+  const effectiveLogLevel = rawLogLevel || 'info';
+  if (!allowedLevels.has(effectiveLogLevel)) {
+    console.error(
+      `❌ Invalid --log-level value: ${effectiveLogLevel}. Supported values: debug, info, warning, error.`
+    );
+    process.exit(2);
+  }
+  return effectiveLogLevel;
+}
+function deriveWrapperOutputMode(effectiveLogLevel) {
+  return {
+    verbose: effectiveLogLevel === 'debug',
+    quiet: effectiveLogLevel === 'warning' || effectiveLogLevel === 'error',
+  };
+}
 /**
  * Initialize the Python environment (download, venv, pip install)
  * @param {boolean} [verbose=false] - Enable verbose output
@@ -126,7 +154,7 @@ async function initializePythonEnvironment(verbose = false, quiet = false) {
     console.error('  - Check your internet connection');
     console.error('  - If behind a proxy, set HTTP_PROXY/HTTPS_PROXY environment variables');
     console.error('  - For SSL issues, set NODE_EXTRA_CA_CERTS or PIP_CERT');
-    console.error('  - Run with --verbose for detailed output');
+    console.error('  - Run with --log-level debug for detailed output');
     process.exit(1);
   }
@@ -142,8 +170,7 @@ async function main() {
     .name('runevals')
     .description('M365 Copilot Agent Evaluations CLI - Zero-config Python evaluation tool')
     .version(VERSION)
-    .option('-v, --verbose', 'verbose output (shows detailed processing steps)')
-    .option('-q, --quiet', 'quiet mode (minimal output)')
+    .option('--log-level [level]', 'log level for diagnostics: debug, info, warning, error (bare flag resolves to info)')
     .option('--prompts <prompts...>', 'prompts to evaluate')
     .option('--expected <responses...>', 'expected responses')
     .option('--prompts-file <file>', 'JSON file with prompts and expected responses')
@@ -159,6 +186,10 @@ async function main() {
   program.parse(process.argv);
   const options = program.opts();
+  const effectiveLogLevel = resolveLogLevel(options);
+  const outputMode = deriveWrapperOutputMode(effectiveLogLevel);
+  const wrapperVerbose = outputMode.verbose;
+  const wrapperQuiet = outputMode.quiet;
   // Handle cache commands first (they don't need environment validation or config)
   if (options.cacheInfo) {
@@ -183,7 +214,7 @@ async function main() {
   if (options.cacheClear) {
     console.log('🗑️  Clearing cache...\n');
-    const success = await clearCache(options.verbose);
+    const success = await clearCache(wrapperVerbose);
     if (success) {
       console.log('✅ Cache cleared successfully!');
@@ -203,14 +234,14 @@ async function main() {
   // Initialize Python environment (do this early for --init-only)
   // Skip env file loading for --init-only since it's not needed
-  if (!options.quiet) {
+  if (!wrapperQuiet) {
     console.log('🚀 M365 Copilot Agent Evaluations CLI\n');
   }
-  await initializePythonEnvironment(options.verbose, options.quiet);
+  await initializePythonEnvironment(wrapperVerbose, wrapperQuiet);
   // If --init-only, stop here (no config or env files needed)
   if (options.initOnly) {
-    if (!options.quiet) {
+    if (!wrapperQuiet) {
       console.log('\n✅ Python environment initialized successfully!\n');
       console.log('⚠️  Note: Configure environment variables before running evaluations.');
       console.log('📖 See README.md for complete setup guide.\n');
@@ -233,7 +264,7 @@ async function main() {
   let localEnvFound = false;
   if (fs.existsSync(localEnvPath)) {
-    if (!options.quiet && options.verbose) {
+    if (!wrapperQuiet && wrapperVerbose) {
       console.log(`📂 Loading .env.local from current directory`);
     }
     const localEnvVars = loadEnvFile(localEnvPath) || {};
@@ -245,7 +276,7 @@ async function main() {
   if (!localEnvFound) {
     localEnvPath = path.join(process.cwd(), 'env', '.env.local');
     if (fs.existsSync(localEnvPath)) {
-      if (!options.quiet && options.verbose) {
+      if (!wrapperQuiet && wrapperVerbose) {
         console.log(`📂 Loading .env.local from current directory env folder`);
       }
       const localEnvVars = loadEnvFile(localEnvPath) || {};
@@ -263,7 +294,7 @@ async function main() {
     let envFileFound = false;
     if (fs.existsSync(envFilePath)) {
-      if (!options.quiet) {
+      if (!wrapperQuiet) {
         console.log(`📂 Loading environment: ${options.env} from current directory env folder`);
       }
       const fileEnvVars = loadEnvFile(envFilePath) || {};
@@ -274,7 +305,7 @@ async function main() {
       envFilePath = path.join(__dirname, '..', 'env', `.env.${options.env}`);
       if (fs.existsSync(envFilePath)) {
-        if (!options.quiet) {
+        if (!wrapperQuiet) {
           console.log(`📂 Loading environment: ${options.env} from package env folder`);
         }
         const fileEnvVars = loadEnvFile(envFilePath) || {};
@@ -295,14 +326,14 @@ async function main() {
   // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID
   if (!resolvedAgentId) {
     resolvedAgentId = envVars['M365_AGENT_ID'] || process.env.M365_AGENT_ID;
-    if (resolvedAgentId && !options.quiet) {
+    if (resolvedAgentId && !wrapperQuiet) {
       console.log(`🤖 Agent ID: ${resolvedAgentId}`);
     }
   }
   // Validate required environment variables (always validate, quiet just suppresses output)
-  if (!validateEnvironmentVariables(options.env, options.quiet)) {
-    if (options.quiet) {
+  if (!validateEnvironmentVariables(options.env, wrapperQuiet)) {
+    if (wrapperQuiet) {
       console.error('📖 Setup guide: https://www.npmjs.com/package/@microsoft/m365-copilot-eval?activeTab=readme\n');
     }
     process.exit(1);
@@ -311,8 +342,7 @@ async function main() {
   // Build arguments to pass to Python CLI
   const pythonArgs = [];
-  if (options.verbose) pythonArgs.push('--verbose');
-  if (options.quiet) pythonArgs.push('--quiet');
+  pythonArgs.push('--log-level', effectiveLogLevel);
   if (options.interactive) pythonArgs.push('--interactive');
   if (resolvedAgentId) pythonArgs.push('--m365-agent-id', resolvedAgentId);
@@ -327,7 +357,7 @@ async function main() {
       console.log('✅ Successfully signed out and cleared cached authentication tokens!');
     } catch (error) {
       console.error('❌ Failed to sign out:', error.message);
-      if (options.verbose) {
+      if (wrapperVerbose) {
         console.error('\nFull error:', error);
       }
       process.exit(1);
@@ -350,7 +380,7 @@ async function main() {
         const candidatePath = path.join(location.dir, filename);
         if (fs.existsSync(candidatePath)) {
           promptsFile = candidatePath;
-          if (!options.quiet) {
+          if (!wrapperQuiet) {
             const displayPath = candidatePath.startsWith(process.cwd())
               ? './' + path.relative(process.cwd(), candidatePath)
               : candidatePath;
@@ -363,7 +393,7 @@ async function main() {
     }
     // If no prompts file found, offer to create starter file
-    if (!promptsFile && !options.quiet) {
+    if (!promptsFile && !wrapperQuiet) {
       console.log('\n⚠️  No prompts file found in current directory or ./evals/\n');
       // Use readline for interactive prompt
@@ -413,7 +443,7 @@ async function main() {
     const evalsDir = path.join(process.cwd(), '.evals');
     if (!fs.existsSync(evalsDir)) {
       fs.mkdirSync(evalsDir, { recursive: true });
-      if (options.verbose) {
+      if (wrapperVerbose) {
         console.log(`Created .evals directory`);
       }
     }
@@ -434,7 +464,7 @@ async function main() {
   }
   // Execute Python CLI with working directory set to Python CLI directory
-  if (!options.quiet) {
+  if (!wrapperQuiet) {
     console.log('\n📊 Running evaluations...\n');
     console.log('─────────────────────────────────────────────────────────────\n');
   }
@@ -442,7 +472,7 @@ async function main() {
   try {
     await executePythonCli(MAIN_SCRIPT, pythonArgs, { cwd: PYTHON_CLI_DIR });
-    if (!options.quiet) {
+    if (!wrapperQuiet) {
       console.log('\n─────────────────────────────────────────────────────────────\n');
       console.log('✓ Evals completed successfully!');
       if (outputFile) {

package/src/clients/node-js/config/default.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Build-time injected default values
  * DO NOT EDIT - This file is auto-generated during build.
  *
- * Generated: 2026-03-23T18:11:37.402Z
+ * Generated: 2026-04-01T19:33:48.937Z
  *
  * @copyright Microsoft Corporation. All rights reserved.
  * @license MIT