@microsoft/m365-copilot-eval 1.3.0-preview.1 → 1.4.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +129 -97
  2. package/package.json +7 -4
  3. package/schema/v1/eval-document.schema.json +140 -8
  4. package/schema/v1/examples/invalid/multi-turn-empty-turns.json +8 -0
  5. package/schema/v1/examples/invalid/multi-turn-has-both-prompt-and-turns.json +13 -0
  6. package/schema/v1/examples/invalid/multi-turn-missing-prompt.json +12 -0
  7. package/schema/v1/examples/invalid/multi-turn-typo-in-turn.json +13 -0
  8. package/schema/v1/examples/invalid/multi-turn-unknown-evaluator.json +15 -0
  9. package/schema/v1/examples/valid/mixed-single-and-multi-turn.json +30 -0
  10. package/schema/v1/examples/valid/multi-turn-output.json +59 -0
  11. package/schema/v1/examples/valid/multi-turn-simple.json +21 -0
  12. package/schema/v1/examples/valid/multi-turn-with-evaluators.json +34 -0
  13. package/schema/version.json +2 -2
  14. package/src/clients/cli/api_clients/A2A/__init__.py +3 -0
  15. package/src/clients/cli/api_clients/A2A/a2a_client.py +456 -0
  16. package/src/clients/cli/api_clients/REST/__init__.py +3 -0
  17. package/src/clients/cli/api_clients/REST/sydney_client.py +204 -0
  18. package/src/clients/cli/api_clients/__init__.py +3 -0
  19. package/src/clients/cli/api_clients/base_agent_client.py +78 -0
  20. package/src/clients/cli/cli_logging/console_diagnostics.py +54 -2
  21. package/src/clients/cli/cli_logging/logging_utils.py +0 -1
  22. package/src/clients/cli/common.py +11 -0
  23. package/src/clients/cli/generate_report.py +272 -129
  24. package/src/clients/cli/main.py +1006 -476
  25. package/src/clients/cli/parallel_executor.py +57 -0
  26. package/src/clients/cli/requirements.txt +1 -1
  27. package/src/clients/cli/response_extractor.py +12 -14
  28. package/src/clients/cli/retry_policy.py +52 -0
  29. package/src/clients/cli/samples/multiturn_example.json +35 -0
  30. package/src/clients/cli/throttle_gate.py +82 -0
  31. package/src/clients/node-js/bin/runevals.js +79 -16
  32. package/src/clients/node-js/config/default.js +5 -1
  33. package/src/clients/node-js/lib/agent-id.js +12 -0
  34. package/src/clients/node-js/lib/env-loader.js +11 -16
  35. package/src/clients/node-js/lib/eula-manager.js +78 -0
  36. package/src/clients/node-js/lib/progress.js +13 -11
@@ -0,0 +1,57 @@
1
+ """Parallel prompt execution utilities.
2
+
3
+ This module provides a minimal reusable executor that preserves input order.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable, Generic, Iterable, List, Optional, TypeVar
11
+
12
+
13
+ T = TypeVar("T")
14
+ R = TypeVar("R")
15
+
16
+
17
+ @dataclass
18
+ class WorkerResult(Generic[R]):
19
+ """Result model used to preserve input ordering and capture failures."""
20
+
21
+ index: int
22
+ value: Optional[R] = None
23
+ error: Optional[Exception] = None
24
+
25
+
26
+ def execute_in_parallel(
27
+ items: Iterable[T],
28
+ worker: Callable[[T, int], R],
29
+ max_workers: int,
30
+ ) -> List[WorkerResult[R]]:
31
+ """Execute worker function in parallel while preserving input order.
32
+
33
+ `worker` receives `(item, index)` and returns a value.
34
+ """
35
+ indexed_items = list(enumerate(items))
36
+ if not indexed_items:
37
+ return []
38
+
39
+ normalized_workers = max(1, min(max_workers, len(indexed_items)))
40
+ results: List[WorkerResult[R]] = [WorkerResult(index=i) for i, _ in indexed_items]
41
+
42
+ with ThreadPoolExecutor(max_workers=normalized_workers) as executor:
43
+ future_map = {
44
+ executor.submit(worker, item, index): index
45
+ for index, item in indexed_items
46
+ }
47
+
48
+ for future in as_completed(future_map):
49
+ index = future_map[future]
50
+ try:
51
+ results[index] = WorkerResult(index=index, value=future.result())
52
+ except (KeyboardInterrupt, SystemExit):
53
+ raise
54
+ except Exception as exc:
55
+ results[index] = WorkerResult(index=index, error=exc)
56
+
57
+ return results
@@ -6,7 +6,7 @@ msal[broker]>=1.34,<2
6
6
  msal-extensions>=1.3.1
7
7
  packaging>=20.0
8
8
  PyJWT>=2.11.0
9
- python-dotenv==1.1.1
9
+ python-dotenv==1.2.2
10
10
  markdown==3.8.2
11
11
  promptflow>=1.18.1
12
12
  questionary>=2.1.1
@@ -573,25 +573,23 @@ class EnhancedResponseExtractor:
573
573
  }
574
574
  }
575
575
 
576
- def extract_enhanced_responses(responses: List[Tuple[str, str]], log_level: str = "info") -> List[Dict[str, Any]]:
576
+ def extract_enhanced_response(raw_response: str, log_level: str = "info") -> Dict[str, Any]:
577
577
  """
578
- Extract enhanced response information for multiple responses.
579
-
578
+ Extract enhanced response information from a raw response string.
579
+
580
580
  Args:
581
- responses: List of (prompt_text, raw_response_string) tuples, one per prompt
582
- sent to the chat API. Order and duplicates are preserved.
583
-
581
+ raw_response: Raw response string from the agent
582
+ log_level: Logging level for the extraction process (default: "info")
583
+
584
584
  Returns:
585
- List of enhanced response dicts (one per prompt, same order as input).
585
+ A dictionary containing the enhanced response information, including:
586
+ - "response": Reconstructed message flow with tool calls and results
587
+ - "tool_definitions": List of tool definitions extracted from telemetry
588
+ - "raw_response_text": Original response text for backward compatibility
589
+ - "metadata": Additional metadata such as conversation ID, request ID, etc.
586
590
  """
587
591
  extractor = EnhancedResponseExtractor(log_level=log_level)
588
- enhanced_responses = []
589
-
590
- for prompt, raw_response in responses:
591
- enhanced = extractor.extract_enhanced_response(raw_response)
592
- enhanced_responses.append(enhanced)
593
-
594
- return enhanced_responses
592
+ return extractor.extract_enhanced_response(raw_response)
595
593
 
596
594
  def get_response_text_for_evaluation(enhanced_response: Dict[str, Any]) -> str:
597
595
  """
@@ -0,0 +1,52 @@
1
+ """Retry utilities for transient HTTP failures in evaluation flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ from email.utils import parsedate_to_datetime
7
+ from typing import Optional
8
+
9
+ RETRYABLE_HTTP_STATUS_CODES = {429, 503, 504}
10
+ MAX_BACKOFF_SECONDS = 60
11
+
12
+
13
+ def is_retryable_status(status_code: Optional[int]) -> bool:
14
+ """Return True for transient HTTP status codes covered by the spec."""
15
+ if status_code is None:
16
+ return False
17
+ return int(status_code) in RETRYABLE_HTTP_STATUS_CODES
18
+
19
+
20
+ def get_backoff_seconds(attempt: int) -> int:
21
+ """Return exponential backoff delay capped at MAX_BACKOFF_SECONDS.
22
+
23
+ Examples: 2, 4, 8 for attempts 1..3.
24
+ """
25
+ if attempt < 1:
26
+ raise ValueError("attempt must be >= 1")
27
+ return min(2 ** attempt, MAX_BACKOFF_SECONDS)
28
+
29
+
30
+ def get_retry_after_seconds(retry_after_header: Optional[str]) -> Optional[int]:
31
+ """Parse Retry-After header value (delay-seconds or HTTP-date per RFC 7231)."""
32
+ if retry_after_header is None:
33
+ return None
34
+
35
+ value = retry_after_header.strip()
36
+ if not value:
37
+ return None
38
+
39
+ # Try delay-seconds (integer) first
40
+ try:
41
+ return max(0, int(value))
42
+ except ValueError:
43
+ pass
44
+
45
+ # Try HTTP-date format (RFC 7231 §7.1.3)
46
+ try:
47
+ retry_date = parsedate_to_datetime(value)
48
+ now = datetime.now(timezone.utc)
49
+ delta = int((retry_date - now).total_seconds())
50
+ return max(0, delta)
51
+ except (ValueError, TypeError):
52
+ return None
@@ -0,0 +1,35 @@
1
+ {
2
+ "schemaVersion": "1.2.0",
3
+ "default_evaluators": {
4
+ "Relevance": {},
5
+ "Coherence": {}
6
+ },
7
+ "items": [
8
+ {
9
+ "prompt": "What is Microsoft Graph?",
10
+ "expected_response": "Microsoft Graph is a gateway to data and intelligence in Microsoft 365."
11
+ },
12
+ {
13
+ "name": "Travel planning conversation",
14
+ "description": "Multi-turn thread testing context retention across turns",
15
+ "turns": [
16
+ {
17
+ "prompt": "I'm planning a trip to Seattle next week.",
18
+ "expected_response": "I can help you plan your Seattle trip."
19
+ },
20
+ {
21
+ "prompt": "What's the weather going to be like?",
22
+ "expected_response": "Seattle weather is typically mild with possible rain."
23
+ },
24
+ {
25
+ "prompt": "Should I bring a rain jacket?",
26
+ "expected_response": "Yes, it's always a good idea to bring rain gear to Seattle.",
27
+ "evaluators": {
28
+ "Groundedness": { "threshold": 4 }
29
+ },
30
+ "evaluators_mode": "extend"
31
+ }
32
+ ]
33
+ }
34
+ ]
35
+ }
@@ -0,0 +1,82 @@
1
+ """Per-API throttle gate support for transient HTTP 429 handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+ from dataclasses import dataclass
8
+ from typing import Optional
9
+
10
+
11
+ @dataclass
12
+ class GateState:
13
+ """Snapshot state for diagnostics and tests."""
14
+
15
+ api_name: str
16
+ blocked_until_epoch: float
17
+ is_blocked: bool
18
+ last_retry_after_seconds: Optional[int]
19
+
20
+
21
+ class ThrottleGate:
22
+ """Thread-safe per-API gate that pauses workers until the block window elapses."""
23
+
24
+ def __init__(self, api_name: str) -> None:
25
+ self.api_name = api_name
26
+ self._lock = threading.Lock()
27
+ self._blocked_until_epoch = 0.0
28
+ self._last_retry_after_seconds: Optional[int] = None
29
+
30
+ def apply_retry_after(self, retry_after_seconds: int) -> float:
31
+ """Apply retry-after duration and keep the maximum active block window.
32
+
33
+ Returns the current effective blocked-until epoch.
34
+ """
35
+ retry_after_seconds = max(0, int(retry_after_seconds))
36
+ candidate = time.time() + retry_after_seconds
37
+
38
+ with self._lock:
39
+ if candidate > self._blocked_until_epoch:
40
+ self._blocked_until_epoch = candidate
41
+ self._last_retry_after_seconds = retry_after_seconds
42
+ return self._blocked_until_epoch
43
+
44
+ MAX_GATE_WAIT_SECONDS = 300.0
45
+
46
+ def wait_if_blocked(self) -> float:
47
+ """Sleep until the gate opens. Returns the total slept duration in seconds.
48
+
49
+ Re-checks the block window after each sleep to handle concurrent
50
+ ``apply_retry_after`` calls that extend the window (avoids TOCTOU).
51
+ Raises ``TimeoutError`` if the total wait exceeds ``MAX_GATE_WAIT_SECONDS``.
52
+ """
53
+ total_slept = 0.0
54
+ while True:
55
+ with self._lock:
56
+ delay = max(0.0, self._blocked_until_epoch - time.time())
57
+ if delay <= 0:
58
+ return total_slept
59
+ if total_slept + delay > self.MAX_GATE_WAIT_SECONDS:
60
+ raise TimeoutError(
61
+ f"ThrottleGate '{self.api_name}' exceeded maximum wait of "
62
+ f"{self.MAX_GATE_WAIT_SECONDS}s (slept {total_slept:.1f}s so far)."
63
+ )
64
+ time.sleep(delay)
65
+ total_slept += delay
66
+
67
+ def clear(self) -> None:
68
+ """Reset the gate to unblocked state."""
69
+ with self._lock:
70
+ self._blocked_until_epoch = 0.0
71
+ self._last_retry_after_seconds = None
72
+
73
+ def state(self) -> GateState:
74
+ """Return immutable snapshot state."""
75
+ with self._lock:
76
+ now = time.time()
77
+ return GateState(
78
+ api_name=self.api_name,
79
+ blocked_until_epoch=self._blocked_until_epoch,
80
+ is_blocked=self._blocked_until_epoch > now,
81
+ last_retry_after_seconds=self._last_retry_after_seconds,
82
+ )
@@ -8,8 +8,10 @@ import { ensurePythonRuntime, getCacheDir } from '../lib/python-runtime.js';
8
8
  import { ensureVenv, executePythonCli } from '../lib/venv-manager.js';
9
9
  import { getCacheStats, clearCache, formatBytes } from '../lib/cache-utils.js';
10
10
  import { checkPackageExpiry } from '../lib/expiry-check.js';
11
+ import { recordAcceptance, checkAcceptance } from '../lib/eula-manager.js';
11
12
  import { ProgressReporter } from '../lib/progress.js';
12
13
  import { _loadEnvFile as loadEnvFile, _loadUserEnvOverride } from '../lib/env-loader.js';
14
+ import { normalizeAgentId } from '../lib/agent-id.js';
13
15
 
14
16
  // Check package expiry (exits if expired, warns if close to expiry)
15
17
  checkPackageExpiry();
@@ -22,20 +24,13 @@ const packageJsonPath = path.join(__dirname, '..', '..', '..', '..', 'package.js
22
24
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
23
25
  const VERSION = packageJson.version;
24
26
 
27
+ const EULA_URL = 'https://aka.ms/evaltoolterms';
28
+
25
29
  // Path to Python CLI and requirements
26
30
  const PYTHON_CLI_DIR = path.join(__dirname, '..', '..', 'cli');
27
31
  const MAIN_SCRIPT = path.join(PYTHON_CLI_DIR, 'main.py');
28
32
  const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
29
33
 
30
- /**
31
- * Display usage terms notice
32
- * Called before running evaluations (but not for --init-only, cache commands, or --signout).
33
- * This notice is always displayed per legal requirements (FR-006).
34
- */
35
- function displayUsageTerms() {
36
- console.log('By using this tool, you agree to the Terms of Use: https://aka.ms/evaltoolterms\n');
37
- }
38
-
39
34
  /**
40
35
  * Set default environment constants that cannot be overridden
41
36
  * This ensures these values are always set regardless of .env files
@@ -176,21 +171,80 @@ async function main() {
176
171
  .option('--prompts-file <file>', 'JSON file with prompts and expected responses')
177
172
  .option('-o, --output <file>', 'output file (JSON, CSV, or HTML)')
178
173
  .option('-i, --interactive', 'interactive mode (enter prompts interactively)')
174
+ .option('--concurrency <number>', 'max prompts to process in parallel (1-5)')
179
175
  .option('--m365-agent-id <id>', 'agent ID (overrides env vars and auto-construction)')
180
176
  .option('--env <environment>', 'environment name (loads env/.env.<environment>)', 'local')
181
177
  .option('--init-only', 'only initialize Python environment, don\'t run evaluations')
182
178
  .option('--cache-info', 'show cache information and statistics')
183
179
  .option('--cache-clear', 'clear the cache (removes Python runtime and venv)')
184
180
  .option('--cache-dir', 'print the cache directory path')
185
- .option('--signout', 'sign out and clear cached authentication tokens');
181
+ .option('--signout', 'sign out and clear cached authentication tokens')
182
+ .action(() => {
183
+ // Default command — handled by the main flow below parseAsync()
184
+ });
185
+
186
+ program
187
+ .command('accept-eula')
188
+ .description('Accept the End User License Agreement (EULA)')
189
+ .action(async () => {
190
+ const config = (await import('../config/default.js')).default;
191
+ try {
192
+ await recordAcceptance(config.eula.version);
193
+ console.log('EULA has been accepted');
194
+ process.exit(0);
195
+ } catch (err) {
196
+ console.error(
197
+ `⚠️ Unable to persist EULA acceptance: ${err.message}`,
198
+ );
199
+ console.error(
200
+ 'Please ensure the directory ~/.m365-copilot-agent-evals/ is writable.',
201
+ );
202
+ process.exit(1);
203
+ }
204
+ });
186
205
 
187
- program.parse(process.argv);
206
+ await program.parseAsync(process.argv);
188
207
  const options = program.opts();
189
208
  const effectiveLogLevel = resolveLogLevel(options);
190
209
  const outputMode = deriveWrapperOutputMode(effectiveLogLevel);
191
210
  const wrapperVerbose = outputMode.verbose;
192
211
  const wrapperQuiet = outputMode.quiet;
193
212
 
213
+ // === EULA Enforcement Gate ===
214
+ // Block all commands until EULA is accepted (FR-010, FR-011).
215
+ // accept-eula subcommand, --help, and --version are already handled
216
+ // by Commander during program.parse() and exit before reaching here.
217
+ const config = (await import('../config/default.js')).default;
218
+ const { accepted, stale } = await checkAcceptance(config.eula.version);
219
+ if (!accepted) {
220
+ if (stale) {
221
+ console.error(
222
+ `==============================================================
223
+ The End User License Agreement (EULA) has been updated.
224
+ Please review the updated terms at:
225
+ ${EULA_URL}
226
+
227
+ To accept the updated EULA, please execute the following command:
228
+
229
+ runevals accept-eula
230
+
231
+ ==============================================================`);
232
+ } else {
233
+ console.error(
234
+ `==============================================================
235
+ In order to use this tool you must accept the End User License
236
+ Agreement (EULA) found at:
237
+ ${EULA_URL}
238
+
239
+ To accept the EULA, please execute the following command:
240
+
241
+ runevals accept-eula
242
+
243
+ ==============================================================`);
244
+ }
245
+ process.exit(2);
246
+ }
247
+
194
248
  // Handle cache commands first (they don't need environment validation or config)
195
249
  if (options.cacheInfo) {
196
250
  console.log('🗂️ Cache Information\n');
@@ -251,8 +305,7 @@ async function main() {
251
305
 
252
306
  // === From here on, we're running actual evals - load config and env files ===
253
307
 
254
- displayUsageTerms();
255
- // Load build-time config
308
+ // Load build-time config (already loaded above for EULA check)
256
309
  await setDefaultEnvironmentConstants();
257
310
 
258
311
  // Load environment files
@@ -322,11 +375,17 @@ async function main() {
322
375
  }
323
376
  }
324
377
 
325
- // Resolve agent ID from environment if not explicitly provided via CLI flag
326
- // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID
378
+ // Resolve agent ID from environment if not explicitly provided via CLI flag.
379
+ // loadEnvFile already resolved aliases (e.g. M365_TITLE_ID) into M365_AGENT_ID.
380
+ // Then normalize via shared helper and sync to process.env so downstream
381
+ // readers (and the python CLI) see the canonical form.
327
382
  if (!resolvedAgentId) {
328
383
  resolvedAgentId = envVars['M365_AGENT_ID'] || process.env.M365_AGENT_ID;
329
- if (resolvedAgentId && !wrapperQuiet) {
384
+ }
385
+ resolvedAgentId = normalizeAgentId(resolvedAgentId);
386
+ if (resolvedAgentId) {
387
+ process.env.M365_AGENT_ID = resolvedAgentId;
388
+ if (!options.m365AgentId && !wrapperQuiet) {
330
389
  console.log(`🤖 Agent ID: ${resolvedAgentId}`);
331
390
  }
332
391
  }
@@ -458,6 +517,10 @@ async function main() {
458
517
  if (options.prompts && options.prompts.length > 0) {
459
518
  pythonArgs.push('--prompts', ...options.prompts);
460
519
  }
520
+
521
+ if (options.concurrency !== undefined) {
522
+ pythonArgs.push('--concurrency', String(options.concurrency));
523
+ }
461
524
 
462
525
  if (options.expected && options.expected.length > 0) {
463
526
  pythonArgs.push('--expected', ...options.expected);
@@ -2,7 +2,7 @@
2
2
  * Build-time injected default values
3
3
  * DO NOT EDIT - This file is auto-generated during build.
4
4
  *
5
- * Generated: 2026-04-01T19:33:48.937Z
5
+ * Generated: 2026-04-22T20:44:41.713Z
6
6
  *
7
7
  * @copyright Microsoft Corporation. All rights reserved.
8
8
  * @license MIT
@@ -21,5 +21,9 @@ export default {
21
21
 
22
22
  /** Scenario Header for Copilot API */
23
23
  scenarioHeader: "agenticevaluation"
24
+ },
25
+ eula: {
26
+ /** EULA version string for acceptance tracking */
27
+ version: "2026-04-01"
24
28
  }
25
29
  };
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Normalize an M365 agent ID by appending '.declarativeAgent' when the value
3
+ * has no '.' segment. Returns the input unchanged when null/undefined/empty
4
+ * or when it already contains a dot.
5
+ *
6
+ * @param {string|null|undefined} id - The raw agent ID value.
7
+ * @returns {string|null|undefined} The normalized agent ID.
8
+ */
9
+ export function normalizeAgentId(id) {
10
+ if (!id) return id;
11
+ return id.includes('.') ? id : `${id}.declarativeAgent`;
12
+ }
@@ -3,6 +3,7 @@
3
3
  * Handles .env.local, .env.local.user, and other env file formats.
4
4
  */
5
5
 
6
+ import { parse as dotenvParse } from 'dotenv';
6
7
  import fs from 'fs';
7
8
  import path from 'path';
8
9
 
@@ -21,7 +22,8 @@ const AGENT_ID_ALIASES = [
21
22
 
22
23
  /**
23
24
  * Load environment variables from a .env-style file.
24
- * Skips blank lines and comments. Protected keys are ignored with a warning.
25
+ * Uses dotenv.parse() for standards-compliant parsing (handles quoted values,
26
+ * inline comments, escape sequences). Protected keys are ignored with a warning.
25
27
  * Malformed lines (no '=' separator) are skipped with a warning.
26
28
  * @param {string} envFilePath - Absolute path to the env file
27
29
  * @returns {Object|null} Parsed key-value pairs, or null if file cannot be read
@@ -34,32 +36,25 @@ export function _loadEnvFile(envFilePath) {
34
36
  const envVars = {};
35
37
  try {
36
38
  const content = fs.readFileSync(envFilePath, 'utf-8');
37
- const lines = content.split('\n');
38
39
 
39
- for (const line of lines) {
40
+ // Pre-scan for malformed lines (no '=') and emit warnings
41
+ for (const line of content.split('\n')) {
40
42
  const trimmedLine = line.trim();
41
43
  if (!trimmedLine || trimmedLine.startsWith('#')) {
42
44
  continue;
43
45
  }
44
-
45
- const eqIndex = trimmedLine.indexOf('=');
46
- if (eqIndex === -1) {
46
+ if (trimmedLine.indexOf('=') === -1) {
47
47
  console.warn(
48
48
  `⚠️ Ignoring malformed line in env file (missing '='): ${trimmedLine}`
49
49
  );
50
- continue;
51
50
  }
51
+ }
52
52
 
53
- const keyName = trimmedLine.slice(0, eqIndex).trim();
54
- const value = trimmedLine
55
- .slice(eqIndex + 1)
56
- .trim()
57
- .replace(/^(['"])(.*)\1$/, '$2');
58
-
59
- if (!keyName) {
60
- continue;
61
- }
53
+ // Use dotenv.parse() for standards-compliant .env parsing
54
+ // (handles quoted values, inline comments, escape sequences, export prefix)
55
+ const parsed = dotenvParse(content);
62
56
 
57
+ for (const [keyName, value] of Object.entries(parsed)) {
63
58
  if (PROTECTED_KEYS.includes(keyName)) {
64
59
  console.warn(
65
60
  `⚠️ Ignoring ${keyName} from .env file (using built-in value)`
@@ -0,0 +1,78 @@
1
+ /**
2
+ * EULA acceptance manager
3
+ *
4
+ * Manages reading and writing the EULA acceptance marker file at
5
+ * ~/.m365-copilot-agent-evals/eula-acceptance.json.
6
+ * This location is independent of the cache directory so acceptance
7
+ * survives --cache-clear operations.
8
+ */
9
+
10
+ import fs from 'node:fs/promises';
11
+ import path from 'node:path';
12
+ import os from 'node:os';
13
+
14
+ const EULA_DIR_NAME = '.m365-copilot-agent-evals';
15
+ const EULA_FILE_NAME = 'eula-acceptance.json';
16
+
17
+ /**
18
+ * Returns the EULA directory path (~/.m365-copilot-agent-evals/).
19
+ * @returns {string}
20
+ */
21
+ export function getEulaDir() {
22
+ return path.join(os.homedir(), EULA_DIR_NAME);
23
+ }
24
+
25
+ /**
26
+ * Returns the full path to the acceptance marker file.
27
+ * @returns {string}
28
+ */
29
+ export function getEulaFilePath() {
30
+ return path.join(getEulaDir(), EULA_FILE_NAME);
31
+ }
32
+
33
+ /**
34
+ * Write an acceptance marker for the given EULA version.
35
+ * Creates the directory if it doesn't exist.
36
+ * @param {string} version - EULA version string
37
+ * @returns {Promise<void>}
38
+ */
39
+ export async function recordAcceptance(version) {
40
+ const dir = getEulaDir();
41
+ await fs.mkdir(dir, { recursive: true });
42
+ const marker = { version, acceptedAt: new Date().toISOString() };
43
+ await fs.writeFile(
44
+ getEulaFilePath(),
45
+ JSON.stringify(marker, null, 2),
46
+ 'utf-8'
47
+ );
48
+ }
49
+
50
+ /**
51
+ * Check whether the EULA has been accepted for the required version.
52
+ * @param {string} requiredVersion - The version to check against
53
+ * @returns {Promise<{accepted: boolean, stale: boolean, marker: object|null}>}
54
+ */
55
+ export async function checkAcceptance(requiredVersion) {
56
+ const marker = await _readMarker();
57
+ if (!marker) return { accepted: false, stale: false, marker: null };
58
+ if (marker.version !== requiredVersion)
59
+ return { accepted: false, stale: true, marker };
60
+ return { accepted: true, stale: false, marker };
61
+ }
62
+
63
+ /**
64
+ * Read and parse the acceptance marker file.
65
+ * Returns null if the file is missing, unreadable, or malformed.
66
+ * Exported with _ prefix for unit testing.
67
+ * @returns {Promise<object|null>}
68
+ */
69
+ export async function _readMarker() {
70
+ try {
71
+ const raw = await fs.readFile(getEulaFilePath(), 'utf-8');
72
+ const parsed = JSON.parse(raw);
73
+ if (!parsed.version || !parsed.acceptedAt) return null;
74
+ return parsed;
75
+ } catch {
76
+ return null;
77
+ }
78
+ }
@@ -605,18 +605,20 @@ export class ProgressReporter {
605
605
 
606
606
  this.phaseStatuses.set(phaseId, 'failed');
607
607
 
608
- // Clear current line and display error
609
- if (this.isInteractive) {
610
- readline.clearLine(process.stdout, 0);
611
- readline.cursorTo(process.stdout, 0);
612
- }
608
+ if (!this.options.quiet) {
609
+ // Clear current line and display error
610
+ if (this.isInteractive) {
611
+ readline.clearLine(process.stdout, 0);
612
+ readline.cursorTo(process.stdout, 0);
613
+ }
613
614
 
614
- console.log(`\n❌ Failed: ${phase.name}`);
615
- console.log(`\nError: ${error.message}`);
616
- console.log(`\nSuggested actions:`);
617
- console.log(` • Check your internet connection`);
618
- console.log(` • If behind a proxy, set HTTP_PROXY/HTTPS_PROXY`);
619
- console.log(` • Run with --verbose for detailed output`);
615
+ console.error(`\n❌ Failed: ${phase.name}`);
616
+ console.error(`\nError: ${error.message}`);
617
+ console.error(`\nSuggested actions:`);
618
+ console.error(` • Check your internet connection`);
619
+ console.error(` • If behind a proxy, set HTTP_PROXY/HTTPS_PROXY`);
620
+ console.error(` • Run with --verbose for detailed output`);
621
+ }
620
622
 
621
623
  this.currentPhase = null;
622
624
  this.phaseStartTime = null;