@microsoft/m365-copilot-eval 1.4.0-preview.1 → 1.6.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,488 @@
1
+ """Output formatting, score conversion, and result writing."""
2
+
3
+ import csv
4
+ import json
5
+ import os
6
+ import sys
7
+ import webbrowser
8
+ from datetime import datetime, timezone
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from cli_logging.cli_logger import emit_structured_log
12
+ from cli_logging.logging_utils import Operation
13
+ from common import (
14
+ DEFAULT_PASS_THRESHOLD,
15
+ RELEVANCE,
16
+ COHERENCE,
17
+ GROUNDEDNESS,
18
+ SIMILARITY,
19
+ TOOL_CALL_ACCURACY,
20
+ CITATIONS,
21
+ EXACT_MATCH,
22
+ PARTIAL_MATCH,
23
+ METRIC_IDS,
24
+ STATUS_PASS,
25
+ STATUS_FAIL,
26
+ STATUS_ERROR,
27
+ STATUS_PARTIAL,
28
+ STATUS_UNKNOWN,
29
+ pascal_case_to_title,
30
+ RunConfig,
31
+ )
32
+ from generate_report import generate_html_report, calculate_aggregate_statistics
33
+ from schema_handler import SchemaVersionManager
34
+
35
+
36
+ def write_results_to_html(results: List[Dict], output_file: str,
37
+ agent_name: Optional[str] = None, agent_id: Optional[str] = None,
38
+ cli_version: Optional[str] = None):
39
+ """Write results to HTML file using generate_html_report from generate_report.py."""
40
+ try:
41
+ html = generate_html_report(results, agent_name=agent_name, agent_id=agent_id,
42
+ cli_version=cli_version)
43
+ os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
44
+ with open(output_file, 'w', encoding='utf-8') as f:
45
+ f.write(html)
46
+ emit_structured_log("info", f"HTML report saved to {output_file}", operation=Operation.WRITE_OUTPUT)
47
+ except Exception as e:
48
+ emit_structured_log("error", f"Error writing to HTML file: {e}", operation=Operation.WRITE_OUTPUT)
49
+ sys.exit(1)
50
+
51
+
52
+ def write_results_to_console(results, agent_name: Optional[str] = None,
53
+ agent_id: Optional[str] = None,
54
+ cli_version: Optional[str] = None):
55
+ """Write the response to console."""
56
+ # ANSI color codes
57
+ BOLD = '\033[1m'
58
+ BLUE = '\033[94m'
59
+ GREEN = '\033[92m'
60
+ YELLOW = '\033[93m'
61
+ CYAN = '\033[96m'
62
+ MAGENTA = '\033[95m'
63
+ ORANGE = '\033[38;5;208m'
64
+ RED = '\033[91m'
65
+ RESET = '\033[0m'
66
+
67
+ def _print_evaluated_item(response: str, expected_response: str,
68
+ evaluators_ran: List[str], item_results: Dict[str, Any],
69
+ error: Optional[str] = None) -> None:
70
+ """Print the body of a single evaluated item (single-turn prompt or multi-turn turn).
71
+
72
+ The item header (Prompt X / Turn X) is printed by the caller; this helper
73
+ prints evaluators, response, expected response, error, and metrics.
74
+ """
75
+ if evaluators_ran:
76
+ print(f"{BOLD}{CYAN}Evaluators:{RESET} {', '.join(evaluators_ran)}")
77
+ if response:
78
+ print(f"{BOLD}{CYAN}Response:{RESET} {response}")
79
+ if expected_response:
80
+ print(f"{BOLD}{YELLOW}Expected Response:{RESET} {expected_response}")
81
+ if error:
82
+ print(f"{BOLD}{RED}Error:{RESET} {error}")
83
+
84
+ for eval_name, v in item_results.items():
85
+ if v is None:
86
+ continue
87
+ display_name = pascal_case_to_title(eval_name)
88
+ if eval_name == RELEVANCE:
89
+ color = MAGENTA
90
+ elif eval_name == COHERENCE:
91
+ color = ORANGE
92
+ else:
93
+ color = BLUE
94
+ print(f"{BOLD}{color}{display_name}:{RESET} {json.dumps(v, indent=4)}")
95
+
96
+ # Show metadata
97
+ metadata_parts = []
98
+ if agent_name:
99
+ metadata_parts.append(f"Agent Name: {agent_name}")
100
+ if agent_id:
101
+ metadata_parts.append(f"Agent ID: {agent_id}")
102
+ if cli_version:
103
+ metadata_parts.append(f"CLI Version: {cli_version}")
104
+ if metadata_parts:
105
+ print(f"{BOLD}{CYAN}{' | '.join(metadata_parts)}{RESET}")
106
+ print()
107
+
108
+ aggregates = calculate_aggregate_statistics(results)
109
+ if aggregates:
110
+ total_items = aggregates[next(iter(aggregates))].get('total_prompts', len(results))
111
+ if total_items > 1:
112
+ print(f"{BOLD}{BLUE}Aggregate Statistics ({total_items} prompts):{RESET}")
113
+ print(f"{BLUE}{'=' * 60}{RESET}")
114
+
115
+ for metric_name, stats in aggregates.items():
116
+ pass_color = GREEN if stats['pass_rate'] >= 80 else YELLOW if stats['pass_rate'] >= 60 else RED
117
+ prompts_evaluated = stats.get('prompts_evaluated', stats['total_evaluated'])
118
+ total_prompts = stats.get('total_prompts', total_items)
119
+ print(f"{BOLD}{CYAN}{metric_name}:{RESET} ({prompts_evaluated}/{total_prompts} prompts)")
120
+ print(f" Pass Rate: {pass_color}{stats['pass_rate']:.1f}%{RESET} ({stats['pass_count']}/{stats['total_evaluated']} passed)")
121
+ print(f" Avg Score: {MAGENTA}{stats['avg_score']:.2f}{RESET}")
122
+ if stats.get('threshold') is not None:
123
+ print(f" Threshold: {YELLOW}{stats['threshold']}{RESET}")
124
+ print()
125
+
126
+ print(f"{BLUE}{'=' * 60}{RESET}")
127
+ print()
128
+
129
+ print(f"{BOLD}{BLUE}Individual Results:{RESET}")
130
+ print(f"{BLUE}{'=' * 50}{RESET}")
131
+ for i, result in enumerate(results, 1):
132
+ if result.get("type") == "multi_turn":
133
+ thread_name = result.get("name", "Unnamed Thread")
134
+ summary = result.get("summary", {})
135
+ status = summary.get("overall_status", STATUS_UNKNOWN)
136
+ status_color = GREEN if status == STATUS_PASS else YELLOW if status == STATUS_PARTIAL else RED
137
+
138
+ print(f"{BOLD}{MAGENTA}Thread {i}: {thread_name}{RESET}")
139
+ for t_idx, turn in enumerate(result.get("turns", []), 1):
140
+ turn_status = turn.get("status", STATUS_UNKNOWN)
141
+ turn_color = GREEN if turn_status == STATUS_PASS else RED if turn_status in (STATUS_FAIL, STATUS_ERROR) else YELLOW
142
+ print(f"{BOLD}{turn_color}Turn {t_idx}:{RESET} [{turn_status}] {turn.get('prompt', '')}")
143
+ _print_evaluated_item(
144
+ response=turn.get("response", ""),
145
+ expected_response=turn.get("expected_response", ""),
146
+ evaluators_ran=turn.get("evaluators_ran", []),
147
+ item_results=turn.get("results", {}),
148
+ error=turn.get("error"),
149
+ )
150
+ print()
151
+ print(f"{BOLD}{MAGENTA}Thread {i} Summary:{RESET}")
152
+ print(f" Status: {status_color}{status.upper()}{RESET}")
153
+ print(f" Turns passed: {status_color}{summary.get('turns_passed', 0)}/{summary.get('turns_total', 0)}{RESET}")
154
+ print(f"{BLUE}{'-' * 30}{RESET}")
155
+ else:
156
+ print(f"{BOLD}{GREEN}Prompt {i}:{RESET} {result['prompt']}")
157
+ _print_evaluated_item(
158
+ response=result.get('response', ''),
159
+ expected_response=result.get('expected_response', ''),
160
+ evaluators_ran=result.get('evaluators_ran', []),
161
+ item_results=result.get('results', {}),
162
+ error=result.get('errorDetails'),
163
+ )
164
+ print(f"{BLUE}{'-' * 30}{RESET}")
165
+
166
+
167
+ def extract_eval_score(data: dict, metric_id: str) -> Optional[Dict]:
168
+ """Extract an EvalScore object from a decorated metric dict.
169
+
170
+ Maps internal decorated-metric format to schema EvalScore:
171
+ {score, result, threshold} (required) + reason, evaluator (optional).
172
+ """
173
+ score_val = None
174
+ if metric_id in data and isinstance(data[metric_id], (int, float)):
175
+ score_val = data[metric_id]
176
+ if score_val is None:
177
+ return None
178
+
179
+ result = data.get("result")
180
+ if result not in (STATUS_PASS, STATUS_FAIL):
181
+ result = STATUS_PASS if score_val >= data.get("threshold", DEFAULT_PASS_THRESHOLD) else STATUS_FAIL
182
+
183
+ eval_score: Dict[str, Any] = {
184
+ "score": score_val,
185
+ "result": result,
186
+ "threshold": data.get("threshold", DEFAULT_PASS_THRESHOLD),
187
+ }
188
+ reason = data.get(f"{metric_id}_reason") or data.get("reason")
189
+ if reason:
190
+ eval_score["reason"] = reason
191
+ return eval_score
192
+
193
+
194
+ def _convert_scores_to_schema(results_dict: Dict[str, Any]) -> Dict[str, Any]:
195
+ """Convert raw evaluator results to schema-compliant score objects.
196
+
197
+ Evaluator results in results_dict are dicts (from _decorate_metric) or
198
+ None when skipped/crashed. None values are omitted from output.
199
+ """
200
+ scores: Dict[str, Any] = {}
201
+
202
+ for eval_key, schema_key in [
203
+ (RELEVANCE, "relevance"),
204
+ (COHERENCE, "coherence"),
205
+ (GROUNDEDNESS, "groundedness"),
206
+ (SIMILARITY, "similarity"),
207
+ (TOOL_CALL_ACCURACY, "toolCallAccuracy"),
208
+ ]:
209
+ data = results_dict.get(eval_key)
210
+ if data is None:
211
+ continue
212
+ eval_score = extract_eval_score(data, METRIC_IDS[eval_key])
213
+ if eval_score:
214
+ scores[schema_key] = eval_score
215
+
216
+ data = results_dict.get(CITATIONS)
217
+ if data is not None:
218
+ count = data.get("citations", 0)
219
+ cit_result = data.get("result")
220
+ if cit_result not in (STATUS_PASS, STATUS_FAIL):
221
+ cit_result = STATUS_PASS if count >= data.get("threshold", 1) else STATUS_FAIL
222
+ citation_score: Dict[str, Any] = {
223
+ "count": count,
224
+ "result": cit_result,
225
+ "threshold": data.get("threshold", 1),
226
+ }
227
+ if "citation_format" in data:
228
+ citation_score["format"] = data["citation_format"]
229
+ scores["citations"] = citation_score
230
+
231
+ data = results_dict.get(EXACT_MATCH)
232
+ if data is not None:
233
+ is_match = data.get("exact_match", 0.0) == 1.0
234
+ scores["exactMatch"] = {
235
+ "match": is_match,
236
+ "result": data.get("result", STATUS_PASS if is_match else STATUS_FAIL),
237
+ "reason": data.get("exact_match_reason", ""),
238
+ }
239
+
240
+ data = results_dict.get(PARTIAL_MATCH)
241
+ if data is not None:
242
+ scores["partialMatch"] = {
243
+ "score": data.get("partial_match", 0.0),
244
+ "result": data.get("result", STATUS_FAIL),
245
+ "threshold": data.get("threshold", 0.5),
246
+ "reason": data.get("partial_match_reason", ""),
247
+ }
248
+
249
+ return scores
250
+
251
+
252
+ def convert_result_to_eval_item(result: Dict) -> Dict:
253
+ """Convert an internal evaluation result dict to a schema-compliant EvalItem."""
254
+ item: Dict[str, Any] = {
255
+ "prompt": result["prompt"],
256
+ "response": result["response"],
257
+ "expected_response": result["expected_response"],
258
+ }
259
+
260
+ if "evaluators" in result:
261
+ item["evaluators"] = result["evaluators"]
262
+ if "evaluators_mode" in result:
263
+ item["evaluators_mode"] = result["evaluators_mode"]
264
+
265
+ scores = _convert_scores_to_schema(result.get("results", {}))
266
+ if scores:
267
+ item["scores"] = scores
268
+
269
+ return item
270
+
271
+
272
+ def convert_thread_result_to_output(thread_result: Dict) -> Dict:
273
+ """Convert a multi-turn thread result to the output format."""
274
+ output_turns = []
275
+ for turn in thread_result.get("turns", []):
276
+ output_turn: Dict[str, Any] = {"prompt": turn.get("prompt", "")}
277
+ if "expected_response" in turn:
278
+ output_turn["expected_response"] = turn["expected_response"]
279
+ if "response" in turn:
280
+ output_turn["response"] = turn["response"]
281
+ if "status" in turn:
282
+ output_turn["status"] = turn["status"]
283
+ if "error" in turn:
284
+ output_turn["error"] = turn["error"]
285
+ if "evaluators" in turn:
286
+ output_turn["evaluators"] = turn["evaluators"]
287
+ if "evaluators_mode" in turn:
288
+ output_turn["evaluators_mode"] = turn["evaluators_mode"]
289
+
290
+ scores = _convert_scores_to_schema(turn.get("results", {}))
291
+ if scores:
292
+ output_turn["scores"] = scores
293
+
294
+ output_turns.append(output_turn)
295
+
296
+ output: Dict[str, Any] = {}
297
+ if thread_result.get("name"):
298
+ output["name"] = thread_result["name"]
299
+ if thread_result.get("description"):
300
+ output["description"] = thread_result["description"]
301
+ if thread_result.get("conversation_id"):
302
+ output["conversation_id"] = thread_result["conversation_id"]
303
+ output["turns"] = output_turns
304
+ if thread_result.get("summary"):
305
+ output["summary"] = thread_result["summary"]
306
+
307
+ return output
308
+
309
+
310
+ def convert_result_to_output_item(result: Dict) -> Dict:
311
+ """Convert an internal result dict to an output item. Routes by type."""
312
+ if result.get("type") == "multi_turn":
313
+ return convert_thread_result_to_output(result)
314
+ return convert_result_to_eval_item(result)
315
+
316
+
317
+ def write_results_to_json(results: List[Dict], output_file: str, agent_id: Optional[str] = None,
318
+ default_evaluators: Optional[Dict[str, Any]] = None,
319
+ agent_name: Optional[str] = None,
320
+ cli_version: Optional[str] = None):
321
+ """Write results to a schema-compliant eval document JSON file.
322
+
323
+ Output follows the eval-document.schema.json format:
324
+ {schemaVersion, metadata, default_evaluators?, items: [EvalItem]}
325
+ """
326
+ try:
327
+ try:
328
+ current_version = SchemaVersionManager().get_current_version()
329
+ except Exception:
330
+ current_version = "1.0.0"
331
+
332
+ items = [convert_result_to_output_item(r) for r in results]
333
+
334
+ metadata: Dict[str, Any] = {
335
+ "evaluatedAt": datetime.now(timezone.utc).isoformat(),
336
+ }
337
+ if agent_id:
338
+ metadata["agentId"] = agent_id
339
+ if agent_name:
340
+ metadata["agentName"] = agent_name
341
+ if cli_version:
342
+ metadata["cliVersion"] = cli_version
343
+
344
+ output_data: Dict[str, Any] = {
345
+ "schemaVersion": current_version,
346
+ "metadata": metadata,
347
+ }
348
+
349
+ if default_evaluators is not None:
350
+ output_data["default_evaluators"] = default_evaluators
351
+
352
+ output_data["items"] = items
353
+
354
+ os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
355
+ with open(output_file, 'w', encoding='utf-8') as f:
356
+ json.dump(output_data, f, indent=2, ensure_ascii=False)
357
+ emit_structured_log("info", f"Results saved to {output_file}", operation=Operation.WRITE_OUTPUT)
358
+ except Exception as e:
359
+ emit_structured_log("error", f"Error writing to JSON file: {e}", operation=Operation.WRITE_OUTPUT)
360
+ sys.exit(1)
361
+
362
+
363
+ def _results_to_csv_json(results_dict: Dict) -> str:
364
+ """Serialize evaluator results dict to a CSV-safe JSON string.
365
+
366
+ Skips None (crashed/skipped evaluators). Results are dicts produced
367
+ by _decorate_metric.
368
+ """
369
+ if not results_dict:
370
+ return ""
371
+ non_null = {k: v for k, v in results_dict.items() if v is not None}
372
+ return json.dumps(non_null) if non_null else ""
373
+
374
+
375
+ def write_results_to_csv(results: List[Dict], output_file: str,
376
+ agent_name: Optional[str] = None, agent_id: Optional[str] = None,
377
+ cli_version: Optional[str] = None):
378
+ """Write results to CSV file."""
379
+ try:
380
+ os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
381
+ with open(output_file, 'w', newline='', encoding='utf-8') as f:
382
+ if results:
383
+ metadata_parts = []
384
+ if agent_name:
385
+ metadata_parts.append(f"Agent Name: {agent_name}")
386
+ if agent_id:
387
+ metadata_parts.append(f"Agent ID: {agent_id}")
388
+ if cli_version:
389
+ metadata_parts.append(f"CLI Version: {cli_version}")
390
+ if metadata_parts:
391
+ f.write(f"# {' | '.join(metadata_parts)}\n")
392
+
393
+ aggregates = calculate_aggregate_statistics(results)
394
+ if aggregates:
395
+ total_items = aggregates[next(iter(aggregates))].get('total_prompts', len(results))
396
+ if total_items > 1:
397
+ f.write("# AGGREGATE STATISTICS\n")
398
+ f.write("Metric,Prompts Evaluated,Total Prompts,Pass Rate (%),Passed,Failed,Avg Score,Threshold\n")
399
+ for metric_name, stats in aggregates.items():
400
+ threshold_str = str(stats.get('threshold', 'N/A'))
401
+ prompts_evaluated = stats.get('prompts_evaluated', stats['total_evaluated'])
402
+ total_prompts = stats.get('total_prompts', total_items)
403
+ f.write(f"{metric_name},{prompts_evaluated},{total_prompts},{stats['pass_rate']:.1f},{stats['pass_count']},{stats['fail_count']},{stats['avg_score']:.2f},{threshold_str}\n")
404
+ f.write("\n# INDIVIDUAL RESULTS\n")
405
+
406
+ single_turn_rows = []
407
+ multi_turn_rows = []
408
+ for result in results:
409
+ if result.get("type") == "multi_turn":
410
+ thread_name = result.get("name", "")
411
+ for turn_idx, turn in enumerate(result.get("turns", [])):
412
+ multi_turn_rows.append({
413
+ "thread_name": thread_name,
414
+ "turn_index": turn_idx + 1,
415
+ "prompt": turn.get("prompt", ""),
416
+ "response": turn.get("response", ""),
417
+ "expected_response": turn.get("expected_response", ""),
418
+ "status": turn.get("status", ""),
419
+ "error": turn.get("error", ""),
420
+ "scores": _results_to_csv_json(turn.get("results", {})),
421
+ })
422
+ summary = result.get("summary", {})
423
+ multi_turn_rows.append({
424
+ "thread_name": thread_name,
425
+ "turn_index": "summary",
426
+ "prompt": "",
427
+ "response": "",
428
+ "expected_response": "",
429
+ "status": summary.get("overall_status", ""),
430
+ "scores": f"{summary.get('turns_passed', 0)}/{summary.get('turns_total', 0)} turns passed",
431
+ })
432
+ else:
433
+ exclude_keys = {'evaluators_ran', 'evaluators', 'evaluators_mode', '_enhanced_response', 'results'}
434
+ row = {k: v for k, v in result.items() if k not in exclude_keys}
435
+ if "results" in result:
436
+ row["scores"] = _results_to_csv_json(result["results"])
437
+ single_turn_rows.append(row)
438
+
439
+ if single_turn_rows:
440
+ if multi_turn_rows:
441
+ f.write("# SINGLE-TURN RESULTS\n")
442
+ fieldnames = list(single_turn_rows[0].keys())
443
+ for row in single_turn_rows:
444
+ for k in row:
445
+ if k not in fieldnames:
446
+ fieldnames.append(k)
447
+ writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
448
+ writer.writeheader()
449
+ writer.writerows(single_turn_rows)
450
+
451
+ if multi_turn_rows:
452
+ if single_turn_rows:
453
+ f.write("\n")
454
+ f.write("# MULTI-TURN RESULTS\n")
455
+ fieldnames = ["thread_name", "turn_index", "prompt", "response", "expected_response", "status", "error", "scores"]
456
+ writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
457
+ writer.writeheader()
458
+ writer.writerows(multi_turn_rows)
459
+ emit_structured_log("info", f"Results saved to {output_file}", operation=Operation.WRITE_OUTPUT)
460
+ except Exception as e:
461
+ emit_structured_log("error", f"Error writing to CSV file: {e}", operation=Operation.WRITE_OUTPUT)
462
+ sys.exit(1)
463
+
464
+
465
+ def output_results(results: List[Dict], config: RunConfig, default_evaluators: Optional[Dict[str, Any]] = None,
466
+ agent_name: Optional[str] = None, cli_version: Optional[str] = None):
467
+ """Output results based on specified format."""
468
+ metadata_kwargs = dict(
469
+ agent_name=agent_name,
470
+ agent_id=config.m365_agent_id,
471
+ cli_version=cli_version,
472
+ )
473
+ if config.output:
474
+ output_lower = config.output.lower()
475
+ if output_lower.endswith('.json'):
476
+ write_results_to_json(results, config.output, default_evaluators=default_evaluators,
477
+ **metadata_kwargs)
478
+ elif output_lower.endswith('.csv'):
479
+ write_results_to_csv(results, config.output, **metadata_kwargs)
480
+ elif output_lower.endswith('.html'):
481
+ write_results_to_html(results, config.output, **metadata_kwargs)
482
+ abs_path = os.path.abspath(config.output)
483
+ webbrowser.open(f'file://{abs_path}')
484
+ else:
485
+ write_results_to_json(results, config.output, default_evaluators=default_evaluators,
486
+ **metadata_kwargs)
487
+ else:
488
+ write_results_to_console(results, **metadata_kwargs)
@@ -9,8 +9,13 @@ import { ensureVenv, executePythonCli } from '../lib/venv-manager.js';
9
9
  import { getCacheStats, clearCache, formatBytes } from '../lib/cache-utils.js';
10
10
  import { checkPackageExpiry } from '../lib/expiry-check.js';
11
11
  import { recordAcceptance, checkAcceptance } from '../lib/eula-manager.js';
12
- import { ProgressReporter } from '../lib/progress.js';
12
+ import { ProgressReporter, isInteractiveTerminal } from '../lib/progress.js';
13
13
  import { _loadEnvFile as loadEnvFile, _loadUserEnvOverride } from '../lib/env-loader.js';
14
+ import {
15
+ _handlePythonVersionMismatch,
16
+ _buildInitializationFailureLines,
17
+ _promptForContinueWithMismatch,
18
+ } from '../lib/version-check.js';
14
19
  import { normalizeAgentId } from '../lib/agent-id.js';
15
20
 
16
21
  // Check package expiry (exits if expired, warns if close to expiry)
@@ -38,10 +43,9 @@ const REQUIREMENTS_FILE = path.join(PYTHON_CLI_DIR, 'requirements.txt');
38
43
  */
39
44
  async function setDefaultEnvironmentConstants() {
40
45
  const config = (await import('../config/default.js')).default;
41
- process.env.M365_EVAL_CLIENT_ID = config.copilotApi.m365EvalClientId;
42
- process.env.COPILOT_API_ENDPOINT = config.copilotApi.copilotApiEndpoint;
43
- process.env.COPILOT_SCOPES = config.copilotApi.copilotScopes;
44
- process.env.X_SCENARIO_HEADER = config.copilotApi.scenarioHeader;
46
+ process.env.WORK_IQ_A2A_ENDPOINT = config.workIq.a2aEndpoint;
47
+ process.env.WORK_IQ_A2A_CLIENT_ID = config.workIq.a2aClientId;
48
+ process.env.WORK_IQ_A2A_SCOPES = config.workIq.a2aScopes;
45
49
  }
46
50
 
47
51
  /**
@@ -131,10 +135,26 @@ async function initializePythonEnvironment(verbose = false, quiet = false) {
131
135
 
132
136
  try {
133
137
  // Step 1: Ensure Python runtime is available (handles download + extract phases)
134
- await ensurePythonRuntime(verbose, onProgress);
138
+ const runtime = await ensurePythonRuntime(verbose, onProgress);
139
+
140
+ // Step 2: Handle version mismatch from PYTHON_PATH fallback.
141
+ // The decision tree (EOL block, interactive prompt, non-interactive
142
+ // auto-reject) lives in _handlePythonVersionMismatch so it is
143
+ // unit-testable without spawning the CLI; we only own the readline
144
+ // wiring and the actual process.exit here.
145
+ const mismatch = await _handlePythonVersionMismatch({
146
+ runtime,
147
+ isInteractive: isInteractiveTerminal(),
148
+ promptForContinue: _promptForContinueWithMismatch,
149
+ warn: (msg) => console.warn(msg),
150
+ error: (msg) => console.error(msg),
151
+ });
152
+ if (mismatch.shouldExit) {
153
+ process.exit(mismatch.exitCode ?? 1);
154
+ }
135
155
 
136
- // Step 2: Ensure venv with dependencies is set up (handles venv + deps phases)
137
- await ensureVenv(REQUIREMENTS_FILE, verbose, onProgress);
156
+ // Step 3: Ensure venv with dependencies is set up (handles venv + deps phases)
157
+ await ensureVenv(REQUIREMENTS_FILE, verbose, onProgress, runtime.pythonPath);
138
158
 
139
159
  // Show completion summary
140
160
  reporter.complete();
@@ -145,11 +165,12 @@ async function initializePythonEnvironment(verbose = false, quiet = false) {
145
165
  console.error('\nFull error:', error);
146
166
  }
147
167
 
148
- console.error('\nTroubleshooting:');
149
- console.error(' - Check your internet connection');
150
- console.error(' - If behind a proxy, set HTTP_PROXY/HTTPS_PROXY environment variables');
151
- console.error(' - For SSL issues, set NODE_EXTRA_CA_CERTS or PIP_CERT');
152
- console.error(' - Run with --log-level debug for detailed output');
168
+ for (const line of _buildInitializationFailureLines({
169
+ error,
170
+ platform: process.platform,
171
+ })) {
172
+ console.error(line);
173
+ }
153
174
 
154
175
  process.exit(1);
155
176
  }
@@ -2,25 +2,22 @@
2
2
  * Build-time injected default values
3
3
  * DO NOT EDIT - This file is auto-generated during build.
4
4
  *
5
- * Generated: 2026-04-22T20:44:41.713Z
5
+ * Generated: 2026-05-07T22:53:22.056Z
6
6
  *
7
7
  * @copyright Microsoft Corporation. All rights reserved.
8
8
  * @license MIT
9
9
  */
10
10
 
11
11
  export default {
12
- copilotApi: {
13
- /** Microsoft M365 Evaluation Client ID */
14
- m365EvalClientId: "c678803a-d8e9-4d67-849c-3a8b2d7ba5d3",
12
+ workIq: {
13
+ /** Work IQ A2A Endpoint */
14
+ a2aEndpoint: "https://graph.microsoft.com/rp/workiq",
15
15
 
16
- /** Copilot OAuth Scopes */
17
- copilotScopes: "https://substrate.office.com/sydney/.default",
16
+ /** Work IQ A2A Client ID */
17
+ a2aClientId: "ba081686-5d24-4bc6-a0d6-d034ecffed87",
18
18
 
19
- /** Copilot API Endpoint */
20
- copilotApiEndpoint: "https://substrate.office.com/m365Copilot",
21
-
22
- /** Scenario Header for Copilot API */
23
- scenarioHeader: "agenticevaluation"
19
+ /** Work IQ A2A OAuth Scopes */
20
+ a2aScopes: "Sites.Read.All Mail.Read People.Read.All OnlineMeetingTranscript.Read.All Chat.Read ChannelMessage.Read.All ExternalItem.Read.All"
24
21
  },
25
22
  eula: {
26
23
  /** EULA version string for acceptance tracking */
@@ -9,10 +9,9 @@ import path from 'path';
9
9
 
10
10
  // Keys that cannot be overridden from .env files (baked in via default.js config)
11
11
  const PROTECTED_KEYS = [
12
- 'M365_EVAL_CLIENT_ID',
13
- 'COPILOT_API_ENDPOINT',
14
- 'COPILOT_SCOPES',
15
- 'X_SCENARIO_HEADER',
12
+ 'WORK_IQ_A2A_ENDPOINT',
13
+ 'WORK_IQ_A2A_CLIENT_ID',
14
+ 'WORK_IQ_A2A_SCOPES',
16
15
  ];
17
16
 
18
17
  // Aliases resolved into M365_AGENT_ID (first match wins)