@microsoft/m365-copilot-eval 1.4.0-preview.1 → 1.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # M365 Copilot Agent Evaluations
2
2
 
3
- > **🔒 PRIVATE PREVIEW:** This tool is currently in private preview. And the instructions below are for Private Preview.
3
+ > **PUBLIC PREVIEW:** This tool is currently in public preview; refer to the instructions below to get started.
4
4
 
5
- A **zero-configuration** CLI for evaluating M365 Copilot agents. Send prompts to your agent, get responses, and automatically score them with Azure AI Evaluation metrics (relevance, coherence, groundedness).
5
+ A CLI for evaluating M365 Copilot agents. Send prompts to your agent, get responses, and automatically score them with Azure AI Evaluation metrics (relevance, coherence, groundedness).
6
6
  - Send a batch (or interactive set) of prompts to a configured chat API endpoint.
7
7
  - Collect agent responses and evaluate them locally using Azure AI Evaluation SDK.
8
8
  - The CLI supports 7 evaluator types. Evaluators marked with ⭐ are **enabled by default**.
@@ -12,7 +12,7 @@ A **zero-configuration** CLI for evaluating M365 Copilot agents. Send prompts to
12
12
  | **Relevance** ⭐ | LLM-based | 1-5 | 3 | Yes |
13
13
  | **Coherence** ⭐ | LLM-based | 1-5 | 3 | Yes |
14
14
  | **Groundedness** | LLM-based | 1-5 | 3 | No |
15
- | **ToolCallAccuracy** | LLM-based | 1-5 | 3 | No |
15
+ | **Similarity** | LLM-based | 1-5 | 3 | No |
16
16
  | **Citations** | Count-based | >= 0 | 1 | No |
17
17
  | **ExactMatch** | String match | boolean | N/A | No |
18
18
  | **PartialMatch** | String match | 0.0-1.0 | 0.5 | No |
@@ -26,6 +26,7 @@ A **zero-configuration** CLI for evaluating M365 Copilot agents. Send prompts to
26
26
  - **Node.js 24.12.0+** (check: `node --version`)
27
27
  - **Environment file** with your credentials and agent ID (see [Environment Setup](#-environment-setup) below)
28
28
  - **Your Tenant ID** - get your tenant id using the instructions [here](https://learn.microsoft.com/en-us/azure/azure-portal/get-subscription-tenant-id)
29
+ - Admin approval to run WORKIQ Client App for your tenant [here](https://github.com/microsoft/work-iq/blob/main/ADMIN-INSTRUCTIONS.md)
29
30
  - **Azure OpenAI endpoint, and API key** (see [Getting Variables](#-getting-variables) below)
30
31
 
31
32
  > Note: Authentication is currently supported on Windows only. Support for other operating systems is coming soon.
@@ -66,6 +67,8 @@ M365_TITLE_ID="T_your-title-id-here" # Auto-generated by ATK
66
67
  # .env.local.user (NOT checked in — secrets go here)
67
68
  AZURE_AI_OPENAI_ENDPOINT="<your-azure-openai-endpoint>"
68
69
  AZURE_AI_API_KEY="<your-api-key-from-azure-portal>"
70
+ AZURE_AI_API_VERSION="2024-12-01-preview" # default
71
+ AZURE_AI_MODEL_NAME="gpt-4o-mini" # recommended
69
72
  TENANT_ID="<your-tenant-id>"
70
73
  ```
71
74
 
@@ -90,7 +93,7 @@ M365_AGENT_ID="your-agent-id" # e.g., U_0dc4a8a2-b95f-edac-91c8-d802023ec2d4
90
93
  AZURE_AI_OPENAI_ENDPOINT="<your-azure-openai-endpoint>"
91
94
  AZURE_AI_API_KEY="<your-api-key-from-azure-portal>"
92
95
  AZURE_AI_API_VERSION="2024-12-01-preview" # default
93
- AZURE_AI_MODEL_NAME="gpt-4o-mini" # default
96
+ AZURE_AI_MODEL_NAME="gpt-4o-mini" # recommended
94
97
  TENANT_ID="<your-tenant-id>"
95
98
  ```
96
99
 
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "@microsoft/m365-copilot-eval",
3
- "version": "1.4.0-preview.1",
3
+ "version": "1.5.0-preview.1",
4
4
  "minCliVersion": "1.0.1-preview.1",
5
5
  "description": "Zero-config Node.js wrapper for M365 Copilot Agent Evaluations CLI (Python-based Azure AI Evaluation SDK)",
6
- "publishDate": "2026-04-22",
6
+ "publishDate": "2026-04-30",
7
7
  "main": "src/clients/node-js/lib/index.js",
8
8
  "type": "module",
9
9
  "bin": {
@@ -5,6 +5,13 @@ All notable changes to the eval document schema will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.2.0](https://github.com/microsoft/M365-Copilot-Agent-Evals/compare/schema-v1.1.0...schema-v1.2.0) (2026-04-22)
9
+
10
+
11
+ ### Features
12
+
13
+ * **schema:** add multi-turn evaluation support (v1.2.0) ([#208](https://github.com/microsoft/M365-Copilot-Agent-Evals/issues/208)) ([a5ad22b](https://github.com/microsoft/M365-Copilot-Agent-Evals/commit/a5ad22bb4f6ac8ba548dc7f431ace073fa5970ce))
14
+
8
15
  ## [1.1.0](https://github.com/microsoft/M365-Copilot-Agent-Evals/compare/schema-v1.0.0...schema-v1.1.0) (2026-03-30)
9
16
 
10
17
 
@@ -287,9 +287,9 @@
287
287
  "$ref": "#/$defs/EvalScore",
288
288
  "description": "Groundedness score (1-5)"
289
289
  },
290
- "toolCallAccuracy": {
290
+ "similarity": {
291
291
  "$ref": "#/$defs/EvalScore",
292
- "description": "Tool call accuracy score (1-5)"
292
+ "description": "Similarity score (1-5)"
293
293
  },
294
294
  "citations": {
295
295
  "$ref": "#/$defs/CitationScore",
@@ -427,7 +427,7 @@
427
427
  "type": "object",
428
428
  "description": "Map of evaluator names to their configuration options",
429
429
  "propertyNames": {
430
- "enum": ["Relevance", "Coherence", "Groundedness", "ToolCallAccuracy", "Citations", "ExactMatch", "PartialMatch"]
430
+ "enum": ["Relevance", "Coherence", "Groundedness", "Similarity", "Citations", "ExactMatch", "PartialMatch"]
431
431
  },
432
432
  "additionalProperties": {
433
433
  "$ref": "#/$defs/EvaluatorOptions"
@@ -0,0 +1,74 @@
1
+ """Interactive agent selection and agent-id utilities."""
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import questionary
6
+
7
+ from cli_logging.cli_logger import emit_structured_log
8
+ from cli_logging.logging_utils import Operation
9
+
10
+
11
+ def normalize_agent_id(agent_id):
12
+ """Append '.declarativeAgent' if agent_id has no '.', else return unchanged.
13
+
14
+ Returns the input unchanged when it is None/empty or already contains a dot.
15
+ """
16
+ if not agent_id:
17
+ return agent_id
18
+ return agent_id if '.' in agent_id else f"{agent_id}.declarativeAgent"
19
+
20
+
21
+ def select_agent_interactively(agents: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
22
+ """
23
+ Display an interactive agent selector using questionary.
24
+
25
+ Args:
26
+ agents: List of agent dictionaries.
27
+
28
+ Returns:
29
+ Tuple of (agent_id, agent_name) or (None, None) if cancelled/skipped
30
+ """
31
+ if not agents:
32
+ return None, None
33
+
34
+ # Build id→name lookup and choices
35
+ id_to_name: Dict[str, str] = {}
36
+ choices = []
37
+ sorted_agents = sorted(agents, key=lambda a: a.get("name", ""))
38
+ for agent in sorted_agents:
39
+ agent_name = agent.get("name", "Unknown")
40
+ agent_id = (agent.get("gptId") or "").strip()
41
+ if not agent_id:
42
+ emit_structured_log("warning", f"Skipping agent '{agent_name}': missing or empty gptId.", operation=Operation.FETCH_AGENTS)
43
+ continue
44
+ agent_description = agent.get("description")
45
+ agent_is_owner = agent.get('isOwner')
46
+ agent_provider = agent.get("provider")
47
+ id_to_name[agent_id] = agent_name
48
+
49
+ # Format the display text
50
+ if agent_provider:
51
+ title = f"{agent_name} - {agent_provider} ({agent_id})"
52
+ else:
53
+ title = f"{agent_name} ({agent_id})"
54
+ segments = [title]
55
+ if agent_is_owner:
56
+ segments.append(f"IsOwner: {agent_is_owner}")
57
+ if agent_description:
58
+ segments.append(agent_description)
59
+ display_text = " - ".join(segments)
60
+
61
+ choices.append(questionary.Choice(title=display_text, value=agent_id))
62
+
63
+ if not choices:
64
+ return None, None
65
+
66
+ # Display the selection prompt
67
+ selected_agent = questionary.select(
68
+ "Select an agent to evaluate:",
69
+ choices=choices,
70
+ use_shortcuts=len(choices) <= 35,
71
+ use_arrow_keys=True
72
+ ).ask()
73
+
74
+ return selected_agent, id_to_name.get(selected_agent) if selected_agent else None
@@ -61,7 +61,7 @@ class A2AClient(BaseAgentClient):
61
61
  """Fetch agents from the A2A discovery endpoint.
62
62
 
63
63
  Calls GET {endpoint}/.agents. Each A2A agent card is normalized to
64
- include 'gptId', 'name', and 'description' so it is compatible with
64
+ include 'gptId', 'name', and 'provider' so it is compatible with
65
65
  the shared select_agent_interactively selector.
66
66
 
67
67
  Returns an empty list if the endpoint is unreachable or returns an
@@ -79,8 +79,14 @@ class A2AClient(BaseAgentClient):
79
79
  )
80
80
  req = urllib.request.Request(agents_url, headers=headers, method="GET")
81
81
  with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
82
- data = json.loads(resp.read().decode("utf-8"))
83
- agents = data if isinstance(data, list) else data.get("agents", [])
82
+ agents = json.loads(resp.read().decode("utf-8"))
83
+ emit_structured_log(
84
+ "debug",
85
+ f"[A2A] Available agents response: {json.dumps(agents)}",
86
+ Operation.FETCH_AGENTS,
87
+ logger=self._logger,
88
+ diagnostic_records=self._diagnostic_records
89
+ )
84
90
  return [self._normalize_agent_card(a) for a in agents]
85
91
  except urllib.error.HTTPError as e:
86
92
  emit_structured_log(
@@ -104,22 +110,11 @@ class A2AClient(BaseAgentClient):
104
110
  @staticmethod
105
111
  def _normalize_agent_card(agent: Dict[str, Any]) -> Dict[str, Any]:
106
112
  """Normalize an A2A agent card to the shape expected by the selector.
107
-
108
- A2A agent cards use a 'url' field rather than a discrete ID. The
109
- agent ID is extracted as the last path segment of that URL, falling
110
- back to the agent name when the URL is absent.
111
113
  """
112
- agent_url = agent.get("url", "")
113
- agent_id = (
114
- agent_url.rstrip("/").rsplit("/", 1)[-1]
115
- if agent_url
116
- else agent.get("name", "")
117
- )
118
114
  return {
119
- "gptId": agent_id,
120
- "name": agent.get("name", agent_id),
121
- "description": agent.get("description", ""),
122
- "isOwner": False,
115
+ "gptId": agent.get("agentId"),
116
+ "name": agent.get("name"),
117
+ "provider": agent.get("provider")
123
118
  }
124
119
 
125
120
  def send_prompt(
@@ -334,15 +329,39 @@ class A2AClient(BaseAgentClient):
334
329
  state = result.get("status", {}).get("state")
335
330
  if state == "completed":
336
331
  msg = result.get("status", {}).get("message") or {}
332
+ all_parts = list(msg.get("parts", []))
333
+ for artifact in result.get("artifacts", []):
334
+ all_parts.extend(artifact.get("parts", []))
337
335
  text = "\n".join(
338
336
  p.get("text", "")
339
- for p in msg.get("parts", [])
337
+ for p in all_parts
340
338
  if p.get("kind") == "text"
341
339
  )
342
340
  attributions = msg.get("metadata", {}).get("attributions", [])
343
- elif state in ("failed", "canceled"):
341
+ elif state in ("failed", "canceled", "rejected"):
342
+ status_msg = result.get("status", {}).get("message") or {}
343
+ detail = "\n".join(
344
+ p.get("text", "")
345
+ for p in status_msg.get("parts", [])
346
+ if p.get("kind") == "text"
347
+ ).strip()
348
+ suffix = f" Detail: {detail}" if detail else ""
349
+ raise RuntimeError(
350
+ f"A2A task {state}. Task id: {result.get('id')}{suffix}"
351
+ )
352
+ elif state in ("input_required", "auth_required"):
353
+ requirement = {
354
+ "input_required": "user input",
355
+ "auth_required": "authentication",
356
+ }.get(state, state.replace("_", " "))
357
+ raise RuntimeError(
358
+ f"A2A task requires {requirement} and cannot proceed automatically."
359
+ f" Task id: {result.get('id')}"
360
+ )
361
+ elif state in ("submitted", "working"):
344
362
  raise RuntimeError(
345
- f"A2A task {state}. Task id: {result.get('id')}"
363
+ f"A2A task is still {state}; synchronous send returned before completion."
364
+ f" Task id: {result.get('id')}"
346
365
  )
347
366
  else:
348
367
  raise RuntimeError(
@@ -44,7 +44,6 @@ class BaseAgentClient(ABC):
44
44
  The conversation_context should be passed to the next turn
45
45
  in a multi-turn conversation, or discarded for single-turn.
46
46
  The context structure is implementation-specific:
47
- - Sydney/REST: {"conversation_id": str}
48
47
  - A2A: {"context_id": str}
49
48
  Returns None as context when no conversation state is established.
50
49
  """
@@ -0,0 +1,136 @@
1
+ """CLI argument parsing and version-check bypass logic."""
2
+
3
+ import argparse
4
+ import os
5
+
6
+ from cli_logging.cli_logger import emit_structured_log
7
+ from cli_logging.logging_utils import Operation
8
+ from common import MAX_CONCURRENCY, RunConfig
9
+ from agent_selector import normalize_agent_id
10
+
11
+
12
+ # Flags that should bypass remote min-version enforcement.
13
+ # --help is not needed here because argparse exits before runtime checks.
14
+ VERSION_CHECK_BYPASS_FLAGS = (
15
+ "signout",
16
+ )
17
+
18
+
19
+ def should_bypass_min_version_check(config: RunConfig) -> bool:
20
+ """Return True if the current invocation should skip min-version checks."""
21
+ return any(getattr(config, flag, False) for flag in VERSION_CHECK_BYPASS_FLAGS)
22
+
23
+
24
+ def parse_arguments():
25
+ """Parse command line arguments."""
26
+ parser = argparse.ArgumentParser(
27
+ description="M365 Copilot Agent Evaluation CLI",
28
+ formatter_class=argparse.RawDescriptionHelpFormatter,
29
+ epilog="""
30
+ Examples:
31
+ # Run with default prompts
32
+ python main.py
33
+
34
+ # Run with custom prompts
35
+ python main.py --prompts "What is Microsoft Graph?" --expected "Microsoft Graph is a gateway..."
36
+
37
+ # Run with prompts from file
38
+ python main.py --prompts-file prompts.json
39
+
40
+ # Interactive mode
41
+ python main.py --interactive
42
+
43
+ # Save results to JSON
44
+ python main.py --output results.json
45
+
46
+ # Save results to CSV
47
+ python main.py --output results.csv
48
+
49
+ # Save results to HTML and open in browser
50
+ python main.py --output report.html
51
+
52
+ # Debug-level diagnostics
53
+ python main.py --log-level debug
54
+
55
+ # Sign out and clear cached authentication tokens
56
+ python main.py --signout
57
+ """
58
+ )
59
+
60
+ # Input options (mutually exclusive)
61
+ input_group = parser.add_mutually_exclusive_group()
62
+ input_group.add_argument(
63
+ '--prompts',
64
+ nargs='+',
65
+ help='List of prompts to evaluate'
66
+ )
67
+ input_group.add_argument(
68
+ '--prompts-file',
69
+ type=str,
70
+ help='JSON file containing prompts and expected responses'
71
+ )
72
+ input_group.add_argument(
73
+ '--interactive',
74
+ action='store_true',
75
+ help='Interactive mode to enter prompts'
76
+ )
77
+
78
+ # Expected responses (only used with --prompts)
79
+ parser.add_argument(
80
+ '--expected',
81
+ nargs='+',
82
+ help='List of expected responses (must match number of prompts)'
83
+ )
84
+
85
+ # Agent ID (--m365-agent-id is primary, --agent-id kept for backward compatibility)
86
+ parser.add_argument(
87
+ '--m365-agent-id', '--agent-id',
88
+ type=str,
89
+ default=os.environ.get("M365_AGENT_ID") or os.environ.get("AGENT_ID"),
90
+ help='Agent ID (default from M365_AGENT_ID environment variable)'
91
+ )
92
+
93
+ # Output options
94
+ parser.add_argument(
95
+ '--output',
96
+ type=str,
97
+ help='Output file path. Format is determined by file extension: .json, .csv, .html. If not provided, results are printed to console.'
98
+ )
99
+
100
+ # Behavior options
101
+ parser.add_argument(
102
+ '--log-level',
103
+ nargs='?',
104
+ const='info',
105
+ action='append',
106
+ help='Set log verbosity: debug, info, warning, error. Bare --log-level resolves to info.'
107
+ )
108
+
109
+ parser.add_argument(
110
+ '--signout',
111
+ action='store_true',
112
+ help='Sign out and clear cached authentication tokens'
113
+ )
114
+
115
+ parser.add_argument(
116
+ '--concurrency',
117
+ type=int,
118
+ default=MAX_CONCURRENCY,
119
+ help=f'Number of parallel workers for prompt processing (1-{MAX_CONCURRENCY}, default: {MAX_CONCURRENCY})'
120
+ )
121
+
122
+ args = parser.parse_args()
123
+
124
+ args.m365_agent_id = normalize_agent_id(args.m365_agent_id)
125
+
126
+ if args.concurrency < 1:
127
+ parser.error('--concurrency must be an integer >= 1.')
128
+ if args.concurrency > MAX_CONCURRENCY:
129
+ emit_structured_log(
130
+ "warning",
131
+ f"--concurrency {args.concurrency} exceeds max {MAX_CONCURRENCY}; clamping to {MAX_CONCURRENCY}.",
132
+ operation=Operation.SETUP,
133
+ )
134
+ args.concurrency = MAX_CONCURRENCY
135
+
136
+ return args
@@ -0,0 +1,33 @@
1
+ """Shared CLI logger instance and structured-log convenience wrapper.
2
+
3
+ Every module in the CLI layer that needs to emit diagnostics imports from here
4
+ instead of main.py, which avoids circular-import issues.
5
+ """
6
+
7
+ import logging
8
+ import sys
9
+ from typing import Any, Dict, List
10
+
11
+ from cli_logging.console_diagnostics import emit_structured_log as _emit_structured_log
12
+ from cli_logging.logging_utils import LOG_LEVEL_MAP, Operation
13
+
14
+ CLI_LOGGER_NAME = "m365.eval.cli"
15
+ CLI_LOGGER = logging.getLogger(CLI_LOGGER_NAME)
16
+ DIAGNOSTIC_RECORDS: List[Dict[str, Any]] = []
17
+
18
+
19
+ def configure_cli_logging(effective_log_level: str) -> None:
20
+ if not CLI_LOGGER.handlers:
21
+ handler = logging.StreamHandler(sys.stdout)
22
+ handler.setFormatter(logging.Formatter("%(message)s"))
23
+ CLI_LOGGER.addHandler(handler)
24
+ CLI_LOGGER.propagate = False
25
+ CLI_LOGGER.setLevel(LOG_LEVEL_MAP[effective_log_level])
26
+
27
+
28
+ def emit_structured_log(level: str, message: str, operation: str = Operation.EVALUATE) -> None:
29
+ _emit_structured_log(
30
+ level, message, operation,
31
+ logger=CLI_LOGGER,
32
+ diagnostic_records=DIAGNOSTIC_RECORDS,
33
+ )
@@ -8,6 +8,7 @@ from cli_logging.logging_utils import (
8
8
  STRUCTURED_LOG_FIELDS,
9
9
  Operation,
10
10
  format_structured_log_entry,
11
+ redact_sensitive_content,
11
12
  )
12
13
 
13
14
  _ANSI_COLORS = {
@@ -102,6 +103,7 @@ def emit_structured_log(
102
103
  if diagnostic_records is not None:
103
104
  diagnostic_records.append(entry)
104
105
  try:
105
- logger.log(getattr(logging, level.upper(), logging.INFO), render_diagnostic(entry))
106
+ rendered, _ = redact_sensitive_content(render_diagnostic(entry))
107
+ logger.log(getattr(logging, level.upper(), logging.INFO), rendered)
106
108
  except Exception:
107
109
  pass
@@ -4,6 +4,22 @@ import re
4
4
  from dataclasses import dataclass
5
5
  from typing import List, Optional
6
6
 
7
+ MAX_CONCURRENCY = 5
8
+ MAX_ATTEMPTS = 4 # Initial attempt + 3 retries
9
+ MAX_TURNS_PER_THREAD = 20
10
+ LONG_THREAD_WARNING_THRESHOLD = 10
11
+ DEFAULT_PASS_THRESHOLD = 3
12
+
13
+ # ── Environment variable name constants ──────────────────────────────
14
+ ENV_AZURE_AI_OPENAI_ENDPOINT = "AZURE_AI_OPENAI_ENDPOINT"
15
+ ENV_AZURE_AI_API_KEY = "AZURE_AI_API_KEY"
16
+ ENV_AZURE_AI_API_VERSION = "AZURE_AI_API_VERSION"
17
+ ENV_AZURE_AI_MODEL_NAME = "AZURE_AI_MODEL_NAME"
18
+ ENV_TENANT_ID = "TENANT_ID"
19
+ ENV_WORK_IQ_A2A_ENDPOINT = "WORK_IQ_A2A_ENDPOINT"
20
+ ENV_WORK_IQ_A2A_CLIENT_ID = "WORK_IQ_A2A_CLIENT_ID"
21
+ ENV_WORK_IQ_A2A_SCOPES = "WORK_IQ_A2A_SCOPES"
22
+
7
23
 
8
24
  def pascal_case_to_title(eval_name: str) -> str:
9
25
  """Convert PascalCase evaluator name to space-separated display name.
@@ -12,10 +28,12 @@ def pascal_case_to_title(eval_name: str) -> str:
12
28
  """
13
29
  return re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', eval_name)
14
30
 
31
+
15
32
  # Canonical evaluator name constants
16
33
  RELEVANCE = "Relevance"
17
34
  COHERENCE = "Coherence"
18
35
  GROUNDEDNESS = "Groundedness"
36
+ SIMILARITY = "Similarity"
19
37
  TOOL_CALL_ACCURACY = "ToolCallAccuracy"
20
38
  CITATIONS = "Citations"
21
39
  EXACT_MATCH = "ExactMatch"
@@ -48,6 +66,7 @@ METRIC_IDS = {
48
66
  RELEVANCE: "relevance",
49
67
  COHERENCE: "coherence",
50
68
  GROUNDEDNESS: "groundedness",
69
+ SIMILARITY: "similarity",
51
70
  TOOL_CALL_ACCURACY: "tool_call_accuracy",
52
71
  CITATIONS: "citations",
53
72
  EXACT_MATCH: "exact_match",
@@ -60,3 +79,37 @@ class RegistryEntry:
60
79
  type: str # "llm", "tool", or "non-llm"
61
80
  requires: List[str]
62
81
  default_threshold: Optional[float]
82
+
83
+
84
+ @dataclass(frozen=True)
85
+ class RunConfig:
86
+ """Typed, immutable runtime configuration passed across module boundaries.
87
+
88
+ Use ``RunConfig.from_namespace(args)`` to build from argparse output.
89
+ Use ``dataclasses.replace(config, field=value)`` to derive new configs.
90
+ """
91
+ prompts: Optional[List[str]] = None
92
+ expected: Optional[List[str]] = None
93
+ prompts_file: Optional[str] = None
94
+ interactive: bool = False
95
+ m365_agent_id: Optional[str] = None
96
+ output: Optional[str] = None
97
+ log_level: Optional[List[str]] = None
98
+ effective_log_level: str = "info"
99
+ signout: bool = False
100
+ concurrency: int = MAX_CONCURRENCY
101
+
102
+ @classmethod
103
+ def from_namespace(cls, args) -> "RunConfig":
104
+ """Build a RunConfig from an argparse.Namespace."""
105
+ return cls(
106
+ prompts=args.prompts,
107
+ expected=args.expected,
108
+ prompts_file=args.prompts_file,
109
+ interactive=args.interactive,
110
+ m365_agent_id=args.m365_agent_id,
111
+ output=args.output,
112
+ log_level=args.log_level,
113
+ signout=args.signout,
114
+ concurrency=args.concurrency,
115
+ )
@@ -0,0 +1,73 @@
1
+ """Environment validation and URL security checks."""
2
+
3
+ import os
4
+ import sys
5
+ import urllib.parse
6
+ from typing import List
7
+
8
+ from cli_logging.cli_logger import emit_structured_log
9
+ from cli_logging.logging_utils import Operation
10
+ from common import (
11
+ ENV_AZURE_AI_OPENAI_ENDPOINT,
12
+ ENV_AZURE_AI_API_KEY,
13
+ ENV_AZURE_AI_API_VERSION,
14
+ ENV_AZURE_AI_MODEL_NAME,
15
+ ENV_WORK_IQ_A2A_ENDPOINT,
16
+ ENV_WORK_IQ_A2A_CLIENT_ID,
17
+ ENV_WORK_IQ_A2A_SCOPES,
18
+ ENV_TENANT_ID,
19
+ )
20
+
21
+
22
+ # Allowed endpoints for URL validation
23
+ ALLOWED_ENDPOINTS = [
24
+ 'substrate.office.com',
25
+ 'graph.microsoft.com',
26
+ ]
27
+
28
+
29
+ def validate_environment() -> None:
30
+ """Validate required environment variables."""
31
+ required_env_vars = [
32
+ ENV_AZURE_AI_OPENAI_ENDPOINT,
33
+ ENV_AZURE_AI_API_KEY,
34
+ ENV_AZURE_AI_API_VERSION,
35
+ ENV_AZURE_AI_MODEL_NAME,
36
+ ENV_WORK_IQ_A2A_ENDPOINT,
37
+ ENV_WORK_IQ_A2A_CLIENT_ID,
38
+ ENV_WORK_IQ_A2A_SCOPES,
39
+ ENV_TENANT_ID,
40
+ ]
41
+
42
+ missing_vars = [
43
+ var for var in required_env_vars if not os.environ.get(var)
44
+ ]
45
+ if missing_vars:
46
+ emit_structured_log(
47
+ "error",
48
+ "Missing required environment variables: "
49
+ f"{', '.join(missing_vars)}. Please ensure your .env file"
50
+ " contains all required configuration.",
51
+ operation=Operation.VALIDATE_ENV,
52
+ )
53
+ sys.exit(1)
54
+
55
+
56
+ def validate_endpoint_url(url: str, allowed_domains: List[str]) -> None:
57
+ """Validate URL against security requirements."""
58
+ try:
59
+ parsed = urllib.parse.urlparse(url)
60
+ except Exception as e:
61
+ raise ValueError(f"Invalid URL format: {url}") from e
62
+
63
+ if parsed.scheme in ['javascript', 'data']:
64
+ raise ValueError(f"Dangerous URL scheme detected: {parsed.scheme}")
65
+
66
+ if parsed.scheme != 'https':
67
+ raise ValueError(f"Only HTTPS URLs are allowed, got: {parsed.scheme}")
68
+
69
+ if parsed.netloc not in allowed_domains:
70
+ raise ValueError(f"Domain not in allowed list: {parsed.netloc}")
71
+
72
+ if parsed.fragment:
73
+ raise ValueError("Fragment URLs are not allowed")