@microsoft/m365-copilot-eval 1.0.1-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +415 -0
  3. package/TERMS.txt +65 -0
  4. package/package.json +82 -0
  5. package/src/clients/cli/auth/__init__.py +1 -0
  6. package/src/clients/cli/auth/auth_handler.py +262 -0
  7. package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +136 -0
  8. package/src/clients/cli/custom_evaluators/ConcisenessNonLLMEvaluator.py +18 -0
  9. package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +25 -0
  10. package/src/clients/cli/custom_evaluators/PII/PII.py +45 -0
  11. package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +39 -0
  12. package/src/clients/cli/custom_evaluators/__init__.py +1 -0
  13. package/src/clients/cli/demo_usage.py +83 -0
  14. package/src/clients/cli/generate_report.py +251 -0
  15. package/src/clients/cli/main.py +766 -0
  16. package/src/clients/cli/readme.md +301 -0
  17. package/src/clients/cli/requirements.txt +10 -0
  18. package/src/clients/cli/response_extractor.py +589 -0
  19. package/src/clients/cli/samples/PartnerSuccess.json +122 -0
  20. package/src/clients/cli/samples/example_prompts.json +14 -0
  21. package/src/clients/cli/samples/example_prompts_alt.json +12 -0
  22. package/src/clients/cli/samples/prompts_ambiguity.json +22 -0
  23. package/src/clients/cli/samples/prompts_rag_grounding.json +22 -0
  24. package/src/clients/cli/samples/prompts_security_injection.json +22 -0
  25. package/src/clients/cli/samples/prompts_tool_use_negatives.json +22 -0
  26. package/src/clients/cli/samples/psaSample.json +18 -0
  27. package/src/clients/cli/samples/starter.json +10 -0
  28. package/src/clients/node-js/bin/runevals.js +505 -0
  29. package/src/clients/node-js/config/default.js +25 -0
  30. package/src/clients/node-js/lib/cache-utils.js +119 -0
  31. package/src/clients/node-js/lib/expiry-check.js +164 -0
  32. package/src/clients/node-js/lib/index.js +25 -0
  33. package/src/clients/node-js/lib/python-runtime.js +253 -0
  34. package/src/clients/node-js/lib/venv-manager.js +242 -0
@@ -0,0 +1,262 @@
1
+ """
2
+ Uses WAM (Windows Account Manager) based authentication handler.
3
+
4
+ This module provides functionality to acquire access tokens using MSAL Python
5
+ with Windows Account Manager (WAM) as the broker. WAM is available on Windows 10+
6
+ and Windows Server 2019+.
7
+
8
+ For more information, see:
9
+ https://learn.microsoft.com/en-us/entra/msal/python/advanced/wam
10
+ https://github.com/AzureAD/microsoft-authentication-extensions-for-python
11
+ """
12
+
13
+ import os
14
+ import platform
15
+ import logging
16
+ from typing import Optional
17
+ from pathlib import Path
18
+ import jwt
19
+ from msal import PublicClientApplication
20
+ from msal_extensions import PersistedTokenCache, build_encrypted_persistence
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class AuthHandler:
25
+ """Handler for Windows Account Manager (WAM) based authentication."""
26
+
27
+ def __init__(self, client_id: str, tenant_id: str, scopes_str: str, cache_dir: Optional[str] = None):
28
+ """
29
+ Initialize the WAM auth handler.
30
+
31
+ Args:
32
+ client_id: App registration client ID (required).
33
+ tenant_id: Directory/tenant ID (required).
34
+ scopes_str: Comma-separated scopes (required).
35
+ cache_dir: Optional directory for token cache file. Defaults to user's home directory.
36
+ """
37
+ current_os = platform.system()
38
+ if current_os != "Windows":
39
+ raise RuntimeError(
40
+ f"Authentication is only supported on Windows. Detected OS: {current_os}. "
41
+ "Support for other operating systems is coming soon."
42
+ )
43
+
44
+ if not client_id:
45
+ raise ValueError("client_id is required")
46
+
47
+ if not tenant_id:
48
+ raise ValueError("tenant_id is required")
49
+
50
+ if not scopes_str:
51
+ raise ValueError("scopes_str is required")
52
+
53
+ scopes = [s.strip() for s in scopes_str.split(",") if s.strip()]
54
+
55
+ self.client_id = client_id
56
+ self.authority = f"https://login.microsoftonline.com/{tenant_id}"
57
+ self.scopes = scopes
58
+
59
+ # Initialize the public client application with WAM broker enabled and token cache
60
+ try:
61
+ self.app = PublicClientApplication(
62
+ client_id=self.client_id,
63
+ authority=self.authority,
64
+ enable_broker_on_windows=True,
65
+ token_cache=self._setup_token_cache(cache_dir)
66
+ )
67
+ logger.info("WAM auth handler initialized successfully")
68
+ except Exception as e:
69
+ logger.error(f"Failed to initialize WAM auth handler: {e}")
70
+ raise
71
+
72
+ def _setup_token_cache(self, cache_dir: Optional[str] = None) -> PersistedTokenCache:
73
+ """
74
+ Setup encrypted persistent token cache using MSAL Extensions.
75
+
76
+ Creates a platform-dependent encrypted cache (DPAPI on Windows, Keychain on Mac,
77
+ Libsecret on Linux) and initializes self.token_cache.
78
+
79
+ Args:
80
+ cache_dir: Optional directory for token cache file. Defaults to ~/.msal_cache.
81
+
82
+ Returns:
83
+ PersistedTokenCache: The initialized encrypted token cache.
84
+
85
+ Raises:
86
+ Exception: If cache initialization fails.
87
+ """
88
+ if cache_dir is None:
89
+ cache_dir = str(Path.home() / ".msal_cache")
90
+
91
+ cache_path = os.path.join(cache_dir, "token_cache.bin")
92
+
93
+ try:
94
+ # Create cache directory if it doesn't exist
95
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
96
+
97
+ # Build encrypted persistence (DPAPI on Windows, Keychain on Mac, Libsecret on Linux)
98
+ persistence = build_encrypted_persistence(cache_path)
99
+ token_cache = PersistedTokenCache(persistence)
100
+ logger.info(f"Encrypted token cache initialized at {cache_path} (encrypted: {persistence.is_encrypted})")
101
+ return token_cache
102
+ except Exception as e:
103
+ logger.error(f"Failed to initialize token cache: {e}")
104
+ raise
105
+
106
+ def acquire_token_interactive(self):
107
+ """
108
+ Acquire a token interactively using WAM and return the full MSAL result dict.
109
+
110
+ This method first attempts to retrieve a cached token. If no valid cached
111
+ token is found or if it has expired, it will prompt the user to authenticate
112
+ via the WAM dialog.
113
+
114
+ Returns:
115
+ The full result dictionary from MSAL (contains access_token, id_token, etc.)
116
+ or None if acquisition fails.
117
+
118
+ Raises:
119
+ ImportError: If broker-related dependencies are not installed.
120
+ Install with: pip install 'msal[broker]>=1.20,<2'
121
+ Exception: If WAM communication fails or user denies the request.
122
+ """
123
+ try:
124
+ # First, attempt to acquire a token silently from cache
125
+ accounts = self.get_accounts()
126
+ if accounts:
127
+ logger.info(f"Attempting silent token acquisition from {len(accounts)} cached account(s)")
128
+ for account in accounts:
129
+ silent_result = self.app.acquire_token_silent(
130
+ scopes=self.scopes, account=account
131
+ )
132
+ if silent_result and "access_token" in silent_result:
133
+ logger.info("Access token acquired successfully from cache")
134
+ return silent_result
135
+ logger.info("No valid cached token found; proceeding with interactive authentication")
136
+ else:
137
+ logger.info("No cached accounts found; proceeding with interactive authentication")
138
+
139
+ # If no cached token is valid, proceed with interactive acquisition
140
+ result = self.app.acquire_token_interactive(
141
+ scopes=self.scopes,
142
+ parent_window_handle=self.app.CONSOLE_WINDOW_HANDLE,
143
+ )
144
+
145
+ if result and "access_token" in result:
146
+ logger.info("Access token acquired successfully via interactive authentication")
147
+ return result
148
+
149
+ error_msg = None
150
+ if result:
151
+ error_msg = result.get(
152
+ "error_description", result.get("error", "Unknown error")
153
+ )
154
+ logger.error(f"Failed to acquire token: {error_msg or 'Unknown error'}")
155
+ return result
156
+
157
+ except ImportError as e:
158
+ logger.error(
159
+ f"WAM broker dependencies not installed: {e}. "
160
+ "Install with: pip install 'msal[broker]>=1.20,<2'"
161
+ )
162
+ raise
163
+ except Exception as e:
164
+ logger.error(f"Error during token acquisition: {e}")
165
+ raise
166
+
167
+ def acquire_token_silent(self, account: Optional[dict] = None) -> Optional[str]:
168
+ """
169
+ Acquire an access token silently (without user interaction).
170
+
171
+ This is useful for cached tokens or when you have a known account.
172
+ Returns None if a valid token is not available and interaction would be required.
173
+
174
+ Args:
175
+ account: Optional account object from previous interactive authentication.
176
+ If None, attempts to get a cached token for any account.
177
+
178
+ Returns:
179
+ The access token string if successful, None if no cached token available.
180
+ """
181
+ try:
182
+ result = self.app.acquire_token_silent(scopes=self.scopes, account=account)
183
+
184
+ if result and "access_token" in result:
185
+ logger.info("Access token acquired silently")
186
+ return result["access_token"]
187
+
188
+ logger.debug(
189
+ "No cached token available; interactive acquisition would be required"
190
+ )
191
+ return None
192
+
193
+ except Exception as e:
194
+ logger.debug(f"Silent token acquisition failed: {e}")
195
+ return None
196
+
197
+ def get_accounts(self) -> list:
198
+ """
199
+ Get the list of accounts available in WAM cache.
200
+
201
+ Returns:
202
+ List of account objects cached by WAM.
203
+ """
204
+ try:
205
+ accounts = self.app.get_accounts()
206
+ logger.info(f"Found {len(accounts)} cached account(s)")
207
+ return accounts
208
+ except Exception as e:
209
+ logger.error(f"Error retrieving cached accounts: {e}")
210
+ return []
211
+
212
+ def clear_cache(self) -> bool:
213
+ """
214
+ Clear all cached tokens and accounts from the token cache.
215
+
216
+ This method removes all cached authentication data, forcing the user
217
+ to authenticate interactively on the next acquire_token_interactive call.
218
+
219
+ Returns:
220
+ True if cache was successfully cleared, False otherwise.
221
+ """
222
+ try:
223
+ # Get all cached accounts
224
+ accounts = self.app.get_accounts()
225
+
226
+ # Remove each account from the cache
227
+ for account in accounts:
228
+ self.app.remove_account(account)
229
+ logger.info(f"Removed account {account.get('username', 'unknown')} from cache")
230
+
231
+ logger.info("Token cache cleared successfully")
232
+ return True
233
+ except Exception as e:
234
+ logger.error(f"Error clearing token cache: {e}")
235
+ return False
236
+
237
+ @staticmethod
238
+ def extract_user_oid_from_access_token(access_token: str) -> str:
239
+ """
240
+ Extract the user OID from an access token using MSAL's JWT decoding.
241
+
242
+ MSAL includes PyJWT under the hood, so we can use jwt.decode
243
+ directly without adding new dependencies.
244
+
245
+ Args:
246
+ access_token: The access token string
247
+
248
+ Returns:
249
+ The user OID if found, empty string otherwise
250
+
251
+ Raises:
252
+ ValueError: If token format is invalid or OID not found
253
+ """
254
+ try:
255
+ # Decode without verification (we're just reading claims, not validating signature)
256
+ decoded = jwt.decode(access_token, options={"verify_signature": False})
257
+ oid = decoded.get('oid', '')
258
+ if not oid:
259
+ raise ValueError("OID not found in token claims")
260
+ return oid
261
+ except jwt.DecodeError as e:
262
+ raise ValueError(f"Failed to decode token: {e}")
@@ -0,0 +1,136 @@
1
+ """
2
+ CitationsEvaluator - A custom evaluator for analyzing citations in M365 Copilot responses.
3
+
4
+ This evaluator uses regex-based pattern matching to detect citations in two formats:
5
+ 1. New OAI format: \ue200cite\ue202turn{X}search{Y}\ue201
6
+ 2. Old format: [^i^] where i is the citation index
7
+
8
+ Where X, Y, and i are natural numbers representing conversation turn, search result index, or citation index.
9
+ """
10
+
11
+ import re
12
+ from enum import Enum
13
+ from typing import Dict, Any, Optional
14
+
15
+
16
+ class CitationFormat(Enum):
17
+ """Enum for different citation formats supported by the evaluator."""
18
+ OAI_UNICODE = "oai_unicode" # New format: \ue200cite\ue202turn{X}search{Y}\ue201
19
+ LEGACY_BRACKET = "legacy_bracket" # Old format: [^i^]
20
+
21
+
22
+ class CitationsEvaluator:
23
+ """
24
+ A custom evaluator that analyzes citations in response text without using an LLM.
25
+
26
+ This evaluator detects citation patterns and returns:
27
+ - Whether at least one citation is present
28
+ - The number of unique citations found
29
+
30
+ Supports both new OAI unicode format and legacy bracket format.
31
+ """
32
+
33
+ def __init__(self, citation_format: CitationFormat = CitationFormat.OAI_UNICODE):
34
+ """
35
+ Initialize the CitationsEvaluator with the specified citation format.
36
+
37
+ Args:
38
+ citation_format (CitationFormat): The format of citations to detect.
39
+ Defaults to OAI_UNICODE format.
40
+ """
41
+ self.citation_format = citation_format
42
+
43
+ if citation_format == CitationFormat.OAI_UNICODE:
44
+ # Pattern to match citations: \ue200cite\ue202turn{number}search{number}\ue201
45
+ self.citation_pattern = r'\ue200cite\ue202turn\d+search\d+\ue201'
46
+ elif citation_format == CitationFormat.LEGACY_BRACKET:
47
+ # Pattern to match citations: [^number^]
48
+ self.citation_pattern = r'\[\^\d+\^\]'
49
+ else:
50
+ raise ValueError(f"Unsupported citation format: {citation_format}")
51
+
52
+ self.compiled_pattern = re.compile(self.citation_pattern)
53
+
54
+ def __call__(self, *, response: str, **kwargs) -> Dict[str, Any]:
55
+ """
56
+ Evaluate the response text for citations.
57
+
58
+ Args:
59
+ response (str): The response text from the M365 Copilot agent
60
+ **kwargs: Additional keyword arguments (not used but kept for compatibility)
61
+
62
+ Returns:
63
+ Dict[str, Any]: Evaluation results containing:
64
+ - citation_format (str): The format used for detection
65
+ - score (int): Number of unique citations found
66
+ - result (str): "pass" if citations found, "fail" otherwise
67
+ - threshold (int): Minimum threshold for passing (1)
68
+ - reason (str): Explanation of the result with citation details
69
+ """
70
+ if not isinstance(response, str):
71
+ response = str(response) if response is not None else ""
72
+
73
+ # Find all citation matches
74
+ citation_matches = self.compiled_pattern.findall(response)
75
+
76
+ # Get unique citations (remove duplicates)
77
+ unique_citations = list(set(citation_matches))
78
+
79
+ # Extract citation identifiers for reporting
80
+ citation_details = []
81
+ for citation in unique_citations:
82
+ if self.citation_format == CitationFormat.OAI_UNICODE:
83
+ # Extract the turn and search numbers from the citation
84
+ turn_search_match = re.search(r'turn(\d+)search(\d+)', citation)
85
+ if turn_search_match:
86
+ turn_num = turn_search_match.group(1)
87
+ search_num = turn_search_match.group(2)
88
+ citation_details.append(f"turn{turn_num}search{search_num}")
89
+ elif self.citation_format == CitationFormat.LEGACY_BRACKET:
90
+ # Extract the citation number from [^number^]
91
+ bracket_match = re.search(r'\[\^(\d+)\^\]', citation)
92
+ if bracket_match:
93
+ citation_num = bracket_match.group(1)
94
+ citation_details.append(f"citation{citation_num}")
95
+
96
+ # Prepare results in a format compatible with the HTML report generator
97
+ results = {
98
+ "citation_format": self.citation_format.value,
99
+ # HTML report compatible fields
100
+ "score": len(unique_citations), # Use citation count as the score
101
+ "result": "pass" if len(unique_citations) > 0 else "fail", # Pass if citations found
102
+ "threshold": 1, # Threshold of 1 citation minimum
103
+ "reason": f"Found {len(unique_citations)} unique citation(s): {', '.join(citation_details) if citation_details else 'None'}"
104
+ }
105
+
106
+ return results
107
+
108
+ def get_name(self) -> str:
109
+ """Return the name of this evaluator."""
110
+ return "CitationsEvaluator"
111
+
112
+ def get_description(self) -> str:
113
+ """Return a description of what this evaluator does."""
114
+ return f"Analyzes response text for M365 Copilot citations using regex pattern matching ({self.citation_format.value} format)"
115
+
116
+
117
+ def citations_evaluator(*, response: str, citation_format: CitationFormat = CitationFormat.OAI_UNICODE, **kwargs) -> Dict[str, Any]:
118
+ """
119
+ Standalone function wrapper for the CitationsEvaluator.
120
+
121
+ This function provides a simple interface compatible with Azure AI Evaluation SDK.
122
+
123
+ Args:
124
+ response (str): The response text to evaluate
125
+ citation_format (CitationFormat): The format of citations to detect
126
+ **kwargs: Additional keyword arguments
127
+
128
+ Returns:
129
+ Dict[str, Any]: Citation evaluation results
130
+ """
131
+ evaluator = CitationsEvaluator(citation_format=citation_format)
132
+ return evaluator(response=response, **kwargs)
133
+
134
+
135
+ # For convenience, export the main classes and functions
136
+ __all__ = ['CitationsEvaluator', 'CitationFormat', 'citations_evaluator']
@@ -0,0 +1,18 @@
1
+ #from azure.ai.evaluation import evaluate
2
+
3
+ class ConcisenessNonLLMEvaluator:
4
+ def __init__(self):
5
+ pass
6
+ # A class is made callable by implementing the special method __call__
7
+ def __call__(self, *, response: str, **kwargs):
8
+ length = len(response)
9
+ # compute raw score (example: normalized to roughly 0-5)
10
+ raw_score = max(0, ((100 - (length / 10)) / 20))
11
+ # round to nearest integer
12
+ rounded = int(round(raw_score))
13
+ return {
14
+ "concisenessnonllm_score": rounded,
15
+ "concisenessnonllm_threshold": 3,
16
+ "concisenessnonllm_result": "pass" if rounded >= 3 else "fail",
17
+ "concisenessnonllm_reason": f"Any response greater than 1000 characters is given zero score. The longer the answer, the lesser the score. The length of the response is {length} chars. And hence, the score!"
18
+ }
@@ -0,0 +1,25 @@
1
+ from azure.ai.evaluation import evaluate
2
+
3
+ class ExactMatchEvaluator:
4
+ def __init__(self):
5
+ pass
6
+
7
+ def __call__(self, *, response: str, expected_answer: str, **kwargs):
8
+ if response is None or response.strip() == "":
9
+ raise ValueError("Response is null, empty, or whitespace.")
10
+
11
+ if expected_answer is None:
12
+ raise ValueError("Expected answer cannot be None.")
13
+
14
+ # Case-sensitive exact match (mimics C# StringComparison.InvariantCulture)
15
+ is_match = response.strip() == expected_answer.strip()
16
+
17
+ return {
18
+ "exact_match": 1.0 if is_match else 0.0,
19
+ "exact_match_result": "pass" if is_match else "fail",
20
+ "exact_match_threshold": 1.0,
21
+ "exact_match_reason": "Exact match found" if is_match else "No exact match found"
22
+ }
23
+
24
+
25
+ exact_match_evaluator = ExactMatchEvaluator()
@@ -0,0 +1,45 @@
1
+ import os
2
+ import json
3
+ import sys
4
+ import re
5
+ from promptflow.client import load_flow
6
+
7
+
8
+ class PIIEvaluator:
9
+ def __init__(self, model_config):
10
+ current_dir = os.path.dirname(__file__)
11
+ prompty_path = os.path.join(current_dir, "pii.prompty")
12
+ self._flow = load_flow(source=prompty_path, model={"configuration": model_config})
13
+
14
+ def __call__(self, *, response: str, **kwargs):
15
+ llm_response = self._flow(response=response)
16
+ try:
17
+ # Try to parse as JSON first
18
+ parsed_response = json.loads(llm_response)
19
+ return parsed_response
20
+ except Exception:
21
+ # If it's not JSON, try to extract the score from XML-like tags
22
+ if isinstance(llm_response, str):
23
+ # Look for <S2>score</S2> pattern
24
+ score_match = re.search(r'<S2>(\d+)</S2>', llm_response)
25
+ # Look for <S1>explanation</S1> pattern
26
+ explanation_match = re.search(r'<S1>(.*?)</S1>', llm_response, re.DOTALL)
27
+
28
+ if score_match:
29
+ score = int(score_match.group(1))
30
+ result_dict = {
31
+ 'PII': score,
32
+ 'score': score,
33
+ 'raw_response': llm_response
34
+ }
35
+
36
+ # Add explanation if found
37
+ if explanation_match:
38
+ explanation = explanation_match.group(1).strip()
39
+ result_dict['explanation'] = explanation
40
+ result_dict['reason'] = explanation
41
+ result_dict['PII_reason'] = explanation
42
+
43
+ return result_dict
44
+ # Fallback: return the raw response
45
+ return {'raw_response': llm_response}
@@ -0,0 +1,39 @@
1
+ from azure.ai.evaluation import evaluate
2
+
3
+ class PartialMatchEvaluator:
4
+ def __init__(self, case_sensitive=False):
5
+ self.case_sensitive = case_sensitive
6
+
7
+ def __call__(self, *, response: str, expected_answer: str, **kwargs):
8
+ if response is None or response.strip() == "":
9
+ raise ValueError("Response cannot be null or empty.")
10
+ if expected_answer is None:
11
+ raise ValueError("Expected answer cannot be null.")
12
+
13
+ resp = response.strip()
14
+ exp = expected_answer.strip()
15
+
16
+ # Adjust case sensitivity
17
+ if not self.case_sensitive:
18
+ resp = resp.lower()
19
+ exp = exp.lower()
20
+
21
+ # Score: fraction of expected text found inside response
22
+ if exp in resp:
23
+ # Percent of expected text that matched (length-based)
24
+ score = len(exp) / len(resp)
25
+ else:
26
+ score = 0.0
27
+
28
+ threshold = 0.5 # 50% match threshold
29
+ is_pass = score >= threshold
30
+
31
+ return {
32
+ "partial_match": score,
33
+ "partial_match_result": "pass" if is_pass else "fail",
34
+ "partial_match_threshold": threshold,
35
+ "partial_match_reason": f"Match score: {score:.3f} ({'above' if is_pass else 'below'} threshold {threshold})"
36
+ }
37
+
38
+
39
+ partial_match_evaluator = PartialMatchEvaluator(case_sensitive=False)
@@ -0,0 +1 @@
1
+ # Custom evaluators package
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Example usage script for the M365 Copilot Agent Evaluation CLI
4
+ This script demonstrates various ways to use the CLI tool.
5
+ """
6
+
7
+ import subprocess
8
+ import sys
9
+ import os
10
+ from pathlib import Path
11
+
12
+ def run_command(cmd, description):
13
+ """Run a command and display its description."""
14
+ print(f"\n{'='*60}")
15
+ print(f"Example: {description}")
16
+ print(f"Command: {cmd}")
17
+ print(f"{'='*60}")
18
+
19
+ try:
20
+ # Note: In a real scenario, you would have your Azure credentials configured
21
+ # This will fail without proper environment variables, but shows the CLI interface
22
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
23
+ if result.returncode == 0:
24
+ print("✅ Command executed successfully")
25
+ print(result.stdout)
26
+ else:
27
+ print("❌ Command failed (likely due to missing Azure credentials)")
28
+ print("Error:", result.stderr)
29
+ except subprocess.TimeoutExpired:
30
+ print("⏱️ Command timed out (this is expected without Azure credentials)")
31
+ except Exception as e:
32
+ print(f"❌ Error running command: {e}")
33
+
34
+ def main():
35
+ """Demonstrate CLI usage examples."""
36
+ script_dir = Path(__file__).parent
37
+ os.chdir(script_dir)
38
+
39
+ print("M365 Copilot Agent Evaluation CLI - Usage Examples")
40
+ print("=" * 60)
41
+ print("Note: These examples will fail without proper Azure credentials.")
42
+ print("This script demonstrates the CLI interface and available options.")
43
+
44
+ # Example 1: Show help
45
+ run_command(
46
+ "python main.py --help",
47
+ "Display help and all available options"
48
+ )
49
+
50
+ # Example 2: Custom prompt (dry run)
51
+ run_command(
52
+ 'python main.py --prompts "What is Microsoft Graph?" --expected "Microsoft Graph is a gateway to data and intelligence in Microsoft 365."',
53
+ "Run evaluation with custom prompt and expected response"
54
+ )
55
+
56
+ # Example 3: Prompts from file
57
+ run_command(
58
+ "python main.py --prompts-file samples/example_prompts.json --output results.json",
59
+ "Load prompts from JSON file and save results to JSON"
60
+ )
61
+
62
+ # Example 4: CSV output
63
+ run_command(
64
+ "python main.py --prompts-file samples/example_prompts.json --output results.csv --format csv",
65
+ "Load prompts from file and save results to CSV"
66
+ )
67
+
68
+ print(f"\n{'='*60}")
69
+ print("Setup Instructions:")
70
+ print("1. Install Azure CLI and run 'az login' to authenticate")
71
+ print("2. Create a .env file with your Azure AI configuration")
72
+ print("3. Set the following environment variables:")
73
+ print(" - AZURE_AI_FOUNDRY_PROJECT_ENDPOINT")
74
+ print(" - AZURE_AI_AGENT_ID")
75
+ print(" - AZURE_AI_OPENAI_ENDPOINT")
76
+ print(" - AZURE_AI_API_KEY")
77
+ print(" - AZURE_AI_API_VERSION")
78
+ print(" - AZURE_AI_MODEL_NAME")
79
+ print("4. Run: python main.py")
80
+ print(f"{'='*60}")
81
+
82
+ if __name__ == "__main__":
83
+ main()