@microsoft/m365-copilot-eval 1.0.1-preview.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +415 -0
- package/TERMS.txt +65 -0
- package/package.json +82 -0
- package/src/clients/cli/auth/__init__.py +1 -0
- package/src/clients/cli/auth/auth_handler.py +262 -0
- package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +136 -0
- package/src/clients/cli/custom_evaluators/ConcisenessNonLLMEvaluator.py +18 -0
- package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +25 -0
- package/src/clients/cli/custom_evaluators/PII/PII.py +45 -0
- package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +39 -0
- package/src/clients/cli/custom_evaluators/__init__.py +1 -0
- package/src/clients/cli/demo_usage.py +83 -0
- package/src/clients/cli/generate_report.py +251 -0
- package/src/clients/cli/main.py +766 -0
- package/src/clients/cli/readme.md +301 -0
- package/src/clients/cli/requirements.txt +10 -0
- package/src/clients/cli/response_extractor.py +589 -0
- package/src/clients/cli/samples/PartnerSuccess.json +122 -0
- package/src/clients/cli/samples/example_prompts.json +14 -0
- package/src/clients/cli/samples/example_prompts_alt.json +12 -0
- package/src/clients/cli/samples/prompts_ambiguity.json +22 -0
- package/src/clients/cli/samples/prompts_rag_grounding.json +22 -0
- package/src/clients/cli/samples/prompts_security_injection.json +22 -0
- package/src/clients/cli/samples/prompts_tool_use_negatives.json +22 -0
- package/src/clients/cli/samples/psaSample.json +18 -0
- package/src/clients/cli/samples/starter.json +10 -0
- package/src/clients/node-js/bin/runevals.js +505 -0
- package/src/clients/node-js/config/default.js +25 -0
- package/src/clients/node-js/lib/cache-utils.js +119 -0
- package/src/clients/node-js/lib/expiry-check.js +164 -0
- package/src/clients/node-js/lib/index.js +25 -0
- package/src/clients/node-js/lib/python-runtime.js +253 -0
- package/src/clients/node-js/lib/venv-manager.js +242 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) Microsoft Corporation.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE
|
package/README.md
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# M365 Copilot Agent Evaluations
|
|
2
|
+
|
|
3
|
+
> **đź”’ PRIVATE PREVIEW:** This tool is currently in private preview. And the instructions below are for Private Preview.
|
|
4
|
+
|
|
5
|
+
A **zero-configuration** CLI for evaluating M365 Copilot agents. Send prompts to your agent, get responses, and automatically score them with Azure AI Evaluation metrics (relevance, coherence, groundedness).
|
|
6
|
+
- Send a batch (or interactive set) of prompts to a configured chat API endpoint.
|
|
7
|
+
- Collect agent responses and evaluate them locally using Azure AI Evaluation SDK.
|
|
8
|
+
- Metrics produced per prompt:
|
|
9
|
+
- - Relevance (1–5)
|
|
10
|
+
- - Coherence (1–5)
|
|
11
|
+
- - Groundedness (1–5)
|
|
12
|
+
- Multiple input modes: command‑line list, JSON file, interactive.
|
|
13
|
+
- Multiple output formats: console (colorized), JSON, CSV, HTML (auto‑opens report).
|
|
14
|
+
|
|
15
|
+
## đź“‹ Prerequisites
|
|
16
|
+
|
|
17
|
+
- **M365 Copilot Agent** deployed to your tenant (can be created with [M365 Agents Toolkit](https://learn.microsoft.com/en-us/microsoft-365/developer/overview-m365-agents-toolkit) or any other method)
|
|
18
|
+
- **Node.js 24.12.0+** (check: `node --version`)
|
|
19
|
+
- **Environment file** with your credentials and agent ID (see [Environment Setup](#-environment-setup) below)
|
|
20
|
+
- **Your Tenant ID, Azure OpenAI endpoint, and API key** (see [Getting Variables](#-getting-variables) below)
|
|
21
|
+
|
|
22
|
+
> Note: Authentication is currently supported on Windows only. Support for other operating systems is coming soon.
|
|
23
|
+
|
|
24
|
+
## đź”§ Environment Setup
|
|
25
|
+
|
|
26
|
+
### Install the Tool
|
|
27
|
+
|
|
28
|
+
1. Go to the releases → https://github.com/microsoft/M365-Copilot-Agent-Evals/releases and click on the most latest release.
|
|
29
|
+
2. Click on the `Source code (tar.gz)`
|
|
30
|
+
3. This should download the package to your device.
|
|
31
|
+
4. Go to the folder where this tar.gz file is. This will now be your project root folder.
|
|
32
|
+
5. Run `npm install -g <filename.tar.gz>` e.g., `npm install -g M365-Copilot-Agent-Evals-<version>.tar.gz`
|
|
33
|
+
|
|
34
|
+
### Setup Steps
|
|
35
|
+
|
|
36
|
+
Now, set up where you'll store your environment variables:
|
|
37
|
+
|
|
38
|
+
**Are you using M365 Agents Toolkit (ATK)?**
|
|
39
|
+
- ✅ **Yes** → You already have `.env.local` in your project with `M365_TITLE_ID`. You'll add Azure OpenAI variables to this file.
|
|
40
|
+
- ✅ **No** → Create a new `env/.env.dev` file in your project directory. You'll add all variables there.
|
|
41
|
+
|
|
42
|
+
The CLI loads environment variables from multiple sources (in order of precedence):
|
|
43
|
+
|
|
44
|
+
1. **`.env.local`** in current directory (auto-detected, ideal for ATK projects)
|
|
45
|
+
2. **`env/.env.{environment}`** via `--env` flag (e.g., `--env dev` loads `env/.env.dev`)
|
|
46
|
+
3. **System environment variables**
|
|
47
|
+
|
|
48
|
+
#### Option 1: For M365 Agents Toolkit (ATK) Projects
|
|
49
|
+
|
|
50
|
+
If you're working in an ATK project, you already have `.env.local` with `M365_TITLE_ID`. Just add your Azure credentials and tenant ID:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# .env.local (existing ATK project file)
|
|
54
|
+
# Already present from ATK:
|
|
55
|
+
M365_TITLE_ID="T_your-title-id-here" # Auto-generated by ATK
|
|
56
|
+
|
|
57
|
+
# You'll add these (see Getting Variables section below):
|
|
58
|
+
AZURE_AI_OPENAI_ENDPOINT="<your-azure-openai-endpoint>"
|
|
59
|
+
AZURE_AI_API_KEY="<your-api-key-from-azure-portal>"
|
|
60
|
+
TENANT_ID="<your-tenant-id>"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
#### Option 2: For Non-ATK Projects
|
|
64
|
+
|
|
65
|
+
Create `env/.env.dev` in your project directory:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# env/.env.dev (new file you create)
|
|
69
|
+
# Your agent ID (Optional):
|
|
70
|
+
M365_AGENT_ID="your-agent-id" # e.g., U_0dc4a8a2-b95f-edac-91c8-d802023ec2d4
|
|
71
|
+
|
|
72
|
+
# You'll add these (see Getting Variables section below):
|
|
73
|
+
AZURE_AI_OPENAI_ENDPOINT="<your-azure-openai-endpoint>"
|
|
74
|
+
AZURE_AI_API_KEY="<your-api-key-from-azure-portal>"
|
|
75
|
+
TENANT_ID="<your-tenant-id>"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
#### Optional Overrides
|
|
79
|
+
```bash
|
|
80
|
+
AZURE_AI_API_VERSION="2024-12-01-preview" # default
|
|
81
|
+
AZURE_AI_MODEL_NAME="gpt-4o-mini" # default
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
You can also override the agent ID at runtime: `runevals --agent-id "custom-id"`
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 🔑 Getting Variables
|
|
89
|
+
|
|
90
|
+
Now that you know what's needed, here's how to get the required values:
|
|
91
|
+
|
|
92
|
+
### 1. Tenant ID
|
|
93
|
+
|
|
94
|
+
Your Azure Active Directory (AAD) tenant ID.
|
|
95
|
+
|
|
96
|
+
**How to obtain:**
|
|
97
|
+
|
|
98
|
+
1. Go to [Azure Portal](https://portal.azure.com)
|
|
99
|
+
2. Search for "Azure Active Directory" or "Microsoft Entra ID"
|
|
100
|
+
3. In the Overview section, you'll see **Tenant ID**
|
|
101
|
+
4. Copy this value - this is your `TENANT_ID`
|
|
102
|
+
|
|
103
|
+
Alternatively, if you have the Azure CLI installed:
|
|
104
|
+
```bash
|
|
105
|
+
az account show --query tenantId
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 2. Agent ID (Only for MSIT)
|
|
109
|
+
- If you have created your agent using Agents Toolkit, then the agent-id is the M365_TITLE_ID in .env.local file
|
|
110
|
+
- If you did not, then you can get your agent-id by
|
|
111
|
+
```
|
|
112
|
+
1. Open aka.ms/devui in your browser
|
|
113
|
+
2. Click on `Configuration`
|
|
114
|
+
3. In the dialog that opens, click on `Untitled Config`
|
|
115
|
+
4. Click on the `Payload` tab
|
|
116
|
+
5. If you scroll down on this tab, you will see `DA (Declarative Agent)`
|
|
117
|
+
6. In this dropdown, you will see all the agents that are installed for you.
|
|
118
|
+
7. Select the agent that you want to evaluate.
|
|
119
|
+
8. Copy the `gpts.id` value before .declarativeAgent.
|
|
120
|
+
9. This is your `agent-id`. It would look like `U_0dc4a8a2-b95f-edac-91c8-d802023ec2d4`
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 3. Azure OpenAI Endpoint and API Key
|
|
124
|
+
|
|
125
|
+
You need both the endpoint URL and API key from your Azure OpenAI resource for "LLM as a Judge" evaluations.
|
|
126
|
+
|
|
127
|
+
**How to obtain:**
|
|
128
|
+
|
|
129
|
+
1. Go to [Azure Portal](https://portal.azure.com)
|
|
130
|
+
2. Navigate to your Azure OpenAI service
|
|
131
|
+
- **Path:** Portal → All Services → Search "OpenAI" → Select your resource
|
|
132
|
+
- **Or create new:** Portal → Create a resource → Search "OpenAI"
|
|
133
|
+
3. In the **Overview** section, copy the **Endpoint** value
|
|
134
|
+
- Format: `https://YOUR-RESOURCE-NAME.openai.azure.com/`
|
|
135
|
+
- This is your `AZURE_AI_OPENAI_ENDPOINT`
|
|
136
|
+
4. In the left sidebar, click **Keys and Endpoint**
|
|
137
|
+
5. Copy **KEY 1** or **KEY 2**
|
|
138
|
+
- This is your `AZURE_AI_API_KEY`
|
|
139
|
+
6. Add both values to your `.env.dev` file as shown in the [Setup Steps](#setup-steps) above
|
|
140
|
+
|
|
141
|
+
**Required model:** Ensure you have `gpt-4o-mini` (or similar) deployed in your Azure OpenAI resource.
|
|
142
|
+
|
|
143
|
+
**Security tip:** Store keys and endpoints securely and never commit to source control.
|
|
144
|
+
|
|
145
|
+
## 🚀 Quick Start
|
|
146
|
+
|
|
147
|
+
Now that you have your environment variables set up, you're ready to run evaluations!
|
|
148
|
+
|
|
149
|
+
> **Important:** Run this tool FROM your M365 agent project directory (where your agent code lives), **not** from this repository. You don't need to clone or download this repo.
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Navigate to YOUR agent project directory
|
|
153
|
+
cd /path/to/your-agent-project
|
|
154
|
+
|
|
155
|
+
# Run evaluations (auto-discovers .env.local for ATK projects)
|
|
156
|
+
runevals
|
|
157
|
+
|
|
158
|
+
# Or specify an environment file
|
|
159
|
+
runevals --env dev
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**No prompts file?** If you don't have a prompts file yet, the tool will offer to create a starter file with example prompts for you.
|
|
163
|
+
|
|
164
|
+
**Environment file lookup:**
|
|
165
|
+
- Checks `.env.local` first (ATK projects)
|
|
166
|
+
- Then checks `env/.env.{name}` if `--env {name}` is specified
|
|
167
|
+
- Prompts file auto-discovery works the same for all projects
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## 📝 Creating Prompts Files
|
|
173
|
+
|
|
174
|
+
The CLI auto-discovers prompts files in your project:
|
|
175
|
+
|
|
176
|
+
### Auto-Discovery
|
|
177
|
+
|
|
178
|
+
When you run `runevals`, it searches:
|
|
179
|
+
1. Current directory: `prompts.json`, `evals.json`, `tests.json`
|
|
180
|
+
2. `./evals/` subdirectory: `prompts.json`, `evals.json`, `tests.json`
|
|
181
|
+
|
|
182
|
+
**Example project structure:**
|
|
183
|
+
```
|
|
184
|
+
my-agent/
|
|
185
|
+
├── .env.local # Your credentials
|
|
186
|
+
├── evals/
|
|
187
|
+
│ └── evals.json # Your test prompts (auto-discovered!)
|
|
188
|
+
└── .evals/
|
|
189
|
+
└── 2025-12-03_14-30-45.html # Generated reports
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Starter File Creation
|
|
193
|
+
|
|
194
|
+
If no file is found:
|
|
195
|
+
```
|
|
196
|
+
⚠️ No prompts file found in current directory or ./evals/
|
|
197
|
+
|
|
198
|
+
Create a starter evals file with sample prompts? (Y/n):
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Answering "Y" creates `./evals/evals.json` with 2 starter prompts:
|
|
202
|
+
|
|
203
|
+
```json
|
|
204
|
+
[
|
|
205
|
+
{
|
|
206
|
+
"prompt": "What is Microsoft 365?",
|
|
207
|
+
"expected_response": "Microsoft 365 is a cloud-based productivity suite..."
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
"prompt": "How can I share a file in Teams?",
|
|
211
|
+
"expected_response": "You can share a file in Teams by uploading it..."
|
|
212
|
+
}
|
|
213
|
+
]
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Edit this file with your own prompts and run again!
|
|
217
|
+
|
|
218
|
+
### Manual Creation
|
|
219
|
+
|
|
220
|
+
Create `./evals/prompts.json`:
|
|
221
|
+
|
|
222
|
+
```json
|
|
223
|
+
[
|
|
224
|
+
{
|
|
225
|
+
"prompt": "Your test prompt here",
|
|
226
|
+
"expected_response": "Expected agent response"
|
|
227
|
+
}
|
|
228
|
+
]
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## 🎯 Usage Examples
|
|
232
|
+
|
|
233
|
+
> **Remember:** All commands below assume you're running them FROM your agent project directory, **not** from this repository.
|
|
234
|
+
|
|
235
|
+
### What to Expect
|
|
236
|
+
|
|
237
|
+
When you run an evaluation from your agent project directory, you'll see:
|
|
238
|
+
```bash
|
|
239
|
+
🚀 M365 Copilot Agent Evaluations CLI
|
|
240
|
+
|
|
241
|
+
đź“‚ Loading environment: dev
|
|
242
|
+
🤖 Agent ID (from M365_TITLE_ID): T_my-agent.declarativeAgent
|
|
243
|
+
đź“„ Using prompts file: ./evals/evals.json
|
|
244
|
+
|
|
245
|
+
📊 Running evaluations...
|
|
246
|
+
|
|
247
|
+
─────────────────────────────────────────────────────────────
|
|
248
|
+
|
|
249
|
+
âś“ Evals completed successfully!
|
|
250
|
+
|
|
251
|
+
Results saved to: ./evals/2025-12-03_14-30-45.html
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Commands to run from your project root:**
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
# Use .env.local (checked in current dir, then env/ folder)
|
|
258
|
+
runevals
|
|
259
|
+
|
|
260
|
+
# Use env/.env.dev configuration
|
|
261
|
+
runevals --env dev
|
|
262
|
+
|
|
263
|
+
# Use specific prompts file in your project
|
|
264
|
+
runevals --prompts-file ./evals/my-tests.json
|
|
265
|
+
|
|
266
|
+
# Inline prompts (no file needed, useful for quick tests)
|
|
267
|
+
runevals --prompts "What is Microsoft Graph?" --expected "Gateway to M365 data"
|
|
268
|
+
|
|
269
|
+
# Interactive mode (enter prompts interactively)
|
|
270
|
+
runevals --interactive
|
|
271
|
+
|
|
272
|
+
# Custom output location in your project
|
|
273
|
+
runevals --output ./reports/results.html
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Optional: Add Shortcuts to package.json
|
|
277
|
+
|
|
278
|
+
You can add shortcuts (npm scripts) to your agent project's `package.json`:
|
|
279
|
+
|
|
280
|
+
```json
|
|
281
|
+
{
|
|
282
|
+
"scripts": {
|
|
283
|
+
"eval": "runevals",
|
|
284
|
+
"eval:local": "runevals --env local",
|
|
285
|
+
"eval:dev": "runevals --env dev"
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Then use shorter commands:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
# Uses .env.local (ATK default)
|
|
294
|
+
npm run eval
|
|
295
|
+
|
|
296
|
+
# Uses env/.env.local
|
|
297
|
+
npm run eval:local
|
|
298
|
+
|
|
299
|
+
# Uses env/.env.dev
|
|
300
|
+
npm run eval:dev
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**Production note:** For production environments, use CI/CD pipelines instead of local `npm run` commands. See [CICD_CACHE_GUIDE.md](CICD_CACHE_GUIDE.md) for examples.
|
|
304
|
+
|
|
305
|
+
## 📊 Output Formats
|
|
306
|
+
|
|
307
|
+
Results are automatically saved to `./evals/YYYY-MM-DD_HH-MM-SS.html` with:
|
|
308
|
+
- **Relevance** score (1-5)
|
|
309
|
+
- **Coherence** score (1-5)
|
|
310
|
+
- **Groundedness** score (1-5)
|
|
311
|
+
- Per-prompt details and aggregate metrics
|
|
312
|
+
|
|
313
|
+
Other formats:
|
|
314
|
+
```bash
|
|
315
|
+
# JSON output
|
|
316
|
+
runevals --output results.json
|
|
317
|
+
|
|
318
|
+
# CSV output
|
|
319
|
+
runevals --output results.csv
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
## đź”§ Command Reference
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
Options:
|
|
326
|
+
-V, --version output version number
|
|
327
|
+
-v, --verbose show detailed processing steps
|
|
328
|
+
-q, --quiet minimal output
|
|
329
|
+
--prompts <prompts...> inline prompts to evaluate
|
|
330
|
+
--expected <responses...> expected responses (with --prompts)
|
|
331
|
+
--prompts-file <file> JSON file with prompts
|
|
332
|
+
-o, --output <file> output file (JSON, CSV, or HTML)
|
|
333
|
+
-i, --interactive interactive prompt entry mode
|
|
334
|
+
--agent-id <id> override agent ID
|
|
335
|
+
--env <environment> environment name (default: dev)
|
|
336
|
+
--init-only just setup, don't run evals
|
|
337
|
+
-h, --help display help
|
|
338
|
+
|
|
339
|
+
Cache Commands:
|
|
340
|
+
cache-info show cache statistics
|
|
341
|
+
cache-clear remove cached Python runtime
|
|
342
|
+
cache-dir print cache directory path
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
## âť“ Troubleshooting
|
|
346
|
+
|
|
347
|
+
### Pre-cache Python Environment (Optional)
|
|
348
|
+
|
|
349
|
+
If you want to set up the Python environment ahead of time without running evaluations:
|
|
350
|
+
|
|
351
|
+
```bash
|
|
352
|
+
runevals --init-only
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
This is useful for:
|
|
356
|
+
- Pre-warming the cache in CI/CD pipelines
|
|
357
|
+
- Testing the setup without running evaluations
|
|
358
|
+
- Troubleshooting installation issues
|
|
359
|
+
|
|
360
|
+
### Cache Issues
|
|
361
|
+
```bash
|
|
362
|
+
# View cache info
|
|
363
|
+
runevals cache-info
|
|
364
|
+
|
|
365
|
+
# Clear and rebuild
|
|
366
|
+
runevals cache-clear
|
|
367
|
+
runevals --init-only --verbose
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Network/Proxy Issues
|
|
371
|
+
```bash
|
|
372
|
+
# Set proxy
|
|
373
|
+
export HTTPS_PROXY=http://proxy:8080
|
|
374
|
+
|
|
375
|
+
# Retry with verbose output
|
|
376
|
+
runevals --init-only --verbose
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### Permission Issues
|
|
380
|
+
```bash
|
|
381
|
+
# Check cache directory
|
|
382
|
+
runevals cache-dir
|
|
383
|
+
|
|
384
|
+
# Fix permissions (Unix/macOS)
|
|
385
|
+
chmod -R u+w $(runevals cache-dir)
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
## 📚 Advanced Documentation
|
|
389
|
+
|
|
390
|
+
- **[CI/CD Integration](./CICD_CACHE_GUIDE.md)** - GitHub Actions, Azure DevOps caching
|
|
391
|
+
- **[Testing Guide](./.github/TESTING_GUIDE.md)** - Cross-platform testing procedures
|
|
392
|
+
- **[Python CLI Guide](./PYTHON_CLI.md)** - Direct Python usage (without Node.js)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
## Contributing
|
|
396
|
+
|
|
397
|
+
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
|
398
|
+
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
|
|
399
|
+
the rights to use your contribution. For details, visit [Contributor License Agreements](https://cla.opensource.microsoft.com).
|
|
400
|
+
|
|
401
|
+
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
|
|
402
|
+
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
|
|
403
|
+
provided by the bot. You will only need to do this once across all repos using our CLA.
|
|
404
|
+
|
|
405
|
+
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
|
406
|
+
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
|
|
407
|
+
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
|
408
|
+
|
|
409
|
+
## Trademarks
|
|
410
|
+
|
|
411
|
+
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
|
|
412
|
+
trademarks or logos is subject to and must follow
|
|
413
|
+
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/legal/intellectualproperty/trademarks/usage/general).
|
|
414
|
+
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
|
|
415
|
+
Any use of third-party trademarks or logos are subject to those third-party's policies.
|
package/TERMS.txt
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
MICROSOFT SOFTWARE LICENSE TERMS
|
|
2
|
+
M365 Copilot Agent Eval Tool
|
|
3
|
+
________________________________________
|
|
4
|
+
IF YOU LIVE IN (OR ARE A BUSINESS WITH A PRINCIPAL PLACE OF BUSINESS IN) THE UNITED STATES, PLEASE READ THE “BINDING ARBITRATION AND CLASS ACTION WAIVER” SECTION BELOW. IT AFFECTS HOW DISPUTES ARE RESOLVED.
|
|
5
|
+
________________________________________
|
|
6
|
+
These license terms are an agreement between you and Microsoft Corporation. They apply to the software named above and any Microsoft services or software updates (except to the extent such services or updates are accompanied by new or additional terms, in which case those different terms apply prospectively and do not alter your or Microsoft’s rights relating to pre-updated software or services). IF YOU COMPLY WITH THESE LICENSE TERMS, YOU HAVE THE RIGHTS BELOW. BY USING THE SOFTWARE, YOU ACCEPT THESE TERMS.
|
|
7
|
+
1. INSTALLATION AND USE RIGHTS.
|
|
8
|
+
a) General. You may install and use any number of copies of the software on your devices.
|
|
9
|
+
b) Included Microsoft Applications. The software may include other Microsoft applications. These license terms apply to those included applications, if any, unless other license terms are provided with the other Microsoft applications.
|
|
10
|
+
c) Work or School Accounts. You can sign into the software with a work or school email address. If you do, you agree that the owner of the domain associated with your email address may control and administer your account, and access and process your data, including the contents of your communications and files. You further agree that your use of the software may be subject to: i) your organization’s guidelines and policies regarding the use of the software; and ii) the agreements Microsoft has with you or your organization, and in such case these terms may not apply. If you already have a Microsoft account and you use a separate work or school email address to access the software, you may be prompted to update the email address associated with your Microsoft account to continue accessing the software.
|
|
11
|
+
d) Third Party Components. The software may include third party components with separate legal notices or governed by other agreements, as may be described in the ThirdPartyNotices file(s) accompanying the software.
|
|
12
|
+
|
|
13
|
+
e) Microsoft Services Agreement. Some features of the software provide access to, or rely on, online services. The use of those services (but not the software) is governed by the separate terms and privacy policies in the Microsoft Services Agreement at http://go.microsoft.com/fwlink/?linkid=398923. Please read them. The services may not be available in all regions.
|
|
14
|
+
|
|
15
|
+
2. TIME-SENSITIVE SOFTWARE.
|
|
16
|
+
a) Period. This agreement is effective on your acceptance and terminates on the earlier of (i) 30 days following first availability of a commercial release of the software or (ii) upon termination by Microsoft. Microsoft may extend this agreement in its discretion.
|
|
17
|
+
b) Notice. You may receive periodic reminder notices of this date through the software.
|
|
18
|
+
c) Access to data. You may not be able to access data used in the software when it stops running.
|
|
19
|
+
3. SCOPE OF LICENSE. The software is licensed, not sold. Microsoft reserves all other rights. Unless applicable law gives you more rights despite this limitation, you will not (and have no right to):
|
|
20
|
+
a) work around any technical limitations in the software that only allow you to use it in certain ways;
|
|
21
|
+
b) reverse engineer, decompile or disassemble the software, or otherwise attempt to derive the source code for the software, except and to the extent required by third party licensing terms governing use of certain open source components that may be included in the software;
|
|
22
|
+
c) remove, minimize, block, or modify any notices of Microsoft or its suppliers in the software;
|
|
23
|
+
d) use the software for commercial, non-profit, or revenue-generating activities unless you have commercial use rights under a separate agreement;
|
|
24
|
+
e) use the software in any way that is against the law or to create or propagate malware; or
|
|
25
|
+
f) share, publish, distribute, or lease the software, provide the software as a stand-alone offering for others to use, or transfer the software or this agreement to any third party.
|
|
26
|
+
4. PRE-RELEASE SOFTWARE. The software is a pre-release version. It may not operate correctly. It may be different from the commercially released version. Customer further acknowledges that the tool is provided in preview and that Customer’s use of any preview features is governed by the Preview Terms within the Product Terms (Microsoft Azure Legal Information | Microsoft Azure, Preview Terms Of Use | Microsoft Azure), as well as the applicable preview provisions of the Data Protection Addendum (DPA)
|
|
27
|
+
5. FEEDBACK. If you give feedback about the software to Microsoft, you give to Microsoft, without charge, the right to use, share and commercialize your feedback in any way and for any purpose. You will not give feedback that is subject to a license that requires Microsoft to license its software or documentation to third parties because Microsoft includes your feedback in them. These rights survive this agreement.
|
|
28
|
+
6. DATA. This software may interact with other Microsoft products that collect data that is transmitted to Microsoft. To learn more about how Microsoft processes personal data we collect, please see the Microsoft Privacy Statement at https://go.microsoft.com/fwlink/?LinkId=248681. Customer is solely responsible for determining what data is submitted for evaluation and for ensuring that such data is handled in compliance with applicable laws, internal policies, and contractual obligations.[A1.1]
|
|
29
|
+
7. EXPORT RESTRICTIONS. You must comply with all domestic and international export laws and regulations that apply to the software, which include restrictions on destinations, end users, and end use. For further information on export restrictions, visit https://aka.ms/exporting.
|
|
30
|
+
8. SUPPORT SERVICES. Microsoft is not obligated under this agreement to provide any support services for the software. Any support provided is “as is”, “with all faults”, and without warranty of any kind.
|
|
31
|
+
9. UPDATES. The software may periodically check for updates, and download and install them for you. You may obtain updates only from Microsoft or authorized sources. Microsoft may need to update your system to provide you with updates. You agree to receive these automatic updates without any additional notice. Updates may not include or support all existing software features, services, or peripheral devices.
|
|
32
|
+
10. BINDING ARBITRATION AND CLASS ACTION WAIVER. This Section applies if you live in (or, if a business, your principal place of business is in) the United States. If you and Microsoft have a dispute, you and Microsoft agree to try for 60 days to resolve it informally. If you and Microsoft can’t, you and Microsoft agree to binding individual arbitration before the American Arbitration Association under the Federal Arbitration Act (“FAA”), and not to sue in court in front of a judge or jury. Instead, a neutral arbitrator will decide. Class action lawsuits, class-wide arbitrations, private attorney-general actions, and any other proceeding where someone acts in a representative capacity are not allowed; nor is combining individual proceedings without the consent of all parties. The complete Arbitration Agreement contains more terms and is at https://aka.ms/arb-agreement-4. You and Microsoft agree to these terms.
|
|
33
|
+
11. TERMINATION. Without prejudice to any other rights, Microsoft may terminate this agreement if you fail to comply with any of its terms or conditions. In such event, you must destroy all copies of the software and all of its component parts.
|
|
34
|
+
12. ENTIRE AGREEMENT. This agreement, and any other terms Microsoft may provide for supplements, updates, or third-party applications, is the entire agreement for the software.
|
|
35
|
+
13. APPLICABLE LAW AND PLACE TO RESOLVE DISPUTES. If you acquired the software in the United States or Canada, the laws of the state or province where you live (or, if a business, where your principal place of business is located) govern the interpretation of this agreement, claims for its breach, and all other claims (including consumer protection, unfair competition, and tort claims), regardless of conflict of laws principles, except that the FAA governs everything related to arbitration. If you acquired the software in any other country, its laws apply, except that the FAA governs everything related to arbitration. If U.S. federal jurisdiction exists, you and Microsoft consent to exclusive jurisdiction and venue in the federal court in King County, Washington for all disputes heard in court (excluding arbitration). If not, you and Microsoft consent to exclusive jurisdiction and venue in the Superior Court of King County, Washington for all disputes heard in court (excluding arbitration).
|
|
36
|
+
14. CONSUMER RIGHTS; REGIONAL VARIATIONS. This agreement describes certain legal rights. You may have other rights, including consumer rights, under the laws of your state, province, or country. Separate and apart from your relationship with Microsoft, you may also have rights with respect to the party from which you acquired the software. This agreement does not change those other rights if the laws of your state, province, or country do not permit it to do so. For example, if you acquired the software in one of the below regions, or mandatory country law applies, then the following provisions apply to you:
|
|
37
|
+
a) Australia. You have statutory guarantees under the Australian Consumer Law and nothing in this agreement is intended to affect those rights.
|
|
38
|
+
b) Canada. If you acquired this software in Canada, you may stop receiving updates by turning off the automatic update feature, disconnecting your device from the Internet (if and when you re-connect to the Internet, however, the software will resume checking for and installing updates), or uninstalling the software. The product documentation, if any, may also specify how to turn off updates for your specific device or software.
|
|
39
|
+
c) Germany and Austria.
|
|
40
|
+
i. Warranty. The properly licensed software will perform substantially as described in any Microsoft materials that accompany the software. However, Microsoft gives no contractual guarantee in relation to the licensed software.
|
|
41
|
+
ii. Limitation of Liability. In case of intentional conduct, gross negligence, claims based on the Product Liability Act, as well as, in case of death or personal or physical injury, Microsoft is liable according to the statutory law.
|
|
42
|
+
Subject to the foregoing clause ii., Microsoft will only be liable for slight negligence if Microsoft is in breach of such material contractual obligations, the fulfillment of which facilitate the due performance of this agreement, the breach of which would endanger the purpose of this agreement and the compliance with which a party may constantly trust in (so-called "cardinal obligations"). In other cases of slight negligence, Microsoft will not be liable for slight negligence.
|
|
43
|
+
|
|
44
|
+
15. CONFIDENTIAL INFORMATION. The software, including its user interface, features and documentation, is confidential and proprietary to Microsoft and its suppliers.
|
|
45
|
+
c) Use. For five years after installation of the software or its commercial release, whichever is first, you may not disclose confidential information to third parties. You may disclose confidential information only to your employees and consultants who need to know the information. You must have written agreements with them that protect the confidential information at least as much as this agreement.
|
|
46
|
+
c) Survival. Your duty to protect confidential information survives this agreement.
|
|
47
|
+
c) Exclusions. You may disclose confidential information in response to a judicial or governmental order. You must first give written notice to Microsoft to allow it to seek a protective order or otherwise protect the information. Confidential information does not include information that:
|
|
48
|
+
1. becomes publicly known through no wrongful act;
|
|
49
|
+
2. you received from a third party who did not breach confidentiality obligations to Microsoft or its suppliers; or
|
|
50
|
+
3. you developed independently.
|
|
51
|
+
|
|
52
|
+
16. DISCLAIMER OF WARRANTY. THE SOFTWARE IS LICENSED “AS IS.” YOU BEAR THE RISK OF USING IT. MICROSOFT GIVES NO EXPRESS WARRANTIES, GUARANTEES, OR CONDITIONS. TO THE EXTENT PERMITTED UNDER APPLICABLE LAWS, MICROSOFT EXCLUDES ALL IMPLIED WARRANTIES, INCLUDING MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.
|
|
53
|
+
17. LIMITATION ON AND EXCLUSION OF DAMAGES. IF YOU HAVE ANY BASIS FOR RECOVERING DAMAGES DESPITE THE PRECEDING DISCLAIMER OF WARRANTY, YOU CAN RECOVER FROM MICROSOFT AND ITS SUPPLIERS ONLY DIRECT DAMAGES UP TO U.S. $5.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT OR INCIDENTAL DAMAGES.
|
|
54
|
+
This limitation applies to (a) anything related to the software, services, content (including code) on third party Internet sites, or third party applications; and (b) claims for breach of contract, warranty, guarantee, or condition; strict liability, negligence, or other tort; or any other claim; in each case to the extent permitted by applicable law.
|
|
55
|
+
It also applies even if Microsoft knew or should have known about the possibility of the damages. The above limitation or exclusion may not apply to you because your state, province, or country may not allow the exclusion or limitation of incidental, consequential, or other damages.
|
|
56
|
+
|
|
57
|
+
Please note: As this software is distributed in Canada, some of the clauses in this agreement are provided below in French.
|
|
58
|
+
Remarque: Ce logiciel étant distribué au Canada, certaines des clauses dans ce contrat sont fournies ci-dessous en français.
|
|
59
|
+
EXONÉRATION DE GARANTIE. Le logiciel visé par une licence est offert « tel quel ». Toute utilisation de ce logiciel est à votre seule risque et péril. Microsoft n’accorde aucune autre garantie expresse. Vous pouvez bénéficier de droits additionnels en vertu du droit local sur la protection des consommateurs, que ce contrat ne peut modifier. La ou elles sont permises par le droit locale, les garanties implicites de qualité marchande, d’adéquation à un usage particulier et d’absence de contrefaçon sont exclues.
|
|
60
|
+
LIMITATION DES DOMMAGES-INTÉRÊTS ET EXCLUSION DE RESPONSABILITÉ POUR LES DOMMAGES. Vous pouvez obtenir de Microsoft et de ses fournisseurs une indemnisation en cas de dommages directs uniquement à hauteur de 5,00 $ US. Vous ne pouvez prétendre à aucune indemnisation pour les autres dommages, y compris les dommages spéciaux, indirects ou accessoires et pertes de bénéfices.
|
|
61
|
+
Cette limitation concerne:
|
|
62
|
+
• tout ce qui est relié au logiciel, aux services ou au contenu (y compris le code) figurant sur des sites Internet tiers ou dans des programmes tiers; et
|
|
63
|
+
• les réclamations au titre de violation de contrat ou de garantie, ou au titre de responsabilité stricte, de négligence ou d’une autre faute dans la limite autorisée par la loi en vigueur.
|
|
64
|
+
Elle s’applique également, même si Microsoft connaissait ou devrait connaître l’éventualité d’un tel dommage. Si votre pays n’autorise pas l’exclusion ou la limitation de responsabilité pour les dommages indirects, accessoires ou de quelque nature que ce soit, il se peut que la limitation ou l’exclusion ci-dessus ne s’appliquera pas à votre égard.
|
|
65
|
+
EFFET JURIDIQUE. Le présent contrat décrit certains droits juridiques. Vous pourriez avoir d’autres droits prévus par les lois de votre pays. Le présent contrat ne modifie pas les droits que vous confèrent les lois de votre pays si celles-ci ne le permettent pas.
|
package/package.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@microsoft/m365-copilot-eval",
|
|
3
|
+
"version": "1.0.1-preview.1",
|
|
4
|
+
"description": "Zero-config Node.js wrapper for M365 Copilot Agent Evaluations CLI (Python-based Azure AI Evaluation SDK)",
|
|
5
|
+
"publishDate": "2026-01-21",
|
|
6
|
+
"main": "src/clients/node-js/lib/index.js",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"bin": {
|
|
9
|
+
"runevals": "./src/clients/node-js/bin/runevals.js"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"postinstall": "node -e \"console.log('By installing this package, you agree to:\\n'); require('fs').readFile('./TERMS.txt', 'utf8', (err, content) => { if (!err) console.log(content); else console.error('Read about the Terms for M365 Copilot Agent Eval Tool here - aka.ms/evaltoolterms'); });\"",
|
|
13
|
+
"prebuild": "node scripts/generate-defaults.js",
|
|
14
|
+
"build": "npm run prettier:check && npm run clean && npm run lint",
|
|
15
|
+
"clean": "rimraf node_modules/.cache dist coverage",
|
|
16
|
+
"test": "node --test tests/clients/node-js/**/*.test.js",
|
|
17
|
+
"set-publish-date": "node scripts/set-publish-date.js",
|
|
18
|
+
"prepublishOnly": "node scripts/set-publish-date.js",
|
|
19
|
+
"prettier:base": "prettier --parser babel",
|
|
20
|
+
"prettier:check": "npm run prettier:base -- --check \"src/clients/node-js/**/*.js\"",
|
|
21
|
+
"prettier:write": "npm run prettier:base -- --write \"src/clients/node-js/**/*.js\"",
|
|
22
|
+
"lint": "npm run lint:eslint",
|
|
23
|
+
"lint:eslint": "eslint --quiet \"src/clients/node-js/**/*.js\"",
|
|
24
|
+
"lint:eslint:fix": "npm run lint:eslint -- --fix",
|
|
25
|
+
"lint:eslint:loud": "eslint \"src/clients/node-js/**/*.js\"",
|
|
26
|
+
"prepare": "husky"
|
|
27
|
+
},
|
|
28
|
+
"keywords": [
|
|
29
|
+
"m365",
|
|
30
|
+
"copilot",
|
|
31
|
+
"evaluation",
|
|
32
|
+
"azure-ai",
|
|
33
|
+
"agent",
|
|
34
|
+
"python-wrapper"
|
|
35
|
+
],
|
|
36
|
+
"author": "Microsoft",
|
|
37
|
+
"license": "MICROSOFT SOFTWARE LICENSE TERMS",
|
|
38
|
+
"engines": {
|
|
39
|
+
"node": ">=22.21.1"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"commander": "^12.1.0",
|
|
43
|
+
"node-fetch": "^3.3.2",
|
|
44
|
+
"tar": "^7.4.3",
|
|
45
|
+
"https-proxy-agent": "^7.0.5"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"@microsoft/eslint-config-msgraph": "^5.0.0",
|
|
49
|
+
"@tony.ganchev/eslint-plugin-header": "^3.1.2",
|
|
50
|
+
"@vitest/browser": "^3.0.0",
|
|
51
|
+
"@vitest/coverage-istanbul": "^3.0.0",
|
|
52
|
+
"@vitest/coverage-v8": "^3.0.0",
|
|
53
|
+
"@vitest/ui": "^3.0.0",
|
|
54
|
+
"eslint": "^9.7.0",
|
|
55
|
+
"eslint-config-prettier": "^10.0.0",
|
|
56
|
+
"eslint-plugin-jsdoc": "^50.1.0",
|
|
57
|
+
"eslint-plugin-prefer-arrow": "^1.2.3",
|
|
58
|
+
"eslint-plugin-prettier": "^5.1.3",
|
|
59
|
+
"eslint-plugin-simple-import-sort": "^12.0.0",
|
|
60
|
+
"husky": "^9.0.11",
|
|
61
|
+
"lerna": "^9.0.3",
|
|
62
|
+
"prettier": "^3.2.4",
|
|
63
|
+
"rimraf": "^6.0.0",
|
|
64
|
+
"sinon": "^21.0.1",
|
|
65
|
+
"vitest": "^3.0.0",
|
|
66
|
+
"webdriverio": "^9.0.1"
|
|
67
|
+
},
|
|
68
|
+
"files": [
|
|
69
|
+
"src/clients/node-js/bin/",
|
|
70
|
+
"src/clients/node-js/lib/",
|
|
71
|
+
"src/clients/node-js/config/",
|
|
72
|
+
"src/clients/cli/**/*.py",
|
|
73
|
+
"src/clients/cli/requirements.txt",
|
|
74
|
+
"src/clients/cli/samples/",
|
|
75
|
+
"README.md",
|
|
76
|
+
"TERMS.txt"
|
|
77
|
+
],
|
|
78
|
+
"repository": {
|
|
79
|
+
"type": "git",
|
|
80
|
+
"url": "https://github.com/microsoft/M365-Copilot-Agent-Evals.git"
|
|
81
|
+
}
|
|
82
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Authentication module for token acquisition
|