@microsoft/m365-copilot-eval 1.2.1-preview.1 → 1.4.0-preview.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -101
- package/package.json +7 -4
- package/schema/CHANGELOG.md +8 -0
- package/schema/v1/eval-document.schema.json +256 -8
- package/schema/v1/examples/invalid/multi-turn-empty-turns.json +8 -0
- package/schema/v1/examples/invalid/multi-turn-has-both-prompt-and-turns.json +13 -0
- package/schema/v1/examples/invalid/multi-turn-missing-prompt.json +12 -0
- package/schema/v1/examples/invalid/multi-turn-typo-in-turn.json +13 -0
- package/schema/v1/examples/invalid/multi-turn-unknown-evaluator.json +15 -0
- package/schema/v1/examples/valid/comprehensive.json +27 -2
- package/schema/v1/examples/valid/mixed-single-and-multi-turn.json +30 -0
- package/schema/v1/examples/valid/multi-turn-output.json +59 -0
- package/schema/v1/examples/valid/multi-turn-simple.json +21 -0
- package/schema/v1/examples/valid/multi-turn-with-evaluators.json +34 -0
- package/schema/version.json +2 -2
- package/src/clients/cli/api_clients/A2A/__init__.py +3 -0
- package/src/clients/cli/api_clients/A2A/a2a_client.py +456 -0
- package/src/clients/cli/api_clients/REST/__init__.py +3 -0
- package/src/clients/cli/api_clients/REST/sydney_client.py +204 -0
- package/src/clients/cli/api_clients/__init__.py +3 -0
- package/src/clients/cli/api_clients/base_agent_client.py +78 -0
- package/src/clients/cli/cli_logging/__init__.py +0 -0
- package/src/clients/cli/cli_logging/console_diagnostics.py +107 -0
- package/src/clients/cli/cli_logging/logging_utils.py +144 -0
- package/src/clients/cli/common.py +62 -0
- package/src/clients/cli/custom_evaluators/CitationsEvaluator.py +3 -3
- package/src/clients/cli/custom_evaluators/ExactMatchEvaluator.py +11 -11
- package/src/clients/cli/custom_evaluators/PartialMatchEvaluator.py +1 -11
- package/src/clients/cli/evaluator_resolver.py +150 -0
- package/src/clients/cli/generate_report.py +347 -184
- package/src/clients/cli/main.py +1288 -481
- package/src/clients/cli/parallel_executor.py +57 -0
- package/src/clients/cli/readme.md +14 -7
- package/src/clients/cli/requirements.txt +1 -1
- package/src/clients/cli/response_extractor.py +30 -14
- package/src/clients/cli/retry_policy.py +52 -0
- package/src/clients/cli/samples/multiturn_example.json +35 -0
- package/src/clients/cli/throttle_gate.py +82 -0
- package/src/clients/node-js/bin/runevals.js +134 -41
- package/src/clients/node-js/config/default.js +5 -1
- package/src/clients/node-js/lib/agent-id.js +12 -0
- package/src/clients/node-js/lib/env-loader.js +11 -16
- package/src/clients/node-js/lib/eula-manager.js +78 -0
- package/src/clients/node-js/lib/progress.js +13 -11
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import json
|
|
5
|
+
import locale
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import urllib.error
|
|
9
|
+
import urllib.request
|
|
10
|
+
import uuid
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from api_clients.base_agent_client import BaseAgentClient
|
|
14
|
+
from cli_logging.console_diagnostics import emit_structured_log
|
|
15
|
+
from cli_logging.logging_utils import Operation
|
|
16
|
+
|
|
17
|
+
# Feature flag required by the experimental A2A surface.
|
|
18
|
+
_A2A_FEATURE_FLAG = "feature.EnableA2AServer"
|
|
19
|
+
|
|
20
|
+
_REQUEST_TIMEOUT_SECS = 120
|
|
21
|
+
|
|
22
|
+
# OAI citation marker patterns — compiled once at module level.
|
|
23
|
+
# Marker format: \ue200cite(\ue202turn{X}search{Y})+\ue201
|
|
24
|
+
_CITATION_REF_PAT = re.compile(r"\ue202turn\d+search(\d+)")
|
|
25
|
+
_CITATION_BLOCK_PAT = re.compile(r"\ue200cite(?:\ue202turn\d+search\d+)+\ue201")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class A2AClient(BaseAgentClient):
|
|
29
|
+
"""A2A (Agent-to-Agent) JSON-RPC 2.0 client for Work IQ agents."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
a2a_endpoint: str,
|
|
35
|
+
access_token: str,
|
|
36
|
+
logger: Optional[logging.Logger] = None,
|
|
37
|
+
diagnostic_records: Optional[List[Dict[str, Any]]] = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Args:
|
|
41
|
+
a2a_endpoint: Base URL of the A2A endpoint.
|
|
42
|
+
access_token: Bearer token for A2A authentication.
|
|
43
|
+
logger: Logger to use. Defaults to a module-level logger if not provided.
|
|
44
|
+
diagnostic_records: List to accumulate structured log entries.
|
|
45
|
+
"""
|
|
46
|
+
self._endpoint = a2a_endpoint.rstrip("/")
|
|
47
|
+
self._access_token = access_token
|
|
48
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
49
|
+
self._diagnostic_records = diagnostic_records
|
|
50
|
+
self._resolved_agent_url: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------ #
|
|
53
|
+
# BaseAgentClient implementation #
|
|
54
|
+
# ------------------------------------------------------------------ #
|
|
55
|
+
|
|
56
|
+
def resolve_agent(self, agent_id: str) -> None:
|
|
57
|
+
"""Pre-resolve agent URL from agent card and cache for the session."""
|
|
58
|
+
self._resolved_agent_url = self._resolve_agent_url(agent_id)
|
|
59
|
+
|
|
60
|
+
def fetch_available_agents(self) -> List[Dict[str, Any]]:
|
|
61
|
+
"""Fetch agents from the A2A discovery endpoint.
|
|
62
|
+
|
|
63
|
+
Calls GET {endpoint}/.agents. Each A2A agent card is normalized to
|
|
64
|
+
include 'gptId', 'name', and 'description' so it is compatible with
|
|
65
|
+
the shared select_agent_interactively selector.
|
|
66
|
+
|
|
67
|
+
Returns an empty list if the endpoint is unreachable or returns an
|
|
68
|
+
error.
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
agents_url = f"{self._endpoint}/.agents"
|
|
72
|
+
headers = self._build_request_headers()
|
|
73
|
+
emit_structured_log(
|
|
74
|
+
"debug",
|
|
75
|
+
f"[A2A] Fetching available agents from: {agents_url}",
|
|
76
|
+
Operation.FETCH_AGENTS,
|
|
77
|
+
logger=self._logger,
|
|
78
|
+
diagnostic_records=self._diagnostic_records,
|
|
79
|
+
)
|
|
80
|
+
req = urllib.request.Request(agents_url, headers=headers, method="GET")
|
|
81
|
+
with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
|
|
82
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
83
|
+
agents = data if isinstance(data, list) else data.get("agents", [])
|
|
84
|
+
return [self._normalize_agent_card(a) for a in agents]
|
|
85
|
+
except urllib.error.HTTPError as e:
|
|
86
|
+
emit_structured_log(
|
|
87
|
+
"warning",
|
|
88
|
+
f"[A2A] Unable to fetch agents list (HTTP {e.code}).",
|
|
89
|
+
Operation.FETCH_AGENTS,
|
|
90
|
+
logger=self._logger,
|
|
91
|
+
diagnostic_records=self._diagnostic_records,
|
|
92
|
+
)
|
|
93
|
+
return []
|
|
94
|
+
except Exception as e:
|
|
95
|
+
emit_structured_log(
|
|
96
|
+
"warning",
|
|
97
|
+
f"[A2A] Error fetching agents: {e}",
|
|
98
|
+
Operation.FETCH_AGENTS,
|
|
99
|
+
logger=self._logger,
|
|
100
|
+
diagnostic_records=self._diagnostic_records,
|
|
101
|
+
)
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _normalize_agent_card(agent: Dict[str, Any]) -> Dict[str, Any]:
|
|
106
|
+
"""Normalize an A2A agent card to the shape expected by the selector.
|
|
107
|
+
|
|
108
|
+
A2A agent cards use a 'url' field rather than a discrete ID. The
|
|
109
|
+
agent ID is extracted as the last path segment of that URL, falling
|
|
110
|
+
back to the agent name when the URL is absent.
|
|
111
|
+
"""
|
|
112
|
+
agent_url = agent.get("url", "")
|
|
113
|
+
agent_id = (
|
|
114
|
+
agent_url.rstrip("/").rsplit("/", 1)[-1]
|
|
115
|
+
if agent_url
|
|
116
|
+
else agent.get("name", "")
|
|
117
|
+
)
|
|
118
|
+
return {
|
|
119
|
+
"gptId": agent_id,
|
|
120
|
+
"name": agent.get("name", agent_id),
|
|
121
|
+
"description": agent.get("description", ""),
|
|
122
|
+
"isOwner": False,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def send_prompt(
|
|
126
|
+
self,
|
|
127
|
+
prompt: str,
|
|
128
|
+
agent_id: str | None = None,
|
|
129
|
+
conversation_context: Optional[Dict[str, Any]] = None,
|
|
130
|
+
) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]:
|
|
131
|
+
"""Send a single prompt to the A2A endpoint and return the response with context.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
prompt: The prompt string to send.
|
|
135
|
+
agent_id: Target agent ID. Required for A2A — no fallback discovery.
|
|
136
|
+
conversation_context: Context from a previous turn (contains context_id),
|
|
137
|
+
or None for the first turn / single-turn usage.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Tuple of (enhanced_response_dict, conversation_context).
|
|
141
|
+
"""
|
|
142
|
+
agent_id = (agent_id or "").strip()
|
|
143
|
+
if not agent_id:
|
|
144
|
+
raise ValueError("agent_id is required for A2A requests.")
|
|
145
|
+
|
|
146
|
+
headers = self._build_request_headers(include_content_type=True)
|
|
147
|
+
agent_url = self._resolved_agent_url or self._resolve_agent_url(agent_id)
|
|
148
|
+
|
|
149
|
+
context_id = conversation_context.get("context_id") if conversation_context else None
|
|
150
|
+
|
|
151
|
+
emit_structured_log(
|
|
152
|
+
"debug",
|
|
153
|
+
"[A2A] Sending prompt to agent.",
|
|
154
|
+
Operation.SEND_PROMPT,
|
|
155
|
+
logger=self._logger,
|
|
156
|
+
diagnostic_records=self._diagnostic_records,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
payload = self._build_chat_payload(prompt, context_id)
|
|
160
|
+
|
|
161
|
+
emit_structured_log(
|
|
162
|
+
"debug",
|
|
163
|
+
f"[A2A] Sending to {agent_url}: {payload.decode('utf-8')[:500]}",
|
|
164
|
+
Operation.SEND_PROMPT,
|
|
165
|
+
logger=self._logger,
|
|
166
|
+
diagnostic_records=self._diagnostic_records,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
result_dict, raw_result = self._send_and_parse_message(agent_url, payload, headers)
|
|
170
|
+
|
|
171
|
+
# Build updated context for subsequent turns
|
|
172
|
+
new_context_id = context_id
|
|
173
|
+
if raw_result:
|
|
174
|
+
new_context_id = raw_result.get("contextId") or context_id
|
|
175
|
+
updated_context = {"context_id": new_context_id} if new_context_id else None
|
|
176
|
+
|
|
177
|
+
return result_dict, updated_context
|
|
178
|
+
|
|
179
|
+
# ------------------------------------------------------------------ #
|
|
180
|
+
# Private helpers #
|
|
181
|
+
# ------------------------------------------------------------------ #
|
|
182
|
+
|
|
183
|
+
def _build_request_headers(self, *, include_content_type: bool = False) -> Dict[str, str]:
|
|
184
|
+
headers: Dict[str, str] = {
|
|
185
|
+
"Authorization": f"Bearer {self._access_token}",
|
|
186
|
+
"X-variants": _A2A_FEATURE_FLAG,
|
|
187
|
+
}
|
|
188
|
+
if include_content_type:
|
|
189
|
+
headers["Content-Type"] = "application/json"
|
|
190
|
+
return headers
|
|
191
|
+
|
|
192
|
+
def _build_chat_payload(self, prompt: str, context_id: str | None = None) -> bytes:
|
|
193
|
+
message: Dict[str, Any] = {
|
|
194
|
+
"kind": "message",
|
|
195
|
+
"role": "user",
|
|
196
|
+
"parts": [{"kind": "text", "text": prompt}],
|
|
197
|
+
"messageId": str(uuid.uuid4()),
|
|
198
|
+
"metadata": {
|
|
199
|
+
"location": self._get_a2a_location(),
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
if context_id:
|
|
203
|
+
message["contextId"] = context_id
|
|
204
|
+
return json.dumps({
|
|
205
|
+
"jsonrpc": "2.0",
|
|
206
|
+
"method": "message/send",
|
|
207
|
+
"params": {"message": message},
|
|
208
|
+
"id": str(uuid.uuid4()),
|
|
209
|
+
}).encode("utf-8")
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
@functools.lru_cache(maxsize=1)
|
|
213
|
+
def _get_a2a_location() -> Dict[str, Any]:
|
|
214
|
+
locale_str = locale.getlocale()[0] or ""
|
|
215
|
+
country = locale_str.split("_")[-1] if "_" in locale_str else ""
|
|
216
|
+
return {
|
|
217
|
+
"countryOrRegion": country,
|
|
218
|
+
"countryOrRegionConfidence": 1.0,
|
|
219
|
+
"timeZone": BaseAgentClient._get_iana_timezone_name(),
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
def _resolve_agent_url(self, agent_id: str) -> str:
|
|
223
|
+
"""Resolve the agent URL from the agent card, falling back to base URL.
|
|
224
|
+
|
|
225
|
+
"""
|
|
226
|
+
headers = self._build_request_headers(include_content_type=True)
|
|
227
|
+
base_agent_url = f"{self._endpoint}/{agent_id}"
|
|
228
|
+
card_url = f"{base_agent_url}/.well-known/agent-card.json"
|
|
229
|
+
agent_url = base_agent_url
|
|
230
|
+
emit_structured_log(
|
|
231
|
+
"debug",
|
|
232
|
+
f"[A2A] Fetching agent card from: {card_url}",
|
|
233
|
+
Operation.FETCH_AGENTS,
|
|
234
|
+
logger=self._logger,
|
|
235
|
+
diagnostic_records=self._diagnostic_records,
|
|
236
|
+
)
|
|
237
|
+
try:
|
|
238
|
+
card_req = urllib.request.Request(card_url, headers=headers)
|
|
239
|
+
with urllib.request.urlopen(card_req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
|
|
240
|
+
raw_card = resp.read().decode("utf-8")
|
|
241
|
+
if raw_card.strip():
|
|
242
|
+
card = json.loads(raw_card)
|
|
243
|
+
agent_url = card.get("url") or base_agent_url
|
|
244
|
+
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
245
|
+
emit_structured_log(
|
|
246
|
+
"debug",
|
|
247
|
+
f"[A2A] Agent card fetch failed ({e}); using base URL: {base_agent_url}",
|
|
248
|
+
Operation.FETCH_AGENTS,
|
|
249
|
+
logger=self._logger,
|
|
250
|
+
diagnostic_records=self._diagnostic_records,
|
|
251
|
+
)
|
|
252
|
+
emit_structured_log(
|
|
253
|
+
"debug",
|
|
254
|
+
f"[A2A] Resolved agent URL: {agent_url}",
|
|
255
|
+
Operation.SEND_PROMPT,
|
|
256
|
+
logger=self._logger,
|
|
257
|
+
diagnostic_records=self._diagnostic_records,
|
|
258
|
+
)
|
|
259
|
+
return agent_url
|
|
260
|
+
|
|
261
|
+
def _send_and_parse_message(
|
|
262
|
+
self,
|
|
263
|
+
agent_url: str,
|
|
264
|
+
payload: bytes,
|
|
265
|
+
headers: Dict[str, str],
|
|
266
|
+
) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
|
267
|
+
"""Send a JSON-RPC message to the agent and parse the response.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
A tuple of (result_dict, raw_result) where result_dict is the
|
|
271
|
+
normalized response dict (raw_response_text, display_response_text,
|
|
272
|
+
a2a_attributions) and raw_result is the parsed JSON result object.
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
RuntimeError: On HTTP errors, connection errors, JSON parse errors,
|
|
276
|
+
or A2A protocol errors.
|
|
277
|
+
"""
|
|
278
|
+
req = urllib.request.Request(agent_url, data=payload, headers=headers, method="POST")
|
|
279
|
+
try:
|
|
280
|
+
with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
|
|
281
|
+
raw = resp.read().decode("utf-8", errors="replace")
|
|
282
|
+
except urllib.error.HTTPError as e:
|
|
283
|
+
body = ""
|
|
284
|
+
try:
|
|
285
|
+
body = e.read().decode("utf-8", errors="replace")
|
|
286
|
+
except Exception:
|
|
287
|
+
pass
|
|
288
|
+
raise RuntimeError(
|
|
289
|
+
f"A2A request failed (HTTP {e.code} {e.reason})."
|
|
290
|
+
+ (f" Body: {body[:500]}" if body else "")
|
|
291
|
+
) from e
|
|
292
|
+
except urllib.error.URLError as e:
|
|
293
|
+
raise RuntimeError(
|
|
294
|
+
f"A2A connection error: {getattr(e, 'reason', str(e))}"
|
|
295
|
+
) from e
|
|
296
|
+
|
|
297
|
+
emit_structured_log(
|
|
298
|
+
"debug",
|
|
299
|
+
f"[A2A] Raw response: {raw[:500]}",
|
|
300
|
+
Operation.SEND_PROMPT,
|
|
301
|
+
logger=self._logger,
|
|
302
|
+
diagnostic_records=self._diagnostic_records,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
data = json.loads(raw)
|
|
307
|
+
except json.JSONDecodeError as e:
|
|
308
|
+
raise RuntimeError(f"A2A response is not valid JSON: {e}") from e
|
|
309
|
+
|
|
310
|
+
if "error" in data:
|
|
311
|
+
err = data["error"]
|
|
312
|
+
raise RuntimeError(
|
|
313
|
+
f"A2A JSON-RPC error {err.get('code', 'unknown')}: {err.get('message', 'no message')}"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if "result" not in data:
|
|
317
|
+
raise RuntimeError(
|
|
318
|
+
f"A2A response missing 'result' key. Keys present: {list(data.keys())}"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
result = data["result"]
|
|
322
|
+
kind = result.get("kind")
|
|
323
|
+
text = ""
|
|
324
|
+
attributions: List[Dict[str, Any]] = []
|
|
325
|
+
|
|
326
|
+
if kind == "message":
|
|
327
|
+
text = "\n".join(
|
|
328
|
+
p.get("text", "")
|
|
329
|
+
for p in result.get("parts", [])
|
|
330
|
+
if p.get("kind") == "text"
|
|
331
|
+
)
|
|
332
|
+
attributions = result.get("metadata", {}).get("attributions", [])
|
|
333
|
+
elif kind == "task":
|
|
334
|
+
state = result.get("status", {}).get("state")
|
|
335
|
+
if state == "completed":
|
|
336
|
+
msg = result.get("status", {}).get("message") or {}
|
|
337
|
+
text = "\n".join(
|
|
338
|
+
p.get("text", "")
|
|
339
|
+
for p in msg.get("parts", [])
|
|
340
|
+
if p.get("kind") == "text"
|
|
341
|
+
)
|
|
342
|
+
attributions = msg.get("metadata", {}).get("attributions", [])
|
|
343
|
+
elif state in ("failed", "canceled"):
|
|
344
|
+
raise RuntimeError(
|
|
345
|
+
f"A2A task {state}. Task id: {result.get('id')}"
|
|
346
|
+
)
|
|
347
|
+
else:
|
|
348
|
+
raise RuntimeError(
|
|
349
|
+
f"A2A task in unexpected state: {state!r}. Task id: {result.get('id')}"
|
|
350
|
+
)
|
|
351
|
+
else:
|
|
352
|
+
raise RuntimeError(f"Unexpected A2A result kind: {kind!r}")
|
|
353
|
+
|
|
354
|
+
if attributions:
|
|
355
|
+
emit_structured_log(
|
|
356
|
+
"debug",
|
|
357
|
+
f"[A2A] Attributions ({len(attributions)}): "
|
|
358
|
+
+ ", ".join(
|
|
359
|
+
a.get("providerDisplayName", a.get("attributionType", ""))
|
|
360
|
+
for a in attributions
|
|
361
|
+
),
|
|
362
|
+
Operation.SEND_PROMPT,
|
|
363
|
+
logger=self._logger,
|
|
364
|
+
diagnostic_records=self._diagnostic_records,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
display_text = self._replace_citation_markers(text, attributions, self._logger, self._diagnostic_records)
|
|
368
|
+
|
|
369
|
+
result_dict = {
|
|
370
|
+
"raw_response_text": text,
|
|
371
|
+
"display_response_text": display_text,
|
|
372
|
+
"a2a_attributions": attributions,
|
|
373
|
+
"metadata": {
|
|
374
|
+
"conversation_id": result.get("contextId"),
|
|
375
|
+
},
|
|
376
|
+
}
|
|
377
|
+
return result_dict, result
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _replace_citation_markers(
|
|
381
|
+
text: str,
|
|
382
|
+
attributions: List[Dict[str, Any]],
|
|
383
|
+
logger: Optional[logging.Logger] = None,
|
|
384
|
+
diagnostic_records: Optional[List[Dict[str, Any]]] = None,
|
|
385
|
+
) -> str:
|
|
386
|
+
"""Replace OAI Unicode citation markers with markdown links.
|
|
387
|
+
|
|
388
|
+
Marker format: \\ue200cite(\\ue202turn{X}search{Y})+\\ue201
|
|
389
|
+
Compound markers (multiple turn/search refs between a single pair of
|
|
390
|
+
bookend characters) are also handled.
|
|
391
|
+
|
|
392
|
+
The search{Y} number is NOT a direct array index — it's a grounding result
|
|
393
|
+
number. Mapping: unique search numbers in first-appearance order →
|
|
394
|
+
citation_attrs[0, 1, ...].
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
text: Response text that may contain OAI citation markers.
|
|
398
|
+
attributions: Attribution objects from A2A response metadata.
|
|
399
|
+
logger: Logger for debug messages. Defaults to module logger if not provided.
|
|
400
|
+
diagnostic_records: List to accumulate structured log entries.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Text with citation markers replaced by markdown links, or the
|
|
404
|
+
original text unchanged if there are no citation attributions.
|
|
405
|
+
"""
|
|
406
|
+
_logger = logger or logging.getLogger(__name__)
|
|
407
|
+
citation_attrs = [a for a in attributions if a.get("attributionType") == "citation"]
|
|
408
|
+
if not text:
|
|
409
|
+
if text == "" and attributions:
|
|
410
|
+
emit_structured_log(
|
|
411
|
+
"warning",
|
|
412
|
+
"[A2A] Response text is empty; skipping citation replacement.",
|
|
413
|
+
Operation.SEND_PROMPT,
|
|
414
|
+
logger=_logger,
|
|
415
|
+
diagnostic_records=diagnostic_records,
|
|
416
|
+
)
|
|
417
|
+
return text
|
|
418
|
+
if not citation_attrs:
|
|
419
|
+
return text
|
|
420
|
+
|
|
421
|
+
# Build ordered map: search-number-string → 0-based index into citation_attrs
|
|
422
|
+
seen: Dict[str, int] = {}
|
|
423
|
+
for m in _CITATION_REF_PAT.finditer(text):
|
|
424
|
+
k = m.group(1)
|
|
425
|
+
if k not in seen:
|
|
426
|
+
seen[k] = len(seen)
|
|
427
|
+
|
|
428
|
+
def replace_citation(m: re.Match) -> str:
|
|
429
|
+
links = []
|
|
430
|
+
for idx_str in _CITATION_REF_PAT.findall(m.group(0)):
|
|
431
|
+
pos = seen.get(idx_str)
|
|
432
|
+
if pos is not None and pos < len(citation_attrs):
|
|
433
|
+
attr = citation_attrs[pos]
|
|
434
|
+
url = attr.get("seeMoreWebUrl") or ""
|
|
435
|
+
label = attr.get("providerDisplayName") or url or idx_str
|
|
436
|
+
if url:
|
|
437
|
+
links.append(f"[{label}]({url})")
|
|
438
|
+
else:
|
|
439
|
+
emit_structured_log(
|
|
440
|
+
"warning",
|
|
441
|
+
f"[A2A] Citation search#{idx_str} has no URL; skipping link.",
|
|
442
|
+
Operation.SEND_PROMPT,
|
|
443
|
+
logger=_logger,
|
|
444
|
+
diagnostic_records=diagnostic_records,
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
emit_structured_log(
|
|
448
|
+
"warning",
|
|
449
|
+
f"[A2A] Citation search#{idx_str} has no matching attribution; skipping link.",
|
|
450
|
+
Operation.SEND_PROMPT,
|
|
451
|
+
logger=_logger,
|
|
452
|
+
diagnostic_records=diagnostic_records,
|
|
453
|
+
)
|
|
454
|
+
return " ".join(links)
|
|
455
|
+
|
|
456
|
+
return _CITATION_BLOCK_PAT.sub(replace_citation, text)
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import urllib.error
|
|
7
|
+
import urllib.parse
|
|
8
|
+
import urllib.request
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from api_clients.base_agent_client import BaseAgentClient
|
|
13
|
+
from cli_logging.console_diagnostics import emit_structured_log
|
|
14
|
+
from cli_logging.logging_utils import Operation
|
|
15
|
+
from response_extractor import extract_enhanced_response
|
|
16
|
+
|
|
17
|
+
_REQUEST_TIMEOUT_SECS = 120
|
|
18
|
+
|
|
19
|
+
# int → str used when passing log level to extract_enhanced_response
|
|
20
|
+
_LEVEL_INT_TO_STR: Dict[int, str] = {
|
|
21
|
+
logging.DEBUG: "debug",
|
|
22
|
+
logging.INFO: "info",
|
|
23
|
+
logging.WARNING: "warning",
|
|
24
|
+
logging.ERROR: "error",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SydneyClient(BaseAgentClient):
|
|
29
|
+
"""REST client for the Microsoft Copilot Sydney chat API."""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
copilot_api_endpoint: str,
|
|
35
|
+
access_token: str,
|
|
36
|
+
user_oid: str,
|
|
37
|
+
logger: Optional[logging.Logger] = None,
|
|
38
|
+
diagnostic_records: Optional[List[Dict[str, Any]]] = None,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Args:
|
|
42
|
+
copilot_api_endpoint: Base URL for the Copilot API.
|
|
43
|
+
access_token: Bearer token for API authentication.
|
|
44
|
+
user_oid: User object ID included in request payloads.
|
|
45
|
+
logger: Logger to use for all client logging. Defaults to a module-level
|
|
46
|
+
logger if not provided.
|
|
47
|
+
diagnostic_records: List to accumulate structured log entries.
|
|
48
|
+
"""
|
|
49
|
+
self._endpoint = copilot_api_endpoint
|
|
50
|
+
self._access_token = access_token
|
|
51
|
+
self._user_oid = user_oid
|
|
52
|
+
self._logger = logger or logging.getLogger(__name__)
|
|
53
|
+
self._diagnostic_records = diagnostic_records
|
|
54
|
+
self._log_level = _LEVEL_INT_TO_STR.get(self._logger.getEffectiveLevel(), "info")
|
|
55
|
+
|
|
56
|
+
# ------------------------------------------------------------------ #
|
|
57
|
+
# BaseAgentClient implementation #
|
|
58
|
+
# ------------------------------------------------------------------ #
|
|
59
|
+
|
|
60
|
+
def fetch_available_agents(self) -> List[Dict[str, Any]]:
|
|
61
|
+
"""Fetch agents available to the user from the Copilot API.
|
|
62
|
+
|
|
63
|
+
Returns an empty list if the endpoint is unavailable or returns an error.
|
|
64
|
+
"""
|
|
65
|
+
try:
|
|
66
|
+
request_data = json.dumps({"participant": {"id": self._user_oid}})
|
|
67
|
+
query_param = urllib.parse.quote(request_data)
|
|
68
|
+
agents_url = f"{self._endpoint}/GetGptList?request={query_param}"
|
|
69
|
+
emit_structured_log(
|
|
70
|
+
"debug",
|
|
71
|
+
f"[REST] Fetching available agents from: {agents_url}",
|
|
72
|
+
Operation.FETCH_AGENTS,
|
|
73
|
+
logger=self._logger,
|
|
74
|
+
diagnostic_records=self._diagnostic_records,
|
|
75
|
+
)
|
|
76
|
+
req = urllib.request.Request(
|
|
77
|
+
agents_url,
|
|
78
|
+
headers=self._build_request_headers(),
|
|
79
|
+
method="GET",
|
|
80
|
+
)
|
|
81
|
+
with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
|
|
82
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
83
|
+
return data.get("gptList", [])
|
|
84
|
+
except urllib.error.HTTPError as e:
|
|
85
|
+
emit_structured_log("warning", f"[REST] Unable to fetch agents list (HTTP {e.code}).", Operation.FETCH_AGENTS, logger=self._logger, diagnostic_records=self._diagnostic_records)
|
|
86
|
+
return []
|
|
87
|
+
except Exception as e:
|
|
88
|
+
emit_structured_log("warning", f"[REST] Error fetching agents: {e}", Operation.FETCH_AGENTS, logger=self._logger, diagnostic_records=self._diagnostic_records)
|
|
89
|
+
return []
|
|
90
|
+
|
|
91
|
+
def send_prompt(
|
|
92
|
+
self,
|
|
93
|
+
prompt: str,
|
|
94
|
+
agent_id: str | None = None,
|
|
95
|
+
conversation_context: Optional[Dict[str, Any]] = None,
|
|
96
|
+
) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]:
|
|
97
|
+
"""Send a prompt to the Sydney /chat endpoint and return the response with context.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
prompt: Prompt string to send to the agent.
|
|
101
|
+
agent_id: Optional agent ID to target a specific Copilot agent.
|
|
102
|
+
conversation_context: Context from a previous turn (contains conversation_id),
|
|
103
|
+
or None for the first turn / single-turn usage.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Tuple of (enhanced_response_dict, conversation_context).
|
|
107
|
+
"""
|
|
108
|
+
request_headers = self._build_request_headers()
|
|
109
|
+
conversation_id = conversation_context.get("conversation_id") if conversation_context else None
|
|
110
|
+
|
|
111
|
+
emit_structured_log("debug", "[REST] Sending prompt to agent.", Operation.SEND_PROMPT, logger=self._logger, diagnostic_records=self._diagnostic_records)
|
|
112
|
+
|
|
113
|
+
payload = self._build_chat_payload(prompt, agent_id, conversation_id)
|
|
114
|
+
emit_structured_log("debug", f"[REST] Sending payload: {payload.decode('utf-8')[:500]}", Operation.SEND_PROMPT, logger=self._logger, diagnostic_records=self._diagnostic_records)
|
|
115
|
+
|
|
116
|
+
req = urllib.request.Request(
|
|
117
|
+
f"{self._endpoint}/chat",
|
|
118
|
+
data=payload,
|
|
119
|
+
headers=request_headers,
|
|
120
|
+
method="POST",
|
|
121
|
+
)
|
|
122
|
+
try:
|
|
123
|
+
with urllib.request.urlopen(req, timeout=_REQUEST_TIMEOUT_SECS) as resp:
|
|
124
|
+
raw = resp.read().decode("utf-8", errors="replace")
|
|
125
|
+
except urllib.error.HTTPError as e:
|
|
126
|
+
error_body = None
|
|
127
|
+
try:
|
|
128
|
+
error_body = e.read().decode("utf-8", errors="replace")
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
msg = f"[REST] Chat API request failed (HTTP {e.code} {e.reason})."
|
|
132
|
+
if error_body:
|
|
133
|
+
msg += f" Body: {error_body[:500]}"
|
|
134
|
+
raise RuntimeError(msg) from e
|
|
135
|
+
except urllib.error.URLError as e:
|
|
136
|
+
raise RuntimeError(
|
|
137
|
+
f"[REST] Chat API connection error: {getattr(e, 'reason', str(e))}"
|
|
138
|
+
) from e
|
|
139
|
+
|
|
140
|
+
emit_structured_log("debug", f"[REST] Raw response: {raw[:500]}", Operation.SEND_PROMPT, logger=self._logger, diagnostic_records=self._diagnostic_records)
|
|
141
|
+
|
|
142
|
+
enhanced_response = extract_enhanced_response(raw.strip(), self._log_level)
|
|
143
|
+
|
|
144
|
+
metadata = enhanced_response.get("metadata", {})
|
|
145
|
+
emit_structured_log(
|
|
146
|
+
"debug",
|
|
147
|
+
"Response IDs for prompt.",
|
|
148
|
+
Operation.SEND_PROMPT,
|
|
149
|
+
logger=self._logger,
|
|
150
|
+
diagnostic_records=self._diagnostic_records,
|
|
151
|
+
run_context={
|
|
152
|
+
"operation": Operation.SEND_PROMPT,
|
|
153
|
+
"request-id": metadata.get("request_id"),
|
|
154
|
+
"conversation-id": metadata.get("conversation_id"),
|
|
155
|
+
"message-id": metadata.get("message_id"),
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Build updated context for subsequent turns
|
|
160
|
+
new_conversation_id = metadata.get("conversation_id") or conversation_id
|
|
161
|
+
updated_context = {"conversation_id": new_conversation_id} if new_conversation_id else None
|
|
162
|
+
|
|
163
|
+
return enhanced_response, updated_context
|
|
164
|
+
|
|
165
|
+
# ------------------------------------------------------------------ #
|
|
166
|
+
# Private helpers #
|
|
167
|
+
# ------------------------------------------------------------------ #
|
|
168
|
+
|
|
169
|
+
def _build_request_headers(self) -> Dict[str, str]:
|
|
170
|
+
headers = {
|
|
171
|
+
"Content-Type": "application/json",
|
|
172
|
+
"X-Scenario": os.environ.get("X_SCENARIO_HEADER"),
|
|
173
|
+
"Authorization": f"Bearer {self._access_token}",
|
|
174
|
+
}
|
|
175
|
+
return {k: v for k, v in headers.items() if v is not None}
|
|
176
|
+
|
|
177
|
+
def _build_chat_payload(
|
|
178
|
+
self,
|
|
179
|
+
prompt: str,
|
|
180
|
+
agent_id: str | None,
|
|
181
|
+
conversation_id: str | None = None,
|
|
182
|
+
) -> bytes:
|
|
183
|
+
message: Dict[str, Any] = {
|
|
184
|
+
"message": {
|
|
185
|
+
"text": prompt,
|
|
186
|
+
"author": "user",
|
|
187
|
+
"messageType": "chat",
|
|
188
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
189
|
+
"locationInfo": self._get_location_info(),
|
|
190
|
+
"from": {
|
|
191
|
+
"id": self._user_oid,
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
"verbosity": "verbose",
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if agent_id:
|
|
198
|
+
message["gpts"] = [{"id": agent_id.strip(), "source": "MOS3"}]
|
|
199
|
+
message["optionsSets"] = ["disable_action_confirmation"]
|
|
200
|
+
|
|
201
|
+
if conversation_id:
|
|
202
|
+
message["conversationId"] = conversation_id
|
|
203
|
+
|
|
204
|
+
return json.dumps(message).encode("utf-8")
|