devops-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devops_mcp/client.py ADDED
@@ -0,0 +1,893 @@
1
+ """Azure DevOps HTTP client with Microsoft Entra ID authentication and lifecycle management."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import os
7
+ import shutil
8
+ import time
9
+ from collections.abc import AsyncIterator
10
+ from contextlib import asynccontextmanager
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from urllib.parse import quote
14
+
15
+ import httpx
16
+ from azure.core.exceptions import ClientAuthenticationError
17
+ from azure.identity import (
18
+ AuthenticationRecord,
19
+ AzureCliCredential,
20
+ ClientSecretCredential,
21
+ DefaultAzureCredential,
22
+ InteractiveBrowserCredential,
23
+ ManagedIdentityCredential,
24
+ TokenCachePersistenceOptions,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ API_VERSION = "7.1"
30
+
31
+ _AZDO_SCOPE = "499b84ac-1321-427f-aa17-267ca6975798/.default"
32
+ _TOKEN_REFRESH_BUFFER_SECONDS = 300
33
+
34
+ _AZ_CLI_CANDIDATE_PATHS = [
35
+ r"C:\Program Files\Microsoft SDKs\Azure\CLI2\wbin",
36
+ r"C:\Program Files (x86)\Microsoft SDKs\Azure\CLI2\wbin",
37
+ ]
38
+
39
+
40
+ _DEFAULT_AUTH_TIMEOUT_SECONDS = 30.0
41
+
42
+
43
+ @dataclass
44
+ class AppContext:
45
+ """Application context holding shared auth state."""
46
+
47
+ organization: str | None
48
+ project: str | None
49
+ credential: (
50
+ AzureCliCredential
51
+ | InteractiveBrowserCredential
52
+ | ClientSecretCredential
53
+ | ManagedIdentityCredential
54
+ | DefaultAzureCredential
55
+ )
56
+ http_client: httpx.AsyncClient = field(default=None) # type: ignore[assignment]
57
+ _token_cache: dict[str, tuple[str, float]] = field(default_factory=dict)
58
+ _token_locks: dict[str, asyncio.Lock] = field(default_factory=dict)
59
+
60
+
61
+ def _ensure_az_cli_on_path() -> None:
62
+ if shutil.which("az"):
63
+ return
64
+ if os.name != "nt":
65
+ logger.warning("Azure CLI not found on PATH.")
66
+ return
67
+ current_path = os.environ.get("PATH", "")
68
+ existing = {os.path.normcase(os.path.normpath(p)) for p in current_path.split(os.pathsep) if p}
69
+ additions = [p for p in _AZ_CLI_CANDIDATE_PATHS if os.path.isdir(p)
70
+ and os.path.normcase(os.path.normpath(p)) not in existing]
71
+ if additions:
72
+ os.environ["PATH"] = os.pathsep.join(additions) + os.pathsep + current_path
73
+ logger.info("Added Azure CLI path(s) to PATH: %s", additions)
74
+ else:
75
+ logger.warning("Azure CLI not found. Ensure az is installed and on PATH.")
76
+
77
+
78
+ def _get_cached_bearer_token(app_ctx: AppContext) -> str | None:
79
+ cached = app_ctx._token_cache.get(_AZDO_SCOPE)
80
+ if cached:
81
+ token_str, expires_on = cached
82
+ if time.time() < expires_on - _TOKEN_REFRESH_BUFFER_SECONDS:
83
+ return token_str
84
+ return None
85
+
86
+
87
+ def get_bearer_token(app_ctx: AppContext) -> str:
88
+ # Thread-safety of the cache write: this function runs inside asyncio.to_thread,
89
+ # so the dict assignment executes on a worker thread. Safety is ensured by two
90
+ # independent guarantees: (1) the per-scope asyncio.Lock in build_headers
91
+ # serializes cold-cache acquisitions — only one caller reaches this function at
92
+ # a time for a given scope; (2) CPython's GIL makes the single dict assignment
93
+ # (`_token_cache[key] = value`) atomic, so there is no torn write or lost update
94
+ # even if a warm-cache reader on the event loop thread races with this write.
95
+ access_token = app_ctx.credential.get_token(_AZDO_SCOPE)
96
+ app_ctx._token_cache[_AZDO_SCOPE] = (access_token.token, float(access_token.expires_on))
97
+ return access_token.token
98
+
99
+
100
+ async def build_headers(
101
+ app_ctx: AppContext,
102
+ *,
103
+ include_content_type: bool = False,
104
+ extra: dict[str, str] | None = None,
105
+ ) -> dict[str, str]:
106
+ """Build standard Azure DevOps API headers with a cached Bearer token.
107
+
108
+ Token acquisition runs in a thread on cache miss to avoid blocking the event
109
+ loop. A per-scope lock ensures concurrent cold-cache callers trigger exactly
110
+ one acquisition; the lock is also where the auth timeout is applied.
111
+ """
112
+ token = _get_cached_bearer_token(app_ctx)
113
+ if token is None:
114
+ # Lazy lock creation is race-free on a single-threaded event loop.
115
+ lock = app_ctx._token_locks.setdefault(_AZDO_SCOPE, asyncio.Lock())
116
+ async with lock:
117
+ # Re-check: a concurrent caller may have populated the cache while
118
+ # we were waiting for the lock.
119
+ token = _get_cached_bearer_token(app_ctx)
120
+ if token is None:
121
+ auth_timeout = _get_auth_timeout_seconds()
122
+ try:
123
+ # NOTE: asyncio.to_thread cannot be cancelled — the worker
124
+ # thread may outlive this timeout. The timeout's purpose is
125
+ # to unblock other callers serialized behind this lock, not
126
+ # to kill the underlying credential call. The lock is
127
+ # released on any exception (including TimeoutError) so
128
+ # subsequent callers are not permanently serialized.
129
+ token = await asyncio.wait_for(
130
+ asyncio.to_thread(get_bearer_token, app_ctx),
131
+ timeout=auth_timeout,
132
+ )
133
+ except asyncio.TimeoutError as exc:
134
+ raise ClientAuthenticationError(
135
+ message=(
136
+ f"Credential acquisition timed out after {auth_timeout:.0f}s. "
137
+ "Check your Azure CLI session (az login), the AZDO_AUTH_TYPE "
138
+ "setting, or increase AZDO_AUTH_TIMEOUT_SECONDS."
139
+ )
140
+ ) from exc
141
+ headers: dict[str, str] = {
142
+ "Authorization": f"Bearer {token}",
143
+ "Accept": "application/json",
144
+ }
145
+ if include_content_type:
146
+ headers["Content-Type"] = "application/json"
147
+ if extra:
148
+ headers.update(extra)
149
+ return headers
150
+
151
+
152
+ def extract_error_message(response: httpx.Response) -> str:
153
+ try:
154
+ body = response.json()
155
+ if "message" in body:
156
+ type_key = body.get("typeKey", "")
157
+ msg = body["message"]
158
+ return f"{type_key}: {msg}" if type_key else msg
159
+ return json.dumps(body)
160
+ except Exception:
161
+ return response.text[:500] if response.text else f"HTTP {response.status_code}"
162
+
163
+
164
+ def resolve_org(app_ctx: AppContext, organization: str | None) -> str:
165
+ """Resolve the effective organization, raising if none is available."""
166
+ effective = organization or app_ctx.organization
167
+ if not effective:
168
+ raise ValueError(
169
+ "No Azure DevOps organization provided. Supply 'organization' on the tool "
170
+ "input, or set AZDO_ORGANIZATION as a default."
171
+ )
172
+ return effective.strip()
173
+
174
+
175
+ def resolve_project(app_ctx: AppContext, project: str | None) -> str:
176
+ """Resolve the effective project, raising if none is available."""
177
+ effective = project or app_ctx.project
178
+ if not effective:
179
+ raise ValueError(
180
+ "No Azure DevOps project provided. Supply 'project' on the tool "
181
+ "input, or set AZDO_PROJECT as a default."
182
+ )
183
+ return effective.strip()
184
+
185
+
186
+ def build_url(organization: str, project: str, path: str) -> str:
187
+ """Build a percent-encoded Azure DevOps REST API URL.
188
+
189
+ ``organization`` and ``project`` are single URL segments and are encoded
190
+ with ``safe=""`` so spaces and other special characters (common in project
191
+ names) become valid percent-encoded sequences.
192
+
193
+ ``path`` is a multi-segment route such as
194
+ ``git/repositories/{id}/pullrequests/{n}`` where ``/`` is an intentional
195
+ path separator, so it is encoded with ``safe="/"`` to preserve those
196
+ separators while still encoding any spaces or special characters that appear
197
+ within individual segments.
198
+ """
199
+ enc_org = quote(organization, safe="")
200
+ enc_project = quote(project, safe="")
201
+ enc_path = quote(path, safe="/")
202
+ return f"https://dev.azure.com/{enc_org}/{enc_project}/_apis/{enc_path}"
203
+
204
+
205
+ def build_params(**kwargs) -> dict:
206
+ """Build a params dict with the API version, filtering out None values."""
207
+ params = {"api-version": API_VERSION}
208
+ params.update({k: v for k, v in kwargs.items() if v is not None})
209
+ return params
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # Resilience helpers
214
+ # ---------------------------------------------------------------------------
215
+
216
+ # Status codes that are safe to retry under the right conditions.
217
+ _RETRYABLE_STATUS_CODES = (429, 502, 503, 504)
218
+
219
+ # HTTP methods where a retried request cannot cause a double-write.
220
+ # POST and PATCH are intentionally excluded: a 5xx may mean the write
221
+ # already committed on the server side, so we must NOT re-issue those.
222
+ # 429 (throttling) is the exception — a throttled POST was never executed,
223
+ # so retrying a 429 is safe for all methods (handled separately below).
224
+ _IDEMPOTENT_METHODS = {"GET", "PUT", "DELETE"}
225
+
226
+ _RETRY_MAX_WAIT_SECONDS = 30
227
+
228
+
229
+ def _parse_retry_after(value: str | None) -> float | None:
230
+ """Parse a ``Retry-After`` header value into a bounded wait duration.
231
+
232
+ Returns the header value as a float capped at ``_RETRY_MAX_WAIT_SECONDS``,
233
+ or ``None`` when the header is absent or not a valid number. Only positive
234
+ values are returned; zero and negative values are treated as absent.
235
+ """
236
+ if value is None:
237
+ return None
238
+ try:
239
+ seconds = float(value)
240
+ except ValueError:
241
+ return None
242
+ if seconds <= 0:
243
+ return None
244
+ return min(seconds, float(_RETRY_MAX_WAIT_SECONDS))
245
+
246
+
247
+ async def request_with_retry(
248
+ http_client: httpx.AsyncClient,
249
+ method: str,
250
+ url: str,
251
+ *,
252
+ headers: dict | None = None,
253
+ params=None,
254
+ json: dict | None = None,
255
+ content: bytes | None = None,
256
+ max_attempts: int = 3,
257
+ **kwargs,
258
+ ) -> httpx.Response:
259
+ """Issue an HTTP request with automatic retry for transient failures.
260
+
261
+ Retry rules:
262
+ - 429 (throttling): retry on ALL methods — a throttled request was never
263
+ executed by the server, so retrying a POST/PATCH after a 429 is safe.
264
+ Honour the Retry-After header (capped at _RETRY_MAX_WAIT_SECONDS); fall
265
+ back to exponential back-off (2^attempt, capped) when header is absent.
266
+ - 502/503/504 (gateway/server errors): retry ONLY for idempotent methods
267
+ (GET, PUT, DELETE). For POST/PATCH, return immediately — the server may
268
+ have committed the write before the error was surfaced.
269
+ - After max_attempts the last response is returned and the tool's own
270
+ raise_for_status / error handling takes over.
271
+ - Timeouts and connection errors are re-raised after the final attempt
272
+ (or immediately on the last attempt) so the caller sees them.
273
+ """
274
+ method_upper = method.upper()
275
+ last_response: httpx.Response | None = None
276
+ last_exc: BaseException | None = None
277
+
278
+ for attempt in range(max_attempts):
279
+ try:
280
+ response = await http_client.request(
281
+ method_upper,
282
+ url,
283
+ headers=headers,
284
+ params=params,
285
+ json=json,
286
+ content=content,
287
+ **kwargs,
288
+ )
289
+ except (httpx.TimeoutException, httpx.ConnectError) as exc:
290
+ if attempt == max_attempts - 1:
291
+ raise
292
+ last_exc = exc
293
+ wait = min(2 ** attempt, _RETRY_MAX_WAIT_SECONDS)
294
+ logger.warning(
295
+ "Network error on %s %s (attempt %d/%d): %s — retrying in %.1fs",
296
+ method_upper,
297
+ url,
298
+ attempt + 1,
299
+ max_attempts,
300
+ type(exc).__name__,
301
+ wait,
302
+ )
303
+ await asyncio.sleep(wait)
304
+ continue
305
+
306
+ last_exc = None
307
+
308
+ if response.status_code not in _RETRYABLE_STATUS_CODES:
309
+ # Proactive throttle signals on non-retryable (including 2xx) responses.
310
+ #
311
+ # Azure DevOps first *delays* requests by returning HTTP 200 with a
312
+ # Retry-After header and X-RateLimit-Remaining=0 before it escalates to
313
+ # HTTP 429 (TF400733). We must honour these signals so we don't keep
314
+ # hammering the API until it hard-blocks us.
315
+ #
316
+ # Two independent signals to handle:
317
+ # 1. X-RateLimit-Remaining=0 — server is near the limit; log a WARNING
318
+ # so operators are aware, but do not sleep on its own (it may not carry
319
+ # a Retry-After on every response).
320
+ # 2. Retry-After present — server explicitly requests a delay; sleep
321
+ # for the bounded parsed value. When both signals are present we log
322
+ # once and sleep once (no double-logging).
323
+ rate_remaining = response.headers.get("x-ratelimit-remaining")
324
+ proactive_wait = _parse_retry_after(response.headers.get("retry-after"))
325
+
326
+ if proactive_wait is not None:
327
+ # Both signals may be present; a single WARNING covers both.
328
+ logger.warning(
329
+ "Proactive throttle on HTTP %d %s %s — Retry-After: %.1fs "
330
+ "(X-RateLimit-Remaining: %s); sleeping before returning",
331
+ response.status_code,
332
+ method_upper,
333
+ url,
334
+ proactive_wait,
335
+ rate_remaining if rate_remaining is not None else "n/a",
336
+ )
337
+ await asyncio.sleep(proactive_wait)
338
+ elif rate_remaining == "0":
339
+ logger.warning(
340
+ "X-RateLimit-Remaining=0 on HTTP %d %s %s — "
341
+ "approaching rate limit; no Retry-After header present",
342
+ response.status_code,
343
+ method_upper,
344
+ url,
345
+ )
346
+
347
+ return response
348
+
349
+ is_throttle = response.status_code == 429
350
+ is_idempotent = method_upper in _IDEMPOTENT_METHODS
351
+
352
+ # For 5xx (502/503/504) on non-idempotent methods, return immediately.
353
+ # The write may have committed — never re-issue a POST or PATCH here.
354
+ if not is_throttle and not is_idempotent:
355
+ logger.warning(
356
+ "HTTP %d on non-idempotent %s %s — not retrying (write may have committed)",
357
+ response.status_code,
358
+ method_upper,
359
+ url,
360
+ )
361
+ return response
362
+
363
+ # Determine how long to wait before the next attempt.
364
+ wait = _parse_retry_after(response.headers.get("retry-after"))
365
+ if wait is None:
366
+ wait = min(2 ** attempt, _RETRY_MAX_WAIT_SECONDS)
367
+
368
+ if attempt < max_attempts - 1:
369
+ logger.warning(
370
+ "HTTP %d on %s %s (attempt %d/%d) — retrying in %.1fs",
371
+ response.status_code,
372
+ method_upper,
373
+ url,
374
+ attempt + 1,
375
+ max_attempts,
376
+ wait,
377
+ )
378
+ await asyncio.sleep(wait)
379
+ last_response = response
380
+ else:
381
+ # Last attempt exhausted — return and let the tool handle it.
382
+ return response
383
+
384
+ # If we exhausted attempts via network errors on the last loop, re-raise.
385
+ if last_exc is not None:
386
+ raise last_exc
387
+ # Should be unreachable, but satisfy the type checker.
388
+ assert last_response is not None
389
+ return last_response
390
+
391
+
392
+ _FINALIZE_WARN_BYTES = 1_000_000
393
+ _FINALIZE_CAP_BYTES = 5_000_000
394
+
395
+
396
+ def finalize_response(
397
+ payload: dict,
398
+ *,
399
+ warn_bytes: int = _FINALIZE_WARN_BYTES,
400
+ cap_bytes: int = _FINALIZE_CAP_BYTES,
401
+ ) -> str:
402
+ """Serialize *payload* to a JSON string, enforcing a size cap.
403
+
404
+ - Under warn_bytes: returned as-is.
405
+ - Between warn_bytes and cap_bytes: logged to stderr, returned as-is.
406
+ - Over cap_bytes: returns a JSON error object instead of the payload
407
+ so that the MCP transport is never flooded with multi-MB content.
408
+
409
+ Note: devops_get_run_log_content uses start_line/end_line slicing at the
410
+ API level to bound log content — it does not rely on this cap to limit
411
+ output, and the cap here serves only as a last-resort safeguard.
412
+ """
413
+ encoded = json.dumps(payload)
414
+ size = len(encoded.encode("utf-8"))
415
+ if size > cap_bytes:
416
+ logger.warning(
417
+ "Response payload %d bytes exceeds cap (%d bytes); returning error stub",
418
+ size,
419
+ cap_bytes,
420
+ )
421
+ return json.dumps({
422
+ "error": True,
423
+ "message": (
424
+ f"Response exceeded {cap_bytes:,} bytes. "
425
+ "Narrow your query, use paging (top / continuation_token), "
426
+ "or use start_line/end_line to fetch a portion of log content."
427
+ ),
428
+ })
429
+ if size > warn_bytes:
430
+ logger.warning(
431
+ "Large response payload: %d bytes (warn threshold %d bytes)",
432
+ size,
433
+ warn_bytes,
434
+ )
435
+ return encoded
436
+
437
+
438
+ async def paginate_results(
439
+ http_client: httpx.AsyncClient,
440
+ url: str,
441
+ headers: dict,
442
+ base_params: dict,
443
+ record_key: str,
444
+ top: int,
445
+ initial_continuation_token: str | None = None,
446
+ ) -> tuple[list, bool]:
447
+ """Collect records across x-ms-continuationtoken pages (Azure DevOps style).
448
+
449
+ Loops, issuing GETs via request_with_retry. On each response it reads the
450
+ `x-ms-continuationtoken` header and URL-encodes the token into the next
451
+ request's `continuationToken` query parameter. Terminates when the header
452
+ is absent or when `top` records have been collected.
453
+
454
+ The `count` in the Azure DevOps response envelope is per-page only — this
455
+ helper ignores it and relies solely on the header presence/absence.
456
+
457
+ Args:
458
+ http_client: shared httpx.AsyncClient from AppContext.
459
+ url: endpoint URL (without continuation token).
460
+ headers: authorization / accept headers for each request.
461
+ base_params: base query parameters (e.g. api-version, $top, filter).
462
+ record_key: key in the JSON response that contains the list of records
463
+ (e.g. "value", "branches", etc.). Falls back to the "value"
464
+ key if this key is not present.
465
+ top: maximum total records to collect across all pages.
466
+ initial_continuation_token: optional token to start mid-sequence.
467
+
468
+ Returns:
469
+ A tuple of (records, has_more) where has_more is True when a
470
+ continuation token was present but top was already reached.
471
+ """
472
+ all_records: list = []
473
+ continuation_token: str | None = initial_continuation_token
474
+ has_more = False
475
+
476
+ while True:
477
+ params = dict(base_params)
478
+ if continuation_token is not None:
479
+ params["continuationToken"] = quote(continuation_token, safe="")
480
+
481
+ response = await request_with_retry(http_client, "GET", url, headers=headers, params=params)
482
+ response.raise_for_status()
483
+
484
+ data = response.json()
485
+ if isinstance(data, dict):
486
+ records = data.get(record_key) or data.get("value") or []
487
+ else:
488
+ records = data or []
489
+
490
+ all_records.extend(records)
491
+
492
+ continuation_token = response.headers.get("x-ms-continuationtoken")
493
+
494
+ if len(all_records) >= top:
495
+ all_records = all_records[:top]
496
+ has_more = continuation_token is not None
497
+ break
498
+
499
+ if continuation_token is None:
500
+ break
501
+
502
+ return all_records, has_more
503
+
504
+
505
+ def _get_auth_timeout_seconds() -> float:
506
+ """Return the credential-acquisition timeout from AZDO_AUTH_TIMEOUT_SECONDS.
507
+
508
+ Falls back to _DEFAULT_AUTH_TIMEOUT_SECONDS and logs a warning when the env
509
+ var is present but non-numeric or non-positive.
510
+ """
511
+ raw = os.environ.get("AZDO_AUTH_TIMEOUT_SECONDS", "").strip()
512
+ if raw:
513
+ try:
514
+ value = float(raw)
515
+ if value > 0:
516
+ return value
517
+ logger.warning(
518
+ "AZDO_AUTH_TIMEOUT_SECONDS=%r is non-positive; using default %.1fs",
519
+ raw,
520
+ _DEFAULT_AUTH_TIMEOUT_SECONDS,
521
+ )
522
+ except ValueError:
523
+ logger.warning(
524
+ "AZDO_AUTH_TIMEOUT_SECONDS=%r is not a valid number; using default %.1fs",
525
+ raw,
526
+ _DEFAULT_AUTH_TIMEOUT_SECONDS,
527
+ )
528
+ return _DEFAULT_AUTH_TIMEOUT_SECONDS
529
+
530
+
531
+ def _get_ephemeral_token() -> bool:
532
+ """Return whether the interactive token cache should be ephemeral (in-memory).
533
+
534
+ Reads AZDO_EPHEMERAL_TOKEN (default: false — unset/empty means persist to
535
+ disk). Only an explicit "true", "1", or "yes" (case-insensitive) opts into
536
+ an ephemeral, in-memory-only token cache (no disk cache, no sidecar written).
537
+ Any other unrecognised value falls back to the default (false) with a warning.
538
+ """
539
+ raw = os.environ.get("AZDO_EPHEMERAL_TOKEN", "").strip().lower()
540
+ if not raw:
541
+ return False
542
+ if raw in ("true", "1", "yes"):
543
+ return True
544
+ if raw in ("false", "0", "no"):
545
+ return False
546
+ logger.warning(
547
+ "AZDO_EPHEMERAL_TOKEN=%r is not a recognised boolean value; "
548
+ "using default (false)",
549
+ os.environ.get("AZDO_EPHEMERAL_TOKEN", ""),
550
+ )
551
+ return False
552
+
553
+
554
+ def _get_token_cache_profile() -> str:
555
+ """Return a filename-safe profile suffix for the token cache and sidecar.
556
+
557
+ Reads AZDO_TOKEN_CACHE_PROFILE (default: empty). When set, the value is
558
+ appended to both the MSAL cache name and the AuthenticationRecord sidecar
559
+ filename so that two server processes connecting to different
560
+ tenants/accounts on the same host do not share (and overwrite) each other's
561
+ cache and pinned account. An empty/unset value preserves the original
562
+ single-profile filenames for backwards compatibility.
563
+
564
+ Only ``[A-Za-z0-9_-]`` are permitted. Any other character raises
565
+ ``ValueError`` rather than being silently dropped: sanitizing would let two
566
+ distinct profiles (e.g. ``a/b`` and ``a.b``) collapse to the same value and
567
+ secretly share one cache — the exact cross-tenant collision this option
568
+ exists to prevent. Failing fast forces the operator to pick an unambiguous,
569
+ filesystem-safe name.
570
+ """
571
+ raw = os.environ.get("AZDO_TOKEN_CACHE_PROFILE", "").strip()
572
+ if not raw:
573
+ return ""
574
+ allowed = (
575
+ "abcdefghijklmnopqrstuvwxyz"
576
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
577
+ "0123456789-_"
578
+ )
579
+ if any(c not in allowed for c in raw):
580
+ raise ValueError(
581
+ f"AZDO_TOKEN_CACHE_PROFILE={raw!r} contains characters outside "
582
+ "[A-Za-z0-9_-]. Choose a profile name using only letters, digits, "
583
+ "dashes, or underscores so it is an unambiguous, filesystem-safe "
584
+ "cache identifier."
585
+ )
586
+ return raw
587
+
588
+
589
+ def _get_user_config_dir() -> Path:
590
+ """Return a per-user config directory for devops-mcp, creating it if needed."""
591
+ config_dir = Path.home() / ".devops-mcp"
592
+ config_dir.mkdir(parents=True, exist_ok=True)
593
+ return config_dir
594
+
595
+
596
+ def _load_auth_record(record_path: Path) -> "AuthenticationRecord | None":
597
+ """Load a persisted AuthenticationRecord from *record_path*.
598
+
599
+ Returns None if the file is absent or cannot be parsed, logging a warning
600
+ in the latter case so that corrupt sidecars degrade to a fresh prompt rather
601
+ than crashing the server.
602
+ """
603
+ if not record_path.exists():
604
+ return None
605
+ try:
606
+ text = record_path.read_text(encoding="utf-8")
607
+ return AuthenticationRecord.deserialize(text)
608
+ except Exception as exc:
609
+ logger.warning(
610
+ "Could not load AuthenticationRecord from %s (%s); "
611
+ "a fresh interactive sign-in will be required",
612
+ record_path,
613
+ exc,
614
+ )
615
+ return None
616
+
617
+
618
+ def _save_auth_record(record: "AuthenticationRecord", record_path: Path) -> None:
619
+ """Serialize *record* to *record_path* with best-effort 0600 permissions.
620
+
621
+ The AuthenticationRecord contains no secrets (home_account_id, tenant,
622
+ authority, username only), but we restrict permissions defensively. On
623
+ Windows, chmod is best-effort; NTFS ACLs govern actual access.
624
+ """
625
+ try:
626
+ text = record.serialize()
627
+ fd = os.open(
628
+ str(record_path),
629
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
630
+ 0o600,
631
+ )
632
+ try:
633
+ os.write(fd, text.encode("utf-8"))
634
+ finally:
635
+ os.close(fd)
636
+ # chmod is best-effort: on Windows it silently does little,
637
+ # on Unix it corrects the umask-applied mode from os.open.
638
+ try:
639
+ os.chmod(str(record_path), 0o600)
640
+ except OSError:
641
+ pass
642
+ logger.info("AuthenticationRecord saved to %s", record_path)
643
+ except Exception as exc:
644
+ logger.warning(
645
+ "Could not save AuthenticationRecord to %s (%s); "
646
+ "restart re-prompts may still occur",
647
+ record_path,
648
+ exc,
649
+ )
650
+
651
+
652
+ def _build_interactive_credential() -> InteractiveBrowserCredential:
653
+ """Build an InteractiveBrowserCredential with a persistent token cache.
654
+
655
+ By default the credential is constructed with TokenCachePersistenceOptions
656
+ so MSAL stores its token cache on disk via the OS secret store (DPAPI on
657
+ Windows, Keychain on macOS, libsecret on Linux). Set AZDO_EPHEMERAL_TOKEN=true
658
+ to opt into an in-memory-only cache (no disk cache, no sidecar).
659
+
660
+ An AuthenticationRecord sidecar is loaded from the user config dir on
661
+ startup so that MSAL can silently select the previously authenticated account
662
+ on restart without re-prompting the user. If no sidecar exists yet, a
663
+ one-shot get_token wrapper saves one after the first interactive sign-in.
664
+
665
+ GOTCHA: the wrapper MUST restore credential.get_token to the original method
666
+ BEFORE calling credential.authenticate(), because authenticate() is
667
+ implemented as self.get_token(…) internally — leaving the wrapper in place
668
+ causes unbounded recursion that is silently swallowed.
669
+ """
670
+ tenant_id = os.environ.get("AZDO_TENANT_ID")
671
+
672
+ if _get_ephemeral_token():
673
+ logger.info(
674
+ "AZDO_EPHEMERAL_TOKEN=true: "
675
+ "interactive credential uses in-memory token cache only"
676
+ )
677
+ return (
678
+ InteractiveBrowserCredential(tenant_id=tenant_id)
679
+ if tenant_id
680
+ else InteractiveBrowserCredential()
681
+ )
682
+
683
+ # Optional profile suffix isolates the cache + sidecar per tenant/account so
684
+ # concurrent sessions on one host do not collide.
685
+ profile = _get_token_cache_profile()
686
+ cache_name = "devops-mcp.cache" if not profile else f"devops-mcp.{profile}.cache"
687
+ record_filename = (
688
+ "auth-record.json" if not profile else f"auth-record.{profile}.json"
689
+ )
690
+ if profile:
691
+ logger.info("Token cache profile active: %r (cache name=%s)", profile, cache_name)
692
+
693
+ # Build cache persistence options. allow_unencrypted_storage=False (the
694
+ # default) means the credential will raise on platforms without an OS
695
+ # secret store — we catch that below and log an actionable message.
696
+ try:
697
+ cache_opts = TokenCachePersistenceOptions(
698
+ name=cache_name,
699
+ allow_unencrypted_storage=False,
700
+ )
701
+ except Exception as exc:
702
+ logger.warning(
703
+ "Could not initialise TokenCachePersistenceOptions (%s); "
704
+ "falling back to in-memory token cache. "
705
+ "On headless Linux, install libsecret-1 or set "
706
+ "AZDO_EPHEMERAL_TOKEN=true to suppress this warning.",
707
+ exc,
708
+ )
709
+ return (
710
+ InteractiveBrowserCredential(tenant_id=tenant_id)
711
+ if tenant_id
712
+ else InteractiveBrowserCredential()
713
+ )
714
+
715
+ config_dir = _get_user_config_dir()
716
+ record_path = config_dir / record_filename
717
+ auth_record = _load_auth_record(record_path)
718
+
719
+ kwargs: dict = {"cache_persistence_options": cache_opts}
720
+ if tenant_id:
721
+ kwargs["tenant_id"] = tenant_id
722
+ if auth_record is not None:
723
+ kwargs["authentication_record"] = auth_record
724
+
725
+ try:
726
+ credential = InteractiveBrowserCredential(**kwargs)
727
+ except Exception as exc:
728
+ logger.warning(
729
+ "Could not build InteractiveBrowserCredential with persistent cache (%s); "
730
+ "falling back to in-memory credential. "
731
+ "On headless Linux without a secret store, set "
732
+ "AZDO_EPHEMERAL_TOKEN=true to suppress this warning.",
733
+ exc,
734
+ )
735
+ return (
736
+ InteractiveBrowserCredential(tenant_id=tenant_id)
737
+ if tenant_id
738
+ else InteractiveBrowserCredential()
739
+ )
740
+
741
+ logger.info(
742
+ "Interactive token cache persistence enabled (encrypted=True, sidecar=%s)",
743
+ record_path,
744
+ )
745
+
746
+ if auth_record is None:
747
+ # No prior sidecar: install a one-shot wrapper to capture the
748
+ # AuthenticationRecord after the first interactive sign-in and persist
749
+ # it so subsequent restarts can authenticate silently.
750
+ _original_get_token = credential.get_token
751
+
752
+ def _get_token_and_record(*args, **kw):
753
+ # Call the real get_token first so the user is prompted and a token
754
+ # is obtained.
755
+ token = _original_get_token(*args, **kw)
756
+ # CRITICAL: restore BEFORE calling authenticate() — authenticate()
757
+ # is implemented internally as self.get_token(…), so if the wrapper
758
+ # is still installed on the instance it re-enters this function
759
+ # causing unbounded recursion (silently swallowed by the broad
760
+ # except, meaning _save_auth_record is never reached).
761
+ credential.get_token = _original_get_token # type: ignore[method-assign]
762
+ try:
763
+ record = credential.authenticate(scopes=list(args))
764
+ _save_auth_record(record, record_path)
765
+ except Exception as exc:
766
+ logger.warning(
767
+ "Could not obtain AuthenticationRecord after sign-in (%s); "
768
+ "restart re-prompts will still occur",
769
+ exc,
770
+ )
771
+ return token
772
+
773
+ credential.get_token = _get_token_and_record # type: ignore[method-assign]
774
+
775
+ return credential
776
+
777
+
778
+ def _build_credential(auth_type: str):
779
+ """Instantiate an azure-identity credential based on AZDO_AUTH_TYPE."""
780
+ _ensure_az_cli_on_path()
781
+ if auth_type == "azure_cli":
782
+ return AzureCliCredential()
783
+ if auth_type == "interactive":
784
+ return _build_interactive_credential()
785
+ if auth_type == "client_secret":
786
+ tenant_id = os.environ.get("AZDO_TENANT_ID", "")
787
+ client_id = os.environ.get("AZDO_CLIENT_ID", "")
788
+ client_secret = os.environ.get("AZDO_CLIENT_SECRET", "")
789
+ missing = [
790
+ name
791
+ for name, val in (
792
+ ("AZDO_TENANT_ID", tenant_id),
793
+ ("AZDO_CLIENT_ID", client_id),
794
+ ("AZDO_CLIENT_SECRET", client_secret),
795
+ )
796
+ if not val
797
+ ]
798
+ if missing:
799
+ raise ValueError(
800
+ f"AZDO_AUTH_TYPE=client_secret requires: {', '.join(missing)}"
801
+ )
802
+ return ClientSecretCredential(tenant_id, client_id, client_secret)
803
+ if auth_type == "managed_identity":
804
+ return ManagedIdentityCredential()
805
+ if auth_type == "default":
806
+ return DefaultAzureCredential()
807
+ raise ValueError(
808
+ f"Unknown AZDO_AUTH_TYPE '{auth_type}'. "
809
+ "Valid values: azure_cli, interactive, client_secret, managed_identity, default"
810
+ )
811
+
812
+
813
+ async def _log_request(request: httpx.Request) -> None:
814
+ auth_header = request.headers.get("authorization", "")
815
+ logger.debug(
816
+ "HTTP %s %s | Authorization header: %s (length=%d)",
817
+ request.method,
818
+ request.url,
819
+ "present" if auth_header else "MISSING",
820
+ len(auth_header),
821
+ )
822
+
823
+
824
+ async def _log_response(response: httpx.Response) -> None:
825
+ logger.debug("HTTP response %d for %s %s", response.status_code, response.request.method, response.request.url)
826
+ if response.status_code in (301, 302, 303, 307, 308):
827
+ logger.warning(
828
+ "Redirect %d -> %s",
829
+ response.status_code,
830
+ response.headers.get("location", "<no location>"),
831
+ )
832
+
833
+
834
+ @asynccontextmanager
835
+ async def devops_lifespan(server) -> AsyncIterator[AppContext]:
836
+ """FastMCP lifespan that initializes shared Azure DevOps auth state.
837
+
838
+ Reads configuration from environment variables:
839
+ - AZDO_AUTH_TYPE: Credential type (default: default)
840
+ default — DefaultAzureCredential (tries all methods in order) [recommended]
841
+ azure_cli — Azure CLI credential (az login)
842
+ interactive — Interactive browser login
843
+ client_secret — Service principal with client secret
844
+ (requires AZDO_TENANT_ID, AZDO_CLIENT_ID, AZDO_CLIENT_SECRET)
845
+ managed_identity — Managed identity (Azure-hosted workloads)
846
+ - AZDO_TENANT_ID: Entra ID tenant ID (required for client_secret)
847
+ - AZDO_CLIENT_ID: Service principal client ID (required for client_secret)
848
+ - AZDO_CLIENT_SECRET: Service principal client secret (required for client_secret)
849
+ - AZDO_ORGANIZATION: Default organization name (optional; can be supplied per-tool)
850
+ - AZDO_PROJECT: Default project name (optional; can be supplied per-tool)
851
+
852
+ Yields:
853
+ AppContext containing the credential, HTTP client, and optional defaults.
854
+ """
855
+ auth_type = os.environ.get("AZDO_AUTH_TYPE", "default").lower()
856
+ organization = os.environ.get("AZDO_ORGANIZATION")
857
+ project = os.environ.get("AZDO_PROJECT")
858
+
859
+ credential = _build_credential(auth_type)
860
+ logger.info("Azure DevOps auth type: %s", auth_type)
861
+
862
+ if organization:
863
+ logger.info("Default Azure DevOps organization: %s", organization)
864
+ else:
865
+ logger.info("No AZDO_ORGANIZATION set; tools must supply 'organization'")
866
+
867
+ if project:
868
+ logger.info("Default Azure DevOps project: %s", project)
869
+ else:
870
+ logger.info("No AZDO_PROJECT set; tools must supply 'project'")
871
+
872
+ http_client = httpx.AsyncClient(
873
+ timeout=httpx.Timeout(connect=10.0, read=30.0, write=60.0, pool=5.0),
874
+ limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
875
+ event_hooks={"request": [_log_request], "response": [_log_response]},
876
+ )
877
+
878
+ app_ctx = AppContext(
879
+ organization=organization,
880
+ project=project,
881
+ credential=credential,
882
+ http_client=http_client,
883
+ )
884
+ logger.info("Azure DevOps MCP server initialized")
885
+
886
+ try:
887
+ yield app_ctx
888
+ finally:
889
+ await http_client.aclose()
890
+ close_fn = getattr(credential, "close", None)
891
+ if callable(close_fn):
892
+ close_fn()
893
+ logger.info("Azure DevOps MCP server shutting down")