PyPI - microsoft-agents-a365-observability-core - Versions diffs - 0.1.0__py3-none-any.whl - Mend

microsoft-agents-a365-observability-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

microsoft_agents_a365/observability/core/exporters/agent365_exporter.py ADDED Viewed

@@ -0,0 +1,310 @@
+# Copyright (c) Microsoft. All rights reserved.
+# pip install opentelemetry-sdk opentelemetry-api requests
+from __future__ import annotations
+import json
+import logging
+import threading
+import time
+from collections.abc import Callable, Sequence
+from typing import Any
+import requests
+from microsoft_agents_a365.runtime.power_platform_api_discovery import PowerPlatformApiDiscovery
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from opentelemetry.trace import StatusCode
+from .utils import (
+    hex_span_id,
+    hex_trace_id,
+    kind_name,
+    partition_by_identity,
+    status_name,
+)
+# ---- Exporter ---------------------------------------------------------------
+# Hardcoded constants - not configurable
+DEFAULT_HTTP_TIMEOUT_SECONDS = 30.0
+DEFAULT_MAX_RETRIES = 3
+# Create logger for this module - inherits from 'microsoft_agents_a365.observability.core'
+logger = logging.getLogger(__name__)
+class Agent365Exporter(SpanExporter):
+    """
+    Agent 365 span exporter for Agent 365:
+      * Partitions spans by (tenantId, agentId)
+      * Builds OTLP-like JSON: resourceSpans -> scopeSpans -> spans
+      * POSTs per group to https://{endpoint}/maven/agent365/agents/{agentId}/traces?api-version=1
+      * Adds Bearer token via token_resolver(agentId, tenantId)
+    """
+    def __init__(
+        self,
+        token_resolver: Callable[[str, str], str | None],
+        cluster_category: str = "prod",
+        use_s2s_endpoint: bool = False,
+    ):
+        if token_resolver is None:
+            raise ValueError("token_resolver must be provided.")
+        self._session = requests.Session()
+        self._closed = False
+        self._lock = threading.Lock()
+        self._token_resolver = token_resolver
+        self._cluster_category = cluster_category
+        self._use_s2s_endpoint = use_s2s_endpoint
+    # ------------- SpanExporter API -----------------
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        if self._closed:
+            return SpanExportResult.FAILURE
+        try:
+            groups = partition_by_identity(spans)
+            if not groups:
+                # No spans with identity; treat as success
+                logger.info("No spans with tenant/agent identity found; nothing exported.")
+                return SpanExportResult.SUCCESS
+            # Debug: Log number of groups and total span count
+            total_spans = sum(len(activities) for activities in groups.values())
+            logger.info(
+                f"Found {len(groups)} identity groups with {total_spans} total spans to export"
+            )
+            any_failure = False
+            for (tenant_id, agent_id), activities in groups.items():
+                payload = self._build_export_request(activities)
+                body = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
+                # Resolve endpoint + token
+                discovery = PowerPlatformApiDiscovery(self._cluster_category)
+                endpoint = discovery.get_tenant_island_cluster_endpoint(tenant_id)
+                endpoint_path = (
+                    f"/maven/agent365/service/agents/{agent_id}/traces"
+                    if self._use_s2s_endpoint
+                    else f"/maven/agent365/agents/{agent_id}/traces"
+                )
+                url = f"https://{endpoint}{endpoint_path}?api-version=1"
+                # Debug: Log endpoint being used
+                logger.info(
+                    f"Exporting {len(activities)} spans to endpoint: {url} "
+                    f"(tenant: {tenant_id}, agent: {agent_id})"
+                )
+                headers = {"content-type": "application/json"}
+                try:
+                    token = self._token_resolver(agent_id, tenant_id)
+                    if token:
+                        headers["authorization"] = f"Bearer {token}"
+                        logger.info(f"Token resolved successfully for agent {agent_id}")
+                    else:
+                        logger.info(f"No token returned for agent {agent_id}")
+                except Exception as e:
+                    # If token resolution fails, treat as failure for this group
+                    logger.error(
+                        f"Token resolution failed for agent {agent_id}, tenant {tenant_id}: {e}"
+                    )
+                    any_failure = True
+                    continue
+                # Basic retry loop
+                ok = self._post_with_retries(url, body, headers)
+                if not ok:
+                    any_failure = True
+            return SpanExportResult.FAILURE if any_failure else SpanExportResult.SUCCESS
+        except Exception as e:
+            # Exporters should not raise; signal failure.
+            logger.error(f"Export failed with exception: {e}")
+            return SpanExportResult.FAILURE
+    def shutdown(self) -> None:
+        with self._lock:
+            if self._closed:
+                return
+            self._closed = True
+            try:
+                self._session.close()
+            except Exception:
+                pass
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True
+    # ------------- HTTP helper ----------------------
+    @staticmethod
+    def _truncate_text(text: str, max_length: int) -> str:
+        """Truncate text to a maximum length, adding '...' if truncated."""
+        if len(text) > max_length:
+            return text[:max_length] + "..."
+        return text
+    def _post_with_retries(self, url: str, body: str, headers: dict[str, str]) -> bool:
+        for attempt in range(DEFAULT_MAX_RETRIES + 1):
+            try:
+                resp = self._session.post(
+                    url,
+                    data=body.encode("utf-8"),
+                    headers=headers,
+                    timeout=DEFAULT_HTTP_TIMEOUT_SECONDS,
+                )
+                # Extract correlation ID from response headers for logging
+                correlation_id = (
+                    resp.headers.get("x-ms-correlation-id")
+                    or resp.headers.get("request-id")
+                    or "N/A"
+                )
+                # 2xx => success
+                if 200 <= resp.status_code < 300:
+                    logger.info(
+                        f"HTTP {resp.status_code} success on attempt {attempt + 1}. "
+                        f"Correlation ID: {correlation_id}. "
+                        f"Response: {self._truncate_text(resp.text, 200)}"
+                    )
+                    return True
+                # Log non-success responses
+                response_text = self._truncate_text(resp.text, 500)
+                # Retry transient
+                if resp.status_code in (408, 429) or 500 <= resp.status_code < 600:
+                    if attempt < DEFAULT_MAX_RETRIES:
+                        time.sleep(0.2 * (attempt + 1))
+                        continue
+                    # Final attempt failed
+                    logger.error(
+                        f"HTTP {resp.status_code} final failure after {DEFAULT_MAX_RETRIES + 1} attempts. "
+                        f"Correlation ID: {correlation_id}. "
+                        f"Response: {response_text}"
+                    )
+                else:
+                    # Non-retryable error
+                    logger.error(
+                        f"HTTP {resp.status_code} non-retryable error. "
+                        f"Correlation ID: {correlation_id}. "
+                        f"Response: {response_text}"
+                    )
+                return False
+            except requests.RequestException as e:
+                if attempt < DEFAULT_MAX_RETRIES:
+                    time.sleep(0.2 * (attempt + 1))
+                    continue
+                # Final attempt failed
+                logger.error(
+                    f"Request failed after {DEFAULT_MAX_RETRIES + 1} attempts with exception: {e}"
+                )
+                return False
+        return False
+    # ------------- Payload mapping ------------------
+    def _build_export_request(self, spans: Sequence[ReadableSpan]) -> dict[str, Any]:
+        # Group by instrumentation scope (name, version)
+        scope_map: dict[tuple[str, str | None], list[dict[str, Any]]] = {}
+        for sp in spans:
+            scope = sp.instrumentation_scope
+            scope_key = (scope.name, scope.version)
+            scope_map.setdefault(scope_key, []).append(self._map_span(sp))
+        scope_spans: list[dict[str, Any]] = []
+        for (name, version), mapped_spans in scope_map.items():
+            scope_spans.append(
+                {
+                    "scope": {
+                        "name": name,
+                        "version": version,
+                    },
+                    "spans": mapped_spans,
+                }
+            )
+        # Resource attributes (from the first span – all spans in a batch usually share resource)
+        # If you need to merge across spans, adapt accordingly.
+        resource_attrs = {}
+        if spans:
+            resource_attrs = dict(getattr(spans[0].resource, "attributes", {}) or {})
+        return {
+            "resourceSpans": [
+                {
+                    "resource": {"attributes": resource_attrs or None},
+                    "scopeSpans": scope_spans,
+                }
+            ]
+        }
+    def _map_span(self, sp: ReadableSpan) -> dict[str, Any]:
+        ctx = sp.context
+        parent_span_id = None
+        if sp.parent is not None and sp.parent.span_id != 0:
+            parent_span_id = hex_span_id(sp.parent.span_id)
+        # attributes
+        attrs = dict(sp.attributes or {})
+        # events
+        events = []
+        for ev in sp.events:
+            ev_attrs = dict(ev.attributes or {}) if ev.attributes else None
+            events.append(
+                {
+                    "timeUnixNano": ev.timestamp,  # already ns
+                    "name": ev.name,
+                    "attributes": ev_attrs,
+                }
+            )
+        if not events:
+            events = None
+        # links
+        links = []
+        for ln in sp.links or []:
+            ln_attrs = dict(ln.attributes or {}) if ln.attributes else None
+            links.append(
+                {
+                    "traceId": hex_trace_id(ln.context.trace_id),
+                    "spanId": hex_span_id(ln.context.span_id),
+                    "attributes": ln_attrs,
+                }
+            )
+        if not links:
+            links = None
+        # status
+        status_code = sp.status.status_code if sp.status else StatusCode.UNSET
+        status = {
+            "code": status_name(status_code),
+            "message": getattr(sp.status, "description", "") or "",
+        }
+        # times are ns in ReadableSpan
+        start_ns = sp.start_time
+        end_ns = sp.end_time
+        return {
+            "traceId": hex_trace_id(ctx.trace_id),
+            "spanId": hex_span_id(ctx.span_id),
+            "parentSpanId": parent_span_id,
+            "name": sp.name,
+            "kind": kind_name(sp.kind),
+            "startTimeUnixNano": start_ns,
+            "endTimeUnixNano": end_ns,
+            "attributes": attrs or None,
+            "events": events,
+            "links": links,
+            "status": status,
+        }

microsoft_agents_a365/observability/core/exporters/utils.py ADDED Viewed

@@ -0,0 +1,72 @@
+# Copyright (c) Microsoft. All rights reserved.
+import os
+from collections.abc import Sequence
+from typing import Any
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.trace import SpanKind, StatusCode
+from ..constants import (
+    ENABLE_A365_OBSERVABILITY_EXPORTER,
+    GEN_AI_AGENT_ID_KEY,
+    TENANT_ID_KEY,
+)
+def hex_trace_id(value: int) -> str:
+    # 128-bit -> 32 hex chars
+    return f"{value:032x}"
+def hex_span_id(value: int) -> str:
+    # 64-bit -> 16 hex chars
+    return f"{value:016x}"
+def as_str(v: Any) -> str | None:
+    if v is None:
+        return None
+    s = str(v)
+    return s if s.strip() else None
+def kind_name(kind: SpanKind) -> str:
+    # Return span kind name (enum name or numeric)
+    try:
+        return kind.name  # Enum in otel 1.27+
+    except Exception:
+        return str(kind)
+def status_name(code: StatusCode) -> str:
+    try:
+        return code.name
+    except Exception:
+        return str(code)
+def partition_by_identity(
+    spans: Sequence[ReadableSpan],
+) -> dict[tuple[str, str], list[ReadableSpan]]:
+    """
+    Extract (tenantId, agentId). Prefer attributes; if you also stamp baggage
+    into attributes via a processor, they'll be here already.
+    """
+    groups: dict[tuple[str, str], list[ReadableSpan]] = {}
+    for sp in spans:
+        attrs = sp.attributes or {}
+        tenant = as_str(attrs.get(TENANT_ID_KEY))
+        agent = as_str(attrs.get(GEN_AI_AGENT_ID_KEY))
+        if not tenant or not agent:
+            continue
+        key = (tenant, agent)
+        groups.setdefault(key, []).append(sp)
+    return groups
+def is_agent365_exporter_enabled() -> bool:
+    """Check if Agent 365 exporter is enabled."""
+    # Check environment variable
+    enable_exporter = os.getenv(ENABLE_A365_OBSERVABILITY_EXPORTER, "").lower()
+    return (enable_exporter) in ("true", "1", "yes", "on")

microsoft_agents_a365/observability/core/inference_call_details.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) Microsoft. All rights reserved.
+from dataclasses import dataclass
+from .inference_operation_type import InferenceOperationType
+@dataclass
+class InferenceCallDetails:
+    """Details of an inference call for generative AI operations."""
+    operationName: InferenceOperationType
+    model: str
+    providerName: str
+    inputTokens: int | None = None
+    outputTokens: int | None = None
+    finishReasons: list[str] | None = None
+    responseId: str | None = None

microsoft_agents_a365/observability/core/inference_operation_type.py ADDED Viewed

@@ -0,0 +1,11 @@
+# Copyright (c) Microsoft. All rights reserved.
+from enum import Enum
+class InferenceOperationType(Enum):
+    """Supported inference operation types for generative AI."""
+    CHAT = "Chat"
+    TEXT_COMPLETION = "TextCompletion"
+    GENERATE_CONTENT = "GenerateContent"

microsoft_agents_a365/observability/core/inference_scope.py ADDED Viewed

@@ -0,0 +1,140 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from typing import List
+from .agent_details import AgentDetails
+from .constants import (
+    GEN_AI_INPUT_MESSAGES_KEY,
+    GEN_AI_OPERATION_NAME_KEY,
+    GEN_AI_OUTPUT_MESSAGES_KEY,
+    GEN_AI_PROVIDER_NAME_KEY,
+    GEN_AI_REQUEST_MODEL_KEY,
+    GEN_AI_RESPONSE_FINISH_REASONS_KEY,
+    GEN_AI_RESPONSE_ID_KEY,
+    GEN_AI_THOUGHT_PROCESS_KEY,
+    GEN_AI_USAGE_INPUT_TOKENS_KEY,
+    GEN_AI_USAGE_OUTPUT_TOKENS_KEY,
+)
+from .inference_call_details import InferenceCallDetails
+from .opentelemetry_scope import OpenTelemetryScope
+from .request import Request
+from .tenant_details import TenantDetails
+from .utils import safe_json_dumps
+class InferenceScope(OpenTelemetryScope):
+    """Provides OpenTelemetry tracing scope for generative AI inference operations."""
+    @staticmethod
+    def start(
+        details: InferenceCallDetails,
+        agent_details: AgentDetails,
+        tenant_details: TenantDetails,
+        request: Request | None = None,
+    ) -> "InferenceScope":
+        """Creates and starts a new scope for inference tracing.
+        Args:
+            details: The details of the inference call
+            agent_details: The details of the agent making the call
+            tenant_details: The details of the tenant
+            request: Optional request details for additional context
+        Returns:
+            A new InferenceScope instance
+        """
+        return InferenceScope(details, agent_details, tenant_details, request)
+    def __init__(
+        self,
+        details: InferenceCallDetails,
+        agent_details: AgentDetails,
+        tenant_details: TenantDetails,
+        request: Request | None = None,
+    ):
+        """Initialize the inference scope.
+        Args:
+            details: The details of the inference call
+            agent_details: The details of the agent making the call
+            tenant_details: The details of the tenant
+            request: Optional request details for additional context
+        """
+        super().__init__(
+            kind="Client",
+            operation_name=details.operationName.value,
+            activity_name=f"{details.operationName.value} {details.model}",
+            agent_details=agent_details,
+            tenant_details=tenant_details,
+        )
+        if request:
+            self.set_tag_maybe(GEN_AI_INPUT_MESSAGES_KEY, request.content)
+        self.set_tag_maybe(GEN_AI_OPERATION_NAME_KEY, details.operationName.value)
+        self.set_tag_maybe(GEN_AI_REQUEST_MODEL_KEY, details.model)
+        self.set_tag_maybe(GEN_AI_PROVIDER_NAME_KEY, details.providerName)
+        self.set_tag_maybe(
+            GEN_AI_USAGE_INPUT_TOKENS_KEY,
+            str(details.inputTokens) if details.inputTokens is not None else None,
+        )
+        self.set_tag_maybe(
+            GEN_AI_USAGE_OUTPUT_TOKENS_KEY,
+            str(details.outputTokens) if details.outputTokens is not None else None,
+        )
+        self.set_tag_maybe(
+            GEN_AI_RESPONSE_FINISH_REASONS_KEY,
+            safe_json_dumps(details.finishReasons) if details.finishReasons else None,
+        )
+        self.set_tag_maybe(GEN_AI_RESPONSE_ID_KEY, details.responseId)
+    def record_input_messages(self, messages: List[str]) -> None:
+        """Records the input messages for telemetry tracking.
+        Args:
+            messages: List of input messages
+        """
+        self.set_tag_maybe(GEN_AI_INPUT_MESSAGES_KEY, safe_json_dumps(messages))
+    def record_output_messages(self, messages: List[str]) -> None:
+        """Records the output messages for telemetry tracking.
+        Args:
+            messages: List of output messages
+        """
+        self.set_tag_maybe(GEN_AI_OUTPUT_MESSAGES_KEY, safe_json_dumps(messages))
+    def record_input_tokens(self, input_tokens: int) -> None:
+        """Records the number of input tokens for telemetry tracking.
+        Args:
+            input_tokens: Number of input tokens
+        """
+        self.set_tag_maybe(GEN_AI_USAGE_INPUT_TOKENS_KEY, str(input_tokens))
+    def record_output_tokens(self, output_tokens: int) -> None:
+        """Records the number of output tokens for telemetry tracking.
+        Args:
+            output_tokens: Number of output tokens
+        """
+        self.set_tag_maybe(GEN_AI_USAGE_OUTPUT_TOKENS_KEY, str(output_tokens))
+    def record_finish_reasons(self, finish_reasons: List[str]) -> None:
+        """Records the finish reasons for telemetry tracking.
+        Args:
+            finish_reasons: List of finish reasons
+        """
+        if finish_reasons:
+            self.set_tag_maybe(GEN_AI_RESPONSE_FINISH_REASONS_KEY, safe_json_dumps(finish_reasons))
+    def record_thought_process(self, thought_process: str) -> None:
+        """Records the thought process.
+        Args:
+            thought_process: The thought process to record
+        """
+        self.set_tag_maybe(GEN_AI_THOUGHT_PROCESS_KEY, thought_process)

microsoft_agents_a365/observability/core/invoke_agent_details.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Data class for invoke agent details.
+from dataclasses import dataclass
+from urllib.parse import ParseResult
+from .agent_details import AgentDetails
+@dataclass
+class InvokeAgentDetails:
+    """Details for agent invocation tracing."""
+    details: AgentDetails
+    endpoint: ParseResult | None = None
+    session_id: str | None = None