netra-zen 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,60 @@
1
- """Runtime loader for telemetry credentials from environment variables."""
2
-
3
- from __future__ import annotations
4
-
5
- import base64
6
- import json
7
- import os
8
- from pathlib import Path
9
- from typing import Optional
10
-
11
- from google.oauth2 import service_account
12
-
13
- _ENV_B64 = "COMMUNITY_CREDENTIALS"
14
- _ENV_PATH = "ZEN_COMMUNITY_TELEMETRY_FILE"
15
- _ENV_PROJECT = "ZEN_COMMUNITY_TELEMETRY_PROJECT"
16
- _DEFAULT_PROJECT = "netra-telemetry-public"
17
-
18
-
19
- def _load_service_account_dict() -> Optional[dict]:
20
- """Load service account JSON from environment variables."""
21
- encoded = os.getenv(_ENV_B64)
22
- if encoded:
23
- try:
24
- raw = base64.b64decode(encoded)
25
- return json.loads(raw)
26
- except (ValueError, json.JSONDecodeError):
27
- return None
28
-
29
- path = os.getenv(_ENV_PATH)
30
- if path:
31
- candidate = Path(path).expanduser()
32
- if candidate.exists():
33
- try:
34
- return json.loads(candidate.read_text())
35
- except json.JSONDecodeError:
36
- return None
37
- return None
38
-
39
-
40
- def get_embedded_credentials():
41
- """Return service account credentials or None."""
42
- info = _load_service_account_dict()
43
- if not info:
44
- return None
45
- try:
46
- return service_account.Credentials.from_service_account_info(
47
- info,
48
- scopes=["https://www.googleapis.com/auth/trace.append"],
49
- )
50
- except Exception:
51
- return None
52
-
53
-
54
- def get_project_id() -> str:
55
- """Return GCP project ID for telemetry."""
56
- info = _load_service_account_dict()
57
- if info and "project_id" in info:
58
- return info["project_id"]
59
- return os.getenv(_ENV_PROJECT, _DEFAULT_PROJECT)
1
+ """Runtime loader for telemetry credentials from environment variables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from google.oauth2 import service_account
12
+
13
+ #_ENV_B64 = "COMMUNITY_CREDENTIALS"
14
+ _ENV_B64 = "ewogICJ0eXBlIjogInNlcnZpY2VfYWNjb3VudCIsCiAgInByb2plY3RfaWQiOiAibmV0cmEtdGVsZW1ldHJ5LXB1YmxpYyIsCiAgInByaXZhdGVfa2V5X2lkIjogImVjOWM4ZGNlZGZmMTUzNjM5YTUxOTcyMzc0MjYyNjkwNjZkNzAxYTQiLAogICJwcml2YXRlX2tleSI6ICItLS0tLUJFR0lOIFBSSVZBVEUgS0VZLS0tLS1cbk1JSUV2Z0lCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktnd2dnU2tBZ0VBQW9JQkFRREhUcmZFOHlQdUFCTDNcbk5aS3diZ1AwamRyaWRnY0UwMUlLMks1YkZ1bWFrUHVrRGxzV0dVaUswOXEyaVNYTWVUQmJPZDF0VjFoc3VJcUhcbnpQK0pZd0NTcVp4S0pIQS8yWUdVcERqeWhHRVd3QTNtS3laVXVUUS9yUTBUK20yV0cwdEMxUzBpQzB6U211cEpcbjBNeGhUUDRjKzYreEczSDVSWEF1YjdONmx6eFFWVnJSY3FHMTYydlF5SFA2OWIrd2RidTJHM0o5UkJVN1VUK1FcbkU2RHB2K01YaEp3MnRPdHZLbFBQT3BnWm9Sb0pmeXU1WlpzbHlJZCtzY3FhUTY5ZjBaSmpIRjlYQVdlT25mUTRcbmExbmV0LzJqRjZibWpuZmQ2MjhBODA5cEluTXJEL0FwZjJzUWJJdXJIYUI4am5uTEQ0eDMrbVhXRTgyMFJLNktcbjViZ0FQanNoQWdNQkFBRUNnZ0VBQmZERVZMWVlDakFkb0pscnpyOHF0a056cEpnV3Y3ZXlQSEZHcWgraDFSbndcbjdDd0Mza0xnNlFWbFFaZFBKWHZ2dTJwYlBaYnl3MlBST2ppN25adFNNU3pseEFaM3c0bHV2YkRTNHpTYnBiZFJcbityd3F3Mi8xUFJnaCtZaFhjNWZLNjVvcHd4Zmg0VzJkWWRlYnZlTXkrRWR1cmtsV2dYTG13L1dQbkkzdExlbzlcbjV1elZjbU42Qk04YkU3azFYK1M0RURBS0VRWlprUEdzTFQ4RXN4UmdWOWtnT1Zicm5VQ1Z0dXA1Q0NGbUR3U1Bcbmg0U25wMEsvTUp3b1U3NG4reTlFMXYxUXRnajE5TkhaNHJ2dFpnUlVaandHQy9Cc3ZkcE1PazArZTJEMlgvRk9cblZnc29xS2tDaklWUzRMcG5YSEpZbU5oajZWNHRXUnZ1OW1NTXhTL3FBUUtCZ1FEb3hZenlsdEZKL242ZEQvUHlcbnZLOFRaTHd5dFdCcjBXU3ZHU2VzM0JYRGZoKzBFbU4rRHpnZGdUb0ovbkhpazM5R21QM0tLN2htOFVvaFFHRy9cbkh0SFRuS0lBQlhrSU8yd2Z3N0h0V2pPTXRocHp2dFQxcmVEVHVjVk0wc2lCMHpjTldCMjFUamdQL3JYY2Q3NWVcbklERmNBN0hTbUJDLzB4bzk3aC8wV2YvOW9RS0JnUURiTWtnbjlVR2Y2SVRMNmxTcDdrYWJGL0NuaE4yU2VTMVdcbnd3R21iRThxTTU0UitDcVRUeHk2UHBRaFVSczlHM1VpVmQ1SXZWVDhuT095ZVBZVFJEbnFCYjJ4S214SFRodlZcbnVQcTgwQXB3anBMbzh6VkFDSy9iVFVjSmlKVGFBdXFHaXI1Ykc4YUlldVpIc0pLeWJ1NmhoNkhXMldwWXVVV1BcbkZ3TTl4elpOZ1FLQmdRQzg5dHJVZVJFUVE3VGZwbnJBM09JNEdUZ2E1bG1QVFo2eDh2YmRncEY4Y2FBbEhDUitcbnlyWWdaYThMTysrU0kzRllpNHpFR2pnS0FlblBFcWdIY21xZW9uSjFGL3hJYll6NlFIRHFJYWJsblZQZUVOWnJcblY2dkQxZlRReC9FVVM3Wk9jL0V5Slh5bnAzeFZyVFB5ejZtaWJERm9xQ0E0eVpSdElDbjZ3VEZxNFFLQmdFWFJcbnAxQXErOE0rb2dYOTF3ZmxvTkhIOTF5MG9vc0VWQis5cjZuZDkvMWVRYXhCbXZZZkRleDVBRi80WUsrL0xqbElcbmxxd2V1cEpZT3VMZlNxcHFZZlFiN2djZmx5dkRRblI2SGt2RURIODd1cW0reGlobVcvV0RrT3dGZUR4VkQzVFpcbmZyYXdpelZ2eUNmdm8xcDRvVVFNV3MxL3BUTXJtRzl5aWhMRWdKU0JBb0dCQU1GWm50ZUtUUDZrVVdrVmpOcndcbmUvQzBDbjJ6dk1YNXVnZURkS1FWNkwrY25mRWlRSzdzZ3R5eFp5ek5kMC82QXJ0YnBrcS9wcVlaYXpwVzVFMkxcbkxVMUF3MmdHT25GRlh2ZXg4aXpOZXViMGdvUVE4d3BtL3lrMVNVekR6VTV1dCtPbVFFRmpsbUYrNDkza0ZYcC9cbnc1MWh2WjVVL2loL1NYbjN6cjdEWE5QYlxuLS0tLS1FTkQgUFJJVkFURSBLRVktLS0tLVxuIiwKICAiY2xpZW50X2VtYWlsIjogInplbi1jb21tdW5pdHktdGVsZW1ldHJ5QG5ldHJhLXRlbGVtZXRyeS1wdWJsaWMuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICJjbGllbnRfaWQiOiAiMTE0NzAwMDA0NzA1MDUxODg5NTY4IiwKICAiYXV0aF91cmkiOiAiaHR0cHM6Ly9hY2NvdW50cy5nb29nbGUuY29tL28vb2F1dGgyL2F1dGgiLAogICJ0b2tlbl91cmkiOiAiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLAogICJhdXRoX3Byb3ZpZGVyX3g1MDlfY2VydF91cmwiOiAiaHR0cHM6Ly93d3cuZ29vZ2xlYXBpcy5jb20vb2F1dGgyL3YxL2NlcnRzIiwKICAiY2xpZW50X3g1MDlfY2VydF91cmwiOiAiaHR0cHM6Ly93d3cuZ29vZ2xlYXBpcy5jb20vcm9ib3QvdjEvbWV0YWRhdGEveDUwOS96ZW4tY29tbXVuaXR5LXRlbGVtZXRyeSU0MG5ldHJhLXRlbGVtZXRyeS1wdWJsaWMuaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLAogICJ1bml2ZXJzZV9kb21haW4iOiAiZ29vZ2xlYXBpcy5jb20iCn0K%"
15
+ _ENV_PATH = "ZEN_COMMUNITY_TELEMETRY_FILE"
16
+ _ENV_PROJECT = "ZEN_COMMUNITY_TELEMETRY_PROJECT"
17
+ _DEFAULT_PROJECT = "netra-telemetry-public"
18
+
19
+
20
+ def _load_service_account_dict() -> Optional[dict]:
21
+ """Load service account JSON from environment variables."""
22
+ encoded = os.getenv(_ENV_B64)
23
+ if encoded:
24
+ try:
25
+ raw = base64.b64decode(encoded)
26
+ return json.loads(raw)
27
+ except (ValueError, json.JSONDecodeError):
28
+ return None
29
+
30
+ path = os.getenv(_ENV_PATH)
31
+ if path:
32
+ candidate = Path(path).expanduser()
33
+ if candidate.exists():
34
+ try:
35
+ return json.loads(candidate.read_text())
36
+ except json.JSONDecodeError:
37
+ return None
38
+ return None
39
+
40
+
41
+ def get_embedded_credentials():
42
+ """Return service account credentials or None."""
43
+ info = _load_service_account_dict()
44
+ if not info:
45
+ return None
46
+ try:
47
+ return service_account.Credentials.from_service_account_info(
48
+ info,
49
+ scopes=["https://www.googleapis.com/auth/trace.append"],
50
+ )
51
+ except Exception:
52
+ return None
53
+
54
+
55
+ def get_project_id() -> str:
56
+ """Return GCP project ID for telemetry."""
57
+ info = _load_service_account_dict()
58
+ if info and "project_id" in info:
59
+ return info["project_id"]
60
+ return os.getenv(_ENV_PROJECT, _DEFAULT_PROJECT)
zen/telemetry/manager.py CHANGED
@@ -1,249 +1,249 @@
1
- """Telemetry manager for Zen orchestrator.
2
-
3
- Provides minimal OpenTelemetry integration that records anonymous spans with
4
- token usage and cost metadata. If OpenTelemetry or Google Cloud libraries are
5
- missing, the manager silently degrades to a no-op implementation.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import hashlib
11
- import logging
12
- import os
13
- import re
14
- from dataclasses import asdict
15
- from typing import Any, Dict, Optional
16
-
17
- try:
18
- from opentelemetry import trace
19
- from opentelemetry.sdk.resources import Resource
20
- from opentelemetry.sdk.trace import TracerProvider
21
- from opentelemetry.sdk.trace.export import BatchSpanProcessor
22
- from opentelemetry.trace import SpanKind
23
-
24
- OPENTELEMETRY_AVAILABLE = True
25
- except ImportError: # pragma: no cover - optional dependency
26
- OPENTELEMETRY_AVAILABLE = False
27
-
28
- try:
29
- from google.cloud.trace_v2 import TraceServiceClient
30
- from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
31
- from google.api_core.exceptions import GoogleAPICallError # type: ignore
32
-
33
- GCP_EXPORT_AVAILABLE = True
34
- except ImportError: # pragma: no cover - optional dependency
35
- GCP_EXPORT_AVAILABLE = False
36
-
37
- class GoogleAPICallError(Exception): # type: ignore
38
- """Fallback exception used when google-api-core is unavailable."""
39
-
40
- pass
41
-
42
- from .embedded_credentials import get_embedded_credentials, get_project_id
43
-
44
- logger = logging.getLogger(__name__)
45
-
46
-
47
- def _sanitize_tool_name(tool: str) -> str:
48
- """Convert tool names to telemetry-safe attribute suffixes."""
49
- safe = re.sub(r"[^a-z0-9_]+", "_", tool.lower()).strip("_")
50
- return safe or "tool"
51
-
52
-
53
- class _NoOpTelemetryManager:
54
- """Fallback manager when telemetry dependencies are unavailable."""
55
-
56
- def is_enabled(self) -> bool:
57
- return False
58
-
59
- def record_instance_span(self, *_, **__): # pragma: no cover - trivial
60
- return
61
-
62
- def shutdown(self) -> None: # pragma: no cover - trivial
63
- return
64
-
65
-
66
- class TelemetryManager:
67
- """Manage OpenTelemetry setup and span emission for Zen."""
68
-
69
- def __init__(self) -> None:
70
- self._enabled = False
71
- self._provider: Optional[TracerProvider] = None
72
- self._tracer = None
73
- self._initialize()
74
-
75
- def _initialize(self) -> None:
76
- if os.getenv("ZEN_TELEMETRY_DISABLED", "").lower() in {"1", "true", "yes"}:
77
- logger.debug("Telemetry disabled via ZEN_TELEMETRY_DISABLED")
78
- return
79
-
80
- if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
81
- logger.debug("OpenTelemetry or Google Cloud exporter not available; telemetry disabled")
82
- return
83
-
84
- credentials = get_embedded_credentials()
85
- if credentials is None:
86
- logger.debug("No telemetry credentials detected; telemetry disabled")
87
- return
88
-
89
- try:
90
- project_id = get_project_id()
91
- client = TraceServiceClient(credentials=credentials)
92
- exporter = CloudTraceSpanExporter(project_id=project_id, client=client)
93
-
94
- resource_attrs = {
95
- "service.name": "zen-orchestrator",
96
- "service.version": os.getenv("ZEN_VERSION", "1.0.3"),
97
- "telemetry.sdk.language": "python",
98
- "telemetry.sdk.name": "opentelemetry",
99
- "zen.analytics.type": "community",
100
- }
101
-
102
- resource = Resource.create(resource_attrs)
103
- provider = TracerProvider(resource=resource)
104
- provider.add_span_processor(BatchSpanProcessor(exporter))
105
-
106
- trace.set_tracer_provider(provider)
107
- self._provider = provider
108
- self._tracer = trace.get_tracer("zen.telemetry")
109
- self._enabled = True
110
- logger.info("Telemetry initialized with community credentials")
111
- except Exception as exc: # pragma: no cover - defensive guard
112
- logger.warning(f"Failed to initialize telemetry: {exc}")
113
- self._enabled = False
114
- self._provider = None
115
- self._tracer = None
116
-
117
- # Public API -----------------------------------------------------
118
-
119
- def is_enabled(self) -> bool:
120
- return self._enabled and self._tracer is not None
121
-
122
- def record_instance_span(
123
- self,
124
- batch_id: str,
125
- instance_name: str,
126
- status: Any,
127
- config: Any,
128
- cost_usd: Optional[float] = None,
129
- workspace: Optional[str] = None,
130
- ) -> None:
131
- if not self.is_enabled():
132
- return
133
-
134
- assert self._tracer is not None # mypy hint
135
-
136
- attributes: Dict[str, Any] = {
137
- "zen.batch.id": batch_id,
138
- "zen.instance.name": instance_name,
139
- "zen.instance.status": getattr(status, "status", "unknown"),
140
- "zen.instance.success": getattr(status, "status", "") == "completed",
141
- "zen.instance.permission_mode": getattr(config, "permission_mode", "unknown"),
142
- "zen.instance.tool_calls": getattr(status, "tool_calls", 0),
143
- "zen.tokens.total": getattr(status, "total_tokens", 0),
144
- "zen.tokens.input": getattr(status, "input_tokens", 0),
145
- "zen.tokens.output": getattr(status, "output_tokens", 0),
146
- "zen.tokens.cache.read": getattr(status, "cache_read_tokens", 0),
147
- "zen.tokens.cache.creation": getattr(status, "cache_creation_tokens", 0),
148
- "zen.tokens.cached_total": getattr(status, "cached_tokens", 0),
149
- }
150
-
151
- start_time = getattr(status, "start_time", None)
152
- end_time = getattr(status, "end_time", None)
153
- if start_time and end_time:
154
- attributes["zen.instance.duration_ms"] = int((end_time - start_time) * 1000)
155
-
156
- command = getattr(config, "command", None) or getattr(config, "prompt", None)
157
- if isinstance(command, str) and command.startswith("/"):
158
- attributes["zen.instance.command_type"] = "slash"
159
- attributes["zen.instance.command"] = command
160
- elif isinstance(command, str):
161
- attributes["zen.instance.command_type"] = "prompt"
162
- else:
163
- attributes["zen.instance.command_type"] = "unknown"
164
-
165
- session_id = getattr(config, "session_id", None)
166
- if session_id:
167
- session_hash = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[:16]
168
- attributes["zen.session.hash"] = session_hash
169
-
170
- if workspace:
171
- workspace_hash = hashlib.sha256(workspace.encode("utf-8")).hexdigest()[:16]
172
- attributes["zen.workspace.hash"] = workspace_hash
173
-
174
- # Tool metadata
175
- tool_tokens = getattr(status, "tool_tokens", {}) or {}
176
- attributes["zen.tools.unique"] = len(tool_tokens)
177
- total_tool_tokens = 0
178
- for tool_name, tokens in tool_tokens.items():
179
- sanitized = _sanitize_tool_name(tool_name)
180
- attributes[f"zen.tools.tokens.{sanitized}"] = int(tokens)
181
- total_tool_tokens += int(tokens)
182
- attributes["zen.tokens.tools_total"] = total_tool_tokens
183
-
184
- tool_details = getattr(status, "tool_details", {}) or {}
185
- for tool_name, count in tool_details.items():
186
- sanitized = _sanitize_tool_name(tool_name)
187
- attributes[f"zen.tools.invocations.{sanitized}"] = int(count)
188
-
189
- # Cost metadata
190
- if cost_usd is not None:
191
- attributes["zen.cost.usd_total"] = round(float(cost_usd), 6)
192
-
193
- reported_cost = getattr(status, "total_cost_usd", None)
194
- if reported_cost is not None:
195
- attributes["zen.cost.usd_reported"] = round(float(reported_cost), 6)
196
-
197
- # Derive cost components using fallback pricing (USD per million tokens)
198
- input_tokens = getattr(status, "input_tokens", 0)
199
- output_tokens = getattr(status, "output_tokens", 0)
200
- cache_read_tokens = getattr(status, "cache_read_tokens", 0)
201
- cache_creation_tokens = getattr(status, "cache_creation_tokens", 0)
202
-
203
- input_cost = (input_tokens / 1_000_000) * 3.00
204
- output_cost = (output_tokens / 1_000_000) * 15.00
205
- cache_read_cost = (cache_read_tokens / 1_000_000) * (3.00 * 0.1)
206
- cache_creation_cost = (cache_creation_tokens / 1_000_000) * (3.00 * 1.25)
207
- tool_cost = (total_tool_tokens / 1_000_000) * 3.00
208
-
209
- attributes.update(
210
- {
211
- "zen.cost.usd_input": round(input_cost, 6),
212
- "zen.cost.usd_output": round(output_cost, 6),
213
- "zen.cost.usd_cache_read": round(cache_read_cost, 6),
214
- "zen.cost.usd_cache_creation": round(cache_creation_cost, 6),
215
- "zen.cost.usd_tools": round(tool_cost, 6),
216
- }
217
- )
218
-
219
- # Emit span
220
- try:
221
- with self._tracer.start_as_current_span(
222
- "zen.instance", kind=SpanKind.INTERNAL
223
- ) as span:
224
- for key, value in attributes.items():
225
- span.set_attribute(key, value)
226
- except GoogleAPICallError as exc: # pragma: no cover - network failure safety
227
- logger.warning(f"Failed to export telemetry span: {exc}")
228
-
229
- def shutdown(self) -> None:
230
- if not self._provider:
231
- return
232
- try:
233
- if hasattr(self._provider, "force_flush"):
234
- self._provider.force_flush()
235
- if hasattr(self._provider, "shutdown"):
236
- self._provider.shutdown()
237
- except Exception as exc: # pragma: no cover
238
- logger.debug(f"Telemetry shutdown warning: {exc}")
239
-
240
-
241
- def _build_manager() -> TelemetryManager | _NoOpTelemetryManager:
242
- if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
243
- return _NoOpTelemetryManager()
244
- return TelemetryManager()
245
-
246
-
247
- telemetry_manager = _build_manager()
248
-
249
- __all__ = ["TelemetryManager", "telemetry_manager"]
1
+ """Telemetry manager for Zen orchestrator.
2
+
3
+ Provides minimal OpenTelemetry integration that records anonymous spans with
4
+ token usage and cost metadata. If OpenTelemetry or Google Cloud libraries are
5
+ missing, the manager silently degrades to a no-op implementation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import logging
12
+ import os
13
+ import re
14
+ from dataclasses import asdict
15
+ from typing import Any, Dict, Optional
16
+
17
+ try:
18
+ from opentelemetry import trace
19
+ from opentelemetry.sdk.resources import Resource
20
+ from opentelemetry.sdk.trace import TracerProvider
21
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
22
+ from opentelemetry.trace import SpanKind
23
+
24
+ OPENTELEMETRY_AVAILABLE = True
25
+ except ImportError: # pragma: no cover - optional dependency
26
+ OPENTELEMETRY_AVAILABLE = False
27
+
28
+ try:
29
+ from google.cloud.trace_v2 import TraceServiceClient
30
+ from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
31
+ from google.api_core.exceptions import GoogleAPICallError # type: ignore
32
+
33
+ GCP_EXPORT_AVAILABLE = True
34
+ except ImportError: # pragma: no cover - optional dependency
35
+ GCP_EXPORT_AVAILABLE = False
36
+
37
+ class GoogleAPICallError(Exception): # type: ignore
38
+ """Fallback exception used when google-api-core is unavailable."""
39
+
40
+ pass
41
+
42
+ from .embedded_credentials import get_embedded_credentials, get_project_id
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ def _sanitize_tool_name(tool: str) -> str:
48
+ """Convert tool names to telemetry-safe attribute suffixes."""
49
+ safe = re.sub(r"[^a-z0-9_]+", "_", tool.lower()).strip("_")
50
+ return safe or "tool"
51
+
52
+
53
+ class _NoOpTelemetryManager:
54
+ """Fallback manager when telemetry dependencies are unavailable."""
55
+
56
+ def is_enabled(self) -> bool:
57
+ return False
58
+
59
+ def record_instance_span(self, *_, **__): # pragma: no cover - trivial
60
+ return
61
+
62
+ def shutdown(self) -> None: # pragma: no cover - trivial
63
+ return
64
+
65
+
66
+ class TelemetryManager:
67
+ """Manage OpenTelemetry setup and span emission for Zen."""
68
+
69
+ def __init__(self) -> None:
70
+ self._enabled = False
71
+ self._provider: Optional[TracerProvider] = None
72
+ self._tracer = None
73
+ self._initialize()
74
+
75
+ def _initialize(self) -> None:
76
+ if os.getenv("ZEN_TELEMETRY_DISABLED", "").lower() in {"1", "true", "yes"}:
77
+ logger.debug("Telemetry disabled via ZEN_TELEMETRY_DISABLED")
78
+ return
79
+
80
+ if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
81
+ logger.debug("OpenTelemetry or Google Cloud exporter not available; telemetry disabled")
82
+ return
83
+
84
+ credentials = get_embedded_credentials()
85
+ if credentials is None:
86
+ logger.debug("No telemetry credentials detected; telemetry disabled")
87
+ return
88
+
89
+ try:
90
+ project_id = get_project_id()
91
+ client = TraceServiceClient(credentials=credentials)
92
+ exporter = CloudTraceSpanExporter(project_id=project_id, client=client)
93
+
94
+ resource_attrs = {
95
+ "service.name": "zen-orchestrator",
96
+ "service.version": os.getenv("ZEN_VERSION", "1.0.3"),
97
+ "telemetry.sdk.language": "python",
98
+ "telemetry.sdk.name": "opentelemetry",
99
+ "zen.analytics.type": "community",
100
+ }
101
+
102
+ resource = Resource.create(resource_attrs)
103
+ provider = TracerProvider(resource=resource)
104
+ provider.add_span_processor(BatchSpanProcessor(exporter))
105
+
106
+ trace.set_tracer_provider(provider)
107
+ self._provider = provider
108
+ self._tracer = trace.get_tracer("zen.telemetry")
109
+ self._enabled = True
110
+ logger.info("Telemetry initialized with community credentials")
111
+ except Exception as exc: # pragma: no cover - defensive guard
112
+ logger.warning(f"Failed to initialize telemetry: {exc}")
113
+ self._enabled = False
114
+ self._provider = None
115
+ self._tracer = None
116
+
117
+ # Public API -----------------------------------------------------
118
+
119
+ def is_enabled(self) -> bool:
120
+ return self._enabled and self._tracer is not None
121
+
122
+ def record_instance_span(
123
+ self,
124
+ batch_id: str,
125
+ instance_name: str,
126
+ status: Any,
127
+ config: Any,
128
+ cost_usd: Optional[float] = None,
129
+ workspace: Optional[str] = None,
130
+ ) -> None:
131
+ if not self.is_enabled():
132
+ return
133
+
134
+ assert self._tracer is not None # mypy hint
135
+
136
+ attributes: Dict[str, Any] = {
137
+ "zen.batch.id": batch_id,
138
+ "zen.instance.name": instance_name,
139
+ "zen.instance.status": getattr(status, "status", "unknown"),
140
+ "zen.instance.success": getattr(status, "status", "") == "completed",
141
+ "zen.instance.permission_mode": getattr(config, "permission_mode", "unknown"),
142
+ "zen.instance.tool_calls": getattr(status, "tool_calls", 0),
143
+ "zen.tokens.total": getattr(status, "total_tokens", 0),
144
+ "zen.tokens.input": getattr(status, "input_tokens", 0),
145
+ "zen.tokens.output": getattr(status, "output_tokens", 0),
146
+ "zen.tokens.cache.read": getattr(status, "cache_read_tokens", 0),
147
+ "zen.tokens.cache.creation": getattr(status, "cache_creation_tokens", 0),
148
+ "zen.tokens.cached_total": getattr(status, "cached_tokens", 0),
149
+ }
150
+
151
+ start_time = getattr(status, "start_time", None)
152
+ end_time = getattr(status, "end_time", None)
153
+ if start_time and end_time:
154
+ attributes["zen.instance.duration_ms"] = int((end_time - start_time) * 1000)
155
+
156
+ command = getattr(config, "command", None) or getattr(config, "prompt", None)
157
+ if isinstance(command, str) and command.startswith("/"):
158
+ attributes["zen.instance.command_type"] = "slash"
159
+ attributes["zen.instance.command"] = command
160
+ elif isinstance(command, str):
161
+ attributes["zen.instance.command_type"] = "prompt"
162
+ else:
163
+ attributes["zen.instance.command_type"] = "unknown"
164
+
165
+ session_id = getattr(config, "session_id", None)
166
+ if session_id:
167
+ session_hash = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[:16]
168
+ attributes["zen.session.hash"] = session_hash
169
+
170
+ if workspace:
171
+ workspace_hash = hashlib.sha256(workspace.encode("utf-8")).hexdigest()[:16]
172
+ attributes["zen.workspace.hash"] = workspace_hash
173
+
174
+ # Tool metadata
175
+ tool_tokens = getattr(status, "tool_tokens", {}) or {}
176
+ attributes["zen.tools.unique"] = len(tool_tokens)
177
+ total_tool_tokens = 0
178
+ for tool_name, tokens in tool_tokens.items():
179
+ sanitized = _sanitize_tool_name(tool_name)
180
+ attributes[f"zen.tools.tokens.{sanitized}"] = int(tokens)
181
+ total_tool_tokens += int(tokens)
182
+ attributes["zen.tokens.tools_total"] = total_tool_tokens
183
+
184
+ tool_details = getattr(status, "tool_details", {}) or {}
185
+ for tool_name, count in tool_details.items():
186
+ sanitized = _sanitize_tool_name(tool_name)
187
+ attributes[f"zen.tools.invocations.{sanitized}"] = int(count)
188
+
189
+ # Cost metadata
190
+ if cost_usd is not None:
191
+ attributes["zen.cost.usd_total"] = round(float(cost_usd), 6)
192
+
193
+ reported_cost = getattr(status, "total_cost_usd", None)
194
+ if reported_cost is not None:
195
+ attributes["zen.cost.usd_reported"] = round(float(reported_cost), 6)
196
+
197
+ # Derive cost components using fallback pricing (USD per million tokens)
198
+ input_tokens = getattr(status, "input_tokens", 0)
199
+ output_tokens = getattr(status, "output_tokens", 0)
200
+ cache_read_tokens = getattr(status, "cache_read_tokens", 0)
201
+ cache_creation_tokens = getattr(status, "cache_creation_tokens", 0)
202
+
203
+ input_cost = (input_tokens / 1_000_000) * 3.00
204
+ output_cost = (output_tokens / 1_000_000) * 15.00
205
+ cache_read_cost = (cache_read_tokens / 1_000_000) * (3.00 * 0.1)
206
+ cache_creation_cost = (cache_creation_tokens / 1_000_000) * (3.00 * 1.25)
207
+ tool_cost = (total_tool_tokens / 1_000_000) * 3.00
208
+
209
+ attributes.update(
210
+ {
211
+ "zen.cost.usd_input": round(input_cost, 6),
212
+ "zen.cost.usd_output": round(output_cost, 6),
213
+ "zen.cost.usd_cache_read": round(cache_read_cost, 6),
214
+ "zen.cost.usd_cache_creation": round(cache_creation_cost, 6),
215
+ "zen.cost.usd_tools": round(tool_cost, 6),
216
+ }
217
+ )
218
+
219
+ # Emit span
220
+ try:
221
+ with self._tracer.start_as_current_span(
222
+ "zen.instance", kind=SpanKind.INTERNAL
223
+ ) as span:
224
+ for key, value in attributes.items():
225
+ span.set_attribute(key, value)
226
+ except GoogleAPICallError as exc: # pragma: no cover - network failure safety
227
+ logger.warning(f"Failed to export telemetry span: {exc}")
228
+
229
+ def shutdown(self) -> None:
230
+ if not self._provider:
231
+ return
232
+ try:
233
+ if hasattr(self._provider, "force_flush"):
234
+ self._provider.force_flush()
235
+ if hasattr(self._provider, "shutdown"):
236
+ self._provider.shutdown()
237
+ except Exception as exc: # pragma: no cover
238
+ logger.debug(f"Telemetry shutdown warning: {exc}")
239
+
240
+
241
+ def _build_manager() -> TelemetryManager | _NoOpTelemetryManager:
242
+ if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
243
+ return _NoOpTelemetryManager()
244
+ return TelemetryManager()
245
+
246
+
247
+ telemetry_manager = _build_manager()
248
+
249
+ __all__ = ["TelemetryManager", "telemetry_manager"]