netra-zen 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {netra_zen-1.0.10.dist-info → netra_zen-1.0.11.dist-info}/METADATA +992 -971
- netra_zen-1.0.11.dist-info/RECORD +30 -0
- {netra_zen-1.0.10.dist-info → netra_zen-1.0.11.dist-info}/licenses/LICENSE.md +1 -1
- scripts/__init__.py +1 -1
- scripts/__main__.py +5 -5
- scripts/agent_cli.py +7179 -6948
- scripts/agent_logs.py +327 -327
- scripts/demo_log_collection.py +146 -144
- scripts/embed_release_credentials.py +75 -75
- scripts/test_apex_telemetry_debug.py +221 -0
- scripts/verify_log_transmission.py +140 -140
- zen/__init__.py +7 -7
- zen/__main__.py +11 -11
- zen/telemetry/__init__.py +14 -11
- zen/telemetry/apex_telemetry.py +259 -0
- zen/telemetry/embedded_credentials.py +59 -26
- zen/telemetry/manager.py +249 -249
- zen_orchestrator.py +3058 -3008
- netra_zen-1.0.10.dist-info/RECORD +0 -28
- {netra_zen-1.0.10.dist-info → netra_zen-1.0.11.dist-info}/WHEEL +0 -0
- {netra_zen-1.0.10.dist-info → netra_zen-1.0.11.dist-info}/entry_points.txt +0 -0
- {netra_zen-1.0.10.dist-info → netra_zen-1.0.11.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,59 @@
|
|
1
|
-
"""
|
2
|
-
|
3
|
-
import
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
1
|
+
"""Runtime loader for telemetry credentials from environment variables."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import base64
|
6
|
+
import json
|
7
|
+
import os
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Optional
|
10
|
+
|
11
|
+
from google.oauth2 import service_account
|
12
|
+
|
13
|
+
_ENV_B64 = "COMMUNITY_CREDENTIALS"
|
14
|
+
_ENV_PATH = "ZEN_COMMUNITY_TELEMETRY_FILE"
|
15
|
+
_ENV_PROJECT = "ZEN_COMMUNITY_TELEMETRY_PROJECT"
|
16
|
+
_DEFAULT_PROJECT = "netra-telemetry-public"
|
17
|
+
|
18
|
+
|
19
|
+
def _load_service_account_dict() -> Optional[dict]:
|
20
|
+
"""Load service account JSON from environment variables."""
|
21
|
+
encoded = os.getenv(_ENV_B64)
|
22
|
+
if encoded:
|
23
|
+
try:
|
24
|
+
raw = base64.b64decode(encoded)
|
25
|
+
return json.loads(raw)
|
26
|
+
except (ValueError, json.JSONDecodeError):
|
27
|
+
return None
|
28
|
+
|
29
|
+
path = os.getenv(_ENV_PATH)
|
30
|
+
if path:
|
31
|
+
candidate = Path(path).expanduser()
|
32
|
+
if candidate.exists():
|
33
|
+
try:
|
34
|
+
return json.loads(candidate.read_text())
|
35
|
+
except json.JSONDecodeError:
|
36
|
+
return None
|
37
|
+
return None
|
38
|
+
|
39
|
+
|
40
|
+
def get_embedded_credentials():
|
41
|
+
"""Return service account credentials or None."""
|
42
|
+
info = _load_service_account_dict()
|
43
|
+
if not info:
|
44
|
+
return None
|
45
|
+
try:
|
46
|
+
return service_account.Credentials.from_service_account_info(
|
47
|
+
info,
|
48
|
+
scopes=["https://www.googleapis.com/auth/trace.append"],
|
49
|
+
)
|
50
|
+
except Exception:
|
51
|
+
return None
|
52
|
+
|
53
|
+
|
54
|
+
def get_project_id() -> str:
|
55
|
+
"""Return GCP project ID for telemetry."""
|
56
|
+
info = _load_service_account_dict()
|
57
|
+
if info and "project_id" in info:
|
58
|
+
return info["project_id"]
|
59
|
+
return os.getenv(_ENV_PROJECT, _DEFAULT_PROJECT)
|
zen/telemetry/manager.py
CHANGED
@@ -1,249 +1,249 @@
|
|
1
|
-
"""Telemetry manager for Zen orchestrator.
|
2
|
-
|
3
|
-
Provides minimal OpenTelemetry integration that records anonymous spans with
|
4
|
-
token usage and cost metadata. If OpenTelemetry or Google Cloud libraries are
|
5
|
-
missing, the manager silently degrades to a no-op implementation.
|
6
|
-
"""
|
7
|
-
|
8
|
-
from __future__ import annotations
|
9
|
-
|
10
|
-
import hashlib
|
11
|
-
import logging
|
12
|
-
import os
|
13
|
-
import re
|
14
|
-
from dataclasses import asdict
|
15
|
-
from typing import Any, Dict, Optional
|
16
|
-
|
17
|
-
try:
|
18
|
-
from opentelemetry import trace
|
19
|
-
from opentelemetry.sdk.resources import Resource
|
20
|
-
from opentelemetry.sdk.trace import TracerProvider
|
21
|
-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
22
|
-
from opentelemetry.trace import SpanKind
|
23
|
-
|
24
|
-
OPENTELEMETRY_AVAILABLE = True
|
25
|
-
except ImportError: # pragma: no cover - optional dependency
|
26
|
-
OPENTELEMETRY_AVAILABLE = False
|
27
|
-
|
28
|
-
try:
|
29
|
-
from google.cloud.trace_v2 import TraceServiceClient
|
30
|
-
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
31
|
-
from google.api_core.exceptions import GoogleAPICallError # type: ignore
|
32
|
-
|
33
|
-
GCP_EXPORT_AVAILABLE = True
|
34
|
-
except ImportError: # pragma: no cover - optional dependency
|
35
|
-
GCP_EXPORT_AVAILABLE = False
|
36
|
-
|
37
|
-
class GoogleAPICallError(Exception): # type: ignore
|
38
|
-
"""Fallback exception used when google-api-core is unavailable."""
|
39
|
-
|
40
|
-
pass
|
41
|
-
|
42
|
-
from .embedded_credentials import get_embedded_credentials, get_project_id
|
43
|
-
|
44
|
-
logger = logging.getLogger(__name__)
|
45
|
-
|
46
|
-
|
47
|
-
def _sanitize_tool_name(tool: str) -> str:
|
48
|
-
"""Convert tool names to telemetry-safe attribute suffixes."""
|
49
|
-
safe = re.sub(r"[^a-z0-9_]+", "_", tool.lower()).strip("_")
|
50
|
-
return safe or "tool"
|
51
|
-
|
52
|
-
|
53
|
-
class _NoOpTelemetryManager:
|
54
|
-
"""Fallback manager when telemetry dependencies are unavailable."""
|
55
|
-
|
56
|
-
def is_enabled(self) -> bool:
|
57
|
-
return False
|
58
|
-
|
59
|
-
def record_instance_span(self, *_, **__): # pragma: no cover - trivial
|
60
|
-
return
|
61
|
-
|
62
|
-
def shutdown(self) -> None: # pragma: no cover - trivial
|
63
|
-
return
|
64
|
-
|
65
|
-
|
66
|
-
class TelemetryManager:
|
67
|
-
"""Manage OpenTelemetry setup and span emission for Zen."""
|
68
|
-
|
69
|
-
def __init__(self) -> None:
|
70
|
-
self._enabled = False
|
71
|
-
self._provider: Optional[TracerProvider] = None
|
72
|
-
self._tracer = None
|
73
|
-
self._initialize()
|
74
|
-
|
75
|
-
def _initialize(self) -> None:
|
76
|
-
if os.getenv("ZEN_TELEMETRY_DISABLED", "").lower() in {"1", "true", "yes"}:
|
77
|
-
logger.debug("Telemetry disabled via ZEN_TELEMETRY_DISABLED")
|
78
|
-
return
|
79
|
-
|
80
|
-
if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
|
81
|
-
logger.debug("OpenTelemetry or Google Cloud exporter not available; telemetry disabled")
|
82
|
-
return
|
83
|
-
|
84
|
-
credentials = get_embedded_credentials()
|
85
|
-
if credentials is None:
|
86
|
-
logger.debug("No telemetry credentials detected; telemetry disabled")
|
87
|
-
return
|
88
|
-
|
89
|
-
try:
|
90
|
-
project_id = get_project_id()
|
91
|
-
client = TraceServiceClient(credentials=credentials)
|
92
|
-
exporter = CloudTraceSpanExporter(project_id=project_id, client=client)
|
93
|
-
|
94
|
-
resource_attrs = {
|
95
|
-
"service.name": "zen-orchestrator",
|
96
|
-
"service.version": os.getenv("ZEN_VERSION", "1.0.3"),
|
97
|
-
"telemetry.sdk.language": "python",
|
98
|
-
"telemetry.sdk.name": "opentelemetry",
|
99
|
-
"zen.analytics.type": "community",
|
100
|
-
}
|
101
|
-
|
102
|
-
resource = Resource.create(resource_attrs)
|
103
|
-
provider = TracerProvider(resource=resource)
|
104
|
-
provider.add_span_processor(BatchSpanProcessor(exporter))
|
105
|
-
|
106
|
-
trace.set_tracer_provider(provider)
|
107
|
-
self._provider = provider
|
108
|
-
self._tracer = trace.get_tracer("zen.telemetry")
|
109
|
-
self._enabled = True
|
110
|
-
logger.info("Telemetry initialized with community credentials")
|
111
|
-
except Exception as exc: # pragma: no cover - defensive guard
|
112
|
-
logger.warning(f"Failed to initialize telemetry: {exc}")
|
113
|
-
self._enabled = False
|
114
|
-
self._provider = None
|
115
|
-
self._tracer = None
|
116
|
-
|
117
|
-
# Public API -----------------------------------------------------
|
118
|
-
|
119
|
-
def is_enabled(self) -> bool:
|
120
|
-
return self._enabled and self._tracer is not None
|
121
|
-
|
122
|
-
def record_instance_span(
|
123
|
-
self,
|
124
|
-
batch_id: str,
|
125
|
-
instance_name: str,
|
126
|
-
status: Any,
|
127
|
-
config: Any,
|
128
|
-
cost_usd: Optional[float] = None,
|
129
|
-
workspace: Optional[str] = None,
|
130
|
-
) -> None:
|
131
|
-
if not self.is_enabled():
|
132
|
-
return
|
133
|
-
|
134
|
-
assert self._tracer is not None # mypy hint
|
135
|
-
|
136
|
-
attributes: Dict[str, Any] = {
|
137
|
-
"zen.batch.id": batch_id,
|
138
|
-
"zen.instance.name": instance_name,
|
139
|
-
"zen.instance.status": getattr(status, "status", "unknown"),
|
140
|
-
"zen.instance.success": getattr(status, "status", "") == "completed",
|
141
|
-
"zen.instance.permission_mode": getattr(config, "permission_mode", "unknown"),
|
142
|
-
"zen.instance.tool_calls": getattr(status, "tool_calls", 0),
|
143
|
-
"zen.tokens.total": getattr(status, "total_tokens", 0),
|
144
|
-
"zen.tokens.input": getattr(status, "input_tokens", 0),
|
145
|
-
"zen.tokens.output": getattr(status, "output_tokens", 0),
|
146
|
-
"zen.tokens.cache.read": getattr(status, "cache_read_tokens", 0),
|
147
|
-
"zen.tokens.cache.creation": getattr(status, "cache_creation_tokens", 0),
|
148
|
-
"zen.tokens.cached_total": getattr(status, "cached_tokens", 0),
|
149
|
-
}
|
150
|
-
|
151
|
-
start_time = getattr(status, "start_time", None)
|
152
|
-
end_time = getattr(status, "end_time", None)
|
153
|
-
if start_time and end_time:
|
154
|
-
attributes["zen.instance.duration_ms"] = int((end_time - start_time) * 1000)
|
155
|
-
|
156
|
-
command = getattr(config, "command", None) or getattr(config, "prompt", None)
|
157
|
-
if isinstance(command, str) and command.startswith("/"):
|
158
|
-
attributes["zen.instance.command_type"] = "slash"
|
159
|
-
attributes["zen.instance.command"] = command
|
160
|
-
elif isinstance(command, str):
|
161
|
-
attributes["zen.instance.command_type"] = "prompt"
|
162
|
-
else:
|
163
|
-
attributes["zen.instance.command_type"] = "unknown"
|
164
|
-
|
165
|
-
session_id = getattr(config, "session_id", None)
|
166
|
-
if session_id:
|
167
|
-
session_hash = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[:16]
|
168
|
-
attributes["zen.session.hash"] = session_hash
|
169
|
-
|
170
|
-
if workspace:
|
171
|
-
workspace_hash = hashlib.sha256(workspace.encode("utf-8")).hexdigest()[:16]
|
172
|
-
attributes["zen.workspace.hash"] = workspace_hash
|
173
|
-
|
174
|
-
# Tool metadata
|
175
|
-
tool_tokens = getattr(status, "tool_tokens", {}) or {}
|
176
|
-
attributes["zen.tools.unique"] = len(tool_tokens)
|
177
|
-
total_tool_tokens = 0
|
178
|
-
for tool_name, tokens in tool_tokens.items():
|
179
|
-
sanitized = _sanitize_tool_name(tool_name)
|
180
|
-
attributes[f"zen.tools.tokens.{sanitized}"] = int(tokens)
|
181
|
-
total_tool_tokens += int(tokens)
|
182
|
-
attributes["zen.tokens.tools_total"] = total_tool_tokens
|
183
|
-
|
184
|
-
tool_details = getattr(status, "tool_details", {}) or {}
|
185
|
-
for tool_name, count in tool_details.items():
|
186
|
-
sanitized = _sanitize_tool_name(tool_name)
|
187
|
-
attributes[f"zen.tools.invocations.{sanitized}"] = int(count)
|
188
|
-
|
189
|
-
# Cost metadata
|
190
|
-
if cost_usd is not None:
|
191
|
-
attributes["zen.cost.usd_total"] = round(float(cost_usd), 6)
|
192
|
-
|
193
|
-
reported_cost = getattr(status, "total_cost_usd", None)
|
194
|
-
if reported_cost is not None:
|
195
|
-
attributes["zen.cost.usd_reported"] = round(float(reported_cost), 6)
|
196
|
-
|
197
|
-
# Derive cost components using fallback pricing (USD per million tokens)
|
198
|
-
input_tokens = getattr(status, "input_tokens", 0)
|
199
|
-
output_tokens = getattr(status, "output_tokens", 0)
|
200
|
-
cache_read_tokens = getattr(status, "cache_read_tokens", 0)
|
201
|
-
cache_creation_tokens = getattr(status, "cache_creation_tokens", 0)
|
202
|
-
|
203
|
-
input_cost = (input_tokens / 1_000_000) * 3.00
|
204
|
-
output_cost = (output_tokens / 1_000_000) * 15.00
|
205
|
-
cache_read_cost = (cache_read_tokens / 1_000_000) * (3.00 * 0.1)
|
206
|
-
cache_creation_cost = (cache_creation_tokens / 1_000_000) * (3.00 * 1.25)
|
207
|
-
tool_cost = (total_tool_tokens / 1_000_000) * 3.00
|
208
|
-
|
209
|
-
attributes.update(
|
210
|
-
{
|
211
|
-
"zen.cost.usd_input": round(input_cost, 6),
|
212
|
-
"zen.cost.usd_output": round(output_cost, 6),
|
213
|
-
"zen.cost.usd_cache_read": round(cache_read_cost, 6),
|
214
|
-
"zen.cost.usd_cache_creation": round(cache_creation_cost, 6),
|
215
|
-
"zen.cost.usd_tools": round(tool_cost, 6),
|
216
|
-
}
|
217
|
-
)
|
218
|
-
|
219
|
-
# Emit span
|
220
|
-
try:
|
221
|
-
with self._tracer.start_as_current_span(
|
222
|
-
"zen.instance", kind=SpanKind.INTERNAL
|
223
|
-
) as span:
|
224
|
-
for key, value in attributes.items():
|
225
|
-
span.set_attribute(key, value)
|
226
|
-
except GoogleAPICallError as exc: # pragma: no cover - network failure safety
|
227
|
-
logger.warning(f"Failed to export telemetry span: {exc}")
|
228
|
-
|
229
|
-
def shutdown(self) -> None:
|
230
|
-
if not self._provider:
|
231
|
-
return
|
232
|
-
try:
|
233
|
-
if hasattr(self._provider, "force_flush"):
|
234
|
-
self._provider.force_flush()
|
235
|
-
if hasattr(self._provider, "shutdown"):
|
236
|
-
self._provider.shutdown()
|
237
|
-
except Exception as exc: # pragma: no cover
|
238
|
-
logger.debug(f"Telemetry shutdown warning: {exc}")
|
239
|
-
|
240
|
-
|
241
|
-
def _build_manager() -> TelemetryManager | _NoOpTelemetryManager:
|
242
|
-
if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
|
243
|
-
return _NoOpTelemetryManager()
|
244
|
-
return TelemetryManager()
|
245
|
-
|
246
|
-
|
247
|
-
telemetry_manager = _build_manager()
|
248
|
-
|
249
|
-
__all__ = ["TelemetryManager", "telemetry_manager"]
|
1
|
+
"""Telemetry manager for Zen orchestrator.
|
2
|
+
|
3
|
+
Provides minimal OpenTelemetry integration that records anonymous spans with
|
4
|
+
token usage and cost metadata. If OpenTelemetry or Google Cloud libraries are
|
5
|
+
missing, the manager silently degrades to a no-op implementation.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
import hashlib
|
11
|
+
import logging
|
12
|
+
import os
|
13
|
+
import re
|
14
|
+
from dataclasses import asdict
|
15
|
+
from typing import Any, Dict, Optional
|
16
|
+
|
17
|
+
try:
|
18
|
+
from opentelemetry import trace
|
19
|
+
from opentelemetry.sdk.resources import Resource
|
20
|
+
from opentelemetry.sdk.trace import TracerProvider
|
21
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
22
|
+
from opentelemetry.trace import SpanKind
|
23
|
+
|
24
|
+
OPENTELEMETRY_AVAILABLE = True
|
25
|
+
except ImportError: # pragma: no cover - optional dependency
|
26
|
+
OPENTELEMETRY_AVAILABLE = False
|
27
|
+
|
28
|
+
try:
|
29
|
+
from google.cloud.trace_v2 import TraceServiceClient
|
30
|
+
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
31
|
+
from google.api_core.exceptions import GoogleAPICallError # type: ignore
|
32
|
+
|
33
|
+
GCP_EXPORT_AVAILABLE = True
|
34
|
+
except ImportError: # pragma: no cover - optional dependency
|
35
|
+
GCP_EXPORT_AVAILABLE = False
|
36
|
+
|
37
|
+
class GoogleAPICallError(Exception): # type: ignore
|
38
|
+
"""Fallback exception used when google-api-core is unavailable."""
|
39
|
+
|
40
|
+
pass
|
41
|
+
|
42
|
+
from .embedded_credentials import get_embedded_credentials, get_project_id
|
43
|
+
|
44
|
+
logger = logging.getLogger(__name__)
|
45
|
+
|
46
|
+
|
47
|
+
def _sanitize_tool_name(tool: str) -> str:
|
48
|
+
"""Convert tool names to telemetry-safe attribute suffixes."""
|
49
|
+
safe = re.sub(r"[^a-z0-9_]+", "_", tool.lower()).strip("_")
|
50
|
+
return safe or "tool"
|
51
|
+
|
52
|
+
|
53
|
+
class _NoOpTelemetryManager:
|
54
|
+
"""Fallback manager when telemetry dependencies are unavailable."""
|
55
|
+
|
56
|
+
def is_enabled(self) -> bool:
|
57
|
+
return False
|
58
|
+
|
59
|
+
def record_instance_span(self, *_, **__): # pragma: no cover - trivial
|
60
|
+
return
|
61
|
+
|
62
|
+
def shutdown(self) -> None: # pragma: no cover - trivial
|
63
|
+
return
|
64
|
+
|
65
|
+
|
66
|
+
class TelemetryManager:
|
67
|
+
"""Manage OpenTelemetry setup and span emission for Zen."""
|
68
|
+
|
69
|
+
def __init__(self) -> None:
|
70
|
+
self._enabled = False
|
71
|
+
self._provider: Optional[TracerProvider] = None
|
72
|
+
self._tracer = None
|
73
|
+
self._initialize()
|
74
|
+
|
75
|
+
def _initialize(self) -> None:
|
76
|
+
if os.getenv("ZEN_TELEMETRY_DISABLED", "").lower() in {"1", "true", "yes"}:
|
77
|
+
logger.debug("Telemetry disabled via ZEN_TELEMETRY_DISABLED")
|
78
|
+
return
|
79
|
+
|
80
|
+
if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
|
81
|
+
logger.debug("OpenTelemetry or Google Cloud exporter not available; telemetry disabled")
|
82
|
+
return
|
83
|
+
|
84
|
+
credentials = get_embedded_credentials()
|
85
|
+
if credentials is None:
|
86
|
+
logger.debug("No telemetry credentials detected; telemetry disabled")
|
87
|
+
return
|
88
|
+
|
89
|
+
try:
|
90
|
+
project_id = get_project_id()
|
91
|
+
client = TraceServiceClient(credentials=credentials)
|
92
|
+
exporter = CloudTraceSpanExporter(project_id=project_id, client=client)
|
93
|
+
|
94
|
+
resource_attrs = {
|
95
|
+
"service.name": "zen-orchestrator",
|
96
|
+
"service.version": os.getenv("ZEN_VERSION", "1.0.3"),
|
97
|
+
"telemetry.sdk.language": "python",
|
98
|
+
"telemetry.sdk.name": "opentelemetry",
|
99
|
+
"zen.analytics.type": "community",
|
100
|
+
}
|
101
|
+
|
102
|
+
resource = Resource.create(resource_attrs)
|
103
|
+
provider = TracerProvider(resource=resource)
|
104
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
105
|
+
|
106
|
+
trace.set_tracer_provider(provider)
|
107
|
+
self._provider = provider
|
108
|
+
self._tracer = trace.get_tracer("zen.telemetry")
|
109
|
+
self._enabled = True
|
110
|
+
logger.info("Telemetry initialized with community credentials")
|
111
|
+
except Exception as exc: # pragma: no cover - defensive guard
|
112
|
+
logger.warning(f"Failed to initialize telemetry: {exc}")
|
113
|
+
self._enabled = False
|
114
|
+
self._provider = None
|
115
|
+
self._tracer = None
|
116
|
+
|
117
|
+
# Public API -----------------------------------------------------
|
118
|
+
|
119
|
+
def is_enabled(self) -> bool:
|
120
|
+
return self._enabled and self._tracer is not None
|
121
|
+
|
122
|
+
def record_instance_span(
|
123
|
+
self,
|
124
|
+
batch_id: str,
|
125
|
+
instance_name: str,
|
126
|
+
status: Any,
|
127
|
+
config: Any,
|
128
|
+
cost_usd: Optional[float] = None,
|
129
|
+
workspace: Optional[str] = None,
|
130
|
+
) -> None:
|
131
|
+
if not self.is_enabled():
|
132
|
+
return
|
133
|
+
|
134
|
+
assert self._tracer is not None # mypy hint
|
135
|
+
|
136
|
+
attributes: Dict[str, Any] = {
|
137
|
+
"zen.batch.id": batch_id,
|
138
|
+
"zen.instance.name": instance_name,
|
139
|
+
"zen.instance.status": getattr(status, "status", "unknown"),
|
140
|
+
"zen.instance.success": getattr(status, "status", "") == "completed",
|
141
|
+
"zen.instance.permission_mode": getattr(config, "permission_mode", "unknown"),
|
142
|
+
"zen.instance.tool_calls": getattr(status, "tool_calls", 0),
|
143
|
+
"zen.tokens.total": getattr(status, "total_tokens", 0),
|
144
|
+
"zen.tokens.input": getattr(status, "input_tokens", 0),
|
145
|
+
"zen.tokens.output": getattr(status, "output_tokens", 0),
|
146
|
+
"zen.tokens.cache.read": getattr(status, "cache_read_tokens", 0),
|
147
|
+
"zen.tokens.cache.creation": getattr(status, "cache_creation_tokens", 0),
|
148
|
+
"zen.tokens.cached_total": getattr(status, "cached_tokens", 0),
|
149
|
+
}
|
150
|
+
|
151
|
+
start_time = getattr(status, "start_time", None)
|
152
|
+
end_time = getattr(status, "end_time", None)
|
153
|
+
if start_time and end_time:
|
154
|
+
attributes["zen.instance.duration_ms"] = int((end_time - start_time) * 1000)
|
155
|
+
|
156
|
+
command = getattr(config, "command", None) or getattr(config, "prompt", None)
|
157
|
+
if isinstance(command, str) and command.startswith("/"):
|
158
|
+
attributes["zen.instance.command_type"] = "slash"
|
159
|
+
attributes["zen.instance.command"] = command
|
160
|
+
elif isinstance(command, str):
|
161
|
+
attributes["zen.instance.command_type"] = "prompt"
|
162
|
+
else:
|
163
|
+
attributes["zen.instance.command_type"] = "unknown"
|
164
|
+
|
165
|
+
session_id = getattr(config, "session_id", None)
|
166
|
+
if session_id:
|
167
|
+
session_hash = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[:16]
|
168
|
+
attributes["zen.session.hash"] = session_hash
|
169
|
+
|
170
|
+
if workspace:
|
171
|
+
workspace_hash = hashlib.sha256(workspace.encode("utf-8")).hexdigest()[:16]
|
172
|
+
attributes["zen.workspace.hash"] = workspace_hash
|
173
|
+
|
174
|
+
# Tool metadata
|
175
|
+
tool_tokens = getattr(status, "tool_tokens", {}) or {}
|
176
|
+
attributes["zen.tools.unique"] = len(tool_tokens)
|
177
|
+
total_tool_tokens = 0
|
178
|
+
for tool_name, tokens in tool_tokens.items():
|
179
|
+
sanitized = _sanitize_tool_name(tool_name)
|
180
|
+
attributes[f"zen.tools.tokens.{sanitized}"] = int(tokens)
|
181
|
+
total_tool_tokens += int(tokens)
|
182
|
+
attributes["zen.tokens.tools_total"] = total_tool_tokens
|
183
|
+
|
184
|
+
tool_details = getattr(status, "tool_details", {}) or {}
|
185
|
+
for tool_name, count in tool_details.items():
|
186
|
+
sanitized = _sanitize_tool_name(tool_name)
|
187
|
+
attributes[f"zen.tools.invocations.{sanitized}"] = int(count)
|
188
|
+
|
189
|
+
# Cost metadata
|
190
|
+
if cost_usd is not None:
|
191
|
+
attributes["zen.cost.usd_total"] = round(float(cost_usd), 6)
|
192
|
+
|
193
|
+
reported_cost = getattr(status, "total_cost_usd", None)
|
194
|
+
if reported_cost is not None:
|
195
|
+
attributes["zen.cost.usd_reported"] = round(float(reported_cost), 6)
|
196
|
+
|
197
|
+
# Derive cost components using fallback pricing (USD per million tokens)
|
198
|
+
input_tokens = getattr(status, "input_tokens", 0)
|
199
|
+
output_tokens = getattr(status, "output_tokens", 0)
|
200
|
+
cache_read_tokens = getattr(status, "cache_read_tokens", 0)
|
201
|
+
cache_creation_tokens = getattr(status, "cache_creation_tokens", 0)
|
202
|
+
|
203
|
+
input_cost = (input_tokens / 1_000_000) * 3.00
|
204
|
+
output_cost = (output_tokens / 1_000_000) * 15.00
|
205
|
+
cache_read_cost = (cache_read_tokens / 1_000_000) * (3.00 * 0.1)
|
206
|
+
cache_creation_cost = (cache_creation_tokens / 1_000_000) * (3.00 * 1.25)
|
207
|
+
tool_cost = (total_tool_tokens / 1_000_000) * 3.00
|
208
|
+
|
209
|
+
attributes.update(
|
210
|
+
{
|
211
|
+
"zen.cost.usd_input": round(input_cost, 6),
|
212
|
+
"zen.cost.usd_output": round(output_cost, 6),
|
213
|
+
"zen.cost.usd_cache_read": round(cache_read_cost, 6),
|
214
|
+
"zen.cost.usd_cache_creation": round(cache_creation_cost, 6),
|
215
|
+
"zen.cost.usd_tools": round(tool_cost, 6),
|
216
|
+
}
|
217
|
+
)
|
218
|
+
|
219
|
+
# Emit span
|
220
|
+
try:
|
221
|
+
with self._tracer.start_as_current_span(
|
222
|
+
"zen.instance", kind=SpanKind.INTERNAL
|
223
|
+
) as span:
|
224
|
+
for key, value in attributes.items():
|
225
|
+
span.set_attribute(key, value)
|
226
|
+
except GoogleAPICallError as exc: # pragma: no cover - network failure safety
|
227
|
+
logger.warning(f"Failed to export telemetry span: {exc}")
|
228
|
+
|
229
|
+
def shutdown(self) -> None:
|
230
|
+
if not self._provider:
|
231
|
+
return
|
232
|
+
try:
|
233
|
+
if hasattr(self._provider, "force_flush"):
|
234
|
+
self._provider.force_flush()
|
235
|
+
if hasattr(self._provider, "shutdown"):
|
236
|
+
self._provider.shutdown()
|
237
|
+
except Exception as exc: # pragma: no cover
|
238
|
+
logger.debug(f"Telemetry shutdown warning: {exc}")
|
239
|
+
|
240
|
+
|
241
|
+
def _build_manager() -> TelemetryManager | _NoOpTelemetryManager:
|
242
|
+
if not (OPENTELEMETRY_AVAILABLE and GCP_EXPORT_AVAILABLE):
|
243
|
+
return _NoOpTelemetryManager()
|
244
|
+
return TelemetryManager()
|
245
|
+
|
246
|
+
|
247
|
+
telemetry_manager = _build_manager()
|
248
|
+
|
249
|
+
__all__ = ["TelemetryManager", "telemetry_manager"]
|