agentmetrics-shared 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentmetrics_shared/__init__.py +17 -0
- agentmetrics_shared/events.py +283 -0
- agentmetrics_shared/pricing.py +307 -0
- agentmetrics_shared/redact.py +161 -0
- agentmetrics_shared-0.2.0.dist-info/METADATA +5 -0
- agentmetrics_shared-0.2.0.dist-info/RECORD +7 -0
- agentmetrics_shared-0.2.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .events import (
|
|
2
|
+
AgentEndEvent,
|
|
3
|
+
LlmOutputEvent,
|
|
4
|
+
ToolEndEvent,
|
|
5
|
+
SessionStartEvent,
|
|
6
|
+
SessionMetricsEvent,
|
|
7
|
+
AuditEvent,
|
|
8
|
+
)
|
|
9
|
+
from .pricing import estimate_cost, MODEL_PRICING
|
|
10
|
+
from .redact import scrub_event, RedactionMode
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AgentEndEvent", "LlmOutputEvent", "ToolEndEvent",
|
|
14
|
+
"SessionStartEvent", "SessionMetricsEvent", "AuditEvent",
|
|
15
|
+
"estimate_cost", "MODEL_PRICING",
|
|
16
|
+
"scrub_event", "RedactionMode",
|
|
17
|
+
]
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import socket
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _now_ms() -> int:
|
|
11
|
+
return int(time.time() * 1000)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _new_id() -> str:
|
|
15
|
+
return str(uuid.uuid4())
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _hostname() -> str:
|
|
19
|
+
try:
|
|
20
|
+
return socket.gethostname()
|
|
21
|
+
except Exception:
|
|
22
|
+
return ""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class AgentEndEvent:
|
|
27
|
+
"""Canonical agent_end event. Call to_payload() to get the wire dict."""
|
|
28
|
+
|
|
29
|
+
event_id: str = field(default_factory=_new_id)
|
|
30
|
+
trace_id: str = field(default_factory=_new_id)
|
|
31
|
+
ts: int = field(default_factory=_now_ms)
|
|
32
|
+
event_name: str = "agent_end"
|
|
33
|
+
redaction_policy_version: str = "v1-strict"
|
|
34
|
+
|
|
35
|
+
# Caller must supply these
|
|
36
|
+
agent_id: str = ""
|
|
37
|
+
platform: str = ""
|
|
38
|
+
|
|
39
|
+
# Auto-detected
|
|
40
|
+
host_id: str | None = field(default_factory=_hostname)
|
|
41
|
+
|
|
42
|
+
status: str = "success"
|
|
43
|
+
duration_ms: float = 0.0
|
|
44
|
+
|
|
45
|
+
model: str | None = None
|
|
46
|
+
model_provider: str | None = None
|
|
47
|
+
|
|
48
|
+
input_tokens: int = 0
|
|
49
|
+
output_tokens: int = 0
|
|
50
|
+
cache_read_tokens: int = 0
|
|
51
|
+
cache_write_tokens: int = 0
|
|
52
|
+
|
|
53
|
+
llm_calls: int = 0
|
|
54
|
+
tool_calls: int = 0
|
|
55
|
+
tool_errors: int = 0
|
|
56
|
+
tool_names: list[str] = field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
estimated_cost_usd: float | None = None
|
|
59
|
+
|
|
60
|
+
step_count: int = 0
|
|
61
|
+
loop_count: int = 0
|
|
62
|
+
|
|
63
|
+
skills_loaded_count: int = 0
|
|
64
|
+
skill_names_hash: str | None = None
|
|
65
|
+
memory_writes_count: int = 0
|
|
66
|
+
session_search_calls: int = 0
|
|
67
|
+
delegation_depth: int = 0
|
|
68
|
+
|
|
69
|
+
run_id: str | None = None
|
|
70
|
+
session_id: str | None = None
|
|
71
|
+
span_id: str | None = None
|
|
72
|
+
parent_span_id: str | None = None
|
|
73
|
+
parent_trace_id: str | None = None
|
|
74
|
+
|
|
75
|
+
cronjob_id: str | None = None
|
|
76
|
+
cron_run_id: str | None = None
|
|
77
|
+
|
|
78
|
+
error: str | None = None
|
|
79
|
+
|
|
80
|
+
secrets_blocked_count: int = 0
|
|
81
|
+
pii_detected_count: int = 0
|
|
82
|
+
|
|
83
|
+
workflow_id: str | None = None
|
|
84
|
+
skill_name: str | None = None
|
|
85
|
+
toolset: str | None = None
|
|
86
|
+
sdk_version: str | None = None
|
|
87
|
+
|
|
88
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
89
|
+
|
|
90
|
+
def to_payload(self) -> dict[str, Any]:
|
|
91
|
+
"""Return a wire-format dict for this event.
|
|
92
|
+
|
|
93
|
+
Always-included fields: event_id, trace_id, ts, event_name, agent_id,
|
|
94
|
+
platform, status, redaction_policy_version, tool_calls, tool_errors,
|
|
95
|
+
tool_names, input_tokens, output_tokens, duration_ms.
|
|
96
|
+
|
|
97
|
+
Optional fields are included only when non-None / non-empty / non-zero.
|
|
98
|
+
total_tokens is computed when any token count > 0.
|
|
99
|
+
"""
|
|
100
|
+
payload: dict[str, Any] = {
|
|
101
|
+
"event_id": self.event_id,
|
|
102
|
+
"trace_id": self.trace_id,
|
|
103
|
+
"ts": self.ts,
|
|
104
|
+
"event_name": self.event_name,
|
|
105
|
+
"agent_id": self.agent_id,
|
|
106
|
+
"platform": self.platform,
|
|
107
|
+
"status": self.status,
|
|
108
|
+
"redaction_policy_version": self.redaction_policy_version,
|
|
109
|
+
"tool_calls": self.tool_calls,
|
|
110
|
+
"tool_errors": self.tool_errors,
|
|
111
|
+
"tool_names": list(self.tool_names),
|
|
112
|
+
"input_tokens": self.input_tokens,
|
|
113
|
+
"output_tokens": self.output_tokens,
|
|
114
|
+
"duration_ms": self.duration_ms,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Compute total_tokens when any token count is positive
|
|
118
|
+
total = (
|
|
119
|
+
self.input_tokens
|
|
120
|
+
+ self.output_tokens
|
|
121
|
+
+ self.cache_read_tokens
|
|
122
|
+
+ self.cache_write_tokens
|
|
123
|
+
)
|
|
124
|
+
if total > 0:
|
|
125
|
+
payload["total_tokens"] = total
|
|
126
|
+
|
|
127
|
+
# Optional fields — include only when meaningful
|
|
128
|
+
if self.model:
|
|
129
|
+
payload["model"] = self.model
|
|
130
|
+
if self.model_provider:
|
|
131
|
+
payload["model_provider"] = self.model_provider
|
|
132
|
+
if self.error:
|
|
133
|
+
payload["error"] = self.error
|
|
134
|
+
if self.cache_read_tokens:
|
|
135
|
+
payload["cache_read_tokens"] = self.cache_read_tokens
|
|
136
|
+
if self.cache_write_tokens:
|
|
137
|
+
payload["cache_write_tokens"] = self.cache_write_tokens
|
|
138
|
+
if self.llm_calls:
|
|
139
|
+
payload["llm_calls"] = self.llm_calls
|
|
140
|
+
if self.step_count:
|
|
141
|
+
payload["step_count"] = self.step_count
|
|
142
|
+
if self.loop_count:
|
|
143
|
+
payload["loop_count"] = self.loop_count
|
|
144
|
+
if self.estimated_cost_usd is not None:
|
|
145
|
+
payload["estimated_cost_usd"] = self.estimated_cost_usd
|
|
146
|
+
if self.host_id:
|
|
147
|
+
payload["host_id"] = self.host_id
|
|
148
|
+
if self.workflow_id:
|
|
149
|
+
payload["workflow_id"] = self.workflow_id
|
|
150
|
+
if self.skill_name:
|
|
151
|
+
payload["skill_name"] = self.skill_name
|
|
152
|
+
if self.toolset:
|
|
153
|
+
payload["toolset"] = self.toolset
|
|
154
|
+
if self.secrets_blocked_count:
|
|
155
|
+
payload["secrets_blocked_count"] = self.secrets_blocked_count
|
|
156
|
+
if self.pii_detected_count:
|
|
157
|
+
payload["pii_detected_count"] = self.pii_detected_count
|
|
158
|
+
if self.run_id:
|
|
159
|
+
payload["run_id"] = self.run_id
|
|
160
|
+
if self.session_id:
|
|
161
|
+
payload["session_id"] = self.session_id
|
|
162
|
+
if self.span_id:
|
|
163
|
+
payload["span_id"] = self.span_id
|
|
164
|
+
if self.parent_span_id:
|
|
165
|
+
payload["parent_span_id"] = self.parent_span_id
|
|
166
|
+
if self.parent_trace_id:
|
|
167
|
+
payload["parent_trace_id"] = self.parent_trace_id
|
|
168
|
+
if self.skills_loaded_count:
|
|
169
|
+
payload["skills_loaded_count"] = self.skills_loaded_count
|
|
170
|
+
if self.skill_names_hash:
|
|
171
|
+
payload["skill_names_hash"] = self.skill_names_hash
|
|
172
|
+
if self.memory_writes_count:
|
|
173
|
+
payload["memory_writes_count"] = self.memory_writes_count
|
|
174
|
+
if self.session_search_calls:
|
|
175
|
+
payload["session_search_calls"] = self.session_search_calls
|
|
176
|
+
if self.delegation_depth:
|
|
177
|
+
payload["delegation_depth"] = self.delegation_depth
|
|
178
|
+
if self.cronjob_id:
|
|
179
|
+
payload["cronjob_id"] = self.cronjob_id
|
|
180
|
+
if self.cron_run_id:
|
|
181
|
+
payload["cron_run_id"] = self.cron_run_id
|
|
182
|
+
if self.sdk_version:
|
|
183
|
+
payload["sdk_version"] = self.sdk_version
|
|
184
|
+
if self.metadata:
|
|
185
|
+
payload["metadata"] = self.metadata
|
|
186
|
+
|
|
187
|
+
return payload
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@dataclass
|
|
191
|
+
class SessionStartEvent:
|
|
192
|
+
event_id: str = field(default_factory=_new_id)
|
|
193
|
+
trace_id: str = ""
|
|
194
|
+
session_id: str = ""
|
|
195
|
+
agent_id: str = ""
|
|
196
|
+
platform: str = ""
|
|
197
|
+
event_name: str = "session_start"
|
|
198
|
+
ts: int = field(default_factory=_now_ms)
|
|
199
|
+
redaction_policy_version: str = "v1-strict"
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class SessionMetricsEvent:
|
|
204
|
+
"""Fired once at session end — aggregates all runs in the session."""
|
|
205
|
+
|
|
206
|
+
event_id: str = field(default_factory=_new_id)
|
|
207
|
+
trace_id: str = ""
|
|
208
|
+
session_id: str = ""
|
|
209
|
+
agent_id: str = ""
|
|
210
|
+
platform: str = ""
|
|
211
|
+
event_name: str = "session_metrics"
|
|
212
|
+
ts: int = field(default_factory=_now_ms)
|
|
213
|
+
redaction_policy_version: str = "v1-strict"
|
|
214
|
+
|
|
215
|
+
duration_ms: int = 0
|
|
216
|
+
run_count: int = 0
|
|
217
|
+
total_input_tokens: int = 0
|
|
218
|
+
total_output_tokens: int = 0
|
|
219
|
+
total_cache_read_tokens: int = 0
|
|
220
|
+
total_cache_write_tokens: int = 0
|
|
221
|
+
total_tool_calls: int = 0
|
|
222
|
+
total_estimated_cost_usd: float = 0.0
|
|
223
|
+
compactions: int = 0
|
|
224
|
+
resets: int = 0
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@dataclass
|
|
228
|
+
class LlmOutputEvent:
|
|
229
|
+
event_id: str = field(default_factory=_new_id)
|
|
230
|
+
trace_id: str = ""
|
|
231
|
+
session_id: str = ""
|
|
232
|
+
run_id: str = ""
|
|
233
|
+
agent_id: str = ""
|
|
234
|
+
platform: str = ""
|
|
235
|
+
event_name: str = "llm_output"
|
|
236
|
+
ts: int = field(default_factory=_now_ms)
|
|
237
|
+
redaction_policy_version: str = "v1-strict"
|
|
238
|
+
|
|
239
|
+
model: str = ""
|
|
240
|
+
provider: str = ""
|
|
241
|
+
input_tokens: int = 0
|
|
242
|
+
output_tokens: int = 0
|
|
243
|
+
cache_read_tokens: int = 0
|
|
244
|
+
cache_write_tokens: int = 0
|
|
245
|
+
finish_reason: str = ""
|
|
246
|
+
estimated_cost_usd: float = 0.0
|
|
247
|
+
span_id: str = ""
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass
|
|
251
|
+
class ToolEndEvent:
|
|
252
|
+
event_id: str = field(default_factory=_new_id)
|
|
253
|
+
trace_id: str = ""
|
|
254
|
+
session_id: str = ""
|
|
255
|
+
run_id: str = ""
|
|
256
|
+
agent_id: str = ""
|
|
257
|
+
platform: str = ""
|
|
258
|
+
event_name: str = "tool_end"
|
|
259
|
+
ts: int = field(default_factory=_now_ms)
|
|
260
|
+
redaction_policy_version: str = "v1-strict"
|
|
261
|
+
|
|
262
|
+
tool_name: str = ""
|
|
263
|
+
tool_call_id: str = ""
|
|
264
|
+
duration_ms: int = 0
|
|
265
|
+
status: str = "success"
|
|
266
|
+
error: str = ""
|
|
267
|
+
span_id: str = ""
|
|
268
|
+
parent_span_id: str = ""
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
@dataclass
|
|
272
|
+
class AuditEvent:
|
|
273
|
+
"""Security/config audit events — redaction changes, access denials, WAL recovery."""
|
|
274
|
+
|
|
275
|
+
event_id: str = field(default_factory=_new_id)
|
|
276
|
+
trace_id: str = ""
|
|
277
|
+
session_id: str = ""
|
|
278
|
+
agent_id: str = ""
|
|
279
|
+
platform: str = ""
|
|
280
|
+
event_name: str = "audit"
|
|
281
|
+
ts: int = field(default_factory=_now_ms)
|
|
282
|
+
status: str = "success"
|
|
283
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import urllib.request
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
# ── Types ─────────────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
# (input_per_M, output_per_M, cache_read_per_M | None, cache_write_per_M | None)
|
|
13
|
+
_T = tuple[float, float, float | None, float | None]
|
|
14
|
+
|
|
15
|
+
# ── Method 1: Static table — official provider docs ───────────────────────────
|
|
16
|
+
#
|
|
17
|
+
# Prefix matching: "claude-opus-4-20250514" matches "claude-opus-4".
|
|
18
|
+
# Keys sorted by length (longest first) so "gpt-4o-mini" matches before "gpt-4o".
|
|
19
|
+
#
|
|
20
|
+
# Sources:
|
|
21
|
+
# Anthropic : https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
|
22
|
+
# OpenAI : https://openai.com/api/pricing/
|
|
23
|
+
# Google : https://ai.google.dev/pricing
|
|
24
|
+
# DeepSeek : https://api-docs.deepseek.com/quick_start/pricing
|
|
25
|
+
# AWS Bedrock: https://aws.amazon.com/bedrock/pricing/
|
|
26
|
+
|
|
27
|
+
MODEL_PRICING: dict[str, _T] = {
|
|
28
|
+
# ── Anthropic ────────────────────────────────────────────────────────────
|
|
29
|
+
"claude-opus-4": (15.00, 75.00, 1.50, 18.75),
|
|
30
|
+
"claude-sonnet-4": ( 3.00, 15.00, 0.30, 3.75),
|
|
31
|
+
"claude-haiku-4": ( 0.80, 4.00, 0.08, 1.00),
|
|
32
|
+
"claude-3-7-sonnet": ( 3.00, 15.00, 0.30, 3.75),
|
|
33
|
+
"claude-3-5-sonnet": ( 3.00, 15.00, 0.30, 3.75),
|
|
34
|
+
"claude-3-5-haiku": ( 0.80, 4.00, 0.08, 1.00),
|
|
35
|
+
"claude-3-opus": (15.00, 75.00, 1.50, 18.75),
|
|
36
|
+
"claude-3-haiku": ( 0.25, 1.25, 0.03, 0.30),
|
|
37
|
+
"claude-3-sonnet": ( 3.00, 15.00, None, None),
|
|
38
|
+
# ── OpenAI ───────────────────────────────────────────────────────────────
|
|
39
|
+
"gpt-4.1-nano": ( 0.10, 0.40, 0.025, None),
|
|
40
|
+
"gpt-4.1-mini": ( 0.40, 1.60, 0.10, None),
|
|
41
|
+
"gpt-4.1": ( 2.00, 8.00, 0.50, None),
|
|
42
|
+
"gpt-4o-mini": ( 0.15, 0.60, 0.075, None),
|
|
43
|
+
"gpt-4o": ( 2.50, 10.00, 1.25, None),
|
|
44
|
+
"gpt-4-turbo": (10.00, 30.00, None, None),
|
|
45
|
+
"gpt-4": (30.00, 60.00, None, None),
|
|
46
|
+
"gpt-3.5-turbo": ( 0.50, 1.50, None, None),
|
|
47
|
+
"o3-mini": ( 1.10, 4.40, 0.55, None),
|
|
48
|
+
"o3": (10.00, 40.00, 2.50, None),
|
|
49
|
+
"o1-mini": ( 1.10, 4.40, 0.55, None),
|
|
50
|
+
"o1": (15.00, 60.00, 7.50, None),
|
|
51
|
+
# ── Google Gemini ─────────────────────────────────────────────────────────
|
|
52
|
+
"gemini-2.5-pro": ( 1.25, 10.00, None, None),
|
|
53
|
+
"gemini-2.5-flash": ( 0.15, 0.60, None, None),
|
|
54
|
+
"gemini-2.0-flash": ( 0.10, 0.40, None, None),
|
|
55
|
+
"gemini-1.5-pro": ( 1.25, 5.00, None, None),
|
|
56
|
+
"gemini-1.5-flash": ( 0.075, 0.30, None, None),
|
|
57
|
+
# ── DeepSeek ─────────────────────────────────────────────────────────────
|
|
58
|
+
"deepseek-reasoner": ( 0.55, 2.19, None, None),
|
|
59
|
+
"deepseek-chat": ( 0.14, 0.28, None, None),
|
|
60
|
+
"deepseek-coder": ( 0.14, 0.28, None, None),
|
|
61
|
+
# ── Meta / Llama ─────────────────────────────────────────────────────────
|
|
62
|
+
"llama-4-maverick": ( 0.27, 0.85, None, None),
|
|
63
|
+
"llama-4-scout": ( 0.18, 0.59, None, None),
|
|
64
|
+
"llama-3.3-70b": ( 0.88, 0.88, None, None),
|
|
65
|
+
"llama-3-70b": ( 0.65, 2.75, None, None),
|
|
66
|
+
"llama-3-8b": ( 0.05, 0.20, None, None),
|
|
67
|
+
# ── Alibaba / Qwen ───────────────────────────────────────────────────────
|
|
68
|
+
"qwen3-235b": ( 4.00, 16.00, None, None),
|
|
69
|
+
"qwen3-32b": ( 0.30, 1.20, None, None),
|
|
70
|
+
"qwen3-4b": ( 0.02, 0.08, None, None),
|
|
71
|
+
# ── Arcee ────────────────────────────────────────────────────────────────
|
|
72
|
+
"trinity-large": ( 0.25, 1.00, 0.25, 0.25),
|
|
73
|
+
"trinity-mini": ( 0.045, 0.15, 0.045, 0.045),
|
|
74
|
+
# ── Together AI / HuggingFace (namespace-stripped prefix keys) ────────────
|
|
75
|
+
"kimi-k2": ( 0.50, 2.80, None, None),
|
|
76
|
+
"deepseek-v3": ( 0.60, 1.25, None, None),
|
|
77
|
+
"deepseek-r1": ( 3.00, 7.00, None, None),
|
|
78
|
+
# ── Vercel AI Gateway ────────────────────────────────────────────────────
|
|
79
|
+
"gpt-5.4-pro": (30.00, 180.00, None, None),
|
|
80
|
+
"gpt-5.4": ( 2.50, 15.00, None, None),
|
|
81
|
+
# ── AWS Bedrock ──────────────────────────────────────────────────────────
|
|
82
|
+
"anthropic.claude-opus-4": (15.00, 75.00, None, None),
|
|
83
|
+
"anthropic.claude-sonnet-4": ( 3.00, 15.00, None, None),
|
|
84
|
+
"anthropic.claude-haiku-4": ( 0.80, 4.00, None, None),
|
|
85
|
+
"anthropic.claude-3-5-sonnet": ( 3.00, 15.00, None, None),
|
|
86
|
+
"anthropic.claude-3-5-haiku": ( 0.80, 4.00, None, None),
|
|
87
|
+
"amazon.nova-pro": ( 0.80, 3.20, None, None),
|
|
88
|
+
"amazon.nova-lite": ( 0.06, 0.24, None, None),
|
|
89
|
+
"amazon.nova-micro": ( 0.035, 0.14, None, None),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_SORTED_KEYS: list[str] = sorted(MODEL_PRICING, key=len, reverse=True)
|
|
93
|
+
|
|
94
|
+
# ── Method 2: Runtime registry — platforms register their own catalog ─────────
|
|
95
|
+
|
|
96
|
+
_RUNTIME_REGISTRY: dict[str, _T] = {}
|
|
97
|
+
_SORTED_RUNTIME: list[str] = []
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def register_prices(
|
|
101
|
+
catalog: dict[str, dict[str, float | None] | _T],
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Register platform-specific model prices at startup.
|
|
104
|
+
|
|
105
|
+
Each entry maps a model ID (or prefix) to pricing:
|
|
106
|
+
{ "model-id": {"input": 1.25, "output": 5.00, "cache_read": 0.12, "cache_write": None} }
|
|
107
|
+
or a 4-tuple: { "model-id": (1.25, 5.00, 0.12, None) }
|
|
108
|
+
|
|
109
|
+
Registered prices take precedence over the static table but are overridden
|
|
110
|
+
by cost_overrides passed directly to estimate_cost().
|
|
111
|
+
"""
|
|
112
|
+
global _SORTED_RUNTIME
|
|
113
|
+
for model, pricing in catalog.items():
|
|
114
|
+
key = model.lower()
|
|
115
|
+
if isinstance(pricing, dict):
|
|
116
|
+
entry: _T = (
|
|
117
|
+
float(pricing.get("input") or 0),
|
|
118
|
+
float(pricing.get("output") or 0),
|
|
119
|
+
float(pricing["cache_read"]) if pricing.get("cache_read") is not None else None,
|
|
120
|
+
float(pricing["cache_write"]) if pricing.get("cache_write") is not None else None,
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
entry = pricing
|
|
124
|
+
_RUNTIME_REGISTRY[key] = entry
|
|
125
|
+
_SORTED_RUNTIME = sorted(_RUNTIME_REGISTRY, key=len, reverse=True)
|
|
126
|
+
logger.debug("agentmetrics: registered %d model price entries", len(_RUNTIME_REGISTRY))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ── Method 3: LiteLLM — optional import, 300+ models ─────────────────────────
|
|
130
|
+
|
|
131
|
+
def _try_litellm(
|
|
132
|
+
model_lower: str,
|
|
133
|
+
input_tokens: int,
|
|
134
|
+
output_tokens: int,
|
|
135
|
+
cache_read_tokens: int,
|
|
136
|
+
cache_write_tokens: int,
|
|
137
|
+
) -> float | None:
|
|
138
|
+
"""Try litellm's model cost database. Returns None if litellm is not installed
|
|
139
|
+
or the model is not in its database."""
|
|
140
|
+
try:
|
|
141
|
+
import litellm # type: ignore[import-not-found]
|
|
142
|
+
cost_map: dict[str, Any] = getattr(litellm, "model_cost", None) or {}
|
|
143
|
+
if not cost_map:
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
# litellm uses exact model IDs; try exact, then strip date suffix
|
|
147
|
+
import re
|
|
148
|
+
entry = (
|
|
149
|
+
cost_map.get(model_lower)
|
|
150
|
+
or cost_map.get(re.sub(r"-\d{8}$", "", model_lower))
|
|
151
|
+
)
|
|
152
|
+
if not entry:
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
in_per_m = (entry.get("input_cost_per_token") or 0) * 1_000_000
|
|
156
|
+
out_per_m = (entry.get("output_cost_per_token") or 0) * 1_000_000
|
|
157
|
+
cr_raw = entry.get("cache_read_input_token_cost")
|
|
158
|
+
cw_raw = entry.get("cache_creation_input_token_cost")
|
|
159
|
+
cr_per_m: float | None = float(cr_raw) * 1_000_000 if cr_raw is not None else None
|
|
160
|
+
cw_per_m: float | None = float(cw_raw) * 1_000_000 if cw_raw is not None else None
|
|
161
|
+
|
|
162
|
+
return _compute((in_per_m, out_per_m, cr_per_m, cw_per_m), input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
163
|
+
except Exception:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# ── Method 4: OpenRouter API — explicit opt-in, covers everything OR routes ───
|
|
168
|
+
|
|
169
|
+
_OPENROUTER_CACHE: dict[str, _T] = {}
|
|
170
|
+
_SORTED_OPENROUTER: list[str] = []
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def populate_from_openrouter(
|
|
174
|
+
api_key: str,
|
|
175
|
+
base_url: str = "https://openrouter.ai",
|
|
176
|
+
timeout: int = 15,
|
|
177
|
+
) -> int:
|
|
178
|
+
"""Fetch live model pricing from OpenRouter's /api/v1/models endpoint.
|
|
179
|
+
|
|
180
|
+
Call once at application startup. Results are cached in memory for the
|
|
181
|
+
process lifetime. Returns the number of models successfully loaded.
|
|
182
|
+
|
|
183
|
+
Requires network access and a valid OpenRouter API key.
|
|
184
|
+
"""
|
|
185
|
+
global _SORTED_OPENROUTER
|
|
186
|
+
url = f"{base_url.rstrip('/')}/api/v1/models"
|
|
187
|
+
req = urllib.request.Request(
|
|
188
|
+
url,
|
|
189
|
+
headers={
|
|
190
|
+
"Authorization": f"Bearer {api_key}",
|
|
191
|
+
"Content-Type": "application/json",
|
|
192
|
+
},
|
|
193
|
+
)
|
|
194
|
+
try:
|
|
195
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
196
|
+
data: dict = json.loads(resp.read())
|
|
197
|
+
except Exception as exc:
|
|
198
|
+
logger.warning("agentmetrics: populate_from_openrouter failed: %s", exc)
|
|
199
|
+
return 0
|
|
200
|
+
|
|
201
|
+
loaded = 0
|
|
202
|
+
for m in data.get("data", []):
|
|
203
|
+
model_id = (m.get("id") or "").lower()
|
|
204
|
+
if not model_id:
|
|
205
|
+
continue
|
|
206
|
+
p = m.get("pricing") or {}
|
|
207
|
+
prompt = p.get("prompt")
|
|
208
|
+
completion = p.get("completion")
|
|
209
|
+
if prompt is None and completion is None:
|
|
210
|
+
continue
|
|
211
|
+
in_per_m = float(prompt or 0) * 1_000_000
|
|
212
|
+
out_per_m = float(completion or 0) * 1_000_000
|
|
213
|
+
cr_raw = p.get("cache_read") or p.get("cached_prompt") or p.get("input_cache_read")
|
|
214
|
+
cw_raw = p.get("cache_write") or p.get("cache_creation") or p.get("input_cache_write")
|
|
215
|
+
cr_per_m: float | None = float(cr_raw) * 1_000_000 if cr_raw is not None else None
|
|
216
|
+
cw_per_m: float | None = float(cw_raw) * 1_000_000 if cw_raw is not None else None
|
|
217
|
+
# Store with stripped namespace so lookups work (estimate_cost strips before lookup)
|
|
218
|
+
key = model_id.split("/", 1)[1] if "/" in model_id else model_id
|
|
219
|
+
_OPENROUTER_CACHE[key] = (in_per_m, out_per_m, cr_per_m, cw_per_m)
|
|
220
|
+
loaded += 1
|
|
221
|
+
|
|
222
|
+
_SORTED_OPENROUTER = sorted(_OPENROUTER_CACHE, key=len, reverse=True)
|
|
223
|
+
logger.info("agentmetrics: loaded %d model prices from OpenRouter", loaded)
|
|
224
|
+
return loaded
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ── Core estimation — tries all methods in priority order ─────────────────────
|
|
228
|
+
|
|
229
|
+
def estimate_cost(
|
|
230
|
+
model: str,
|
|
231
|
+
input_tokens: int,
|
|
232
|
+
output_tokens: int,
|
|
233
|
+
cache_read_tokens: int = 0,
|
|
234
|
+
cache_write_tokens: int = 0,
|
|
235
|
+
cost_overrides: dict[str, _T] | None = None,
|
|
236
|
+
) -> float | None:
|
|
237
|
+
"""Return USD cost for the given model and token counts, or None if unknown.
|
|
238
|
+
|
|
239
|
+
Resolution order:
|
|
240
|
+
1. cost_overrides — caller-supplied explicit prices (highest priority)
|
|
241
|
+
2. Runtime registry — registered via register_prices() at startup
|
|
242
|
+
3. Static table — official provider docs (40+ known models)
|
|
243
|
+
4. LiteLLM — if litellm is installed (300+ models)
|
|
244
|
+
5. OpenRouter cache — if populate_from_openrouter() was called
|
|
245
|
+
6. None — unknown model, never guesses
|
|
246
|
+
|
|
247
|
+
Strips provider namespace: "openai/gpt-4o" → "gpt-4o".
|
|
248
|
+
Prefix matches versioned IDs: "claude-opus-4-20250514" → "claude-opus-4".
|
|
249
|
+
"""
|
|
250
|
+
if not any([input_tokens, output_tokens, cache_read_tokens, cache_write_tokens]):
|
|
251
|
+
return 0.0
|
|
252
|
+
|
|
253
|
+
model_lower = (model or "").lower().strip()
|
|
254
|
+
if not model_lower:
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
# Strip provider namespace: "openai/gpt-4o" → "gpt-4o"
|
|
258
|
+
if "/" in model_lower:
|
|
259
|
+
model_lower = model_lower.split("/", 1)[1]
|
|
260
|
+
|
|
261
|
+
# 1. Explicit overrides
|
|
262
|
+
if cost_overrides:
|
|
263
|
+
for key in sorted(cost_overrides, key=len, reverse=True):
|
|
264
|
+
if model_lower.startswith(key.lower()):
|
|
265
|
+
return _compute(cost_overrides[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
266
|
+
|
|
267
|
+
# 2. Runtime registry (platforms register at startup)
|
|
268
|
+
for key in _SORTED_RUNTIME:
|
|
269
|
+
if model_lower.startswith(key):
|
|
270
|
+
return _compute(_RUNTIME_REGISTRY[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
271
|
+
|
|
272
|
+
# 3. Static table (official docs, pre-sorted by length)
|
|
273
|
+
for key in _SORTED_KEYS:
|
|
274
|
+
if model_lower.startswith(key):
|
|
275
|
+
return _compute(MODEL_PRICING[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
276
|
+
|
|
277
|
+
# 4. LiteLLM (optional import)
|
|
278
|
+
litellm_result = _try_litellm(model_lower, input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
279
|
+
if litellm_result is not None:
|
|
280
|
+
return litellm_result
|
|
281
|
+
|
|
282
|
+
# 5. OpenRouter cache (if populated at startup)
|
|
283
|
+
for key in _SORTED_OPENROUTER:
|
|
284
|
+
if model_lower.startswith(key) or model_lower == key:
|
|
285
|
+
return _compute(_OPENROUTER_CACHE[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
|
|
286
|
+
|
|
287
|
+
return None # unknown model — never guesses
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _compute(
|
|
291
|
+
prices: _T,
|
|
292
|
+
in_t: int,
|
|
293
|
+
out_t: int,
|
|
294
|
+
cache_read: int,
|
|
295
|
+
cache_write: int,
|
|
296
|
+
) -> float:
|
|
297
|
+
in_p: float = prices[0] or 0.0
|
|
298
|
+
out_p: float = prices[1] or 0.0
|
|
299
|
+
cr_p: float | None = prices[2] if len(prices) > 2 else None
|
|
300
|
+
cw_p: float | None = prices[3] if len(prices) > 3 else None
|
|
301
|
+
|
|
302
|
+
cost = (in_t / 1_000_000.0) * in_p + (out_t / 1_000_000.0) * out_p
|
|
303
|
+
if cache_read and cr_p is not None:
|
|
304
|
+
cost += (cache_read / 1_000_000.0) * cr_p
|
|
305
|
+
if cache_write and cw_p is not None:
|
|
306
|
+
cost += (cache_write / 1_000_000.0) * cw_p
|
|
307
|
+
return round(cost, 6)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import time
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .config import AgentMetricsConfig
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Patterns that identify secrets in free-form text.
|
|
15
|
+
# Order matters — more specific patterns first.
|
|
16
|
+
_SECRET_PATTERNS: list[tuple[str, str]] = [
|
|
17
|
+
(r"sk-[A-Za-z0-9\-_]{20,}", "[REDACTED]"),
|
|
18
|
+
(r"am_[A-Za-z0-9\-_]{16,}", "[REDACTED]"),
|
|
19
|
+
# JWT-like tokens (two base64 segments separated by a dot)
|
|
20
|
+
(r"\bey[A-Za-z0-9\-_]{20,}\.[A-Za-z0-9\-_]{20,}", "[REDACTED]"),
|
|
21
|
+
# Key-value patterns: api_key=<value>, password: <value>, etc.
|
|
22
|
+
(
|
|
23
|
+
r"(?i)(?:api[-_]?key|apikey|api[-_]?token|access[-_]?token|"
|
|
24
|
+
r"secret|password|passwd|auth)[=:\s\"']+([^\s\"'&,\]\}\n]{8,})",
|
|
25
|
+
r"\1=[REDACTED]",
|
|
26
|
+
),
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
_COMPILED_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
|
30
|
+
(re.compile(p), r) for p, r in _SECRET_PATTERNS
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# FNV-1a 32-bit constants for tool name hashing (matches npm package implementation).
|
|
34
|
+
_FNV_OFFSET = 2166136261
|
|
35
|
+
_FNV_PRIME = 16777619
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RedactionMode(str, Enum):
|
|
39
|
+
STRICT = "strict"
|
|
40
|
+
MODERATE = "moderate"
|
|
41
|
+
DEBUG = "debug"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def active_mode(cfg: AgentMetricsConfig) -> RedactionMode:
|
|
45
|
+
"""Return current redaction mode, honouring debug auto-expiry."""
|
|
46
|
+
try:
|
|
47
|
+
mode = RedactionMode(cfg.redaction_mode)
|
|
48
|
+
except ValueError:
|
|
49
|
+
mode = RedactionMode.STRICT
|
|
50
|
+
|
|
51
|
+
if mode == RedactionMode.DEBUG and cfg.debug_expires_at is not None:
|
|
52
|
+
if time.time() > cfg.debug_expires_at:
|
|
53
|
+
logger.info("agentmetrics: debug mode expired — reverting to strict")
|
|
54
|
+
return RedactionMode.STRICT
|
|
55
|
+
|
|
56
|
+
return mode
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def scrub_secrets(text: str, mode: RedactionMode) -> str:
|
|
60
|
+
"""Remove API keys, tokens, and passwords from a string."""
|
|
61
|
+
scrubbed, _ = scrub_secrets_and_count(text, mode)
|
|
62
|
+
return scrubbed
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def scrub_secrets_and_count(text: str, mode: RedactionMode) -> tuple[str, int]:
|
|
66
|
+
"""Scrub secrets and return (scrubbed_text, replacement_count).
|
|
67
|
+
|
|
68
|
+
Counts how many secret patterns were redacted. Used to populate
|
|
69
|
+
secrets_blocked_count on agent_end events.
|
|
70
|
+
Always runs in strict and moderate modes; skips in debug mode.
|
|
71
|
+
"""
|
|
72
|
+
if not text or mode == RedactionMode.DEBUG:
|
|
73
|
+
return text, 0
|
|
74
|
+
result = text
|
|
75
|
+
total = 0
|
|
76
|
+
for pattern, replacement in _COMPILED_PATTERNS:
|
|
77
|
+
result, n = pattern.subn(replacement, result)
|
|
78
|
+
total += n
|
|
79
|
+
return result, total
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def redact_tool_name(name: str, cfg: AgentMetricsConfig) -> str | None:
|
|
83
|
+
"""Apply tool name export policy. Returns None when the name must not be sent."""
|
|
84
|
+
policy = cfg.exported_tool_names
|
|
85
|
+
|
|
86
|
+
if policy == "off":
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
if policy == "blocklist":
|
|
90
|
+
if name in (cfg.redact_tool_names or []):
|
|
91
|
+
return _hash_name(name)
|
|
92
|
+
return name
|
|
93
|
+
|
|
94
|
+
if policy == "allowlist":
|
|
95
|
+
# Only names explicitly in redact_tool_names are exported.
|
|
96
|
+
if name in (cfg.redact_tool_names or []):
|
|
97
|
+
return name
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
if policy == "hash":
|
|
101
|
+
return _hash_name(name)
|
|
102
|
+
|
|
103
|
+
return name
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def scrub_event(payload: dict[str, Any], mode: RedactionMode) -> dict[str, Any]:
|
|
107
|
+
"""Scrub secrets from all string fields in an event payload dict."""
|
|
108
|
+
if mode == RedactionMode.DEBUG:
|
|
109
|
+
return payload
|
|
110
|
+
result, _ = _scrub_dict_and_count(payload, mode)
|
|
111
|
+
return result # type: ignore[return-value]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def scrub_event_and_count(payload: dict[str, Any], mode: RedactionMode) -> tuple[dict[str, Any], int]:
|
|
115
|
+
"""Scrub secrets from all string fields, returning (scrubbed_payload, total_blocked).
|
|
116
|
+
|
|
117
|
+
Used by _enqueue() to populate secrets_blocked_count on agent_end events.
|
|
118
|
+
"""
|
|
119
|
+
if mode == RedactionMode.DEBUG:
|
|
120
|
+
return payload, 0
|
|
121
|
+
result, total = _scrub_dict_and_count(payload, mode)
|
|
122
|
+
return result, total # type: ignore[return-value]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _scrub_dict(obj: Any, mode: RedactionMode) -> Any: # noqa: ANN401 — recursive Any is intentional
|
|
126
|
+
result, _ = _scrub_dict_and_count(obj, mode)
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _scrub_dict_and_count(obj: Any, mode: RedactionMode) -> tuple[Any, int]: # noqa: ANN401
|
|
131
|
+
if isinstance(obj, str):
|
|
132
|
+
return scrub_secrets_and_count(obj, mode)
|
|
133
|
+
if isinstance(obj, dict):
|
|
134
|
+
result_dict: dict[str, Any] = {}
|
|
135
|
+
total = 0
|
|
136
|
+
for k, v in obj.items():
|
|
137
|
+
scrubbed_v, n = _scrub_dict_and_count(v, mode)
|
|
138
|
+
result_dict[k] = scrubbed_v
|
|
139
|
+
total += n
|
|
140
|
+
return result_dict, total
|
|
141
|
+
if isinstance(obj, list):
|
|
142
|
+
result_list = []
|
|
143
|
+
total = 0
|
|
144
|
+
for v in obj:
|
|
145
|
+
scrubbed_v, n = _scrub_dict_and_count(v, mode)
|
|
146
|
+
result_list.append(scrubbed_v)
|
|
147
|
+
total += n
|
|
148
|
+
return result_list, total
|
|
149
|
+
return obj, 0
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _hash_name(name: str) -> str:
|
|
153
|
+
"""FNV-1a 32-bit hash for tool name pseudonymisation. Same algorithm as npm package."""
|
|
154
|
+
h = _FNV_OFFSET
|
|
155
|
+
for byte in name.encode():
|
|
156
|
+
h = (h ^ byte) * _FNV_PRIME & 0xFFFFFFFF
|
|
157
|
+
return f"t_{h:08x}"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def redaction_policy_version(mode: RedactionMode) -> str:
|
|
161
|
+
return f"v1-{mode.value}"
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
agentmetrics_shared/__init__.py,sha256=ccB0r1KVmx090UUAY4P17T7Hn7sMTbJaY_zdhP95YYw,448
|
|
2
|
+
agentmetrics_shared/events.py,sha256=8MMH8sTFc8gJDZvCZdANuPssYBPr7-NFMWtwkaI-Hu0,8804
|
|
3
|
+
agentmetrics_shared/pricing.py,sha256=dCwfbwTGoJo_wS5ldiWoaKnn-1RbHp0ZtH7yQ6bKeHo,15097
|
|
4
|
+
agentmetrics_shared/redact.py,sha256=BVgrR68ZnkOoo4Ai-GIPoTf9I51hdYyGZ8zLYj5Or7E,5164
|
|
5
|
+
agentmetrics_shared-0.2.0.dist-info/METADATA,sha256=NkKglAc41DgMkAAZgNwB0ETRQjW91cCZCy4LW0cyE5Q,174
|
|
6
|
+
agentmetrics_shared-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
agentmetrics_shared-0.2.0.dist-info/RECORD,,
|