agentmetrics-shared 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # Generated — dashboard SPA copied here during server build
2
+ api/app/static/
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *.pyo
8
+ .venv/
9
+ .env
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+ .mypy_cache/
14
+ .ruff_cache/
15
+ .pytest_cache/
16
+ htmlcov/
17
+ .coverage
18
+ coverage.xml
19
+
20
+ # Node / JS
21
+ node_modules/
22
+ .next/
23
+ .turbo/
24
+ dist/
25
+ build/
26
+ *.tsbuildinfo
27
+ .pnpm-store/
28
+
29
+ # Env files
30
+ .env
31
+ .env.local
32
+ .env.production
33
+ .env.*.local
34
+ api/.env.local
35
+ dashboard/.env.local
36
+
37
+ # Build artifacts inside packages
38
+ packages/python/dist/
39
+ packages/python/*.egg-info/
40
+ packages/js/dist/
41
+ packages/js/node_modules/
42
+
43
+ # Internal docs — never public
44
+ .internal/
45
+ PLAN.md
46
+ CODE.md
47
+
48
+ # OS
49
+ .DS_Store
50
+ Thumbs.db
51
+
52
+ # IDE
53
+ .vscode/
54
+ .idea/
55
+ *.swp
56
+
57
+ # Docker
58
+ *.log
59
+ .internal
60
+
61
+ # Local data (SQLite DB when running without Docker)
62
+ data/
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentmetrics-shared
3
+ Version: 0.2.0
4
+ Summary: Shared event schema, pricing, and redaction for all agentmetrics integrations
5
+ Requires-Python: >=3.11
@@ -0,0 +1,17 @@
1
+ from .events import (
2
+ AgentEndEvent,
3
+ LlmOutputEvent,
4
+ ToolEndEvent,
5
+ SessionStartEvent,
6
+ SessionMetricsEvent,
7
+ AuditEvent,
8
+ )
9
+ from .pricing import estimate_cost, MODEL_PRICING
10
+ from .redact import scrub_event, RedactionMode
11
+
12
+ __all__ = [
13
+ "AgentEndEvent", "LlmOutputEvent", "ToolEndEvent",
14
+ "SessionStartEvent", "SessionMetricsEvent", "AuditEvent",
15
+ "estimate_cost", "MODEL_PRICING",
16
+ "scrub_event", "RedactionMode",
17
+ ]
@@ -0,0 +1,283 @@
1
+ from __future__ import annotations
2
+
3
+ import socket
4
+ import time
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ def _now_ms() -> int:
11
+ return int(time.time() * 1000)
12
+
13
+
14
+ def _new_id() -> str:
15
+ return str(uuid.uuid4())
16
+
17
+
18
+ def _hostname() -> str:
19
+ try:
20
+ return socket.gethostname()
21
+ except Exception:
22
+ return ""
23
+
24
+
25
+ @dataclass
26
+ class AgentEndEvent:
27
+ """Canonical agent_end event. Call to_payload() to get the wire dict."""
28
+
29
+ event_id: str = field(default_factory=_new_id)
30
+ trace_id: str = field(default_factory=_new_id)
31
+ ts: int = field(default_factory=_now_ms)
32
+ event_name: str = "agent_end"
33
+ redaction_policy_version: str = "v1-strict"
34
+
35
+ # Caller must supply these
36
+ agent_id: str = ""
37
+ platform: str = ""
38
+
39
+ # Auto-detected
40
+ host_id: str | None = field(default_factory=_hostname)
41
+
42
+ status: str = "success"
43
+ duration_ms: float = 0.0
44
+
45
+ model: str | None = None
46
+ model_provider: str | None = None
47
+
48
+ input_tokens: int = 0
49
+ output_tokens: int = 0
50
+ cache_read_tokens: int = 0
51
+ cache_write_tokens: int = 0
52
+
53
+ llm_calls: int = 0
54
+ tool_calls: int = 0
55
+ tool_errors: int = 0
56
+ tool_names: list[str] = field(default_factory=list)
57
+
58
+ estimated_cost_usd: float | None = None
59
+
60
+ step_count: int = 0
61
+ loop_count: int = 0
62
+
63
+ skills_loaded_count: int = 0
64
+ skill_names_hash: str | None = None
65
+ memory_writes_count: int = 0
66
+ session_search_calls: int = 0
67
+ delegation_depth: int = 0
68
+
69
+ run_id: str | None = None
70
+ session_id: str | None = None
71
+ span_id: str | None = None
72
+ parent_span_id: str | None = None
73
+ parent_trace_id: str | None = None
74
+
75
+ cronjob_id: str | None = None
76
+ cron_run_id: str | None = None
77
+
78
+ error: str | None = None
79
+
80
+ secrets_blocked_count: int = 0
81
+ pii_detected_count: int = 0
82
+
83
+ workflow_id: str | None = None
84
+ skill_name: str | None = None
85
+ toolset: str | None = None
86
+ sdk_version: str | None = None
87
+
88
+ metadata: dict[str, Any] = field(default_factory=dict)
89
+
90
+ def to_payload(self) -> dict[str, Any]:
91
+ """Return a wire-format dict for this event.
92
+
93
+ Always-included fields: event_id, trace_id, ts, event_name, agent_id,
94
+ platform, status, redaction_policy_version, tool_calls, tool_errors,
95
+ tool_names, input_tokens, output_tokens, duration_ms.
96
+
97
+ Optional fields are included only when non-None / non-empty / non-zero.
98
+ total_tokens is computed when any token count > 0.
99
+ """
100
+ payload: dict[str, Any] = {
101
+ "event_id": self.event_id,
102
+ "trace_id": self.trace_id,
103
+ "ts": self.ts,
104
+ "event_name": self.event_name,
105
+ "agent_id": self.agent_id,
106
+ "platform": self.platform,
107
+ "status": self.status,
108
+ "redaction_policy_version": self.redaction_policy_version,
109
+ "tool_calls": self.tool_calls,
110
+ "tool_errors": self.tool_errors,
111
+ "tool_names": list(self.tool_names),
112
+ "input_tokens": self.input_tokens,
113
+ "output_tokens": self.output_tokens,
114
+ "duration_ms": self.duration_ms,
115
+ }
116
+
117
+ # Compute total_tokens when any token count is positive
118
+ total = (
119
+ self.input_tokens
120
+ + self.output_tokens
121
+ + self.cache_read_tokens
122
+ + self.cache_write_tokens
123
+ )
124
+ if total > 0:
125
+ payload["total_tokens"] = total
126
+
127
+ # Optional fields — include only when meaningful
128
+ if self.model:
129
+ payload["model"] = self.model
130
+ if self.model_provider:
131
+ payload["model_provider"] = self.model_provider
132
+ if self.error:
133
+ payload["error"] = self.error
134
+ if self.cache_read_tokens:
135
+ payload["cache_read_tokens"] = self.cache_read_tokens
136
+ if self.cache_write_tokens:
137
+ payload["cache_write_tokens"] = self.cache_write_tokens
138
+ if self.llm_calls:
139
+ payload["llm_calls"] = self.llm_calls
140
+ if self.step_count:
141
+ payload["step_count"] = self.step_count
142
+ if self.loop_count:
143
+ payload["loop_count"] = self.loop_count
144
+ if self.estimated_cost_usd is not None:
145
+ payload["estimated_cost_usd"] = self.estimated_cost_usd
146
+ if self.host_id:
147
+ payload["host_id"] = self.host_id
148
+ if self.workflow_id:
149
+ payload["workflow_id"] = self.workflow_id
150
+ if self.skill_name:
151
+ payload["skill_name"] = self.skill_name
152
+ if self.toolset:
153
+ payload["toolset"] = self.toolset
154
+ if self.secrets_blocked_count:
155
+ payload["secrets_blocked_count"] = self.secrets_blocked_count
156
+ if self.pii_detected_count:
157
+ payload["pii_detected_count"] = self.pii_detected_count
158
+ if self.run_id:
159
+ payload["run_id"] = self.run_id
160
+ if self.session_id:
161
+ payload["session_id"] = self.session_id
162
+ if self.span_id:
163
+ payload["span_id"] = self.span_id
164
+ if self.parent_span_id:
165
+ payload["parent_span_id"] = self.parent_span_id
166
+ if self.parent_trace_id:
167
+ payload["parent_trace_id"] = self.parent_trace_id
168
+ if self.skills_loaded_count:
169
+ payload["skills_loaded_count"] = self.skills_loaded_count
170
+ if self.skill_names_hash:
171
+ payload["skill_names_hash"] = self.skill_names_hash
172
+ if self.memory_writes_count:
173
+ payload["memory_writes_count"] = self.memory_writes_count
174
+ if self.session_search_calls:
175
+ payload["session_search_calls"] = self.session_search_calls
176
+ if self.delegation_depth:
177
+ payload["delegation_depth"] = self.delegation_depth
178
+ if self.cronjob_id:
179
+ payload["cronjob_id"] = self.cronjob_id
180
+ if self.cron_run_id:
181
+ payload["cron_run_id"] = self.cron_run_id
182
+ if self.sdk_version:
183
+ payload["sdk_version"] = self.sdk_version
184
+ if self.metadata:
185
+ payload["metadata"] = self.metadata
186
+
187
+ return payload
188
+
189
+
190
+ @dataclass
191
+ class SessionStartEvent:
192
+ event_id: str = field(default_factory=_new_id)
193
+ trace_id: str = ""
194
+ session_id: str = ""
195
+ agent_id: str = ""
196
+ platform: str = ""
197
+ event_name: str = "session_start"
198
+ ts: int = field(default_factory=_now_ms)
199
+ redaction_policy_version: str = "v1-strict"
200
+
201
+
202
+ @dataclass
203
+ class SessionMetricsEvent:
204
+ """Fired once at session end — aggregates all runs in the session."""
205
+
206
+ event_id: str = field(default_factory=_new_id)
207
+ trace_id: str = ""
208
+ session_id: str = ""
209
+ agent_id: str = ""
210
+ platform: str = ""
211
+ event_name: str = "session_metrics"
212
+ ts: int = field(default_factory=_now_ms)
213
+ redaction_policy_version: str = "v1-strict"
214
+
215
+ duration_ms: int = 0
216
+ run_count: int = 0
217
+ total_input_tokens: int = 0
218
+ total_output_tokens: int = 0
219
+ total_cache_read_tokens: int = 0
220
+ total_cache_write_tokens: int = 0
221
+ total_tool_calls: int = 0
222
+ total_estimated_cost_usd: float = 0.0
223
+ compactions: int = 0
224
+ resets: int = 0
225
+
226
+
227
+ @dataclass
228
+ class LlmOutputEvent:
229
+ event_id: str = field(default_factory=_new_id)
230
+ trace_id: str = ""
231
+ session_id: str = ""
232
+ run_id: str = ""
233
+ agent_id: str = ""
234
+ platform: str = ""
235
+ event_name: str = "llm_output"
236
+ ts: int = field(default_factory=_now_ms)
237
+ redaction_policy_version: str = "v1-strict"
238
+
239
+ model: str = ""
240
+ provider: str = ""
241
+ input_tokens: int = 0
242
+ output_tokens: int = 0
243
+ cache_read_tokens: int = 0
244
+ cache_write_tokens: int = 0
245
+ finish_reason: str = ""
246
+ estimated_cost_usd: float = 0.0
247
+ span_id: str = ""
248
+
249
+
250
+ @dataclass
251
+ class ToolEndEvent:
252
+ event_id: str = field(default_factory=_new_id)
253
+ trace_id: str = ""
254
+ session_id: str = ""
255
+ run_id: str = ""
256
+ agent_id: str = ""
257
+ platform: str = ""
258
+ event_name: str = "tool_end"
259
+ ts: int = field(default_factory=_now_ms)
260
+ redaction_policy_version: str = "v1-strict"
261
+
262
+ tool_name: str = ""
263
+ tool_call_id: str = ""
264
+ duration_ms: int = 0
265
+ status: str = "success"
266
+ error: str = ""
267
+ span_id: str = ""
268
+ parent_span_id: str = ""
269
+
270
+
271
+ @dataclass
272
+ class AuditEvent:
273
+ """Security/config audit events — redaction changes, access denials, WAL recovery."""
274
+
275
+ event_id: str = field(default_factory=_new_id)
276
+ trace_id: str = ""
277
+ session_id: str = ""
278
+ agent_id: str = ""
279
+ platform: str = ""
280
+ event_name: str = "audit"
281
+ ts: int = field(default_factory=_now_ms)
282
+ status: str = "success"
283
+ metadata: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,307 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import urllib.request
6
+ from typing import Any
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # ── Types ─────────────────────────────────────────────────────────────────────
11
+
12
+ # (input_per_M, output_per_M, cache_read_per_M | None, cache_write_per_M | None)
13
+ _T = tuple[float, float, float | None, float | None]
14
+
15
+ # ── Method 1: Static table — official provider docs ───────────────────────────
16
+ #
17
+ # Prefix matching: "claude-opus-4-20250514" matches "claude-opus-4".
18
+ # Keys sorted by length (longest first) so "gpt-4o-mini" matches before "gpt-4o".
19
+ #
20
+ # Sources:
21
+ # Anthropic : https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
22
+ # OpenAI : https://openai.com/api/pricing/
23
+ # Google : https://ai.google.dev/pricing
24
+ # DeepSeek : https://api-docs.deepseek.com/quick_start/pricing
25
+ # AWS Bedrock: https://aws.amazon.com/bedrock/pricing/
26
+
27
+ MODEL_PRICING: dict[str, _T] = {
28
+ # ── Anthropic ────────────────────────────────────────────────────────────
29
+ "claude-opus-4": (15.00, 75.00, 1.50, 18.75),
30
+ "claude-sonnet-4": ( 3.00, 15.00, 0.30, 3.75),
31
+ "claude-haiku-4": ( 0.80, 4.00, 0.08, 1.00),
32
+ "claude-3-7-sonnet": ( 3.00, 15.00, 0.30, 3.75),
33
+ "claude-3-5-sonnet": ( 3.00, 15.00, 0.30, 3.75),
34
+ "claude-3-5-haiku": ( 0.80, 4.00, 0.08, 1.00),
35
+ "claude-3-opus": (15.00, 75.00, 1.50, 18.75),
36
+ "claude-3-haiku": ( 0.25, 1.25, 0.03, 0.30),
37
+ "claude-3-sonnet": ( 3.00, 15.00, None, None),
38
+ # ── OpenAI ───────────────────────────────────────────────────────────────
39
+ "gpt-4.1-nano": ( 0.10, 0.40, 0.025, None),
40
+ "gpt-4.1-mini": ( 0.40, 1.60, 0.10, None),
41
+ "gpt-4.1": ( 2.00, 8.00, 0.50, None),
42
+ "gpt-4o-mini": ( 0.15, 0.60, 0.075, None),
43
+ "gpt-4o": ( 2.50, 10.00, 1.25, None),
44
+ "gpt-4-turbo": (10.00, 30.00, None, None),
45
+ "gpt-4": (30.00, 60.00, None, None),
46
+ "gpt-3.5-turbo": ( 0.50, 1.50, None, None),
47
+ "o3-mini": ( 1.10, 4.40, 0.55, None),
48
+ "o3": (10.00, 40.00, 2.50, None),
49
+ "o1-mini": ( 1.10, 4.40, 0.55, None),
50
+ "o1": (15.00, 60.00, 7.50, None),
51
+ # ── Google Gemini ─────────────────────────────────────────────────────────
52
+ "gemini-2.5-pro": ( 1.25, 10.00, None, None),
53
+ "gemini-2.5-flash": ( 0.15, 0.60, None, None),
54
+ "gemini-2.0-flash": ( 0.10, 0.40, None, None),
55
+ "gemini-1.5-pro": ( 1.25, 5.00, None, None),
56
+ "gemini-1.5-flash": ( 0.075, 0.30, None, None),
57
+ # ── DeepSeek ─────────────────────────────────────────────────────────────
58
+ "deepseek-reasoner": ( 0.55, 2.19, None, None),
59
+ "deepseek-chat": ( 0.14, 0.28, None, None),
60
+ "deepseek-coder": ( 0.14, 0.28, None, None),
61
+ # ── Meta / Llama ─────────────────────────────────────────────────────────
62
+ "llama-4-maverick": ( 0.27, 0.85, None, None),
63
+ "llama-4-scout": ( 0.18, 0.59, None, None),
64
+ "llama-3.3-70b": ( 0.88, 0.88, None, None),
65
+ "llama-3-70b": ( 0.65, 2.75, None, None),
66
+ "llama-3-8b": ( 0.05, 0.20, None, None),
67
+ # ── Alibaba / Qwen ───────────────────────────────────────────────────────
68
+ "qwen3-235b": ( 4.00, 16.00, None, None),
69
+ "qwen3-32b": ( 0.30, 1.20, None, None),
70
+ "qwen3-4b": ( 0.02, 0.08, None, None),
71
+ # ── Arcee ────────────────────────────────────────────────────────────────
72
+ "trinity-large": ( 0.25, 1.00, 0.25, 0.25),
73
+ "trinity-mini": ( 0.045, 0.15, 0.045, 0.045),
74
+ # ── Together AI / HuggingFace (namespace-stripped prefix keys) ────────────
75
+ "kimi-k2": ( 0.50, 2.80, None, None),
76
+ "deepseek-v3": ( 0.60, 1.25, None, None),
77
+ "deepseek-r1": ( 3.00, 7.00, None, None),
78
+ # ── Vercel AI Gateway ────────────────────────────────────────────────────
79
+ "gpt-5.4-pro": (30.00, 180.00, None, None),
80
+ "gpt-5.4": ( 2.50, 15.00, None, None),
81
+ # ── AWS Bedrock ──────────────────────────────────────────────────────────
82
+ "anthropic.claude-opus-4": (15.00, 75.00, None, None),
83
+ "anthropic.claude-sonnet-4": ( 3.00, 15.00, None, None),
84
+ "anthropic.claude-haiku-4": ( 0.80, 4.00, None, None),
85
+ "anthropic.claude-3-5-sonnet": ( 3.00, 15.00, None, None),
86
+ "anthropic.claude-3-5-haiku": ( 0.80, 4.00, None, None),
87
+ "amazon.nova-pro": ( 0.80, 3.20, None, None),
88
+ "amazon.nova-lite": ( 0.06, 0.24, None, None),
89
+ "amazon.nova-micro": ( 0.035, 0.14, None, None),
90
+ }
91
+
92
+ _SORTED_KEYS: list[str] = sorted(MODEL_PRICING, key=len, reverse=True)
93
+
94
+ # ── Method 2: Runtime registry — platforms register their own catalog ─────────
95
+
96
+ _RUNTIME_REGISTRY: dict[str, _T] = {}
97
+ _SORTED_RUNTIME: list[str] = []
98
+
99
+
100
+ def register_prices(
101
+ catalog: dict[str, dict[str, float | None] | _T],
102
+ ) -> None:
103
+ """Register platform-specific model prices at startup.
104
+
105
+ Each entry maps a model ID (or prefix) to pricing:
106
+ { "model-id": {"input": 1.25, "output": 5.00, "cache_read": 0.12, "cache_write": None} }
107
+ or a 4-tuple: { "model-id": (1.25, 5.00, 0.12, None) }
108
+
109
+ Registered prices take precedence over the static table but are overridden
110
+ by cost_overrides passed directly to estimate_cost().
111
+ """
112
+ global _SORTED_RUNTIME
113
+ for model, pricing in catalog.items():
114
+ key = model.lower()
115
+ if isinstance(pricing, dict):
116
+ entry: _T = (
117
+ float(pricing.get("input") or 0),
118
+ float(pricing.get("output") or 0),
119
+ float(pricing["cache_read"]) if pricing.get("cache_read") is not None else None,
120
+ float(pricing["cache_write"]) if pricing.get("cache_write") is not None else None,
121
+ )
122
+ else:
123
+ entry = pricing
124
+ _RUNTIME_REGISTRY[key] = entry
125
+ _SORTED_RUNTIME = sorted(_RUNTIME_REGISTRY, key=len, reverse=True)
126
+ logger.debug("agentmetrics: registered %d model price entries", len(_RUNTIME_REGISTRY))
127
+
128
+
129
+ # ── Method 3: LiteLLM — optional import, 300+ models ─────────────────────────
130
+
131
+ def _try_litellm(
132
+ model_lower: str,
133
+ input_tokens: int,
134
+ output_tokens: int,
135
+ cache_read_tokens: int,
136
+ cache_write_tokens: int,
137
+ ) -> float | None:
138
+ """Try litellm's model cost database. Returns None if litellm is not installed
139
+ or the model is not in its database."""
140
+ try:
141
+ import litellm # type: ignore[import-not-found]
142
+ cost_map: dict[str, Any] = getattr(litellm, "model_cost", None) or {}
143
+ if not cost_map:
144
+ return None
145
+
146
+ # litellm uses exact model IDs; try exact, then strip date suffix
147
+ import re
148
+ entry = (
149
+ cost_map.get(model_lower)
150
+ or cost_map.get(re.sub(r"-\d{8}$", "", model_lower))
151
+ )
152
+ if not entry:
153
+ return None
154
+
155
+ in_per_m = (entry.get("input_cost_per_token") or 0) * 1_000_000
156
+ out_per_m = (entry.get("output_cost_per_token") or 0) * 1_000_000
157
+ cr_raw = entry.get("cache_read_input_token_cost")
158
+ cw_raw = entry.get("cache_creation_input_token_cost")
159
+ cr_per_m: float | None = float(cr_raw) * 1_000_000 if cr_raw is not None else None
160
+ cw_per_m: float | None = float(cw_raw) * 1_000_000 if cw_raw is not None else None
161
+
162
+ return _compute((in_per_m, out_per_m, cr_per_m, cw_per_m), input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
163
+ except Exception:
164
+ return None
165
+
166
+
167
+ # ── Method 4: OpenRouter API — explicit opt-in, covers everything OR routes ───
168
+
169
+ _OPENROUTER_CACHE: dict[str, _T] = {}
170
+ _SORTED_OPENROUTER: list[str] = []
171
+
172
+
173
+ def populate_from_openrouter(
174
+ api_key: str,
175
+ base_url: str = "https://openrouter.ai",
176
+ timeout: int = 15,
177
+ ) -> int:
178
+ """Fetch live model pricing from OpenRouter's /api/v1/models endpoint.
179
+
180
+ Call once at application startup. Results are cached in memory for the
181
+ process lifetime. Returns the number of models successfully loaded.
182
+
183
+ Requires network access and a valid OpenRouter API key.
184
+ """
185
+ global _SORTED_OPENROUTER
186
+ url = f"{base_url.rstrip('/')}/api/v1/models"
187
+ req = urllib.request.Request(
188
+ url,
189
+ headers={
190
+ "Authorization": f"Bearer {api_key}",
191
+ "Content-Type": "application/json",
192
+ },
193
+ )
194
+ try:
195
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
196
+ data: dict = json.loads(resp.read())
197
+ except Exception as exc:
198
+ logger.warning("agentmetrics: populate_from_openrouter failed: %s", exc)
199
+ return 0
200
+
201
+ loaded = 0
202
+ for m in data.get("data", []):
203
+ model_id = (m.get("id") or "").lower()
204
+ if not model_id:
205
+ continue
206
+ p = m.get("pricing") or {}
207
+ prompt = p.get("prompt")
208
+ completion = p.get("completion")
209
+ if prompt is None and completion is None:
210
+ continue
211
+ in_per_m = float(prompt or 0) * 1_000_000
212
+ out_per_m = float(completion or 0) * 1_000_000
213
+ cr_raw = p.get("cache_read") or p.get("cached_prompt") or p.get("input_cache_read")
214
+ cw_raw = p.get("cache_write") or p.get("cache_creation") or p.get("input_cache_write")
215
+ cr_per_m: float | None = float(cr_raw) * 1_000_000 if cr_raw is not None else None
216
+ cw_per_m: float | None = float(cw_raw) * 1_000_000 if cw_raw is not None else None
217
+ # Store with stripped namespace so lookups work (estimate_cost strips before lookup)
218
+ key = model_id.split("/", 1)[1] if "/" in model_id else model_id
219
+ _OPENROUTER_CACHE[key] = (in_per_m, out_per_m, cr_per_m, cw_per_m)
220
+ loaded += 1
221
+
222
+ _SORTED_OPENROUTER = sorted(_OPENROUTER_CACHE, key=len, reverse=True)
223
+ logger.info("agentmetrics: loaded %d model prices from OpenRouter", loaded)
224
+ return loaded
225
+
226
+
227
+ # ── Core estimation — tries all methods in priority order ─────────────────────
228
+
229
+ def estimate_cost(
230
+ model: str,
231
+ input_tokens: int,
232
+ output_tokens: int,
233
+ cache_read_tokens: int = 0,
234
+ cache_write_tokens: int = 0,
235
+ cost_overrides: dict[str, _T] | None = None,
236
+ ) -> float | None:
237
+ """Return USD cost for the given model and token counts, or None if unknown.
238
+
239
+ Resolution order:
240
+ 1. cost_overrides — caller-supplied explicit prices (highest priority)
241
+ 2. Runtime registry — registered via register_prices() at startup
242
+ 3. Static table — official provider docs (40+ known models)
243
+ 4. LiteLLM — if litellm is installed (300+ models)
244
+ 5. OpenRouter cache — if populate_from_openrouter() was called
245
+ 6. None — unknown model, never guesses
246
+
247
+ Strips provider namespace: "openai/gpt-4o" → "gpt-4o".
248
+ Prefix matches versioned IDs: "claude-opus-4-20250514" → "claude-opus-4".
249
+ """
250
+ if not any([input_tokens, output_tokens, cache_read_tokens, cache_write_tokens]):
251
+ return 0.0
252
+
253
+ model_lower = (model or "").lower().strip()
254
+ if not model_lower:
255
+ return None
256
+
257
+ # Strip provider namespace: "openai/gpt-4o" → "gpt-4o"
258
+ if "/" in model_lower:
259
+ model_lower = model_lower.split("/", 1)[1]
260
+
261
+ # 1. Explicit overrides
262
+ if cost_overrides:
263
+ for key in sorted(cost_overrides, key=len, reverse=True):
264
+ if model_lower.startswith(key.lower()):
265
+ return _compute(cost_overrides[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
266
+
267
+ # 2. Runtime registry (platforms register at startup)
268
+ for key in _SORTED_RUNTIME:
269
+ if model_lower.startswith(key):
270
+ return _compute(_RUNTIME_REGISTRY[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
271
+
272
+ # 3. Static table (official docs, pre-sorted by length)
273
+ for key in _SORTED_KEYS:
274
+ if model_lower.startswith(key):
275
+ return _compute(MODEL_PRICING[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
276
+
277
+ # 4. LiteLLM (optional import)
278
+ litellm_result = _try_litellm(model_lower, input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
279
+ if litellm_result is not None:
280
+ return litellm_result
281
+
282
+ # 5. OpenRouter cache (if populated at startup)
283
+ for key in _SORTED_OPENROUTER:
284
+ if model_lower.startswith(key) or model_lower == key:
285
+ return _compute(_OPENROUTER_CACHE[key], input_tokens, output_tokens, cache_read_tokens, cache_write_tokens)
286
+
287
+ return None # unknown model — never guesses
288
+
289
+
290
+ def _compute(
291
+ prices: _T,
292
+ in_t: int,
293
+ out_t: int,
294
+ cache_read: int,
295
+ cache_write: int,
296
+ ) -> float:
297
+ in_p: float = prices[0] or 0.0
298
+ out_p: float = prices[1] or 0.0
299
+ cr_p: float | None = prices[2] if len(prices) > 2 else None
300
+ cw_p: float | None = prices[3] if len(prices) > 3 else None
301
+
302
+ cost = (in_t / 1_000_000.0) * in_p + (out_t / 1_000_000.0) * out_p
303
+ if cache_read and cr_p is not None:
304
+ cost += (cache_read / 1_000_000.0) * cr_p
305
+ if cache_write and cw_p is not None:
306
+ cost += (cache_write / 1_000_000.0) * cw_p
307
+ return round(cost, 6)
@@ -0,0 +1,161 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import re
5
+ import time
6
+ from enum import Enum
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ if TYPE_CHECKING:
10
+ from .config import AgentMetricsConfig
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Patterns that identify secrets in free-form text.
15
+ # Order matters — more specific patterns first.
16
+ _SECRET_PATTERNS: list[tuple[str, str]] = [
17
+ (r"sk-[A-Za-z0-9\-_]{20,}", "[REDACTED]"),
18
+ (r"am_[A-Za-z0-9\-_]{16,}", "[REDACTED]"),
19
+ # JWT-like tokens (two base64 segments separated by a dot)
20
+ (r"\bey[A-Za-z0-9\-_]{20,}\.[A-Za-z0-9\-_]{20,}", "[REDACTED]"),
21
+ # Key-value patterns: api_key=<value>, password: <value>, etc.
22
+ (
23
+ r"(?i)(?:api[-_]?key|apikey|api[-_]?token|access[-_]?token|"
24
+ r"secret|password|passwd|auth)[=:\s\"']+([^\s\"'&,\]\}\n]{8,})",
25
+ r"\1=[REDACTED]",
26
+ ),
27
+ ]
28
+
29
+ _COMPILED_PATTERNS: list[tuple[re.Pattern[str], str]] = [
30
+ (re.compile(p), r) for p, r in _SECRET_PATTERNS
31
+ ]
32
+
33
+ # FNV-1a 32-bit constants for tool name hashing (matches npm package implementation).
34
+ _FNV_OFFSET = 2166136261
35
+ _FNV_PRIME = 16777619
36
+
37
+
38
+ class RedactionMode(str, Enum):
39
+ STRICT = "strict"
40
+ MODERATE = "moderate"
41
+ DEBUG = "debug"
42
+
43
+
44
+ def active_mode(cfg: AgentMetricsConfig) -> RedactionMode:
45
+ """Return current redaction mode, honouring debug auto-expiry."""
46
+ try:
47
+ mode = RedactionMode(cfg.redaction_mode)
48
+ except ValueError:
49
+ mode = RedactionMode.STRICT
50
+
51
+ if mode == RedactionMode.DEBUG and cfg.debug_expires_at is not None:
52
+ if time.time() > cfg.debug_expires_at:
53
+ logger.info("agentmetrics: debug mode expired — reverting to strict")
54
+ return RedactionMode.STRICT
55
+
56
+ return mode
57
+
58
+
59
+ def scrub_secrets(text: str, mode: RedactionMode) -> str:
60
+ """Remove API keys, tokens, and passwords from a string."""
61
+ scrubbed, _ = scrub_secrets_and_count(text, mode)
62
+ return scrubbed
63
+
64
+
65
+ def scrub_secrets_and_count(text: str, mode: RedactionMode) -> tuple[str, int]:
66
+ """Scrub secrets and return (scrubbed_text, replacement_count).
67
+
68
+ Counts how many secret patterns were redacted. Used to populate
69
+ secrets_blocked_count on agent_end events.
70
+ Always runs in strict and moderate modes; skips in debug mode.
71
+ """
72
+ if not text or mode == RedactionMode.DEBUG:
73
+ return text, 0
74
+ result = text
75
+ total = 0
76
+ for pattern, replacement in _COMPILED_PATTERNS:
77
+ result, n = pattern.subn(replacement, result)
78
+ total += n
79
+ return result, total
80
+
81
+
82
+ def redact_tool_name(name: str, cfg: AgentMetricsConfig) -> str | None:
83
+ """Apply tool name export policy. Returns None when the name must not be sent."""
84
+ policy = cfg.exported_tool_names
85
+
86
+ if policy == "off":
87
+ return None
88
+
89
+ if policy == "blocklist":
90
+ if name in (cfg.redact_tool_names or []):
91
+ return _hash_name(name)
92
+ return name
93
+
94
+ if policy == "allowlist":
95
+ # Only names explicitly in redact_tool_names are exported.
96
+ if name in (cfg.redact_tool_names or []):
97
+ return name
98
+ return None
99
+
100
+ if policy == "hash":
101
+ return _hash_name(name)
102
+
103
+ return name
104
+
105
+
106
+ def scrub_event(payload: dict[str, Any], mode: RedactionMode) -> dict[str, Any]:
107
+ """Scrub secrets from all string fields in an event payload dict."""
108
+ if mode == RedactionMode.DEBUG:
109
+ return payload
110
+ result, _ = _scrub_dict_and_count(payload, mode)
111
+ return result # type: ignore[return-value]
112
+
113
+
114
+ def scrub_event_and_count(payload: dict[str, Any], mode: RedactionMode) -> tuple[dict[str, Any], int]:
115
+ """Scrub secrets from all string fields, returning (scrubbed_payload, total_blocked).
116
+
117
+ Used by _enqueue() to populate secrets_blocked_count on agent_end events.
118
+ """
119
+ if mode == RedactionMode.DEBUG:
120
+ return payload, 0
121
+ result, total = _scrub_dict_and_count(payload, mode)
122
+ return result, total # type: ignore[return-value]
123
+
124
+
125
+ def _scrub_dict(obj: Any, mode: RedactionMode) -> Any: # noqa: ANN401 — recursive Any is intentional
126
+ result, _ = _scrub_dict_and_count(obj, mode)
127
+ return result
128
+
129
+
130
+ def _scrub_dict_and_count(obj: Any, mode: RedactionMode) -> tuple[Any, int]: # noqa: ANN401
131
+ if isinstance(obj, str):
132
+ return scrub_secrets_and_count(obj, mode)
133
+ if isinstance(obj, dict):
134
+ result_dict: dict[str, Any] = {}
135
+ total = 0
136
+ for k, v in obj.items():
137
+ scrubbed_v, n = _scrub_dict_and_count(v, mode)
138
+ result_dict[k] = scrubbed_v
139
+ total += n
140
+ return result_dict, total
141
+ if isinstance(obj, list):
142
+ result_list = []
143
+ total = 0
144
+ for v in obj:
145
+ scrubbed_v, n = _scrub_dict_and_count(v, mode)
146
+ result_list.append(scrubbed_v)
147
+ total += n
148
+ return result_list, total
149
+ return obj, 0
150
+
151
+
152
+ def _hash_name(name: str) -> str:
153
+ """FNV-1a 32-bit hash for tool name pseudonymisation. Same algorithm as npm package."""
154
+ h = _FNV_OFFSET
155
+ for byte in name.encode():
156
+ h = (h ^ byte) * _FNV_PRIME & 0xFFFFFFFF
157
+ return f"t_{h:08x}"
158
+
159
+
160
+ def redaction_policy_version(mode: RedactionMode) -> str:
161
+ return f"v1-{mode.value}"
@@ -0,0 +1,10 @@
1
+ [project]
2
+ name = "agentmetrics-shared"
3
+ version = "0.2.0"
4
+ requires-python = ">=3.11"
5
+ description = "Shared event schema, pricing, and redaction for all agentmetrics integrations"
6
+ dependencies = []
7
+
8
+ [build-system]
9
+ requires = ["hatchling"]
10
+ build-backend = "hatchling.build"