elltri 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cost_platform/__init__.py +3 -0
- cost_platform/db/__init__.py +0 -0
- cost_platform/db/engine.py +40 -0
- cost_platform/db/models.py +186 -0
- cost_platform/emitter.py +206 -0
- cost_platform/event_assembler.py +68 -0
- cost_platform/extractor.py +148 -0
- cost_platform/models.py +90 -0
- cost_platform/rate_card.py +127 -0
- cost_platform/wrapper.py +238 -0
- elltri-0.1.0.dist-info/METADATA +42 -0
- elltri-0.1.0.dist-info/RECORD +14 -0
- elltri-0.1.0.dist-info/WHEEL +5 -0
- elltri-0.1.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Engine, create_engine
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
|
6
|
+
|
|
7
|
+
_DEFAULT_URL = "postgresql://cost_platform:changeme@localhost:5433/cost_platform"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _database_url() -> str:
|
|
11
|
+
if url := os.environ.get("DATABASE_URL"):
|
|
12
|
+
return url
|
|
13
|
+
# ECS injects the Secrets Manager value as DB_SECRET_JSON containing
|
|
14
|
+
# {username, password, host, port, dbname}.
|
|
15
|
+
if secret := os.environ.get("DB_SECRET_JSON"):
|
|
16
|
+
s = json.loads(secret)
|
|
17
|
+
return (
|
|
18
|
+
f"postgresql://{s['username']}:{s['password']}"
|
|
19
|
+
f"@{s['host']}:{s.get('port', 5432)}/{s['dbname']}"
|
|
20
|
+
)
|
|
21
|
+
return _DEFAULT_URL
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _async_url(url: str) -> str:
|
|
25
|
+
# asyncpg requires the postgresql+asyncpg:// scheme
|
|
26
|
+
if url.startswith("postgresql://"):
|
|
27
|
+
return url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
|
28
|
+
if url.startswith("postgres://"):
|
|
29
|
+
return url.replace("postgres://", "postgresql+asyncpg://", 1)
|
|
30
|
+
return url
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_sync_engine() -> Engine:
|
|
34
|
+
"""Return a synchronous engine (psycopg2). Used by Alembic and batch jobs."""
|
|
35
|
+
return create_engine(_database_url())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_async_engine() -> AsyncEngine:
|
|
39
|
+
"""Return an async engine (asyncpg). Used by FastAPI services."""
|
|
40
|
+
return create_async_engine(_async_url(_database_url()))
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import uuid
|
|
3
|
+
from datetime import UTC, date, datetime
|
|
4
|
+
from decimal import Decimal
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import (
|
|
7
|
+
Date,
|
|
8
|
+
DateTime,
|
|
9
|
+
ForeignKey,
|
|
10
|
+
Index,
|
|
11
|
+
Integer,
|
|
12
|
+
Numeric,
|
|
13
|
+
String,
|
|
14
|
+
UniqueConstraint,
|
|
15
|
+
func,
|
|
16
|
+
)
|
|
17
|
+
from sqlalchemy import (
|
|
18
|
+
Enum as SAEnum,
|
|
19
|
+
)
|
|
20
|
+
from sqlalchemy.dialects.postgresql import UUID
|
|
21
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OrgStatus(enum.StrEnum):
|
|
25
|
+
active = "active"
|
|
26
|
+
suspended = "suspended"
|
|
27
|
+
churned = "churned"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ApiKeyRole(enum.StrEnum):
|
|
31
|
+
admin = "admin"
|
|
32
|
+
ingest = "ingest"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Base(DeclarativeBase):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Organisation(Base):
|
|
40
|
+
__tablename__ = "organisations"
|
|
41
|
+
|
|
42
|
+
org_id: Mapped[uuid.UUID] = mapped_column(
|
|
43
|
+
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
44
|
+
)
|
|
45
|
+
org_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
46
|
+
org_slug: Mapped[str] = mapped_column(String(100), nullable=False, unique=True)
|
|
47
|
+
status: Mapped[OrgStatus] = mapped_column(
|
|
48
|
+
SAEnum(OrgStatus, name="org_status"),
|
|
49
|
+
nullable=False,
|
|
50
|
+
default=OrgStatus.active,
|
|
51
|
+
)
|
|
52
|
+
plan_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
|
|
53
|
+
provisioned_at: Mapped[datetime] = mapped_column(
|
|
54
|
+
DateTime(timezone=True),
|
|
55
|
+
nullable=False,
|
|
56
|
+
default=lambda: datetime.now(UTC),
|
|
57
|
+
server_default=func.now(),
|
|
58
|
+
)
|
|
59
|
+
anthropic_workspace_id: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
|
60
|
+
anthropic_admin_api_key_secret_arn: Mapped[str | None] = mapped_column(
|
|
61
|
+
String(2048), nullable=True
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class ApiKey(Base):
|
|
66
|
+
__tablename__ = "api_keys"
|
|
67
|
+
|
|
68
|
+
key_id: Mapped[uuid.UUID] = mapped_column(
|
|
69
|
+
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
70
|
+
)
|
|
71
|
+
org_id: Mapped[uuid.UUID] = mapped_column(
|
|
72
|
+
UUID(as_uuid=True),
|
|
73
|
+
ForeignKey("organisations.org_id", ondelete="RESTRICT"),
|
|
74
|
+
nullable=False,
|
|
75
|
+
)
|
|
76
|
+
key_hash: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
77
|
+
role: Mapped[ApiKeyRole] = mapped_column(
|
|
78
|
+
SAEnum(ApiKeyRole, name="api_key_role"), nullable=False
|
|
79
|
+
)
|
|
80
|
+
label: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
81
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
82
|
+
DateTime(timezone=True),
|
|
83
|
+
nullable=False,
|
|
84
|
+
default=lambda: datetime.now(UTC),
|
|
85
|
+
server_default=func.now(),
|
|
86
|
+
)
|
|
87
|
+
revoked_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
88
|
+
|
|
89
|
+
__table_args__ = (Index("ix_api_keys_key_hash", "key_hash", unique=True),)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class OrgRateOverride(Base):
|
|
93
|
+
__tablename__ = "org_rate_overrides"
|
|
94
|
+
|
|
95
|
+
override_id: Mapped[uuid.UUID] = mapped_column(
|
|
96
|
+
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
97
|
+
)
|
|
98
|
+
org_id: Mapped[uuid.UUID] = mapped_column(
|
|
99
|
+
UUID(as_uuid=True),
|
|
100
|
+
ForeignKey("organisations.org_id", ondelete="RESTRICT"),
|
|
101
|
+
nullable=False,
|
|
102
|
+
)
|
|
103
|
+
provider: Mapped[str] = mapped_column(String(50), nullable=False)
|
|
104
|
+
model_id: Mapped[str] = mapped_column(String(100), nullable=False)
|
|
105
|
+
effective_from: Mapped[date] = mapped_column(Date(), nullable=False)
|
|
106
|
+
discount_pct: Mapped[Decimal] = mapped_column(Numeric(5, 4), nullable=False)
|
|
107
|
+
|
|
108
|
+
__table_args__ = (
|
|
109
|
+
UniqueConstraint(
|
|
110
|
+
"org_id", "provider", "model_id", "effective_from",
|
|
111
|
+
name="uq_org_rate_overrides_key",
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ReviewerRate(Base):
|
|
117
|
+
__tablename__ = "reviewer_rates"
|
|
118
|
+
|
|
119
|
+
rate_id: Mapped[uuid.UUID] = mapped_column(
|
|
120
|
+
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
121
|
+
)
|
|
122
|
+
org_id: Mapped[uuid.UUID] = mapped_column(
|
|
123
|
+
UUID(as_uuid=True),
|
|
124
|
+
ForeignKey("organisations.org_id", ondelete="RESTRICT"),
|
|
125
|
+
nullable=False,
|
|
126
|
+
)
|
|
127
|
+
owner_team: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
128
|
+
hourly_rate: Mapped[Decimal] = mapped_column(Numeric(10, 4), nullable=False)
|
|
129
|
+
currency: Mapped[str] = mapped_column(String(3), nullable=False, default="USD")
|
|
130
|
+
effective_from: Mapped[date] = mapped_column(Date(), nullable=False)
|
|
131
|
+
|
|
132
|
+
__table_args__ = (
|
|
133
|
+
UniqueConstraint(
|
|
134
|
+
"org_id", "owner_team", "effective_from",
|
|
135
|
+
name="uq_reviewer_rates_key",
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class AgentStatus(enum.StrEnum):
|
|
141
|
+
active = "active"
|
|
142
|
+
deprecated = "deprecated"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class AgentRegistry(Base):
|
|
146
|
+
__tablename__ = "agent_registry"
|
|
147
|
+
|
|
148
|
+
org_id: Mapped[uuid.UUID] = mapped_column(
|
|
149
|
+
UUID(as_uuid=True),
|
|
150
|
+
ForeignKey("organisations.org_id", ondelete="RESTRICT"),
|
|
151
|
+
primary_key=True,
|
|
152
|
+
)
|
|
153
|
+
agent_id: Mapped[str] = mapped_column(String(255), primary_key=True)
|
|
154
|
+
agent_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
155
|
+
description: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
156
|
+
owner_team: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
|
157
|
+
department: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
|
158
|
+
cost_centre: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
|
159
|
+
product: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
|
160
|
+
category: Mapped[str | None] = mapped_column(String(100), nullable=True)
|
|
161
|
+
status: Mapped[AgentStatus] = mapped_column(
|
|
162
|
+
SAEnum(AgentStatus, name="agent_status"),
|
|
163
|
+
nullable=False,
|
|
164
|
+
default=AgentStatus.active,
|
|
165
|
+
)
|
|
166
|
+
onboarded_at: Mapped[datetime] = mapped_column(
|
|
167
|
+
DateTime(timezone=True),
|
|
168
|
+
nullable=False,
|
|
169
|
+
default=lambda: datetime.now(UTC),
|
|
170
|
+
server_default=func.now(),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class TransformationCheckpoint(Base):
|
|
175
|
+
__tablename__ = "transformation_checkpoints"
|
|
176
|
+
|
|
177
|
+
id: Mapped[int] = mapped_column(Integer, primary_key=True)
|
|
178
|
+
job_name: Mapped[str] = mapped_column(String, nullable=False, unique=True)
|
|
179
|
+
last_processed_prefix: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
180
|
+
last_run_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
181
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
182
|
+
DateTime(timezone=True),
|
|
183
|
+
nullable=False,
|
|
184
|
+
server_default=func.now(),
|
|
185
|
+
onupdate=func.now(),
|
|
186
|
+
)
|
cost_platform/emitter.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OTLPEmitter — serialises TelemetryEvent and OutcomeEvent as OTel spans and exports
|
|
3
|
+
them to the collector via OTLP HTTP.
|
|
4
|
+
|
|
5
|
+
Collector endpoint: OTEL_EXPORTER_OTLP_ENDPOINT env var (default http://localhost:4318).
|
|
6
|
+
Auth: COST_PLATFORM_API_KEY is sent as x-api-key in the OTLP export headers.
|
|
7
|
+
This is how the collector's key-validation sidecar resolves org_id.
|
|
8
|
+
|
|
9
|
+
If COST_PLATFORM_API_KEY is not set at init time, the emitter operates in no-op mode:
|
|
10
|
+
emit_event and emit_outcome do nothing.
|
|
11
|
+
|
|
12
|
+
Both emit methods are async and must never raise. Catch all exceptions, log to stderr.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
from opentelemetry import trace
|
|
23
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
24
|
+
from opentelemetry.sdk.resources import Resource
|
|
25
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
26
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from cost_platform.models import OutcomeEvent, TelemetryEvent
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OTLPEmitter:
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
api_key = os.environ.get("COST_PLATFORM_API_KEY", "")
|
|
35
|
+
if not api_key:
|
|
36
|
+
print(
|
|
37
|
+
"cost_platform: COST_PLATFORM_API_KEY not set — running in no-op mode",
|
|
38
|
+
file=sys.stderr,
|
|
39
|
+
)
|
|
40
|
+
self._tracer: trace.Tracer | None = None
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
base = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318").rstrip("/")
|
|
44
|
+
exporter = OTLPSpanExporter(
|
|
45
|
+
endpoint=f"{base}/v1/traces",
|
|
46
|
+
headers={"x-api-key": api_key},
|
|
47
|
+
)
|
|
48
|
+
resource = Resource.create({"service.name": "cost-platform-wrapper"})
|
|
49
|
+
provider = TracerProvider(resource=resource)
|
|
50
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
51
|
+
self._tracer = provider.get_tracer("cost_platform", schema_url="")
|
|
52
|
+
|
|
53
|
+
# ------------------------------------------------------------------
|
|
54
|
+
# Public async interface (fire-and-forget; never raises)
|
|
55
|
+
# ------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
async def emit_event(self, event: TelemetryEvent) -> None:
|
|
58
|
+
try:
|
|
59
|
+
self._emit_telemetry_sync(event)
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
print(f"cost_platform: telemetry emit failed: {exc}", file=sys.stderr)
|
|
62
|
+
|
|
63
|
+
async def emit_outcome(self, event: OutcomeEvent) -> None:
|
|
64
|
+
try:
|
|
65
|
+
self._emit_outcome_sync(event)
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
print(f"cost_platform: outcome emit failed: {exc}", file=sys.stderr)
|
|
68
|
+
|
|
69
|
+
# ------------------------------------------------------------------
|
|
70
|
+
# Internal sync helpers (called by async methods and by wrapper sync path)
|
|
71
|
+
# ------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def emit_event_sync(self, event: TelemetryEvent) -> None:
|
|
74
|
+
"""Synchronous entry-point for the wrapper's sync messages.create() path."""
|
|
75
|
+
try:
|
|
76
|
+
self._emit_telemetry_sync(event)
|
|
77
|
+
except Exception as exc:
|
|
78
|
+
print(f"cost_platform: telemetry emit failed: {exc}", file=sys.stderr)
|
|
79
|
+
|
|
80
|
+
def emit_outcome_sync(self, event: OutcomeEvent) -> None:
|
|
81
|
+
"""Synchronous entry-point for the wrapper's sync complete() path."""
|
|
82
|
+
try:
|
|
83
|
+
self._emit_outcome_sync(event)
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
print(f"cost_platform: outcome emit failed: {exc}", file=sys.stderr)
|
|
86
|
+
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
# Private helpers
|
|
89
|
+
# ------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
def _emit_telemetry_sync(self, event: TelemetryEvent) -> None:
|
|
92
|
+
if self._tracer is None:
|
|
93
|
+
return
|
|
94
|
+
with self._tracer.start_as_current_span("cost_platform.action") as span:
|
|
95
|
+
span.set_attribute("event_type", "telemetry")
|
|
96
|
+
# Group 1 — Trace Identity
|
|
97
|
+
span.set_attribute("cp.org_id", event.org_id)
|
|
98
|
+
span.set_attribute("cp.workflow_id", event.workflow_id)
|
|
99
|
+
span.set_attribute("cp.agent_id", event.agent_id)
|
|
100
|
+
span.set_attribute("cp.agent_version", event.agent_version)
|
|
101
|
+
span.set_attribute("cp.environment", event.environment)
|
|
102
|
+
if event.parent_span_id is not None:
|
|
103
|
+
span.set_attribute("cp.parent_span_id", event.parent_span_id)
|
|
104
|
+
if event.step_name is not None:
|
|
105
|
+
span.set_attribute("cp.step_name", event.step_name)
|
|
106
|
+
if event.step_index is not None:
|
|
107
|
+
span.set_attribute("cp.step_index", event.step_index)
|
|
108
|
+
|
|
109
|
+
# Group 2 — Call Metadata
|
|
110
|
+
span.set_attribute("cp.provider", event.provider)
|
|
111
|
+
span.set_attribute("gen_ai.system", event.provider)
|
|
112
|
+
span.set_attribute("gen_ai.request.model", event.model_id)
|
|
113
|
+
span.set_attribute("cp.model_id", event.model_id)
|
|
114
|
+
span.set_attribute("cp.request_id", event.request_id)
|
|
115
|
+
span.set_attribute("cp.latency_ms_total", event.latency_ms_total)
|
|
116
|
+
span.set_attribute("cp.stop_reason", event.stop_reason)
|
|
117
|
+
span.set_attribute("cp.service_tier", event.service_tier)
|
|
118
|
+
span.set_attribute("cp.batch_mode", event.batch_mode)
|
|
119
|
+
span.set_attribute("cp.retry_attempt", event.retry_attempt)
|
|
120
|
+
if event.latency_ms_ttfb is not None:
|
|
121
|
+
span.set_attribute("cp.latency_ms_ttfb", event.latency_ms_ttfb)
|
|
122
|
+
if event.error_code is not None:
|
|
123
|
+
span.set_attribute("cp.error_code", event.error_code)
|
|
124
|
+
|
|
125
|
+
# Group 3 — Token Counts
|
|
126
|
+
span.set_attribute("gen_ai.usage.input_tokens", event.tokens_input_fresh)
|
|
127
|
+
span.set_attribute("gen_ai.usage.output_tokens", event.tokens_output_total)
|
|
128
|
+
span.set_attribute("cp.tokens_input_fresh", event.tokens_input_fresh)
|
|
129
|
+
span.set_attribute("cp.tokens_input_total", event.tokens_input_total)
|
|
130
|
+
span.set_attribute("cp.tokens_output_total", event.tokens_output_total)
|
|
131
|
+
if event.tokens_input_cache_write_5m is not None:
|
|
132
|
+
span.set_attribute(
|
|
133
|
+
"cp.tokens_input_cache_write_5m", event.tokens_input_cache_write_5m
|
|
134
|
+
)
|
|
135
|
+
if event.tokens_input_cache_write_1h is not None:
|
|
136
|
+
span.set_attribute(
|
|
137
|
+
"cp.tokens_input_cache_write_1h", event.tokens_input_cache_write_1h
|
|
138
|
+
)
|
|
139
|
+
if event.tokens_input_cache_read is not None:
|
|
140
|
+
span.set_attribute("cp.tokens_input_cache_read", event.tokens_input_cache_read)
|
|
141
|
+
if event.tokens_output_thinking_est is not None:
|
|
142
|
+
span.set_attribute(
|
|
143
|
+
"cp.tokens_output_thinking_est", event.tokens_output_thinking_est
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Group 4 — Cost Fields (None when emitted from wrapper; set by transformation job)
|
|
147
|
+
if event.cost_usd_input_fresh is not None:
|
|
148
|
+
span.set_attribute("cp.cost_usd_input_fresh", event.cost_usd_input_fresh)
|
|
149
|
+
if event.cost_usd_output is not None:
|
|
150
|
+
span.set_attribute("cp.cost_usd_output", event.cost_usd_output)
|
|
151
|
+
if event.cost_usd_tools is not None:
|
|
152
|
+
span.set_attribute("cp.cost_usd_tools", event.cost_usd_tools)
|
|
153
|
+
if event.cost_usd_total is not None:
|
|
154
|
+
span.set_attribute("cp.cost_usd_total", event.cost_usd_total)
|
|
155
|
+
if event.rate_card_version is not None:
|
|
156
|
+
span.set_attribute("cp.rate_card_version", event.rate_card_version)
|
|
157
|
+
if event.cost_usd_input_cache_write is not None:
|
|
158
|
+
span.set_attribute(
|
|
159
|
+
"cp.cost_usd_input_cache_write", event.cost_usd_input_cache_write
|
|
160
|
+
)
|
|
161
|
+
if event.cost_usd_input_cache_read is not None:
|
|
162
|
+
span.set_attribute(
|
|
163
|
+
"cp.cost_usd_input_cache_read", event.cost_usd_input_cache_read
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Group 5 — SDK Metadata
|
|
167
|
+
if event.sdk_language is not None:
|
|
168
|
+
span.set_attribute("cp.sdk_language", event.sdk_language)
|
|
169
|
+
if event.sdk_version is not None:
|
|
170
|
+
span.set_attribute("cp.sdk_version", event.sdk_version)
|
|
171
|
+
|
|
172
|
+
# Group 6 — Tool Usage
|
|
173
|
+
span.set_attribute("cp.tool_calls_total", event.tool_calls_total)
|
|
174
|
+
span.set_attribute("cp.tool_calls_web_search", event.tool_calls_web_search)
|
|
175
|
+
span.set_attribute("cp.tool_calls_code_exec", event.tool_calls_code_exec)
|
|
176
|
+
span.set_attribute("cp.tool_calls_detail", json.dumps(event.tool_calls_detail))
|
|
177
|
+
|
|
178
|
+
span.set_attribute("cp.event_timestamp", event.event_timestamp.isoformat())
|
|
179
|
+
|
|
180
|
+
def _emit_outcome_sync(self, event: OutcomeEvent) -> None:
|
|
181
|
+
if self._tracer is None:
|
|
182
|
+
return
|
|
183
|
+
with self._tracer.start_as_current_span("cost_platform.outcome") as span:
|
|
184
|
+
span.set_attribute("event_type", "outcome")
|
|
185
|
+
span.set_attribute("cp.org_id", event.org_id)
|
|
186
|
+
span.set_attribute("cp.workflow_id", event.workflow_id)
|
|
187
|
+
span.set_attribute("cp.agent_id", event.agent_id)
|
|
188
|
+
span.set_attribute("cp.outcome_status", event.outcome_status)
|
|
189
|
+
span.set_attribute("cp.started_at", event.started_at.isoformat())
|
|
190
|
+
span.set_attribute("cp.completed_at", event.completed_at.isoformat())
|
|
191
|
+
span.set_attribute("cp.duration_ms", event.duration_ms)
|
|
192
|
+
span.set_attribute("cp.human_review_required", event.human_review_required)
|
|
193
|
+
if event.error_code is not None:
|
|
194
|
+
span.set_attribute("cp.error_code", event.error_code)
|
|
195
|
+
if event.human_review_duration_mins is not None:
|
|
196
|
+
span.set_attribute(
|
|
197
|
+
"cp.human_review_duration_mins", event.human_review_duration_mins
|
|
198
|
+
)
|
|
199
|
+
if event.session_id is not None:
|
|
200
|
+
span.set_attribute("cp.session_id", event.session_id)
|
|
201
|
+
if event.external_reference_id is not None:
|
|
202
|
+
span.set_attribute("cp.external_reference_id", event.external_reference_id)
|
|
203
|
+
if event.sdk_language is not None:
|
|
204
|
+
span.set_attribute("cp.sdk_language", event.sdk_language)
|
|
205
|
+
if event.sdk_version is not None:
|
|
206
|
+
span.set_attribute("cp.sdk_version", event.sdk_version)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Assemble a TelemetryEvent from extracted token counts, trace context, and call metadata.
|
|
3
|
+
|
|
4
|
+
Pure field assembly — no rate card, no cost computation. All cost_usd_* fields and
|
|
5
|
+
rate_card_version are set to None; the transformation job is the sole owner of those fields.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import uuid
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from cost_platform.models import TelemetryEvent
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EventAssembler:
|
|
18
|
+
def assemble(
|
|
19
|
+
self,
|
|
20
|
+
token_counts: dict[str, Any],
|
|
21
|
+
trace_fields: dict[str, Any],
|
|
22
|
+
call_metadata: dict[str, Any],
|
|
23
|
+
) -> TelemetryEvent:
|
|
24
|
+
event_timestamp: datetime = call_metadata.get("event_timestamp") or datetime.now(UTC)
|
|
25
|
+
|
|
26
|
+
return TelemetryEvent(
|
|
27
|
+
trace_id=call_metadata.get("trace_id") or str(uuid.uuid4()),
|
|
28
|
+
span_id=call_metadata.get("span_id") or str(uuid.uuid4()),
|
|
29
|
+
workflow_id=trace_fields["workflow_id"],
|
|
30
|
+
agent_id=trace_fields["agent_id"],
|
|
31
|
+
agent_version=trace_fields["agent_version"],
|
|
32
|
+
environment=trace_fields["environment"],
|
|
33
|
+
provider=call_metadata["provider"],
|
|
34
|
+
model_id=call_metadata["model_id"],
|
|
35
|
+
request_id=call_metadata["request_id"],
|
|
36
|
+
latency_ms_total=int(call_metadata.get("latency_ms_total", 0)),
|
|
37
|
+
stop_reason=call_metadata.get("stop_reason", "end_turn"),
|
|
38
|
+
service_tier=call_metadata.get("service_tier", "standard"),
|
|
39
|
+
batch_mode=bool(call_metadata.get("batch_mode", False)),
|
|
40
|
+
retry_attempt=int(call_metadata.get("retry_attempt", 0)),
|
|
41
|
+
tokens_input_fresh=token_counts["tokens_input_fresh"],
|
|
42
|
+
tokens_input_total=token_counts["tokens_input_total"],
|
|
43
|
+
tokens_output_total=token_counts["tokens_output_total"],
|
|
44
|
+
cost_usd_input_fresh=None,
|
|
45
|
+
cost_usd_output=None,
|
|
46
|
+
cost_usd_tools=None,
|
|
47
|
+
cost_usd_total=None,
|
|
48
|
+
cost_usd_input_cache_write=None,
|
|
49
|
+
cost_usd_input_cache_read=None,
|
|
50
|
+
rate_card_version=None,
|
|
51
|
+
tool_calls_total=int(call_metadata.get("tool_calls_total", 0)),
|
|
52
|
+
tool_calls_web_search=int(call_metadata.get("tool_calls_web_search", 0)),
|
|
53
|
+
tool_calls_code_exec=int(call_metadata.get("tool_calls_code_exec", 0)),
|
|
54
|
+
tool_calls_detail=call_metadata.get("tool_calls_detail", []),
|
|
55
|
+
event_timestamp=event_timestamp,
|
|
56
|
+
parent_span_id=call_metadata.get("parent_span_id"),
|
|
57
|
+
step_name=trace_fields.get("step_name"),
|
|
58
|
+
step_index=trace_fields.get("step_index"),
|
|
59
|
+
latency_ms_ttfb=call_metadata.get("latency_ms_ttfb"),
|
|
60
|
+
error_code=call_metadata.get("error_code"),
|
|
61
|
+
tokens_input_cache_write_5m=token_counts.get("tokens_input_cache_write_5m"),
|
|
62
|
+
tokens_input_cache_write_1h=token_counts.get("tokens_input_cache_write_1h"),
|
|
63
|
+
tokens_input_cache_read=token_counts.get("tokens_input_cache_read"),
|
|
64
|
+
tokens_output_thinking_est=token_counts.get("tokens_output_thinking_est"),
|
|
65
|
+
cost_usd_runtime=None,
|
|
66
|
+
sdk_language=call_metadata.get("sdk_language"),
|
|
67
|
+
sdk_version=call_metadata.get("sdk_version"),
|
|
68
|
+
)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extract token counts and call metadata from an Anthropic Message response.
|
|
3
|
+
|
|
4
|
+
Critical invariant (Technical Invariant #1):
|
|
5
|
+
Anthropic's `usage.input_tokens` = FRESH (uncached) tokens only.
|
|
6
|
+
It is NOT the total input. This is non-standard versus other providers.
|
|
7
|
+
Total = input_tokens + cache_creation_input_tokens + cache_read_input_tokens.
|
|
8
|
+
Never use input_tokens alone as total billable input for Anthropic calls.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
_STOP_REASON_MAP: dict[str, str] = {
|
|
16
|
+
"end_turn": "end_turn",
|
|
17
|
+
"tool_use": "tool_use",
|
|
18
|
+
"max_tokens": "max_tokens",
|
|
19
|
+
"stop_sequence": "end_turn", # normal completion via stop sequence
|
|
20
|
+
"error": "error",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
_CODE_EXEC_TOOL_NAMES: frozenset[str] = frozenset({"bash", "computer"})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_token_counts(response: Any) -> dict[str, Any]:
|
|
27
|
+
"""Return token counts, tool usage, and response metadata from an Anthropic Message."""
|
|
28
|
+
usage = response.usage
|
|
29
|
+
|
|
30
|
+
fresh = usage.input_tokens
|
|
31
|
+
cache_read = getattr(usage, "cache_read_input_tokens", None) or 0
|
|
32
|
+
|
|
33
|
+
cache_creation = getattr(usage, "cache_creation_input_tokens", None)
|
|
34
|
+
cache_write_5m, cache_write_1h = _split_cache_creation(cache_creation)
|
|
35
|
+
|
|
36
|
+
tokens_input_total = fresh + cache_write_5m + cache_write_1h + cache_read
|
|
37
|
+
|
|
38
|
+
thinking_est = _estimate_thinking_tokens(response)
|
|
39
|
+
|
|
40
|
+
tool_calls_total, tool_calls_web_search, tool_calls_code_exec, tool_calls_detail = (
|
|
41
|
+
_extract_tool_usage(response)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
raw_stop = getattr(response, "stop_reason", None) or "end_turn"
|
|
45
|
+
stop_reason = _STOP_REASON_MAP.get(str(raw_stop), "end_turn")
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
"tokens_input_fresh": fresh,
|
|
49
|
+
"tokens_input_cache_write_5m": cache_write_5m if cache_write_5m else None,
|
|
50
|
+
"tokens_input_cache_write_1h": cache_write_1h if cache_write_1h else None,
|
|
51
|
+
"tokens_input_cache_read": cache_read if cache_read else None,
|
|
52
|
+
"tokens_input_total": tokens_input_total,
|
|
53
|
+
"tokens_output_total": usage.output_tokens,
|
|
54
|
+
"tokens_output_thinking_est": thinking_est,
|
|
55
|
+
"stop_reason": stop_reason,
|
|
56
|
+
"model_id": response.model,
|
|
57
|
+
"request_id": response.id,
|
|
58
|
+
"tool_calls_total": tool_calls_total,
|
|
59
|
+
"tool_calls_web_search": tool_calls_web_search,
|
|
60
|
+
"tool_calls_code_exec": tool_calls_code_exec,
|
|
61
|
+
"tool_calls_detail": tool_calls_detail,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _split_cache_creation(cache_creation: Any) -> tuple[int, int]:
|
|
66
|
+
"""Return (ephemeral_5m_tokens, ephemeral_1h_tokens) from cache_creation field.
|
|
67
|
+
|
|
68
|
+
Handles two API formats:
|
|
69
|
+
- Structured object with ephemeral_5m_input_tokens / ephemeral_1h_input_tokens attributes
|
|
70
|
+
- Legacy plain int (treated entirely as 5m write)
|
|
71
|
+
"""
|
|
72
|
+
if cache_creation is None:
|
|
73
|
+
return 0, 0
|
|
74
|
+
if isinstance(cache_creation, int):
|
|
75
|
+
return cache_creation, 0
|
|
76
|
+
# Structured CacheCreation object
|
|
77
|
+
five_m = getattr(cache_creation, "ephemeral_5m_input_tokens", 0) or 0
|
|
78
|
+
one_h = getattr(cache_creation, "ephemeral_1h_input_tokens", 0) or 0
|
|
79
|
+
return int(five_m), int(one_h)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _estimate_thinking_tokens(response: Any) -> int | None:
|
|
83
|
+
"""Estimate output thinking tokens from thinking content blocks (char_count // 4)."""
|
|
84
|
+
content = getattr(response, "content", None) or []
|
|
85
|
+
total = 0
|
|
86
|
+
found = False
|
|
87
|
+
for block in content:
|
|
88
|
+
if getattr(block, "type", None) == "thinking":
|
|
89
|
+
text = getattr(block, "thinking", "") or ""
|
|
90
|
+
total += len(text) // 4
|
|
91
|
+
found = True
|
|
92
|
+
return total if found else None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def extract_token_counts_openai(response: Any) -> dict[str, Any]:
|
|
96
|
+
"""Return token counts and call metadata from an OpenAI ChatCompletion response."""
|
|
97
|
+
usage = response.usage
|
|
98
|
+
prompt_tokens_details = getattr(usage, "prompt_tokens_details", None)
|
|
99
|
+
cache_read = (
|
|
100
|
+
getattr(prompt_tokens_details, "cached_tokens", None) or 0
|
|
101
|
+
if prompt_tokens_details is not None
|
|
102
|
+
else 0
|
|
103
|
+
)
|
|
104
|
+
fresh = usage.prompt_tokens - cache_read
|
|
105
|
+
|
|
106
|
+
choice = response.choices[0]
|
|
107
|
+
raw_stop = getattr(choice, "finish_reason", None) or "end_turn"
|
|
108
|
+
tool_calls = getattr(choice.message, "tool_calls", None) or []
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
"tokens_input_fresh": fresh,
|
|
112
|
+
"tokens_input_cache_write_5m": None,
|
|
113
|
+
"tokens_input_cache_write_1h": None,
|
|
114
|
+
"tokens_input_cache_read": cache_read if cache_read else None,
|
|
115
|
+
"tokens_input_total": usage.prompt_tokens,
|
|
116
|
+
"tokens_output_total": usage.completion_tokens,
|
|
117
|
+
"tokens_output_thinking_est": None,
|
|
118
|
+
"stop_reason": raw_stop,
|
|
119
|
+
"model_id": response.model,
|
|
120
|
+
"request_id": response.id,
|
|
121
|
+
"tool_calls_total": len(tool_calls),
|
|
122
|
+
"tool_calls_web_search": 0,
|
|
123
|
+
"tool_calls_code_exec": 0,
|
|
124
|
+
"tool_calls_detail": [],
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _extract_tool_usage(
|
|
129
|
+
response: Any,
|
|
130
|
+
) -> tuple[int, int, int, list[dict[str, Any]]]:
|
|
131
|
+
"""Return (total, web_search_count, code_exec_count, detail_list)."""
|
|
132
|
+
content = getattr(response, "content", None) or []
|
|
133
|
+
tool_blocks = [b for b in content if getattr(b, "type", None) == "tool_use"]
|
|
134
|
+
|
|
135
|
+
web_search = 0
|
|
136
|
+
code_exec = 0
|
|
137
|
+
name_counts: dict[str, int] = {}
|
|
138
|
+
|
|
139
|
+
for block in tool_blocks:
|
|
140
|
+
name = getattr(block, "name", "") or ""
|
|
141
|
+
name_counts[name] = name_counts.get(name, 0) + 1
|
|
142
|
+
if name == "web_search":
|
|
143
|
+
web_search += 1
|
|
144
|
+
elif name in _CODE_EXEC_TOOL_NAMES:
|
|
145
|
+
code_exec += 1
|
|
146
|
+
|
|
147
|
+
detail = [{"name": n, "count": c} for n, c in name_counts.items()]
|
|
148
|
+
return len(tool_blocks), web_search, code_exec, detail
|
cost_platform/models.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class TelemetryEvent:
|
|
10
|
+
# Group 1 — Trace Identity
|
|
11
|
+
trace_id: str
|
|
12
|
+
span_id: str
|
|
13
|
+
workflow_id: str
|
|
14
|
+
agent_id: str
|
|
15
|
+
agent_version: str
|
|
16
|
+
environment: str # production | staging | development
|
|
17
|
+
|
|
18
|
+
# Group 2 — Call Metadata
|
|
19
|
+
provider: str # anthropic | openai | google
|
|
20
|
+
model_id: str
|
|
21
|
+
request_id: str
|
|
22
|
+
latency_ms_total: int
|
|
23
|
+
stop_reason: str # end_turn | tool_use | max_tokens | error
|
|
24
|
+
service_tier: str # standard | priority
|
|
25
|
+
batch_mode: bool
|
|
26
|
+
retry_attempt: int
|
|
27
|
+
|
|
28
|
+
# Group 3 — Token Counts
|
|
29
|
+
tokens_input_fresh: int
|
|
30
|
+
tokens_input_total: int
|
|
31
|
+
tokens_output_total: int
|
|
32
|
+
|
|
33
|
+
# Group 6 — Tool Usage
|
|
34
|
+
tool_calls_total: int
|
|
35
|
+
tool_calls_web_search: int
|
|
36
|
+
tool_calls_code_exec: int
|
|
37
|
+
tool_calls_detail: list[dict[str, Any]]
|
|
38
|
+
|
|
39
|
+
# Timestamp — required for rate card resolution and event ordering
|
|
40
|
+
event_timestamp: datetime
|
|
41
|
+
|
|
42
|
+
# Optional fields (defaults last, per dataclass ordering rule)
|
|
43
|
+
# Group 1 — Trace Identity (optional)
|
|
44
|
+
org_id: str = "" # resolved by OTel Collector from the ingest API key
|
|
45
|
+
parent_span_id: str | None = None
|
|
46
|
+
step_name: str | None = None
|
|
47
|
+
step_index: int | None = None
|
|
48
|
+
|
|
49
|
+
# Group 2 — Call Metadata (optional)
|
|
50
|
+
latency_ms_ttfb: int | None = None
|
|
51
|
+
error_code: str | None = None
|
|
52
|
+
|
|
53
|
+
# Group 3 — Token Counts (optional)
|
|
54
|
+
tokens_input_cache_write_5m: int | None = None
|
|
55
|
+
tokens_input_cache_write_1h: int | None = None
|
|
56
|
+
tokens_input_cache_read: int | None = None
|
|
57
|
+
tokens_output_thinking_est: int | None = None
|
|
58
|
+
|
|
59
|
+
# Group 4 — Cost Fields (set by transformation job; None from wrapper)
|
|
60
|
+
cost_usd_input_fresh: float | None = None
|
|
61
|
+
cost_usd_output: float | None = None
|
|
62
|
+
cost_usd_tools: float | None = None
|
|
63
|
+
cost_usd_total: float | None = None
|
|
64
|
+
cost_usd_input_cache_write: float | None = None
|
|
65
|
+
cost_usd_input_cache_read: float | None = None
|
|
66
|
+
cost_usd_runtime: float | None = None # added at workflow level by transformation job
|
|
67
|
+
rate_card_version: str | None = None # stamped by transformation job
|
|
68
|
+
|
|
69
|
+
# Group 5 — SDK Metadata
|
|
70
|
+
sdk_language: str | None = None
|
|
71
|
+
sdk_version: str | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class OutcomeEvent:
|
|
76
|
+
workflow_id: str
|
|
77
|
+
agent_id: str
|
|
78
|
+
outcome_status: str # success | failure | partial
|
|
79
|
+
started_at: datetime
|
|
80
|
+
completed_at: datetime
|
|
81
|
+
duration_ms: int
|
|
82
|
+
human_review_required: bool
|
|
83
|
+
|
|
84
|
+
org_id: str = "" # resolved by OTel Collector from the ingest API key
|
|
85
|
+
error_code: str | None = None
|
|
86
|
+
human_review_duration_mins: int | None = None
|
|
87
|
+
session_id: str | None = None
|
|
88
|
+
external_reference_id: str | None = None
|
|
89
|
+
sdk_language: str | None = None
|
|
90
|
+
sdk_version: str | None = None
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rate card loader for the AI Agent Cost Intelligence Platform.
|
|
3
|
+
|
|
4
|
+
Resolves the correct LLM pricing entry for a given provider, model, and event timestamp,
|
|
5
|
+
then applies any org-level discount to token rates.
|
|
6
|
+
|
|
7
|
+
Batch discount note
|
|
8
|
+
-------------------
|
|
9
|
+
The 50% batch discount is NOT part of the rate card and is NOT applied here.
|
|
10
|
+
It is applied by the transformation job after rates are resolved, so that the
|
|
11
|
+
rate card always returns list (non-batch) prices.
|
|
12
|
+
|
|
13
|
+
Tool-call flat fees
|
|
14
|
+
-------------------
|
|
15
|
+
`web_search_per_call`, `agent_runtime_per_hour`, and `code_exec_per_hour` are flat
|
|
16
|
+
fees, not per-token rates. They are never discounted by `discount_pct` and are never
|
|
17
|
+
subject to the batch multiplier. The caller must handle them separately.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import hashlib
|
|
23
|
+
import re
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from datetime import date, datetime
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
import yaml
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ResolvedRates:
|
|
34
|
+
provider: str
|
|
35
|
+
model_id: str
|
|
36
|
+
input_per_mtok: float
|
|
37
|
+
output_per_mtok: float
|
|
38
|
+
cache_write_5m_multiplier: float = 0.0
|
|
39
|
+
cache_write_1h_multiplier: float = 0.0
|
|
40
|
+
cache_read_multiplier: float = 0.0
|
|
41
|
+
web_search_per_call: float = 0.0
|
|
42
|
+
agent_runtime_per_hour: float = 0.0
|
|
43
|
+
code_exec_per_hour: float = 0.0
|
|
44
|
+
discount_pct: float = 0.0
|
|
45
|
+
rate_card_version: str = field(default="")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class RateCardLoader:
|
|
49
|
+
def __init__(self, path: Path | str) -> None:
|
|
50
|
+
path = Path(path)
|
|
51
|
+
if path.is_dir():
|
|
52
|
+
files = sorted(path.glob("*_rate_card.yaml"))
|
|
53
|
+
combined = hashlib.sha256()
|
|
54
|
+
self._providers: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
|
55
|
+
for f in files:
|
|
56
|
+
raw = f.read_bytes()
|
|
57
|
+
combined.update(raw)
|
|
58
|
+
data: dict[str, Any] = yaml.safe_load(raw)
|
|
59
|
+
self._providers[data["provider"]] = data["models"]
|
|
60
|
+
self._version: str = combined.hexdigest()
|
|
61
|
+
else:
|
|
62
|
+
raw = path.read_bytes()
|
|
63
|
+
self._version = hashlib.sha256(raw).hexdigest()
|
|
64
|
+
data = yaml.safe_load(raw)
|
|
65
|
+
self._providers = {
|
|
66
|
+
provider: provider_data["models"]
|
|
67
|
+
for provider, provider_data in data["providers"].items()
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def rate_card_version(self) -> str:
|
|
72
|
+
return self._version
|
|
73
|
+
|
|
74
|
+
def resolve(
|
|
75
|
+
self,
|
|
76
|
+
provider: str,
|
|
77
|
+
model_id: str,
|
|
78
|
+
event_timestamp: datetime | date,
|
|
79
|
+
discount_pct: float = 0.0,
|
|
80
|
+
) -> ResolvedRates:
|
|
81
|
+
if provider not in self._providers:
|
|
82
|
+
raise ValueError(f"Unknown provider '{provider}'")
|
|
83
|
+
models = self._providers[provider]
|
|
84
|
+
if model_id not in models:
|
|
85
|
+
# Strip trailing date suffixes and retry:
|
|
86
|
+
# Anthropic format: claude-haiku-4-5-20251001 → claude-haiku-4-5
|
|
87
|
+
# OpenAI format: gpt-4o-2024-11-20 → gpt-4o
|
|
88
|
+
base_id = re.sub(r"-\d{8}$", "", model_id)
|
|
89
|
+
base_id = re.sub(r"-\d{4}-\d{2}-\d{2}$", "", base_id)
|
|
90
|
+
if base_id != model_id and base_id in models:
|
|
91
|
+
model_id = base_id
|
|
92
|
+
else:
|
|
93
|
+
raise ValueError(f"Unknown model '{model_id}' for provider '{provider}'")
|
|
94
|
+
|
|
95
|
+
event_date = (
|
|
96
|
+
event_timestamp.date()
|
|
97
|
+
if isinstance(event_timestamp, datetime)
|
|
98
|
+
else event_timestamp
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
entries: list[dict[str, Any]] = models[model_id]
|
|
102
|
+
valid = [
|
|
103
|
+
e for e in entries if date.fromisoformat(str(e["effective_from"])) <= event_date
|
|
104
|
+
]
|
|
105
|
+
if not valid:
|
|
106
|
+
raise ValueError(
|
|
107
|
+
f"No rate card entry for '{provider}/{model_id}' "
|
|
108
|
+
f"effective on or before {event_date}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
entry = max(valid, key=lambda e: date.fromisoformat(str(e["effective_from"])))
|
|
112
|
+
multiplier = 1.0 - discount_pct
|
|
113
|
+
|
|
114
|
+
return ResolvedRates(
|
|
115
|
+
provider=provider,
|
|
116
|
+
model_id=model_id,
|
|
117
|
+
input_per_mtok=entry["input_per_mtok"] * multiplier,
|
|
118
|
+
output_per_mtok=entry["output_per_mtok"] * multiplier,
|
|
119
|
+
cache_write_5m_multiplier=entry.get("cache_write_5m_multiplier", 0.0),
|
|
120
|
+
cache_write_1h_multiplier=entry.get("cache_write_1h_multiplier", 0.0),
|
|
121
|
+
cache_read_multiplier=entry.get("cache_read_multiplier", 0.0),
|
|
122
|
+
web_search_per_call=entry.get("web_search_per_call", 0.0),
|
|
123
|
+
agent_runtime_per_hour=entry.get("agent_runtime_per_hour", 0.0),
|
|
124
|
+
code_exec_per_hour=entry.get("code_exec_per_hour", 0.0),
|
|
125
|
+
discount_pct=discount_pct,
|
|
126
|
+
rate_card_version=self._version,
|
|
127
|
+
)
|
cost_platform/wrapper.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AgentTelemetry — the public interface imported by agent teams.
|
|
3
|
+
|
|
4
|
+
Usage (from the handoff spec):
|
|
5
|
+
|
|
6
|
+
from cost_platform import AgentTelemetry
|
|
7
|
+
import anthropic, uuid
|
|
8
|
+
|
|
9
|
+
telemetry = AgentTelemetry(
|
|
10
|
+
agent_id="invoice-extractor",
|
|
11
|
+
agent_version="2.1.0",
|
|
12
|
+
workflow_id=str(uuid.uuid4()),
|
|
13
|
+
environment="production",
|
|
14
|
+
)
|
|
15
|
+
client = telemetry.wrap(anthropic.Anthropic())
|
|
16
|
+
response = client.messages.create(
|
|
17
|
+
model="claude-sonnet-4-6",
|
|
18
|
+
max_tokens=1024,
|
|
19
|
+
messages=[{"role": "user", "content": "..."}],
|
|
20
|
+
_step_name="extract_line_items", # stripped before Anthropic call
|
|
21
|
+
_step_index=1, # stripped before Anthropic call
|
|
22
|
+
)
|
|
23
|
+
telemetry.complete(outcome_status="success", human_review_required=False)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import importlib.metadata as _importlib_metadata
|
|
29
|
+
import sys
|
|
30
|
+
import time
|
|
31
|
+
from datetime import UTC, datetime
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
from cost_platform.emitter import OTLPEmitter
|
|
35
|
+
from cost_platform.event_assembler import EventAssembler
|
|
36
|
+
from cost_platform.extractor import extract_token_counts, extract_token_counts_openai
|
|
37
|
+
from cost_platform.models import OutcomeEvent
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
_SDK_VERSION = _importlib_metadata.version("elltri")
|
|
41
|
+
except Exception:
|
|
42
|
+
try:
|
|
43
|
+
_SDK_VERSION = _importlib_metadata.version("cost-platform")
|
|
44
|
+
except Exception:
|
|
45
|
+
_SDK_VERSION = "unknown"
|
|
46
|
+
|
|
47
|
+
_SDK_LANGUAGE = "python"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AgentTelemetry:
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
agent_id: str,
|
|
54
|
+
agent_version: str,
|
|
55
|
+
workflow_id: str,
|
|
56
|
+
environment: str,
|
|
57
|
+
) -> None:
|
|
58
|
+
self._agent_id = agent_id
|
|
59
|
+
self._agent_version = agent_version
|
|
60
|
+
self._workflow_id = workflow_id
|
|
61
|
+
self._environment = environment
|
|
62
|
+
self._started_at = datetime.now(UTC)
|
|
63
|
+
|
|
64
|
+
self._assembler = EventAssembler()
|
|
65
|
+
self._emitter = OTLPEmitter()
|
|
66
|
+
|
|
67
|
+
def wrap(self, client: Any) -> Any:
|
|
68
|
+
"""Return a wrapped client with transparent telemetry. Supports Anthropic and OpenAI."""
|
|
69
|
+
if hasattr(client, "messages"): # Anthropic
|
|
70
|
+
return _WrappedClient(client, self)
|
|
71
|
+
elif hasattr(client, "chat"): # OpenAI
|
|
72
|
+
return _WrappedOpenAIClient(client, self)
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"cost_platform: unsupported client type {type(client).__name__}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def complete(
|
|
78
|
+
self,
|
|
79
|
+
outcome_status: str,
|
|
80
|
+
human_review_required: bool = False,
|
|
81
|
+
human_review_duration_mins: int | None = None,
|
|
82
|
+
session_id: str | None = None,
|
|
83
|
+
external_reference_id: str | None = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Emit an OutcomeEvent. Call once at the end of each agent workflow run."""
|
|
86
|
+
completed_at = datetime.now(UTC)
|
|
87
|
+
duration_ms = int((completed_at - self._started_at).total_seconds() * 1000)
|
|
88
|
+
event = OutcomeEvent(
|
|
89
|
+
workflow_id=self._workflow_id,
|
|
90
|
+
agent_id=self._agent_id,
|
|
91
|
+
outcome_status=outcome_status,
|
|
92
|
+
started_at=self._started_at,
|
|
93
|
+
completed_at=completed_at,
|
|
94
|
+
duration_ms=duration_ms,
|
|
95
|
+
human_review_required=human_review_required,
|
|
96
|
+
human_review_duration_mins=human_review_duration_mins,
|
|
97
|
+
session_id=session_id,
|
|
98
|
+
external_reference_id=external_reference_id,
|
|
99
|
+
sdk_language=_SDK_LANGUAGE,
|
|
100
|
+
sdk_version=_SDK_VERSION,
|
|
101
|
+
)
|
|
102
|
+
self._emitter.emit_outcome_sync(event)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class _WrappedClient:
|
|
106
|
+
def __init__(self, native: Any, telemetry: AgentTelemetry) -> None:
|
|
107
|
+
self._native = native
|
|
108
|
+
self.messages = _WrappedMessages(native.messages, telemetry)
|
|
109
|
+
|
|
110
|
+
def __getattr__(self, name: str) -> Any:
|
|
111
|
+
return getattr(self._native, name)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class _WrappedMessages:
|
|
115
|
+
def __init__(self, native_messages: Any, telemetry: AgentTelemetry) -> None:
|
|
116
|
+
self._native = native_messages
|
|
117
|
+
self._telemetry = telemetry
|
|
118
|
+
|
|
119
|
+
def create(self, **kwargs: Any) -> Any:
|
|
120
|
+
# Strip wrapper-only parameters before forwarding to Anthropic
|
|
121
|
+
step_name: str | None = kwargs.pop("_step_name", None)
|
|
122
|
+
step_index: int | None = kwargs.pop("_step_index", None)
|
|
123
|
+
|
|
124
|
+
t0 = time.monotonic()
|
|
125
|
+
response = self._native.create(**kwargs)
|
|
126
|
+
latency_ms = int((time.monotonic() - t0) * 1000)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
extracted = extract_token_counts(response)
|
|
130
|
+
|
|
131
|
+
trace_fields: dict[str, Any] = {
|
|
132
|
+
"workflow_id": self._telemetry._workflow_id,
|
|
133
|
+
"agent_id": self._telemetry._agent_id,
|
|
134
|
+
"agent_version": self._telemetry._agent_version,
|
|
135
|
+
"step_name": step_name,
|
|
136
|
+
"step_index": step_index,
|
|
137
|
+
"environment": self._telemetry._environment,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
call_metadata: dict[str, Any] = {
|
|
141
|
+
"provider": "anthropic",
|
|
142
|
+
"model_id": extracted.pop("model_id"),
|
|
143
|
+
"request_id": extracted.pop("request_id"),
|
|
144
|
+
"event_timestamp": datetime.now(UTC),
|
|
145
|
+
"latency_ms_total": latency_ms,
|
|
146
|
+
"stop_reason": extracted.pop("stop_reason"),
|
|
147
|
+
"service_tier": "standard",
|
|
148
|
+
"batch_mode": False,
|
|
149
|
+
"retry_attempt": 0,
|
|
150
|
+
"tool_calls_total": extracted.pop("tool_calls_total"),
|
|
151
|
+
"tool_calls_web_search": extracted.pop("tool_calls_web_search"),
|
|
152
|
+
"tool_calls_code_exec": extracted.pop("tool_calls_code_exec"),
|
|
153
|
+
"tool_calls_detail": extracted.pop("tool_calls_detail"),
|
|
154
|
+
"sdk_language": _SDK_LANGUAGE,
|
|
155
|
+
"sdk_version": _SDK_VERSION,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
event = self._telemetry._assembler.assemble(extracted, trace_fields, call_metadata)
|
|
159
|
+
self._telemetry._emitter.emit_event_sync(event)
|
|
160
|
+
except Exception as exc:
|
|
161
|
+
print(f"cost_platform: telemetry failed: {exc}", file=sys.stderr)
|
|
162
|
+
|
|
163
|
+
return response
|
|
164
|
+
|
|
165
|
+
def __getattr__(self, name: str) -> Any:
|
|
166
|
+
return getattr(self._native, name)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class _WrappedOpenAIClient:
|
|
170
|
+
def __init__(self, native: Any, telemetry: AgentTelemetry) -> None:
|
|
171
|
+
self._native = native
|
|
172
|
+
self.chat = _WrappedOpenAIChat(native.chat, telemetry)
|
|
173
|
+
|
|
174
|
+
def __getattr__(self, name: str) -> Any:
|
|
175
|
+
return getattr(self._native, name)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class _WrappedOpenAIChat:
|
|
179
|
+
def __init__(self, native_chat: Any, telemetry: AgentTelemetry) -> None:
|
|
180
|
+
self._native = native_chat
|
|
181
|
+
self.completions = _WrappedOpenAICompletions(native_chat.completions, telemetry)
|
|
182
|
+
|
|
183
|
+
def __getattr__(self, name: str) -> Any:
|
|
184
|
+
return getattr(self._native, name)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class _WrappedOpenAICompletions:
|
|
188
|
+
def __init__(self, native_completions: Any, telemetry: AgentTelemetry) -> None:
|
|
189
|
+
self._native = native_completions
|
|
190
|
+
self._telemetry = telemetry
|
|
191
|
+
|
|
192
|
+
def create(self, **kwargs: Any) -> Any:
|
|
193
|
+
step_name: str | None = kwargs.pop("_step_name", None)
|
|
194
|
+
step_index: int | None = kwargs.pop("_step_index", None)
|
|
195
|
+
|
|
196
|
+
t0 = time.monotonic()
|
|
197
|
+
response = self._native.create(**kwargs)
|
|
198
|
+
latency_ms = int((time.monotonic() - t0) * 1000)
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
extracted = extract_token_counts_openai(response)
|
|
202
|
+
|
|
203
|
+
trace_fields: dict[str, Any] = {
|
|
204
|
+
"workflow_id": self._telemetry._workflow_id,
|
|
205
|
+
"agent_id": self._telemetry._agent_id,
|
|
206
|
+
"agent_version": self._telemetry._agent_version,
|
|
207
|
+
"step_name": step_name,
|
|
208
|
+
"step_index": step_index,
|
|
209
|
+
"environment": self._telemetry._environment,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
call_metadata: dict[str, Any] = {
|
|
213
|
+
"provider": "openai",
|
|
214
|
+
"model_id": extracted.pop("model_id"),
|
|
215
|
+
"request_id": extracted.pop("request_id"),
|
|
216
|
+
"event_timestamp": datetime.now(UTC),
|
|
217
|
+
"latency_ms_total": latency_ms,
|
|
218
|
+
"stop_reason": extracted.pop("stop_reason"),
|
|
219
|
+
"service_tier": "standard",
|
|
220
|
+
"batch_mode": False,
|
|
221
|
+
"retry_attempt": 0,
|
|
222
|
+
"tool_calls_total": extracted.pop("tool_calls_total"),
|
|
223
|
+
"tool_calls_web_search": extracted.pop("tool_calls_web_search"),
|
|
224
|
+
"tool_calls_code_exec": extracted.pop("tool_calls_code_exec"),
|
|
225
|
+
"tool_calls_detail": extracted.pop("tool_calls_detail"),
|
|
226
|
+
"sdk_language": _SDK_LANGUAGE,
|
|
227
|
+
"sdk_version": _SDK_VERSION,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
event = self._telemetry._assembler.assemble(extracted, trace_fields, call_metadata)
|
|
231
|
+
self._telemetry._emitter.emit_event_sync(event)
|
|
232
|
+
except Exception as exc:
|
|
233
|
+
print(f"cost_platform: telemetry failed: {exc}", file=sys.stderr)
|
|
234
|
+
|
|
235
|
+
return response
|
|
236
|
+
|
|
237
|
+
def __getattr__(self, name: str) -> Any:
|
|
238
|
+
return getattr(self._native, name)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: elltri
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI Total Cost of Ownership — instrumented agent metering and first-party Claude usage
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Elltri/Cost-Explorer
|
|
7
|
+
Project-URL: Repository, https://github.com/Elltri/Cost-Explorer
|
|
8
|
+
Keywords: llm,cost,observability,opentelemetry,anthropic,openai
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: pyyaml>=6.0
|
|
19
|
+
Requires-Dist: sqlalchemy[asyncio]>=2.0
|
|
20
|
+
Requires-Dist: alembic>=1.13
|
|
21
|
+
Requires-Dist: asyncpg>=0.29
|
|
22
|
+
Requires-Dist: fastapi>=0.111
|
|
23
|
+
Requires-Dist: uvicorn[standard]>=0.29
|
|
24
|
+
Requires-Dist: cachetools>=5.3
|
|
25
|
+
Requires-Dist: opentelemetry-sdk>=1.24
|
|
26
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.24
|
|
27
|
+
Requires-Dist: opentelemetry-proto>=1.24
|
|
28
|
+
Requires-Dist: httpx>=0.27
|
|
29
|
+
Requires-Dist: anthropic>=0.26
|
|
30
|
+
Requires-Dist: pyiceberg[glue,pyiceberg-core,s3fs]>=0.7
|
|
31
|
+
Requires-Dist: pyarrow>=15.0
|
|
32
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
33
|
+
Requires-Dist: psycopg2-binary>=2.9
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
37
|
+
Requires-Dist: black>=24.0; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
39
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
40
|
+
Requires-Dist: httpx2>=2.0; extra == "dev"
|
|
41
|
+
Requires-Dist: boto3>=1.34; extra == "dev"
|
|
42
|
+
Requires-Dist: botocore>=1.34; extra == "dev"
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
cost_platform/__init__.py,sha256=0b4cW-8k66zEQuLWg8-iaGeaeIMx6jLPrIFfbCwQSEA,79
|
|
2
|
+
cost_platform/emitter.py,sha256=NFYcIDXhV6KdHrh6uMFUaN54U1CY2C2P1EtiHWxGVIs,10366
|
|
3
|
+
cost_platform/event_assembler.py,sha256=pABOxbzBy07QTrOwsdHbWHdS6jkW1NLjGIKpEGX4EWg,3248
|
|
4
|
+
cost_platform/extractor.py,sha256=Q4TvBQ99OyzvaymH6iHRyqMg3OYQpK9aK2ELjKW10Ys,5555
|
|
5
|
+
cost_platform/models.py,sha256=KSMHQINkLP7RsvCkQApPHHr1BQgIx7ieSWjg13mcwSc,2864
|
|
6
|
+
cost_platform/rate_card.py,sha256=hBVTCS--LEOOwTktFO0Vr9J6j6XysplzSY4YySe0eao,4693
|
|
7
|
+
cost_platform/wrapper.py,sha256=uAjmhkwz6ePaRoMQapSWIWnE9BhydVMV9CBKLWWJOXk,8867
|
|
8
|
+
cost_platform/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
cost_platform/db/engine.py,sha256=b3AUjyAhLlfypPQzabuXG0ssvs4zlhb4mGESG6J8w3w,1348
|
|
10
|
+
cost_platform/db/models.py,sha256=ulefy965lLFcqkZ15L2wLjW2kcu957evVm5REIZaP7Y,6119
|
|
11
|
+
elltri-0.1.0.dist-info/METADATA,sha256=MUvH9cbmOs9bmijLno0MMPEq0yQf6TINQtaLG_UTlFM,1711
|
|
12
|
+
elltri-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
13
|
+
elltri-0.1.0.dist-info/top_level.txt,sha256=5IuorqrMkRIDumJjUr_jI5JSsfJid8B7OwnejgenM_E,14
|
|
14
|
+
elltri-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cost_platform
|