a2a-llm-tracker 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- a2a_llm_tracker-0.0.1/PKG-INFO +22 -0
- a2a_llm_tracker-0.0.1/README.md +1 -0
- a2a_llm_tracker-0.0.1/pyproject.toml +60 -0
- a2a_llm_tracker-0.0.1/setup.cfg +4 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/__init__.py +6 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/__main__.py +95 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/context.py +56 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/core.py +2 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/events.py +41 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/integrations/__init__.py +3 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/integrations/litellm.py +324 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/meter.py +50 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/pricing.py +29 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/sinks/__init__.py +5 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/sinks/base.py +11 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/sinks/jsonl.py +22 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker/sinks/sqlite.py +98 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker.egg-info/PKG-INFO +22 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker.egg-info/SOURCES.txt +21 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker.egg-info/dependency_links.txt +1 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker.egg-info/requires.txt +11 -0
- a2a_llm_tracker-0.0.1/src/a2a_llm_tracker.egg-info/top_level.txt +1 -0
- a2a_llm_tracker-0.0.1/tests/test_core.py +14 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: a2a-llm-tracker
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A short description of your package
|
|
5
|
+
Author-email: Nischal Bhandari <nischal@boomconsole.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.6
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: mftsccs
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
15
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
17
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
18
|
+
Requires-Dist: twine; extra == "dev"
|
|
19
|
+
Requires-Dist: python-dotenv; extra == "dev"
|
|
20
|
+
Requires-Dist: litellm; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# a2a-tracker
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# a2a-tracker
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "a2a-llm-tracker" # PyPI name uses hyphens; the import name uses underscores
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Nischal Bhandari", email = "nischal@boomconsole.com" }
|
|
10
|
+
]
|
|
11
|
+
description = "A short description of your package"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.6"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"mftsccs"
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = [
|
|
25
|
+
"pytest>=7.0",
|
|
26
|
+
"pytest-cov>=4.0",
|
|
27
|
+
"black>=23.0",
|
|
28
|
+
"ruff>=0.1.0",
|
|
29
|
+
"mypy>=1.0",
|
|
30
|
+
"twine",
|
|
31
|
+
"python-dotenv",
|
|
32
|
+
"litellm"
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[tool.setuptools]
|
|
36
|
+
package-dir = {"" = "src"}
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.packages.find]
|
|
39
|
+
where = ["src"]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
[tool.black]
|
|
43
|
+
line-length = 88
|
|
44
|
+
target-version = ["py38", "py39", "py310", "py311", "py312"]
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
line-length = 88
|
|
48
|
+
select = ["E", "F", "I", "N", "W", "UP"]
|
|
49
|
+
|
|
50
|
+
[tool.mypy]
|
|
51
|
+
python_version = "3.8"
|
|
52
|
+
warn_return_any = true
|
|
53
|
+
warn_unused_configs = true
|
|
54
|
+
ignore_missing_imports = true
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
testpaths = ["tests"]
|
|
58
|
+
python_files = ["test_*.py"]
|
|
59
|
+
python_functions = ["test_*"]
|
|
60
|
+
addopts = "-v --tb=short"
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Manual smoke-test runner for agent_meter.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
OPENAI_API_KEY=... python -m agent_meter
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
from a2a_llm_tracker import Meter, PricingRegistry, meter_context
|
|
12
|
+
from a2a_llm_tracker.integrations.litellm import LiteLLM
|
|
13
|
+
from a2a_llm_tracker.sinks.jsonl import JSONLSink
|
|
14
|
+
from dotenv import load_dotenv
|
|
15
|
+
load_dotenv()
|
|
16
|
+
|
|
17
|
+
def setup_meter() -> LiteLLM:
|
|
18
|
+
pricing = PricingRegistry()
|
|
19
|
+
|
|
20
|
+
# Example prices — adjust as needed
|
|
21
|
+
pricing.set_price(
|
|
22
|
+
provider="openai",
|
|
23
|
+
model="openai/gpt-4.1",
|
|
24
|
+
input_per_million=2.0,
|
|
25
|
+
output_per_million=8.0,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
meter = Meter(
|
|
29
|
+
pricing=pricing,
|
|
30
|
+
sinks=[JSONLSink("debug_usage.jsonl")],
|
|
31
|
+
project="agent-meter-smoke-test",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return LiteLLM(meter=meter)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def run_sync(llm: LiteLLM) -> None:
|
|
38
|
+
print("\n=== SYNC NON-STREAMING ===")
|
|
39
|
+
resp = llm.completion(
|
|
40
|
+
model="openai/gpt-4.1",
|
|
41
|
+
messages=[{"role": "user", "content": "Say hello in one sentence."}],
|
|
42
|
+
)
|
|
43
|
+
print(resp)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def run_stream(llm: LiteLLM) -> None:
|
|
47
|
+
print("\n=== SYNC STREAMING ===")
|
|
48
|
+
for chunk in llm.completion(
|
|
49
|
+
model="openai/gpt-4.1",
|
|
50
|
+
messages=[{"role": "user", "content": "Write a short haiku about testing."}],
|
|
51
|
+
stream=True,
|
|
52
|
+
):
|
|
53
|
+
print(chunk, end="", flush=True)
|
|
54
|
+
print()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def run_async(llm: LiteLLM) -> None:
|
|
58
|
+
print("\n=== ASYNC NON-STREAMING ===")
|
|
59
|
+
resp = await llm.acompletion(
|
|
60
|
+
model="openai/gpt-4.1",
|
|
61
|
+
messages=[{"role": "user", "content": "Async hello!"}],
|
|
62
|
+
)
|
|
63
|
+
print(resp)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def run_async_stream(llm: LiteLLM) -> None:
|
|
67
|
+
print("\n=== ASYNC STREAMING ===")
|
|
68
|
+
stream = await llm.acompletion(
|
|
69
|
+
model="openai/gpt-4.1",
|
|
70
|
+
messages=[{"role": "user", "content": "Stream something async."}],
|
|
71
|
+
stream=True,
|
|
72
|
+
)
|
|
73
|
+
async for chunk in stream:
|
|
74
|
+
print(chunk, end="", flush=True)
|
|
75
|
+
print()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def main() -> None:
|
|
79
|
+
if not os.getenv("OPENAI_API_KEY"):
|
|
80
|
+
print("⚠️ OPENAI_API_KEY is not set. Exiting.")
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
llm = setup_meter()
|
|
84
|
+
|
|
85
|
+
with meter_context(agent_id="smoke-test-agent", session_id="local"):
|
|
86
|
+
run_sync(llm)
|
|
87
|
+
run_stream(llm)
|
|
88
|
+
asyncio.run(run_async(llm))
|
|
89
|
+
asyncio.run(run_async_stream(llm))
|
|
90
|
+
|
|
91
|
+
print("\n✔ Smoke test complete. Check debug_usage.jsonl")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if __name__ == "__main__":
|
|
95
|
+
main()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from contextvars import ContextVar
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Iterator, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class MeterContext:
|
|
11
|
+
agent_id: Optional[str] = None
|
|
12
|
+
user_id: Optional[str] = None
|
|
13
|
+
session_id: Optional[str] = None
|
|
14
|
+
trace_id: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_CTX: ContextVar[MeterContext] = ContextVar("agent_meter_context", default=MeterContext())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_context() -> MeterContext:
|
|
21
|
+
return _CTX.get()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def set_context(
|
|
25
|
+
*,
|
|
26
|
+
agent_id: Optional[str] = None,
|
|
27
|
+
user_id: Optional[str] = None,
|
|
28
|
+
session_id: Optional[str] = None,
|
|
29
|
+
trace_id: Optional[str] = None,
|
|
30
|
+
) -> None:
|
|
31
|
+
current = _CTX.get()
|
|
32
|
+
_CTX.set(
|
|
33
|
+
MeterContext(
|
|
34
|
+
agent_id=agent_id if agent_id is not None else current.agent_id,
|
|
35
|
+
user_id=user_id if user_id is not None else current.user_id,
|
|
36
|
+
session_id=session_id if session_id is not None else current.session_id,
|
|
37
|
+
trace_id=trace_id if trace_id is not None else current.trace_id,
|
|
38
|
+
)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@contextmanager
|
|
43
|
+
def meter_context(
|
|
44
|
+
*,
|
|
45
|
+
agent_id: Optional[str] = None,
|
|
46
|
+
user_id: Optional[str] = None,
|
|
47
|
+
session_id: Optional[str] = None,
|
|
48
|
+
trace_id: Optional[str] = None,
|
|
49
|
+
) -> Iterator[None]:
|
|
50
|
+
token = _CTX.set(
|
|
51
|
+
MeterContext(agent_id=agent_id, user_id=user_id, session_id=session_id, trace_id=trace_id)
|
|
52
|
+
)
|
|
53
|
+
try:
|
|
54
|
+
yield
|
|
55
|
+
finally:
|
|
56
|
+
_CTX.reset(token)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def utcnow() -> datetime:
|
|
9
|
+
return datetime.now(timezone.utc)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class UsageEvent:
|
|
14
|
+
provider: str
|
|
15
|
+
model: str
|
|
16
|
+
|
|
17
|
+
input_tokens: Optional[int] = None
|
|
18
|
+
output_tokens: Optional[int] = None
|
|
19
|
+
total_tokens: Optional[int] = None
|
|
20
|
+
|
|
21
|
+
input_cost_usd: Optional[float] = None
|
|
22
|
+
output_cost_usd: Optional[float] = None
|
|
23
|
+
cost_usd: Optional[float] = None
|
|
24
|
+
|
|
25
|
+
accuracy: str = "unknown" # "exact" | "estimated" | "unknown"
|
|
26
|
+
|
|
27
|
+
request_id: Optional[str] = None
|
|
28
|
+
latency_ms: Optional[int] = None
|
|
29
|
+
status: str = "ok" # "ok" | "error"
|
|
30
|
+
error_type: Optional[str] = None
|
|
31
|
+
error_message: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
ts: datetime = field(default_factory=utcnow)
|
|
34
|
+
|
|
35
|
+
# attribution
|
|
36
|
+
agent_id: Optional[str] = None
|
|
37
|
+
user_id: Optional[str] = None
|
|
38
|
+
session_id: Optional[str] = None
|
|
39
|
+
trace_id: Optional[str] = None
|
|
40
|
+
|
|
41
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, AsyncIterator, Iterator, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from ..events import UsageEvent
|
|
7
|
+
from ..meter import Meter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_attr(obj: Any, name: str, default: Any = None) -> Any:
|
|
11
|
+
# works for dicts and objects
|
|
12
|
+
if isinstance(obj, dict):
|
|
13
|
+
return obj.get(name, default)
|
|
14
|
+
return getattr(obj, name, default)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _extract_usage(resp_or_chunk: Any) -> Tuple[Optional[int], Optional[int], Optional[int]]:
|
|
18
|
+
"""
|
|
19
|
+
LiteLLM often returns a response with:
|
|
20
|
+
- resp.usage.prompt_tokens / completion_tokens / total_tokens
|
|
21
|
+
For streaming, some configs return usage in the final chunk.
|
|
22
|
+
"""
|
|
23
|
+
usage = _get_attr(resp_or_chunk, "usage", None)
|
|
24
|
+
if usage is None:
|
|
25
|
+
# sometimes nested differently; keep this minimal for v1
|
|
26
|
+
return None, None, None
|
|
27
|
+
|
|
28
|
+
in_tok = _get_attr(usage, "prompt_tokens", None)
|
|
29
|
+
out_tok = _get_attr(usage, "completion_tokens", None)
|
|
30
|
+
total = _get_attr(usage, "total_tokens", None)
|
|
31
|
+
|
|
32
|
+
# Sometimes LiteLLM usage might be dict-like
|
|
33
|
+
if isinstance(usage, dict):
|
|
34
|
+
in_tok = usage.get("prompt_tokens", in_tok)
|
|
35
|
+
out_tok = usage.get("completion_tokens", out_tok)
|
|
36
|
+
total = usage.get("total_tokens", total)
|
|
37
|
+
|
|
38
|
+
return in_tok, out_tok, total
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _guess_provider_from_model(model: str) -> str:
|
|
42
|
+
"""
|
|
43
|
+
LiteLLM supports prefixes like 'openai/gpt-4o-mini', 'anthropic/claude-...'.
|
|
44
|
+
If absent, we default to 'litellm'.
|
|
45
|
+
"""
|
|
46
|
+
if "/" in model:
|
|
47
|
+
return model.split("/", 1)[0].lower()
|
|
48
|
+
if ":" in model:
|
|
49
|
+
return model.split(":", 1)[0].lower()
|
|
50
|
+
return "litellm"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class _StreamWrapper(Iterator[Any]):
|
|
54
|
+
def __init__(self, inner: Iterator[Any], finalize) -> None:
|
|
55
|
+
self._inner = inner
|
|
56
|
+
self._finalize = finalize
|
|
57
|
+
self._done = False
|
|
58
|
+
|
|
59
|
+
def __iter__(self) -> "_StreamWrapper":
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def __next__(self) -> Any:
|
|
63
|
+
try:
|
|
64
|
+
return next(self._inner)
|
|
65
|
+
except StopIteration:
|
|
66
|
+
if not self._done:
|
|
67
|
+
self._done = True
|
|
68
|
+
self._finalize()
|
|
69
|
+
raise
|
|
70
|
+
except Exception:
|
|
71
|
+
if not self._done:
|
|
72
|
+
self._done = True
|
|
73
|
+
self._finalize(error=True)
|
|
74
|
+
raise
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class _AsyncStreamWrapper(AsyncIterator[Any]):
|
|
78
|
+
def __init__(self, inner: AsyncIterator[Any], finalize) -> None:
|
|
79
|
+
self._inner = inner
|
|
80
|
+
self._finalize = finalize
|
|
81
|
+
self._done = False
|
|
82
|
+
|
|
83
|
+
def __aiter__(self) -> "_AsyncStreamWrapper":
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
async def __anext__(self) -> Any:
|
|
87
|
+
try:
|
|
88
|
+
return await self._inner.__anext__()
|
|
89
|
+
except StopAsyncIteration:
|
|
90
|
+
if not self._done:
|
|
91
|
+
self._done = True
|
|
92
|
+
self._finalize()
|
|
93
|
+
raise
|
|
94
|
+
except Exception:
|
|
95
|
+
if not self._done:
|
|
96
|
+
self._done = True
|
|
97
|
+
self._finalize(error=True)
|
|
98
|
+
raise
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class LiteLLM:
|
|
102
|
+
"""
|
|
103
|
+
LiteLLM-first wrapper.
|
|
104
|
+
- completion(): sync
|
|
105
|
+
- acompletion(): async
|
|
106
|
+
Supports stream=True and stream=False.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, *, meter: Meter) -> None:
|
|
110
|
+
self.meter = meter
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
import litellm # type: ignore
|
|
114
|
+
except Exception as e:
|
|
115
|
+
raise RuntimeError(
|
|
116
|
+
"litellm is not installed. Install with: pip install -e '.[litellm]'"
|
|
117
|
+
) from e
|
|
118
|
+
|
|
119
|
+
self._litellm = litellm
|
|
120
|
+
|
|
121
|
+
def completion(self, **kwargs: Any):
|
|
122
|
+
"""
|
|
123
|
+
Mirrors litellm.completion(**kwargs).
|
|
124
|
+
If stream=True, returns an iterator you can for-loop; event is recorded when stream ends.
|
|
125
|
+
"""
|
|
126
|
+
model = kwargs.get("model", "unknown")
|
|
127
|
+
provider = _guess_provider_from_model(model)
|
|
128
|
+
|
|
129
|
+
stream = bool(kwargs.get("stream", False))
|
|
130
|
+
|
|
131
|
+
# Best effort: ask for usage in streaming if supported by downstream.
|
|
132
|
+
# Not all providers respect this; we still handle missing usage.
|
|
133
|
+
kwargs.setdefault("stream_options", {"include_usage": True})
|
|
134
|
+
|
|
135
|
+
t0 = time.time()
|
|
136
|
+
last_chunk_holder: dict[str, Any] = {"last": None}
|
|
137
|
+
status = "ok"
|
|
138
|
+
err_type = None
|
|
139
|
+
err_msg = None
|
|
140
|
+
request_id: Optional[str] = None
|
|
141
|
+
final_model: str = model
|
|
142
|
+
|
|
143
|
+
def finalize(error: bool = False) -> None:
|
|
144
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
145
|
+
|
|
146
|
+
resp_or_chunk = last_chunk_holder["last"]
|
|
147
|
+
in_tok = out_tok = total = None
|
|
148
|
+
|
|
149
|
+
if resp_or_chunk is not None:
|
|
150
|
+
in_tok, out_tok, total = _extract_usage(resp_or_chunk)
|
|
151
|
+
request_id_local = _get_attr(resp_or_chunk, "id", None)
|
|
152
|
+
nonlocal request_id, final_model
|
|
153
|
+
if request_id is None:
|
|
154
|
+
request_id = request_id_local
|
|
155
|
+
final_model = _get_attr(resp_or_chunk, "model", final_model) or final_model
|
|
156
|
+
|
|
157
|
+
accuracy = "exact" if (in_tok is not None and out_tok is not None) else "unknown"
|
|
158
|
+
in_cost, out_cost, cost, _price = self.meter.compute_cost(provider, final_model, in_tok, out_tok)
|
|
159
|
+
|
|
160
|
+
event = UsageEvent(
|
|
161
|
+
provider=provider,
|
|
162
|
+
model=final_model,
|
|
163
|
+
input_tokens=in_tok,
|
|
164
|
+
output_tokens=out_tok,
|
|
165
|
+
total_tokens=total,
|
|
166
|
+
input_cost_usd=in_cost,
|
|
167
|
+
output_cost_usd=out_cost,
|
|
168
|
+
cost_usd=cost,
|
|
169
|
+
accuracy=accuracy,
|
|
170
|
+
request_id=request_id,
|
|
171
|
+
latency_ms=latency_ms,
|
|
172
|
+
status="error" if error else status,
|
|
173
|
+
error_type=err_type,
|
|
174
|
+
error_message=err_msg,
|
|
175
|
+
metadata={"integration": "litellm", "stream": stream},
|
|
176
|
+
)
|
|
177
|
+
self.meter.record(event)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
resp = self._litellm.completion(**kwargs)
|
|
181
|
+
if not stream:
|
|
182
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
183
|
+
|
|
184
|
+
request_id = _get_attr(resp, "id", None)
|
|
185
|
+
final_model = _get_attr(resp, "model", model) or model
|
|
186
|
+
in_tok, out_tok, total = _extract_usage(resp)
|
|
187
|
+
|
|
188
|
+
accuracy = "exact" if (in_tok is not None and out_tok is not None) else "unknown"
|
|
189
|
+
in_cost, out_cost, cost, _price = self.meter.compute_cost(provider, final_model, in_tok, out_tok)
|
|
190
|
+
|
|
191
|
+
self.meter.record(
|
|
192
|
+
UsageEvent(
|
|
193
|
+
provider=provider,
|
|
194
|
+
model=final_model,
|
|
195
|
+
input_tokens=in_tok,
|
|
196
|
+
output_tokens=out_tok,
|
|
197
|
+
total_tokens=total,
|
|
198
|
+
input_cost_usd=in_cost,
|
|
199
|
+
output_cost_usd=out_cost,
|
|
200
|
+
cost_usd=cost,
|
|
201
|
+
accuracy=accuracy,
|
|
202
|
+
request_id=request_id,
|
|
203
|
+
latency_ms=latency_ms,
|
|
204
|
+
status="ok",
|
|
205
|
+
metadata={"integration": "litellm", "stream": False},
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
return resp
|
|
209
|
+
|
|
210
|
+
# stream=True
|
|
211
|
+
def gen():
|
|
212
|
+
for chunk in resp:
|
|
213
|
+
last_chunk_holder["last"] = chunk
|
|
214
|
+
yield chunk
|
|
215
|
+
|
|
216
|
+
return _StreamWrapper(gen(), finalize)
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
status = "error"
|
|
220
|
+
err_type = type(e).__name__
|
|
221
|
+
err_msg = str(e)
|
|
222
|
+
finalize(error=True)
|
|
223
|
+
raise
|
|
224
|
+
|
|
225
|
+
async def acompletion(self, **kwargs: Any):
|
|
226
|
+
"""
|
|
227
|
+
Mirrors litellm.acompletion(**kwargs).
|
|
228
|
+
If stream=True, returns an async iterator; event is recorded when stream ends.
|
|
229
|
+
"""
|
|
230
|
+
model = kwargs.get("model", "unknown")
|
|
231
|
+
provider = _guess_provider_from_model(model)
|
|
232
|
+
stream = bool(kwargs.get("stream", False))
|
|
233
|
+
|
|
234
|
+
kwargs.setdefault("stream_options", {"include_usage": True})
|
|
235
|
+
|
|
236
|
+
t0 = time.time()
|
|
237
|
+
last_chunk_holder: dict[str, Any] = {"last": None}
|
|
238
|
+
status = "ok"
|
|
239
|
+
err_type = None
|
|
240
|
+
err_msg = None
|
|
241
|
+
request_id: Optional[str] = None
|
|
242
|
+
final_model: str = model
|
|
243
|
+
|
|
244
|
+
def finalize(error: bool = False) -> None:
|
|
245
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
246
|
+
|
|
247
|
+
resp_or_chunk = last_chunk_holder["last"]
|
|
248
|
+
in_tok = out_tok = total = None
|
|
249
|
+
|
|
250
|
+
if resp_or_chunk is not None:
|
|
251
|
+
in_tok, out_tok, total = _extract_usage(resp_or_chunk)
|
|
252
|
+
request_id_local = _get_attr(resp_or_chunk, "id", None)
|
|
253
|
+
nonlocal request_id, final_model
|
|
254
|
+
if request_id is None:
|
|
255
|
+
request_id = request_id_local
|
|
256
|
+
final_model = _get_attr(resp_or_chunk, "model", final_model) or final_model
|
|
257
|
+
|
|
258
|
+
accuracy = "exact" if (in_tok is not None and out_tok is not None) else "unknown"
|
|
259
|
+
in_cost, out_cost, cost, _price = self.meter.compute_cost(provider, final_model, in_tok, out_tok)
|
|
260
|
+
|
|
261
|
+
event = UsageEvent(
|
|
262
|
+
provider=provider,
|
|
263
|
+
model=final_model,
|
|
264
|
+
input_tokens=in_tok,
|
|
265
|
+
output_tokens=out_tok,
|
|
266
|
+
total_tokens=total,
|
|
267
|
+
input_cost_usd=in_cost,
|
|
268
|
+
output_cost_usd=out_cost,
|
|
269
|
+
cost_usd=cost,
|
|
270
|
+
accuracy=accuracy,
|
|
271
|
+
request_id=request_id,
|
|
272
|
+
latency_ms=latency_ms,
|
|
273
|
+
status="error" if error else status,
|
|
274
|
+
error_type=err_type,
|
|
275
|
+
error_message=err_msg,
|
|
276
|
+
metadata={"integration": "litellm", "stream": stream, "async": True},
|
|
277
|
+
)
|
|
278
|
+
self.meter.record(event)
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
resp = await self._litellm.acompletion(**kwargs)
|
|
282
|
+
if not stream:
|
|
283
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
284
|
+
|
|
285
|
+
request_id = _get_attr(resp, "id", None)
|
|
286
|
+
final_model = _get_attr(resp, "model", model) or model
|
|
287
|
+
in_tok, out_tok, total = _extract_usage(resp)
|
|
288
|
+
|
|
289
|
+
accuracy = "exact" if (in_tok is not None and out_tok is not None) else "unknown"
|
|
290
|
+
in_cost, out_cost, cost, _price = self.meter.compute_cost(provider, final_model, in_tok, out_tok)
|
|
291
|
+
|
|
292
|
+
self.meter.record(
|
|
293
|
+
UsageEvent(
|
|
294
|
+
provider=provider,
|
|
295
|
+
model=final_model,
|
|
296
|
+
input_tokens=in_tok,
|
|
297
|
+
output_tokens=out_tok,
|
|
298
|
+
total_tokens=total,
|
|
299
|
+
input_cost_usd=in_cost,
|
|
300
|
+
output_cost_usd=out_cost,
|
|
301
|
+
cost_usd=cost,
|
|
302
|
+
accuracy=accuracy,
|
|
303
|
+
request_id=request_id,
|
|
304
|
+
latency_ms=latency_ms,
|
|
305
|
+
status="ok",
|
|
306
|
+
metadata={"integration": "litellm", "stream": False, "async": True},
|
|
307
|
+
)
|
|
308
|
+
)
|
|
309
|
+
return resp
|
|
310
|
+
|
|
311
|
+
# stream=True (async iterator)
|
|
312
|
+
async def agen():
|
|
313
|
+
async for chunk in resp:
|
|
314
|
+
last_chunk_holder["last"] = chunk
|
|
315
|
+
yield chunk
|
|
316
|
+
|
|
317
|
+
return _AsyncStreamWrapper(agen(), finalize)
|
|
318
|
+
|
|
319
|
+
except Exception as e:
|
|
320
|
+
status = "error"
|
|
321
|
+
err_type = type(e).__name__
|
|
322
|
+
err_msg = str(e)
|
|
323
|
+
finalize(error=True)
|
|
324
|
+
raise
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
from .context import get_context
|
|
6
|
+
from .events import UsageEvent
|
|
7
|
+
from .pricing import ModelPrice, PricingRegistry
|
|
8
|
+
from .sinks.base import Sink
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Meter:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
*,
|
|
15
|
+
pricing: Optional[PricingRegistry] = None,
|
|
16
|
+
sinks: Optional[Iterable[Sink]] = None,
|
|
17
|
+
project: Optional[str] = None,
|
|
18
|
+
) -> None:
|
|
19
|
+
self.pricing = pricing or PricingRegistry()
|
|
20
|
+
self.sinks = list(sinks or [])
|
|
21
|
+
self.project = project
|
|
22
|
+
|
|
23
|
+
def compute_cost(
|
|
24
|
+
self,
|
|
25
|
+
provider: str,
|
|
26
|
+
model: str,
|
|
27
|
+
input_tokens: Optional[int],
|
|
28
|
+
output_tokens: Optional[int],
|
|
29
|
+
) -> tuple[Optional[float], Optional[float], Optional[float], Optional[ModelPrice]]:
|
|
30
|
+
price = self.pricing.get_price(provider, model)
|
|
31
|
+
if price is None or input_tokens is None or output_tokens is None:
|
|
32
|
+
return None, None, None, price
|
|
33
|
+
|
|
34
|
+
in_cost = (input_tokens / 1_000_000) * price.input_per_million
|
|
35
|
+
out_cost = (output_tokens / 1_000_000) * price.output_per_million
|
|
36
|
+
return in_cost, out_cost, (in_cost + out_cost), price
|
|
37
|
+
|
|
38
|
+
def record(self, event: UsageEvent) -> None:
|
|
39
|
+
# enrich with context if missing
|
|
40
|
+
ctx = get_context()
|
|
41
|
+
event.agent_id = event.agent_id or ctx.agent_id
|
|
42
|
+
event.user_id = event.user_id or ctx.user_id
|
|
43
|
+
event.session_id = event.session_id or ctx.session_id
|
|
44
|
+
event.trace_id = event.trace_id or ctx.trace_id
|
|
45
|
+
|
|
46
|
+
if self.project:
|
|
47
|
+
event.metadata.setdefault("project", self.project)
|
|
48
|
+
|
|
49
|
+
for sink in self.sinks:
|
|
50
|
+
sink.write(event)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class ModelPrice:
|
|
9
|
+
input_per_million: float
|
|
10
|
+
output_per_million: float
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PricingRegistry:
|
|
14
|
+
"""
|
|
15
|
+
Simple in-memory pricing table.
|
|
16
|
+
Users can override by calling `set_price`.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
self._prices: dict[tuple[str, str], ModelPrice] = {}
|
|
21
|
+
|
|
22
|
+
def set_price(self, provider: str, model: str, *, input_per_million: float, output_per_million: float) -> None:
|
|
23
|
+
self._prices[(provider.lower(), model)] = ModelPrice(
|
|
24
|
+
input_per_million=float(input_per_million),
|
|
25
|
+
output_per_million=float(output_per_million),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def get_price(self, provider: str, model: str) -> Optional[ModelPrice]:
|
|
29
|
+
return self._prices.get((provider.lower(), model))
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .base import Sink
|
|
9
|
+
from ..events import UsageEvent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JSONLSink(Sink):
|
|
13
|
+
def __init__(self, path: str) -> None:
|
|
14
|
+
self.path = Path(path)
|
|
15
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
|
|
17
|
+
def write(self, event: UsageEvent) -> None:
|
|
18
|
+
d: dict[str, Any] = asdict(event)
|
|
19
|
+
# datetime -> iso
|
|
20
|
+
d["ts"] = event.ts.isoformat()
|
|
21
|
+
with self.path.open("a", encoding="utf-8") as f:
|
|
22
|
+
f.write(json.dumps(d, ensure_ascii=False) + "\n")
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import Sink
|
|
9
|
+
from ..events import UsageEvent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SQLiteSink(Sink):
|
|
13
|
+
def __init__(self, path: str = "usage.db") -> None:
|
|
14
|
+
self.path = Path(path)
|
|
15
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
self._init_db()
|
|
17
|
+
|
|
18
|
+
def _connect(self) -> sqlite3.Connection:
|
|
19
|
+
return sqlite3.connect(str(self.path))
|
|
20
|
+
|
|
21
|
+
def _init_db(self) -> None:
|
|
22
|
+
with self._connect() as conn:
|
|
23
|
+
conn.execute(
|
|
24
|
+
"""
|
|
25
|
+
CREATE TABLE IF NOT EXISTS usage_events (
|
|
26
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
27
|
+
ts TEXT,
|
|
28
|
+
provider TEXT,
|
|
29
|
+
model TEXT,
|
|
30
|
+
input_tokens INTEGER,
|
|
31
|
+
output_tokens INTEGER,
|
|
32
|
+
total_tokens INTEGER,
|
|
33
|
+
input_cost_usd REAL,
|
|
34
|
+
output_cost_usd REAL,
|
|
35
|
+
cost_usd REAL,
|
|
36
|
+
accuracy TEXT,
|
|
37
|
+
request_id TEXT,
|
|
38
|
+
latency_ms INTEGER,
|
|
39
|
+
status TEXT,
|
|
40
|
+
error_type TEXT,
|
|
41
|
+
error_message TEXT,
|
|
42
|
+
agent_id TEXT,
|
|
43
|
+
user_id TEXT,
|
|
44
|
+
session_id TEXT,
|
|
45
|
+
trace_id TEXT,
|
|
46
|
+
metadata_json TEXT
|
|
47
|
+
)
|
|
48
|
+
"""
|
|
49
|
+
)
|
|
50
|
+
conn.commit()
|
|
51
|
+
|
|
52
|
+
def write(self, event: UsageEvent) -> None:
|
|
53
|
+
d: dict[str, Any] = asdict(event)
|
|
54
|
+
metadata_json = None
|
|
55
|
+
try:
|
|
56
|
+
import json
|
|
57
|
+
|
|
58
|
+
metadata_json = json.dumps(d.get("metadata", {}), ensure_ascii=False)
|
|
59
|
+
except Exception:
|
|
60
|
+
metadata_json = "{}"
|
|
61
|
+
|
|
62
|
+
with self._connect() as conn:
|
|
63
|
+
conn.execute(
|
|
64
|
+
"""
|
|
65
|
+
INSERT INTO usage_events (
|
|
66
|
+
ts, provider, model,
|
|
67
|
+
input_tokens, output_tokens, total_tokens,
|
|
68
|
+
input_cost_usd, output_cost_usd, cost_usd,
|
|
69
|
+
accuracy, request_id, latency_ms,
|
|
70
|
+
status, error_type, error_message,
|
|
71
|
+
agent_id, user_id, session_id, trace_id,
|
|
72
|
+
metadata_json
|
|
73
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
74
|
+
""",
|
|
75
|
+
(
|
|
76
|
+
event.ts.isoformat(),
|
|
77
|
+
event.provider,
|
|
78
|
+
event.model,
|
|
79
|
+
event.input_tokens,
|
|
80
|
+
event.output_tokens,
|
|
81
|
+
event.total_tokens,
|
|
82
|
+
event.input_cost_usd,
|
|
83
|
+
event.output_cost_usd,
|
|
84
|
+
event.cost_usd,
|
|
85
|
+
event.accuracy,
|
|
86
|
+
event.request_id,
|
|
87
|
+
event.latency_ms,
|
|
88
|
+
event.status,
|
|
89
|
+
event.error_type,
|
|
90
|
+
event.error_message,
|
|
91
|
+
event.agent_id,
|
|
92
|
+
event.user_id,
|
|
93
|
+
event.session_id,
|
|
94
|
+
event.trace_id,
|
|
95
|
+
metadata_json,
|
|
96
|
+
),
|
|
97
|
+
)
|
|
98
|
+
conn.commit()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: a2a-llm-tracker
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A short description of your package
|
|
5
|
+
Author-email: Nischal Bhandari <nischal@boomconsole.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.6
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: mftsccs
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
15
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
17
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
18
|
+
Requires-Dist: twine; extra == "dev"
|
|
19
|
+
Requires-Dist: python-dotenv; extra == "dev"
|
|
20
|
+
Requires-Dist: litellm; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# a2a-tracker
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/a2a_llm_tracker/__init__.py
|
|
4
|
+
src/a2a_llm_tracker/__main__.py
|
|
5
|
+
src/a2a_llm_tracker/context.py
|
|
6
|
+
src/a2a_llm_tracker/core.py
|
|
7
|
+
src/a2a_llm_tracker/events.py
|
|
8
|
+
src/a2a_llm_tracker/meter.py
|
|
9
|
+
src/a2a_llm_tracker/pricing.py
|
|
10
|
+
src/a2a_llm_tracker.egg-info/PKG-INFO
|
|
11
|
+
src/a2a_llm_tracker.egg-info/SOURCES.txt
|
|
12
|
+
src/a2a_llm_tracker.egg-info/dependency_links.txt
|
|
13
|
+
src/a2a_llm_tracker.egg-info/requires.txt
|
|
14
|
+
src/a2a_llm_tracker.egg-info/top_level.txt
|
|
15
|
+
src/a2a_llm_tracker/integrations/__init__.py
|
|
16
|
+
src/a2a_llm_tracker/integrations/litellm.py
|
|
17
|
+
src/a2a_llm_tracker/sinks/__init__.py
|
|
18
|
+
src/a2a_llm_tracker/sinks/base.py
|
|
19
|
+
src/a2a_llm_tracker/sinks/jsonl.py
|
|
20
|
+
src/a2a_llm_tracker/sinks/sqlite.py
|
|
21
|
+
tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
a2a_llm_tracker
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
|
|
2
|
+
from a2a_llm_tracker import Meter, PricingRegistry
|
|
3
|
+
from a2a_llm_tracker.sinks.sqlite import SQLiteSink
|
|
4
|
+
from a2a_llm_tracker.integrations.litellm import LiteLLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def testing_registry():
|
|
8
|
+
pricing = PricingRegistry()
|
|
9
|
+
pricing.set_price("openai", "openai/gpt-4.1", input_per_million=2.0, output_per_million=8.0)
|
|
10
|
+
llm = LiteLLM(meter=meter)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|