agentgraf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentgraf-0.1.0/.gitignore +27 -0
- agentgraf-0.1.0/PKG-INFO +41 -0
- agentgraf-0.1.0/README.md +8 -0
- agentgraf-0.1.0/pyproject.toml +70 -0
- agentgraf-0.1.0/src/agentgraf/__init__.py +40 -0
- agentgraf-0.1.0/src/agentgraf/client.py +208 -0
- agentgraf-0.1.0/src/agentgraf/models.py +165 -0
- agentgraf-0.1.0/src/agentgraf/processor.py +157 -0
- agentgraf-0.1.0/src/agentgraf/py.typed +0 -0
- agentgraf-0.1.0/src/agentgraf/tracer.py +424 -0
- agentgraf-0.1.0/tests/conftest.py +1 -0
- agentgraf-0.1.0/tests/test_client.py +147 -0
- agentgraf-0.1.0/tests/test_models.py +149 -0
- agentgraf-0.1.0/tests/test_processor.py +212 -0
- agentgraf-0.1.0/tests/test_tracer.py +366 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
|
|
10
|
+
# Node
|
|
11
|
+
node_modules/
|
|
12
|
+
.output/
|
|
13
|
+
/public/build/
|
|
14
|
+
|
|
15
|
+
# IDE
|
|
16
|
+
.vscode/
|
|
17
|
+
.idea/
|
|
18
|
+
*.swp
|
|
19
|
+
*.swo
|
|
20
|
+
|
|
21
|
+
# OS
|
|
22
|
+
.DS_Store
|
|
23
|
+
Thumbs.db
|
|
24
|
+
|
|
25
|
+
# Env
|
|
26
|
+
.env
|
|
27
|
+
.env.local
|
agentgraf-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentgraf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Zero-infrastructure AI agent tracing for Grafana + Loki
|
|
5
|
+
Project-URL: Homepage, https://github.com/Berg-it/agentgraf
|
|
6
|
+
Project-URL: Documentation, https://github.com/Berg-it/agentgraf#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/Berg-it/agentgraf
|
|
8
|
+
Project-URL: Issues, https://github.com/Berg-it/agentgraf/issues
|
|
9
|
+
Author-email: Mohamed Amine Berguiga <m.a.berguiga@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
Keywords: ai-agent,grafana,langchain,llm,loki,observability,tracing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: System :: Monitoring
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: httpx>=0.27.0
|
|
23
|
+
Requires-Dist: pydantic>=2.0.0
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
29
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
30
|
+
Provides-Extra: langchain
|
|
31
|
+
Requires-Dist: langchain-core>=0.2.0; extra == 'langchain'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# agentgraf — Python Tracer
|
|
35
|
+
|
|
36
|
+
Zero-infrastructure AI agent tracing for Grafana + Loki.
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install agentgraf # core only
|
|
40
|
+
pip install agentgraf[langchain] # with LangChain callback
|
|
41
|
+
```
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agentgraf"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Zero-infrastructure AI agent tracing for Grafana + Loki"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Mohamed Amine Berguiga", email = "m.a.berguiga@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["tracing", "llm", "langchain", "grafana", "loki", "observability", "ai-agent"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
"Topic :: System :: Monitoring",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
dependencies = [
|
|
29
|
+
"httpx>=0.27.0",
|
|
30
|
+
"pydantic>=2.0.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
langchain = [
|
|
35
|
+
"langchain-core>=0.2.0",
|
|
36
|
+
]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
"pytest-asyncio>=0.23",
|
|
40
|
+
"mypy>=1.10",
|
|
41
|
+
"ruff>=0.4",
|
|
42
|
+
"respx>=0.21",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/Berg-it/agentgraf"
|
|
47
|
+
Documentation = "https://github.com/Berg-it/agentgraf#readme"
|
|
48
|
+
Repository = "https://github.com/Berg-it/agentgraf"
|
|
49
|
+
Issues = "https://github.com/Berg-it/agentgraf/issues"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["tracer/src/agentgraf"]
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = ["E", "F", "I", "N", "W", "UP", "B", "C4", "SIM"]
|
|
56
|
+
ignore = ["E501"]
|
|
57
|
+
|
|
58
|
+
[tool.ruff.lint.pydocstyle]
|
|
59
|
+
convention = "google"
|
|
60
|
+
|
|
61
|
+
[tool.mypy]
|
|
62
|
+
python_version = "3.10"
|
|
63
|
+
strict = true
|
|
64
|
+
warn_return_any = true
|
|
65
|
+
warn_unused_ignores = true
|
|
66
|
+
show_error_codes = true
|
|
67
|
+
|
|
68
|
+
[tool.pytest.ini_options]
|
|
69
|
+
asyncio_mode = "auto"
|
|
70
|
+
testpaths = ["tracer/tests"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""AgentGraf — Zero-infrastructure AI agent tracing for Grafana + Loki.
|
|
2
|
+
|
|
3
|
+
Usage::
|
|
4
|
+
|
|
5
|
+
from agentgraf import LokiClient, BatchSpanProcessor, TraceSpan
|
|
6
|
+
|
|
7
|
+
client = LokiClient(loki_url="http://loki:3100/loki/api/v1/push")
|
|
8
|
+
processor = BatchSpanProcessor(exporter=client.send_spans_sync)
|
|
9
|
+
processor.start()
|
|
10
|
+
|
|
11
|
+
# ... add spans manually or use AgentGrafTracer (LangChain) ...
|
|
12
|
+
|
|
13
|
+
processor.shutdown()
|
|
14
|
+
|
|
15
|
+
Optional LangChain integration (``pip install agentgraf[langchain]``)::
|
|
16
|
+
|
|
17
|
+
from agentgraf import AgentGrafTracer
|
|
18
|
+
tracer = AgentGrafTracer(processor=processor)
|
|
19
|
+
graph.astream(state, config={"callbacks": [tracer]})
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .models import SpanKind, SpanStatus, TraceSpan
|
|
23
|
+
from .client import LokiClient
|
|
24
|
+
from .processor import BatchSpanProcessor
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"TraceSpan",
|
|
28
|
+
"SpanKind",
|
|
29
|
+
"SpanStatus",
|
|
30
|
+
"LokiClient",
|
|
31
|
+
"BatchSpanProcessor",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# Lazy import for LangChain tracer — only available if langchain-core is installed.
|
|
35
|
+
try:
|
|
36
|
+
from .tracer import AgentGrafTracer # noqa: F401
|
|
37
|
+
|
|
38
|
+
__all__.append("AgentGrafTracer")
|
|
39
|
+
except ImportError:
|
|
40
|
+
pass
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Loki-direct HTTP client — dual sync/async, auto-detects execution context.
|
|
2
|
+
|
|
3
|
+
Push spans directly to Loki's push API. No Gateway required in v0.1.0.
|
|
4
|
+
|
|
5
|
+
Loki API reference: POST /loki/api/v1/push
|
|
6
|
+
Payload: {"streams": [{"stream": {...labels...}, "values": [[ts_ns, line, metadata]]}]}
|
|
7
|
+
|
|
8
|
+
Timestamps must be **string nanosecond-epoch** or Loki returns 400.
|
|
9
|
+
Structured metadata (3rd tuple element) is a Loki >=3.0 feature — older Loki
|
|
10
|
+
versions silently ignore it, so the fallback is the JSON body parsed via ``| json``
|
|
11
|
+
in LogQL.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
from typing import Any, Dict, List, Optional
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
|
|
22
|
+
from .models import TraceSpan
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger("agentgraf.client")
|
|
25
|
+
|
|
26
|
+
# Loki labels kept intentionally low-cardinality — never put run_id/trace_id here.
|
|
27
|
+
_STATIC_LABELS = {"job": "agentgraf"}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class LokiClient:
|
|
31
|
+
"""Push spans to Loki HTTP API. Dual sync/async, auto-detects context.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
loki_url: Full Loki push endpoint (e.g. ``http://loki:3100/loki/api/v1/push``).
|
|
35
|
+
max_retries: Number of retry attempts on transient failures (5xx, timeouts).
|
|
36
|
+
retry_delay: Base delay in seconds before first retry (default 1.0).
|
|
37
|
+
timeout: HTTP request timeout in seconds (default 10).
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
loki_url: str,
|
|
43
|
+
max_retries: int = 3,
|
|
44
|
+
retry_delay: float = 1.0,
|
|
45
|
+
timeout: float = 10.0,
|
|
46
|
+
):
|
|
47
|
+
self._url = loki_url.rstrip("/")
|
|
48
|
+
self._max_retries = max_retries
|
|
49
|
+
self._retry_delay = retry_delay
|
|
50
|
+
self._timeout = timeout
|
|
51
|
+
self._sync_client: Optional[httpx.Client] = None
|
|
52
|
+
self._async_client: Optional[httpx.AsyncClient] = None
|
|
53
|
+
|
|
54
|
+
# ------------------------------------------------------------------
|
|
55
|
+
# Public API — sync
|
|
56
|
+
# ------------------------------------------------------------------
|
|
57
|
+
def send_spans_sync(self, spans: List[TraceSpan]) -> bool:
|
|
58
|
+
"""Push a batch of spans to Loki from a synchronous context.
|
|
59
|
+
|
|
60
|
+
Retries on 5xx and network errors. Does NOT retry 4xx
|
|
61
|
+
(client errors will not resolve on their own).
|
|
62
|
+
"""
|
|
63
|
+
if self._sync_client is None:
|
|
64
|
+
self._sync_client = httpx.Client(timeout=self._timeout)
|
|
65
|
+
payload = _build_loki_payload(spans)
|
|
66
|
+
for attempt in range(1, self._max_retries + 1):
|
|
67
|
+
try:
|
|
68
|
+
resp = self._sync_client.post(
|
|
69
|
+
self._url,
|
|
70
|
+
json=payload,
|
|
71
|
+
headers={"Content-Type": "application/json"},
|
|
72
|
+
)
|
|
73
|
+
if resp.status_code < 300:
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
if 400 <= resp.status_code < 500:
|
|
77
|
+
# Client error — not retryable
|
|
78
|
+
logger.warning(
|
|
79
|
+
"Loki push returned %d (client error, not retrying): %s",
|
|
80
|
+
resp.status_code,
|
|
81
|
+
resp.text[:200],
|
|
82
|
+
)
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
# Server error (5xx) — retryable
|
|
86
|
+
logger.warning(
|
|
87
|
+
"Loki push returned %d (attempt %d/%d): %s",
|
|
88
|
+
resp.status_code,
|
|
89
|
+
attempt,
|
|
90
|
+
self._max_retries,
|
|
91
|
+
resp.text[:200],
|
|
92
|
+
)
|
|
93
|
+
except httpx.HTTPError as exc:
|
|
94
|
+
logger.warning(
|
|
95
|
+
"Loki push failed (attempt %d/%d): %s",
|
|
96
|
+
attempt,
|
|
97
|
+
self._max_retries,
|
|
98
|
+
exc,
|
|
99
|
+
)
|
|
100
|
+
if attempt < self._max_retries:
|
|
101
|
+
time.sleep(self._retry_delay * (2 ** (attempt - 1)))
|
|
102
|
+
logger.error(
|
|
103
|
+
"Failed to push %d spans to Loki after %d attempts", len(spans), self._max_retries
|
|
104
|
+
)
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
# ------------------------------------------------------------------
|
|
108
|
+
# Public API — async
|
|
109
|
+
# ------------------------------------------------------------------
|
|
110
|
+
async def send_spans_async(self, spans: List[TraceSpan]) -> bool:
|
|
111
|
+
"""Push a batch of spans to Loki from an async context.
|
|
112
|
+
|
|
113
|
+
Retries on 5xx and network errors. Does NOT retry 4xx.
|
|
114
|
+
"""
|
|
115
|
+
if self._async_client is None:
|
|
116
|
+
self._async_client = httpx.AsyncClient(timeout=self._timeout)
|
|
117
|
+
payload = _build_loki_payload(spans)
|
|
118
|
+
for attempt in range(1, self._max_retries + 1):
|
|
119
|
+
try:
|
|
120
|
+
resp = await self._async_client.post(
|
|
121
|
+
self._url,
|
|
122
|
+
json=payload,
|
|
123
|
+
headers={"Content-Type": "application/json"},
|
|
124
|
+
)
|
|
125
|
+
if resp.status_code < 300:
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
if 400 <= resp.status_code < 500:
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Loki push returned %d (client error, not retrying): %s",
|
|
131
|
+
resp.status_code,
|
|
132
|
+
resp.text[:200],
|
|
133
|
+
)
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
logger.warning(
|
|
137
|
+
"Loki push returned %d (attempt %d/%d): %s",
|
|
138
|
+
resp.status_code,
|
|
139
|
+
attempt,
|
|
140
|
+
self._max_retries,
|
|
141
|
+
resp.text[:200],
|
|
142
|
+
)
|
|
143
|
+
except httpx.HTTPError as exc:
|
|
144
|
+
logger.warning(
|
|
145
|
+
"Loki push failed (attempt %d/%d): %s",
|
|
146
|
+
attempt,
|
|
147
|
+
self._max_retries,
|
|
148
|
+
exc,
|
|
149
|
+
)
|
|
150
|
+
if attempt < self._max_retries:
|
|
151
|
+
await asyncio.sleep(self._retry_delay * (2 ** (attempt - 1)))
|
|
152
|
+
logger.error(
|
|
153
|
+
"Failed to push %d spans to Loki after %d attempts", len(spans), self._max_retries
|
|
154
|
+
)
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
# ------------------------------------------------------------------
|
|
158
|
+
# Lifecycle
|
|
159
|
+
# ------------------------------------------------------------------
|
|
160
|
+
def close_sync(self) -> None:
|
|
161
|
+
"""Close the synchronous HTTP client."""
|
|
162
|
+
if self._sync_client is not None:
|
|
163
|
+
self._sync_client.close()
|
|
164
|
+
self._sync_client = None
|
|
165
|
+
|
|
166
|
+
async def close_async(self) -> None:
|
|
167
|
+
"""Close the asynchronous HTTP client."""
|
|
168
|
+
if self._async_client is not None:
|
|
169
|
+
await self._async_client.aclose()
|
|
170
|
+
self._async_client = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ======================================================================
|
|
174
|
+
# Internal helpers
|
|
175
|
+
# ======================================================================
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _build_loki_payload(spans: List[TraceSpan]) -> Dict[str, Any]:
|
|
179
|
+
"""Build a Loki push-API payload from a batch of spans.
|
|
180
|
+
|
|
181
|
+
Groups spans by (project, kind) to minimise stream count.
|
|
182
|
+
Timestamps are string nanoseconds (Loki requirement).
|
|
183
|
+
Structured metadata carries trace_id/span_id/run_id for Loki >=3.0;
|
|
184
|
+
the same data is in the JSON body for users on older Loki with ``| json``.
|
|
185
|
+
"""
|
|
186
|
+
groups: Dict[tuple, List[tuple]] = {}
|
|
187
|
+
for span in spans:
|
|
188
|
+
key = (span.project, span.kind.value)
|
|
189
|
+
ts_ns = str(int(span.start_time * 1_000_000_000))
|
|
190
|
+
line = span.to_json()
|
|
191
|
+
meta = {
|
|
192
|
+
"trace_id": span.trace_id,
|
|
193
|
+
"span_id": span.span_id,
|
|
194
|
+
"run_id": span.run_id,
|
|
195
|
+
}
|
|
196
|
+
if span.parent_span_id:
|
|
197
|
+
meta["parent_span_id"] = span.parent_span_id
|
|
198
|
+
groups.setdefault(key, []).append((ts_ns, line, meta))
|
|
199
|
+
|
|
200
|
+
streams = []
|
|
201
|
+
for (project, kind), values in groups.items():
|
|
202
|
+
streams.append(
|
|
203
|
+
{
|
|
204
|
+
"stream": {**_STATIC_LABELS, "project": project, "kind": kind},
|
|
205
|
+
"values": values,
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
return {"streams": streams}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""AgentGraf data models — Pydantic v2 span representation for LLM agent traces.
|
|
2
|
+
|
|
3
|
+
All timestamps are float unix-epoch seconds (compatible with Loki nanosecond push).
|
|
4
|
+
The model mirrors OpenTelemetry conventions where possible so that future
|
|
5
|
+
exporters (OTLP, Jaeger, Zipkin) are trivial to add.
|
|
6
|
+
|
|
7
|
+
contract-version: 1
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
import uuid
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SpanKind(str, Enum):
|
|
20
|
+
"""OpenTelemetry SpanKind adapted for LLM workloads."""
|
|
21
|
+
|
|
22
|
+
LLM = "llm"
|
|
23
|
+
TOOL = "tool"
|
|
24
|
+
CHAIN = "chain"
|
|
25
|
+
AGENT = "agent"
|
|
26
|
+
RETRIEVER = "retriever"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SpanStatus(str, Enum):
|
|
30
|
+
OK = "ok"
|
|
31
|
+
ERROR = "error"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TraceSpan(BaseModel):
|
|
35
|
+
"""A single span in an AI-agent trace.
|
|
36
|
+
|
|
37
|
+
This is the stable data contract — backward compatible for life.
|
|
38
|
+
Fields mirror OpenTelemetry conventions where possible.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
trace_id: 32-hex-char UUID, shared by all spans in one logical run.
|
|
42
|
+
span_id: 16-hex-char UUID, unique per span.
|
|
43
|
+
parent_span_id: 16-hex-char UUID or ``None`` for root spans.
|
|
44
|
+
run_id: Stable ID across a full agent invocation (LangChain ``run_id``).
|
|
45
|
+
run_name: Optional human-readable label for the run.
|
|
46
|
+
project: Logical grouping (e.g. ``k-fix``, ``support-bot``).
|
|
47
|
+
kind: Semantic classification of the span.
|
|
48
|
+
name: Human-readable operation name (``gpt-4o``, ``kubectl_get_pods``).
|
|
49
|
+
start_time: Unix-epoch seconds.
|
|
50
|
+
end_time: Unix-epoch seconds (``None`` until span is closed).
|
|
51
|
+
latency_ms: Computed from (end_time - start_time) * 1000.
|
|
52
|
+
model: LLM model name (OpenAI, Anthropic, etc.).
|
|
53
|
+
input_tokens: Token count consumed by the prompt.
|
|
54
|
+
output_tokens: Token count produced by the completion.
|
|
55
|
+
total_tokens: ``input_tokens + output_tokens``.
|
|
56
|
+
input_data: Truncated/sanitized input payload (JSON string).
|
|
57
|
+
output_data: Truncated/sanitized output payload (JSON string).
|
|
58
|
+
status: ``"ok"`` or ``"error"``.
|
|
59
|
+
error: Error message when status is ``"error"``.
|
|
60
|
+
tags: Free-form key/value labels.
|
|
61
|
+
metadata: Structured metadata (extensible).
|
|
62
|
+
agentgraf_version: Protocol version (current = 1).
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# ── OTel-compatible IDs ──
|
|
66
|
+
trace_id: str = Field(
|
|
67
|
+
default_factory=lambda: uuid.uuid4().hex,
|
|
68
|
+
description="32-char hex (OTel format)",
|
|
69
|
+
)
|
|
70
|
+
span_id: str = Field(
|
|
71
|
+
default_factory=lambda: uuid.uuid4().hex[:16],
|
|
72
|
+
description="16-char hex",
|
|
73
|
+
)
|
|
74
|
+
parent_span_id: Optional[str] = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="16-char hex or None (root span)",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# ── Identification ──
|
|
80
|
+
run_id: str = Field(
|
|
81
|
+
default_factory=lambda: uuid.uuid4().hex[:16],
|
|
82
|
+
description="Stable across a full agent invocation",
|
|
83
|
+
)
|
|
84
|
+
run_name: Optional[str] = None
|
|
85
|
+
project: str = "default"
|
|
86
|
+
|
|
87
|
+
# ── Span metadata ──
|
|
88
|
+
kind: SpanKind = SpanKind.CHAIN
|
|
89
|
+
name: str = "unnamed"
|
|
90
|
+
start_time: float = Field(default_factory=time.time)
|
|
91
|
+
end_time: Optional[float] = None
|
|
92
|
+
latency_ms: Optional[int] = None
|
|
93
|
+
|
|
94
|
+
# ── LLM-specific ──
|
|
95
|
+
model: Optional[str] = None
|
|
96
|
+
input_tokens: int = 0
|
|
97
|
+
output_tokens: int = 0
|
|
98
|
+
total_tokens: int = 0
|
|
99
|
+
|
|
100
|
+
# ── I/O (sanitized, truncated) ──
|
|
101
|
+
input_data: Optional[str] = None
|
|
102
|
+
output_data: Optional[str] = None
|
|
103
|
+
|
|
104
|
+
# ── Status ──
|
|
105
|
+
status: SpanStatus = SpanStatus.OK
|
|
106
|
+
error: Optional[str] = None
|
|
107
|
+
|
|
108
|
+
# ── Extensibility ──
|
|
109
|
+
tags: Dict[str, Any] = Field(default_factory=dict)
|
|
110
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
111
|
+
|
|
112
|
+
# ── AgentGraf protocol version ──
|
|
113
|
+
agentgraf_version: int = 1
|
|
114
|
+
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
# Life-cycle helpers
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
def finish(
|
|
119
|
+
self,
|
|
120
|
+
end_time: Optional[float] = None,
|
|
121
|
+
status: Optional[SpanStatus] = None,
|
|
122
|
+
error: Optional[str] = None,
|
|
123
|
+
) -> "TraceSpan":
|
|
124
|
+
"""Close the span, recording end_time, latency, and optional status."""
|
|
125
|
+
self.end_time = end_time or time.time()
|
|
126
|
+
self.latency_ms = int((self.end_time - self.start_time) * 1000)
|
|
127
|
+
if status is not None:
|
|
128
|
+
self.status = status
|
|
129
|
+
if error is not None:
|
|
130
|
+
self.error = error
|
|
131
|
+
return self
|
|
132
|
+
|
|
133
|
+
def set_tag(self, key: str, value: Any) -> "TraceSpan":
|
|
134
|
+
"""Fluent helper to add a single tag."""
|
|
135
|
+
self.tags[key] = value
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def set_metadata(self, key: str, value: Any) -> "TraceSpan":
|
|
139
|
+
"""Fluent helper to add a single metadata entry."""
|
|
140
|
+
self.metadata[key] = value
|
|
141
|
+
return self
|
|
142
|
+
|
|
143
|
+
def set_input(self, data: str, truncate: int = 10_000) -> "TraceSpan":
|
|
144
|
+
"""Set input_data with optional truncation."""
|
|
145
|
+
self.input_data = data[:truncate] if len(data) > truncate else data
|
|
146
|
+
return self
|
|
147
|
+
|
|
148
|
+
def set_output(self, data: str, truncate: int = 10_000) -> "TraceSpan":
|
|
149
|
+
"""Set output_data with optional truncation."""
|
|
150
|
+
self.output_data = data[:truncate] if len(data) > truncate else data
|
|
151
|
+
return self
|
|
152
|
+
|
|
153
|
+
def set_tokens(self, input_tokens: int, output_tokens: int) -> "TraceSpan":
|
|
154
|
+
"""Record token usage."""
|
|
155
|
+
self.input_tokens = input_tokens
|
|
156
|
+
self.output_tokens = output_tokens
|
|
157
|
+
self.total_tokens = input_tokens + output_tokens
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
# Serialization
|
|
162
|
+
# ------------------------------------------------------------------
|
|
163
|
+
def to_json(self) -> str:
|
|
164
|
+
"""Compact JSON string (one line → friendly for Loki / stdout)."""
|
|
165
|
+
return self.model_dump_json(exclude_none=True)
|