decimalai 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- decimalai/__init__.py +298 -0
- decimalai/_client.py +334 -0
- decimalai/_config.py +149 -0
- decimalai/autogen.py +40 -0
- decimalai/cli/__init__.py +0 -0
- decimalai/cli/dataset_cmd.py +0 -0
- decimalai/cli/main.py +75 -0
- decimalai/cli/manifest_cmd.py +0 -0
- decimalai/cli/replay_cmd.py +0 -0
- decimalai/evals/__init__.py +557 -0
- decimalai/evals/adapters.py +208 -0
- decimalai/evals/builtin.py +84 -0
- decimalai/export/__init__.py +0 -0
- decimalai/export/jsonl.py +0 -0
- decimalai/export/parquet.py +0 -0
- decimalai/generic.py +338 -0
- decimalai/integrations/__init__.py +5 -0
- decimalai/integrations/_lc_compat.py +340 -0
- decimalai/integrations/langgraph.py +0 -0
- decimalai/integrations/openai.py +0 -0
- decimalai/integrations/otel.py +297 -0
- decimalai/langchain.py +842 -0
- decimalai/llamaindex.py +40 -0
- decimalai/manifest/__init__.py +0 -0
- decimalai/manifest/detector.py +0 -0
- decimalai/manifest/extractor.py +0 -0
- decimalai/manifest/hasher.py +0 -0
- decimalai/openai_agents.py +42 -0
- decimalai/replay/__init__.py +0 -0
- decimalai/replay/tasks.py +0 -0
- decimalai/schema/__init__.py +16 -0
- decimalai/schema/common.py +55 -0
- decimalai/schema/dataset.py +0 -0
- decimalai/schema/manifest.py +306 -0
- decimalai/schema/trace.py +110 -0
- decimalai-0.3.0.dist-info/METADATA +78 -0
- decimalai-0.3.0.dist-info/RECORD +40 -0
- decimalai-0.3.0.dist-info/WHEEL +4 -0
- decimalai-0.3.0.dist-info/entry_points.txt +2 -0
- decimalai-0.3.0.dist-info/licenses/LICENSE +0 -0
decimalai/__init__.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""DecimalAI SDK — Agent dataset lifecycle platform.
|
|
2
|
+
|
|
3
|
+
Quick start::
|
|
4
|
+
|
|
5
|
+
import decimalai
|
|
6
|
+
decimalai.init() # reads DECIMAL_API_KEY from env
|
|
7
|
+
|
|
8
|
+
# LangChain — one-liner
|
|
9
|
+
decimalai.init(langchain=True)
|
|
10
|
+
|
|
11
|
+
# Or manual install
|
|
12
|
+
from decimalai.langchain import install
|
|
13
|
+
install()
|
|
14
|
+
|
|
15
|
+
# Generic / any framework
|
|
16
|
+
@decimalai.trace(agent_name="my-agent")
|
|
17
|
+
def run_agent(query):
|
|
18
|
+
decimalai.log_llm_call(model="gpt-4o", input=msgs, output=resp)
|
|
19
|
+
return resp.choices[0].message.content
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__version__ = "0.3.0"
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import os
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("decimalai")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def init(
|
|
32
|
+
api_key: Optional[str] = None,
|
|
33
|
+
base_url: Optional[str] = None,
|
|
34
|
+
project: Optional[str] = None,
|
|
35
|
+
enabled: bool = True,
|
|
36
|
+
langchain: bool = False,
|
|
37
|
+
agent_name: Optional[str] = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Initialize the DecimalAI SDK.
|
|
40
|
+
|
|
41
|
+
Must be called once before using any integration. Configuration is
|
|
42
|
+
resolved in order: explicit parameter → environment variable → default.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
api_key: API key. Falls back to ``DECIMAL_API_KEY`` env var.
|
|
46
|
+
base_url: Backend URL. Falls back to ``DECIMAL_BASE_URL``, then
|
|
47
|
+
``https://api.decimal.ai``.
|
|
48
|
+
project: Optional project grouping.
|
|
49
|
+
enabled: Set ``False`` to disable all tracing (integrations become no-ops).
|
|
50
|
+
langchain: If ``True``, auto-calls ``decimalai.langchain.install()``.
|
|
51
|
+
agent_name: Default agent name for langchain auto-install.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
DecimalConfigError: If ``api_key`` is not provided and not in env.
|
|
55
|
+
"""
|
|
56
|
+
from ._config import DecimalConfig, DecimalConfigError
|
|
57
|
+
from ._client import DecimalAIClient
|
|
58
|
+
|
|
59
|
+
import decimalai._config as _cfg
|
|
60
|
+
|
|
61
|
+
# Resolve API key
|
|
62
|
+
resolved_key = api_key or os.environ.get("DECIMAL_API_KEY", "")
|
|
63
|
+
if not resolved_key and enabled:
|
|
64
|
+
raise DecimalConfigError(
|
|
65
|
+
"No API key provided. Pass api_key= to decimalai.init() "
|
|
66
|
+
"or set the DECIMAL_API_KEY environment variable."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Resolve base URL
|
|
70
|
+
resolved_url = (
|
|
71
|
+
base_url
|
|
72
|
+
or os.environ.get("DECIMAL_BASE_URL", "")
|
|
73
|
+
or "https://api.decimal.ai"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
config = DecimalConfig(
|
|
77
|
+
api_key=resolved_key,
|
|
78
|
+
base_url=resolved_url.rstrip("/"),
|
|
79
|
+
project=project,
|
|
80
|
+
enabled=enabled,
|
|
81
|
+
)
|
|
82
|
+
_cfg._config = config
|
|
83
|
+
|
|
84
|
+
if enabled:
|
|
85
|
+
_cfg._client = DecimalAIClient(
|
|
86
|
+
api_key=config.api_key,
|
|
87
|
+
base_url=config.base_url,
|
|
88
|
+
project=config.project,
|
|
89
|
+
)
|
|
90
|
+
logger.info(
|
|
91
|
+
"DecimalAI SDK initialized: base_url=%s project=%s",
|
|
92
|
+
config.base_url,
|
|
93
|
+
config.project,
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
_cfg._client = None
|
|
97
|
+
logger.info("DecimalAI SDK initialized in disabled mode (no-op)")
|
|
98
|
+
|
|
99
|
+
# Auto-install langchain tracing if requested
|
|
100
|
+
if langchain:
|
|
101
|
+
from .langchain import install as _lc_install
|
|
102
|
+
_lc_install(agent_name=agent_name)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def send(trace) -> None:
|
|
106
|
+
"""Manually send a trace to the backend.
|
|
107
|
+
|
|
108
|
+
For advanced usage when ``auto_send=False``.
|
|
109
|
+
"""
|
|
110
|
+
from ._config import _get_client
|
|
111
|
+
|
|
112
|
+
client = _get_client()
|
|
113
|
+
client.ingest_trace(trace)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ── Eval convenience functions ─────────────────────────────────
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def eval(
|
|
120
|
+
trace_id: str,
|
|
121
|
+
name: str,
|
|
122
|
+
score: float,
|
|
123
|
+
*,
|
|
124
|
+
source: str = "custom",
|
|
125
|
+
source_label: Optional[str] = None,
|
|
126
|
+
passed: Optional[bool] = None,
|
|
127
|
+
reason: Optional[str] = None,
|
|
128
|
+
category: str = "quality",
|
|
129
|
+
) -> dict:
|
|
130
|
+
"""Push a single eval score to a trace.
|
|
131
|
+
|
|
132
|
+
This is the simplest way to attach evaluation results to a trace.
|
|
133
|
+
All scores are visible in the dashboard's Evaluation Breakdown card
|
|
134
|
+
grouped by source.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
trace_id: The trace to attach the score to.
|
|
138
|
+
name: Metric name (e.g., "factual_accuracy", "coherence").
|
|
139
|
+
score: Score value between 0.0 and 1.0.
|
|
140
|
+
source: Eval source identifier. Defaults to "custom".
|
|
141
|
+
source_label: Human-readable display name (e.g., "My RAG Eval").
|
|
142
|
+
passed: Binary pass/fail. Defaults to score >= 0.5.
|
|
143
|
+
reason: Human-readable explanation of the score.
|
|
144
|
+
category: "quality" (default) or "compatibility".
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
API response with stored score details and recomputed verdict.
|
|
148
|
+
|
|
149
|
+
Example::
|
|
150
|
+
|
|
151
|
+
import decimalai
|
|
152
|
+
decimalai.init()
|
|
153
|
+
|
|
154
|
+
decimalai.eval(
|
|
155
|
+
trace_id="abc123",
|
|
156
|
+
name="factual_accuracy",
|
|
157
|
+
score=0.75,
|
|
158
|
+
reason="3/4 facts verified against source docs",
|
|
159
|
+
)
|
|
160
|
+
"""
|
|
161
|
+
from ._config import _get_client
|
|
162
|
+
|
|
163
|
+
client = _get_client()
|
|
164
|
+
score_entry = {
|
|
165
|
+
"name": name,
|
|
166
|
+
"score": score,
|
|
167
|
+
"passed": passed if passed is not None else score >= 0.5,
|
|
168
|
+
}
|
|
169
|
+
if reason:
|
|
170
|
+
score_entry["reason"] = reason
|
|
171
|
+
|
|
172
|
+
metadata = {}
|
|
173
|
+
if source_label:
|
|
174
|
+
metadata["source_label"] = source_label
|
|
175
|
+
|
|
176
|
+
return client.push_eval_scores(
|
|
177
|
+
trace_id=trace_id,
|
|
178
|
+
source=source,
|
|
179
|
+
scores=[score_entry],
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def score(
|
|
184
|
+
trace_id: str,
|
|
185
|
+
name: str,
|
|
186
|
+
value: float,
|
|
187
|
+
reason: Optional[str] = None,
|
|
188
|
+
) -> dict:
|
|
189
|
+
"""Shorthand for pushing a single eval score.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
trace_id: The trace to attach the score to.
|
|
193
|
+
name: Metric name.
|
|
194
|
+
value: Score between 0.0 and 1.0.
|
|
195
|
+
reason: Optional explanation.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
API response.
|
|
199
|
+
|
|
200
|
+
Example::
|
|
201
|
+
|
|
202
|
+
decimalai.score("abc123", "factual_accuracy", 0.75)
|
|
203
|
+
"""
|
|
204
|
+
return eval(
|
|
205
|
+
trace_id=trace_id,
|
|
206
|
+
name=name,
|
|
207
|
+
score=value,
|
|
208
|
+
reason=reason,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_eval_breakdown(trace_id: str) -> dict:
|
|
213
|
+
"""Get the full eval breakdown for a trace with provenance info.
|
|
214
|
+
|
|
215
|
+
Returns scores grouped by source (Manifest Diff, DeepEval, LangSmith,
|
|
216
|
+
Custom, etc.) with icons, labels, and decision reasons.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Dict with eval_verdict, quality_avg, compat_avg, source_groups,
|
|
220
|
+
and decision_reasons.
|
|
221
|
+
|
|
222
|
+
Example::
|
|
223
|
+
|
|
224
|
+
import decimalai
|
|
225
|
+
decimalai.init()
|
|
226
|
+
|
|
227
|
+
bd = decimalai.get_eval_breakdown("abc123")
|
|
228
|
+
print(f"Verdict: {bd['eval_verdict']}")
|
|
229
|
+
"""
|
|
230
|
+
from ._config import _get_client
|
|
231
|
+
|
|
232
|
+
client = _get_client()
|
|
233
|
+
return client.get_eval_breakdown(trace_id)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ── Re-export generic tracing API ──────────────────────────────
|
|
237
|
+
|
|
238
|
+
from .generic import ( # noqa: E402, F401
|
|
239
|
+
log_llm_call,
|
|
240
|
+
log_tool_call,
|
|
241
|
+
start_trace,
|
|
242
|
+
trace,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Re-export eval adapters from new location
|
|
246
|
+
from .evals.adapters import ( # noqa: E402, F401
|
|
247
|
+
push_deepeval_results,
|
|
248
|
+
push_langsmith_scores,
|
|
249
|
+
push_custom_scores,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
from .evals import batch_eval # noqa: E402, F401
|
|
253
|
+
|
|
254
|
+
__all__ = [
|
|
255
|
+
"__version__",
|
|
256
|
+
"init",
|
|
257
|
+
"send",
|
|
258
|
+
"eval",
|
|
259
|
+
"score",
|
|
260
|
+
"get_eval_breakdown",
|
|
261
|
+
"trace",
|
|
262
|
+
"start_trace",
|
|
263
|
+
"log_llm_call",
|
|
264
|
+
"log_tool_call",
|
|
265
|
+
"push_deepeval_results",
|
|
266
|
+
"push_langsmith_scores",
|
|
267
|
+
"push_custom_scores",
|
|
268
|
+
"batch_eval",
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# ── Auto-init from environment variable ────────────────────────
|
|
273
|
+
# Setting DECIMAL_AUTO_TRACE=langchain will auto-init and install tracing.
|
|
274
|
+
|
|
275
|
+
def _auto_init_from_env() -> None:
|
|
276
|
+
"""Auto-initialize from environment variables if configured."""
|
|
277
|
+
auto_trace = os.environ.get("DECIMAL_AUTO_TRACE", "").strip().lower()
|
|
278
|
+
if not auto_trace:
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
api_key = os.environ.get("DECIMAL_API_KEY", "")
|
|
282
|
+
if not api_key:
|
|
283
|
+
logger.debug(
|
|
284
|
+
"DECIMAL_AUTO_TRACE=%s set but no DECIMAL_API_KEY found, skipping",
|
|
285
|
+
auto_trace,
|
|
286
|
+
)
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
init(
|
|
291
|
+
langchain=(auto_trace == "langchain"),
|
|
292
|
+
)
|
|
293
|
+
logger.info("DecimalAI auto-initialized via DECIMAL_AUTO_TRACE=%s", auto_trace)
|
|
294
|
+
except Exception:
|
|
295
|
+
logger.debug("Auto-init failed", exc_info=True)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
_auto_init_from_env()
|
decimalai/_client.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""HTTP client for communicating with the Decimal platform."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
from uuid import UUID
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from .schema.trace import RunTrace
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger("decimalai")
|
|
15
|
+
|
|
16
|
+
_MAX_RETRIES = 3
|
|
17
|
+
_DEFAULT_RETRY_DELAY = 1.0 # seconds
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DecimalRateLimitError(Exception):
|
|
21
|
+
"""Raised when the Decimal platform returns 429 after all retries are exhausted."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, retry_after: float = 0, message: str = ""):
|
|
24
|
+
self.retry_after = retry_after
|
|
25
|
+
super().__init__(message or f"Rate limit exceeded. Retry after {retry_after}s")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DecimalAIClient:
|
|
29
|
+
"""Client for the Decimal platform API.
|
|
30
|
+
|
|
31
|
+
Handles authentication, trace ingestion, and manifest registration.
|
|
32
|
+
Can be used standalone or created automatically via ``decimalai.init()``.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
api_key: str,
|
|
38
|
+
base_url: str = "https://api.decimal.ai",
|
|
39
|
+
project: Optional[str] = None,
|
|
40
|
+
timeout: float = 30.0,
|
|
41
|
+
):
|
|
42
|
+
self.api_key = api_key
|
|
43
|
+
self.base_url = base_url.rstrip("/")
|
|
44
|
+
self.project = project
|
|
45
|
+
|
|
46
|
+
headers = {
|
|
47
|
+
"Authorization": f"Bearer {api_key}",
|
|
48
|
+
"Content-Type": "application/json",
|
|
49
|
+
}
|
|
50
|
+
if project:
|
|
51
|
+
headers["X-Decimal-Project"] = project
|
|
52
|
+
|
|
53
|
+
self._http = httpx.Client(
|
|
54
|
+
base_url=self.base_url,
|
|
55
|
+
headers=headers,
|
|
56
|
+
timeout=timeout,
|
|
57
|
+
)
|
|
58
|
+
self._trace_buffer: List[RunTrace] = []
|
|
59
|
+
|
|
60
|
+
# ── Auth ────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
def verify_auth(self) -> Dict[str, Any]:
|
|
63
|
+
"""Verify the API key and return project configuration."""
|
|
64
|
+
resp = self._http.get("/api/v1/auth/verify")
|
|
65
|
+
resp.raise_for_status()
|
|
66
|
+
return resp.json()
|
|
67
|
+
|
|
68
|
+
# ── Retry logic ────────────────────────────────────────────
|
|
69
|
+
|
|
70
|
+
def _request_with_retry(
|
|
71
|
+
self, method: str, url: str, **kwargs: Any
|
|
72
|
+
) -> httpx.Response:
|
|
73
|
+
"""Make an HTTP request with retry-on-429.
|
|
74
|
+
|
|
75
|
+
Retries up to ``_MAX_RETRIES`` times when the server responds with
|
|
76
|
+
HTTP 429. Uses the ``Retry-After`` header if present, otherwise
|
|
77
|
+
falls back to exponential backoff (1s, 2s, 4s).
|
|
78
|
+
"""
|
|
79
|
+
last_exc: Optional[httpx.HTTPStatusError] = None
|
|
80
|
+
|
|
81
|
+
for attempt in range(_MAX_RETRIES + 1): # 0, 1, 2, 3
|
|
82
|
+
resp = self._http.request(method, url, **kwargs)
|
|
83
|
+
|
|
84
|
+
if resp.status_code != 429:
|
|
85
|
+
resp.raise_for_status()
|
|
86
|
+
return resp
|
|
87
|
+
|
|
88
|
+
# 429 — parse Retry-After and maybe retry
|
|
89
|
+
retry_after = float(resp.headers.get("Retry-After", 0))
|
|
90
|
+
delay = max(retry_after, _DEFAULT_RETRY_DELAY * (2 ** attempt))
|
|
91
|
+
|
|
92
|
+
last_exc = httpx.HTTPStatusError(
|
|
93
|
+
f"429 Too Many Requests",
|
|
94
|
+
request=resp.request,
|
|
95
|
+
response=resp,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if attempt < _MAX_RETRIES:
|
|
99
|
+
logger.warning(
|
|
100
|
+
"Rate limited (429). Retrying in %.1fs (attempt %d/%d)",
|
|
101
|
+
delay, attempt + 1, _MAX_RETRIES,
|
|
102
|
+
)
|
|
103
|
+
time.sleep(delay)
|
|
104
|
+
else:
|
|
105
|
+
raise DecimalRateLimitError(
|
|
106
|
+
retry_after=retry_after,
|
|
107
|
+
message=(
|
|
108
|
+
f"Rate limit exceeded after {_MAX_RETRIES} retries. "
|
|
109
|
+
f"Server says retry after {retry_after}s."
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Should never reach here, but satisfy type checker
|
|
114
|
+
raise last_exc # type: ignore[misc]
|
|
115
|
+
|
|
116
|
+
# ── Trace ingestion ────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
def ingest_trace(self, trace: RunTrace) -> Dict[str, Any]:
|
|
119
|
+
"""Send a single trace to the platform."""
|
|
120
|
+
payload = trace.model_dump(mode="json")
|
|
121
|
+
resp = self._request_with_retry("POST", "/api/v1/traces", json=payload)
|
|
122
|
+
logger.debug("Ingested trace %s", trace.id)
|
|
123
|
+
return resp.json()
|
|
124
|
+
|
|
125
|
+
def ingest_traces_batch(self, traces: List[RunTrace]) -> Dict[str, Any]:
|
|
126
|
+
"""Send a batch of traces to the platform."""
|
|
127
|
+
payload = [t.model_dump(mode="json") for t in traces]
|
|
128
|
+
resp = self._request_with_retry("POST", "/api/v1/traces/batch", json=payload)
|
|
129
|
+
logger.debug("Ingested %d traces", len(traces))
|
|
130
|
+
return resp.json()
|
|
131
|
+
|
|
132
|
+
def buffer_trace(self, trace: RunTrace) -> None:
|
|
133
|
+
"""Buffer a trace for batched sending."""
|
|
134
|
+
self._trace_buffer.append(trace)
|
|
135
|
+
if len(self._trace_buffer) >= 50:
|
|
136
|
+
self.flush()
|
|
137
|
+
|
|
138
|
+
def flush(self) -> None:
|
|
139
|
+
"""Flush all buffered traces to the platform.
|
|
140
|
+
|
|
141
|
+
On rate limit errors (429), the buffer is **preserved** so that
|
|
142
|
+
traces are not lost — the next call to ``flush()`` will retry.
|
|
143
|
+
For all other errors the buffer is cleared.
|
|
144
|
+
"""
|
|
145
|
+
if not self._trace_buffer:
|
|
146
|
+
return
|
|
147
|
+
try:
|
|
148
|
+
self.ingest_traces_batch(self._trace_buffer)
|
|
149
|
+
self._trace_buffer.clear()
|
|
150
|
+
except DecimalRateLimitError:
|
|
151
|
+
logger.warning(
|
|
152
|
+
"Rate limited — preserving %d buffered traces for later flush",
|
|
153
|
+
len(self._trace_buffer),
|
|
154
|
+
)
|
|
155
|
+
except Exception:
|
|
156
|
+
logger.exception("Failed to flush %d traces", len(self._trace_buffer))
|
|
157
|
+
self._trace_buffer.clear()
|
|
158
|
+
|
|
159
|
+
# ── Trace queries ──────────────────────────────────────────
|
|
160
|
+
|
|
161
|
+
def list_traces(
|
|
162
|
+
self,
|
|
163
|
+
limit: int = 20,
|
|
164
|
+
offset: int = 0,
|
|
165
|
+
status: Optional[str] = None,
|
|
166
|
+
agent_name: Optional[str] = None,
|
|
167
|
+
) -> Dict[str, Any]:
|
|
168
|
+
"""List traces for the current project."""
|
|
169
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
170
|
+
if status:
|
|
171
|
+
params["status"] = status
|
|
172
|
+
if agent_name:
|
|
173
|
+
params["agent_name"] = agent_name
|
|
174
|
+
resp = self._http.get("/api/v1/traces", params=params)
|
|
175
|
+
resp.raise_for_status()
|
|
176
|
+
return resp.json()
|
|
177
|
+
|
|
178
|
+
def get_trace(self, trace_id: str | UUID) -> Dict[str, Any]:
|
|
179
|
+
"""Get a single trace with its full span tree."""
|
|
180
|
+
resp = self._http.get(f"/api/v1/traces/{trace_id}")
|
|
181
|
+
resp.raise_for_status()
|
|
182
|
+
return resp.json()
|
|
183
|
+
|
|
184
|
+
# ── Manifest registration ─────────────────────────────────
|
|
185
|
+
|
|
186
|
+
def register_manifest(self, manifest: Any) -> Dict[str, Any]:
|
|
187
|
+
"""Register a manifest snapshot with the platform.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
manifest: A ManifestSnapshot (from decimalai.schema.manifest).
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Registration response with manifest_id and compatibility info.
|
|
194
|
+
"""
|
|
195
|
+
payload = manifest.model_dump(mode="json")
|
|
196
|
+
resp = self._http.post("/api/v1/manifests", json=payload)
|
|
197
|
+
resp.raise_for_status()
|
|
198
|
+
logger.debug("Registered manifest %s (hash=%s)", manifest.id, manifest.manifest_hash)
|
|
199
|
+
return resp.json()
|
|
200
|
+
|
|
201
|
+
def list_manifests(
|
|
202
|
+
self,
|
|
203
|
+
limit: int = 20,
|
|
204
|
+
offset: int = 0,
|
|
205
|
+
agent_name: Optional[str] = None,
|
|
206
|
+
) -> Dict[str, Any]:
|
|
207
|
+
"""List manifests from the platform."""
|
|
208
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
209
|
+
if agent_name:
|
|
210
|
+
params["agent_name"] = agent_name
|
|
211
|
+
resp = self._http.get("/api/v1/manifests", params=params)
|
|
212
|
+
resp.raise_for_status()
|
|
213
|
+
return resp.json()
|
|
214
|
+
|
|
215
|
+
# ── Eval Scores ──────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
def push_eval_scores(
|
|
218
|
+
self,
|
|
219
|
+
trace_id: str | UUID,
|
|
220
|
+
source: str,
|
|
221
|
+
scores: List[Dict[str, Any]],
|
|
222
|
+
) -> Dict[str, Any]:
|
|
223
|
+
"""Push external evaluation scores to a trace.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
trace_id: The trace to attach scores to.
|
|
227
|
+
source: Origin of the scores (e.g., "deepeval", "langsmith", "custom").
|
|
228
|
+
scores: List of score dicts, each with at least "name" and "score".
|
|
229
|
+
Optional fields: "passed", "reason", "category".
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Ingestion response with stored score count.
|
|
233
|
+
|
|
234
|
+
Example::
|
|
235
|
+
|
|
236
|
+
client.push_eval_scores(
|
|
237
|
+
trace_id="abc123",
|
|
238
|
+
source="deepeval",
|
|
239
|
+
scores=[
|
|
240
|
+
{"name": "correctness", "score": 0.92, "reason": "Accurate"},
|
|
241
|
+
{"name": "faithfulness", "score": 0.85},
|
|
242
|
+
],
|
|
243
|
+
)
|
|
244
|
+
"""
|
|
245
|
+
payload = {"source": source, "scores": scores}
|
|
246
|
+
resp = self._request_with_retry(
|
|
247
|
+
"POST", f"/api/v1/traces/{trace_id}/eval-scores", json=payload,
|
|
248
|
+
)
|
|
249
|
+
logger.debug("Pushed %d eval scores to trace %s", len(scores), str(trace_id)[:8])
|
|
250
|
+
return resp.json()
|
|
251
|
+
|
|
252
|
+
def get_eval_scores(self, trace_id: str | UUID) -> Dict[str, Any]:
|
|
253
|
+
"""Get all evaluation scores (quality + compatibility) for a trace.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Dict with quality_scores, compatibility_scores, and aggregates.
|
|
257
|
+
"""
|
|
258
|
+
resp = self._http.get(f"/api/v1/traces/{trace_id}/eval-scores")
|
|
259
|
+
resp.raise_for_status()
|
|
260
|
+
return resp.json()
|
|
261
|
+
|
|
262
|
+
def get_eval_breakdown(self, trace_id: str | UUID) -> Dict[str, Any]:
|
|
263
|
+
"""Get the full eval breakdown with provenance for a trace.
|
|
264
|
+
|
|
265
|
+
Returns scores grouped by source (Manifest Diff, DeepEval, LangSmith,
|
|
266
|
+
Custom, etc.) with icons, labels, badge colors, and decision reasons
|
|
267
|
+
explaining how the final verdict was computed.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Dict with eval_verdict, quality_avg, compat_avg, source_groups,
|
|
271
|
+
and decision_reasons.
|
|
272
|
+
|
|
273
|
+
Example::
|
|
274
|
+
|
|
275
|
+
breakdown = client.get_eval_breakdown("trace-123")
|
|
276
|
+
print(breakdown["eval_verdict"]) # "keep" / "drop" / ...
|
|
277
|
+
for group in breakdown["source_groups"]:
|
|
278
|
+
print(f"{group['source_label']}: {group['source_avg']}")
|
|
279
|
+
for score in group["scores"]:
|
|
280
|
+
print(f" {score['name']}: {score['score']}")
|
|
281
|
+
"""
|
|
282
|
+
resp = self._http.get(f"/api/v1/traces/{trace_id}/eval-breakdown")
|
|
283
|
+
resp.raise_for_status()
|
|
284
|
+
return resp.json()
|
|
285
|
+
|
|
286
|
+
def get_decision(self, trace_id: str | UUID) -> Dict[str, Any]:
|
|
287
|
+
"""Compute and get the unified verdict for a trace.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Dict with verdict (keep/repair/replay/drop), quality_avg,
|
|
291
|
+
compat_avg, and per-score breakdowns.
|
|
292
|
+
"""
|
|
293
|
+
resp = self._request_with_retry(
|
|
294
|
+
"POST", f"/api/v1/traces/{trace_id}/decision",
|
|
295
|
+
)
|
|
296
|
+
return resp.json()
|
|
297
|
+
|
|
298
|
+
def batch_decision(
|
|
299
|
+
self,
|
|
300
|
+
trace_ids: Optional[List[str]] = None,
|
|
301
|
+
manifest_id: Optional[str] = None,
|
|
302
|
+
) -> Dict[str, Any]:
|
|
303
|
+
"""Batch compute unified verdicts for multiple traces.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
trace_ids: Specific trace IDs to score.
|
|
307
|
+
manifest_id: Score all traces from this manifest (alternative to trace_ids).
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Dict with decisions list, total count, and verdict_counts breakdown.
|
|
311
|
+
"""
|
|
312
|
+
payload: Dict[str, Any] = {}
|
|
313
|
+
if trace_ids:
|
|
314
|
+
payload["trace_ids"] = trace_ids
|
|
315
|
+
if manifest_id:
|
|
316
|
+
payload["manifest_id"] = manifest_id
|
|
317
|
+
|
|
318
|
+
resp = self._request_with_retry(
|
|
319
|
+
"POST", "/api/v1/traces/batch-decision", json=payload,
|
|
320
|
+
)
|
|
321
|
+
return resp.json()
|
|
322
|
+
|
|
323
|
+
# ── Lifecycle ──────────────────────────────────────────────
|
|
324
|
+
|
|
325
|
+
def close(self) -> None:
|
|
326
|
+
"""Flush remaining traces and close the HTTP client."""
|
|
327
|
+
self.flush()
|
|
328
|
+
self._http.close()
|
|
329
|
+
|
|
330
|
+
def __enter__(self) -> "DecimalAIClient":
|
|
331
|
+
return self
|
|
332
|
+
|
|
333
|
+
def __exit__(self, *args: Any) -> None:
|
|
334
|
+
self.close()
|