prova-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prova_cp/__init__.py +27 -0
- prova_cp/callbacks.py +302 -0
- prova_cp/canonical.py +15 -0
- prova_cp/cli.py +53 -0
- prova_cp/client.py +116 -0
- prova_cp/crewai.py +99 -0
- prova_cp/migrate.py +185 -0
- prova_cp/verify.py +97 -0
- prova_cp/wrap.py +111 -0
- prova_sdk-0.1.0.dist-info/METADATA +165 -0
- prova_sdk-0.1.0.dist-info/RECORD +13 -0
- prova_sdk-0.1.0.dist-info/WHEEL +4 -0
- prova_sdk-0.1.0.dist-info/entry_points.txt +2 -0
prova_cp/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Agent-side SDK for the Prova AI control plane."""
|
|
2
|
+
|
|
3
|
+
from .client import ProvaClient, ProvaApiError, ReceiptVerificationError
|
|
4
|
+
from .verify import verify_receipt
|
|
5
|
+
from .canonical import canonicalize
|
|
6
|
+
from .migrate import migrate, MAPPERS, langsmith_mapper, langfuse_mapper, openai_mapper
|
|
7
|
+
from .callbacks import ProvaCallbackHandler
|
|
8
|
+
from .crewai import ProvaCrewAI
|
|
9
|
+
from .wrap import wrap_openai, wrap_anthropic
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ProvaClient",
|
|
13
|
+
"ProvaApiError",
|
|
14
|
+
"ReceiptVerificationError",
|
|
15
|
+
"verify_receipt",
|
|
16
|
+
"canonicalize",
|
|
17
|
+
"migrate",
|
|
18
|
+
"MAPPERS",
|
|
19
|
+
"langsmith_mapper",
|
|
20
|
+
"langfuse_mapper",
|
|
21
|
+
"openai_mapper",
|
|
22
|
+
"ProvaCallbackHandler",
|
|
23
|
+
"ProvaCrewAI",
|
|
24
|
+
"wrap_openai",
|
|
25
|
+
"wrap_anthropic",
|
|
26
|
+
]
|
|
27
|
+
__version__ = "0.1.0"
|
prova_cp/callbacks.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""LangChain / LangGraph callback handler for automatic Prova instrumentation.
|
|
2
|
+
|
|
3
|
+
Drop ProvaCallbackHandler into any LangGraph graph or LangChain chain and every
|
|
4
|
+
LLM call, chain invocation, and tool call is automatically ingested as a signed
|
|
5
|
+
Prova receipt.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
|
|
9
|
+
from prova_cp import ProvaClient
|
|
10
|
+
from prova_cp.callbacks import ProvaCallbackHandler
|
|
11
|
+
|
|
12
|
+
prova = ProvaClient(api_key=os.environ["PROVA_API_KEY"])
|
|
13
|
+
handler = ProvaCallbackHandler(
|
|
14
|
+
client=prova,
|
|
15
|
+
app_id="my-agent",
|
|
16
|
+
environment="production",
|
|
17
|
+
framework="langgraph",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# LangGraph:
|
|
21
|
+
graph.invoke(inputs, config={"callbacks": [handler]})
|
|
22
|
+
|
|
23
|
+
# LangChain:
|
|
24
|
+
chain.invoke(inputs, config={"callbacks": [handler]})
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
import time
|
|
31
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
32
|
+
from uuid import UUID
|
|
33
|
+
|
|
34
|
+
log = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
38
|
+
from langchain_core.outputs import LLMResult
|
|
39
|
+
_LANGCHAIN_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
_LANGCHAIN_AVAILABLE = False
|
|
42
|
+
|
|
43
|
+
class BaseCallbackHandler: # type: ignore[no-redef]
|
|
44
|
+
"""Stub so the module is importable even without langchain_core."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
class LLMResult: # type: ignore[no-redef]
|
|
48
|
+
"""Stub."""
|
|
49
|
+
generations: list = []
|
|
50
|
+
llm_output: dict = {}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ProvaCallbackHandler(BaseCallbackHandler):
|
|
54
|
+
"""LangChain/LangGraph callback handler that ingests every AI event into Prova.
|
|
55
|
+
|
|
56
|
+
Thread-safe: each run_id gets its own timing state stored in a dict.
|
|
57
|
+
Failures are swallowed and logged so a Prova outage never breaks the agent.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
client: Any,
|
|
63
|
+
*,
|
|
64
|
+
app_id: str = "agent",
|
|
65
|
+
environment: str = "production",
|
|
66
|
+
framework: str = "langgraph",
|
|
67
|
+
provider: Optional[str] = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
if not _LANGCHAIN_AVAILABLE:
|
|
70
|
+
raise ImportError(
|
|
71
|
+
"langchain-core is required to use ProvaCallbackHandler. "
|
|
72
|
+
"Install it with: pip install langchain-core"
|
|
73
|
+
)
|
|
74
|
+
super().__init__()
|
|
75
|
+
self._client = client
|
|
76
|
+
self._source = {
|
|
77
|
+
"app_id": app_id,
|
|
78
|
+
"environment": environment,
|
|
79
|
+
"framework": framework,
|
|
80
|
+
}
|
|
81
|
+
self._provider = provider
|
|
82
|
+
self._start_times: Dict[str, float] = {}
|
|
83
|
+
self._prompts: Dict[str, Any] = {}
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# LLM callbacks
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def on_llm_start(
|
|
90
|
+
self,
|
|
91
|
+
serialized: Dict[str, Any],
|
|
92
|
+
prompts: List[str],
|
|
93
|
+
*,
|
|
94
|
+
run_id: UUID,
|
|
95
|
+
parent_run_id: Optional[UUID] = None,
|
|
96
|
+
**kwargs: Any,
|
|
97
|
+
) -> None:
|
|
98
|
+
key = str(run_id)
|
|
99
|
+
self._start_times[key] = time.time()
|
|
100
|
+
self._prompts[key] = prompts
|
|
101
|
+
|
|
102
|
+
def on_chat_model_start(
|
|
103
|
+
self,
|
|
104
|
+
serialized: Dict[str, Any],
|
|
105
|
+
messages: List[List[Any]],
|
|
106
|
+
*,
|
|
107
|
+
run_id: UUID,
|
|
108
|
+
parent_run_id: Optional[UUID] = None,
|
|
109
|
+
**kwargs: Any,
|
|
110
|
+
) -> None:
|
|
111
|
+
key = str(run_id)
|
|
112
|
+
self._start_times[key] = time.time()
|
|
113
|
+
try:
|
|
114
|
+
self._prompts[key] = [
|
|
115
|
+
{"role": m.type, "content": m.content}
|
|
116
|
+
for batch in messages
|
|
117
|
+
for m in batch
|
|
118
|
+
]
|
|
119
|
+
except Exception:
|
|
120
|
+
self._prompts[key] = str(messages)
|
|
121
|
+
|
|
122
|
+
def on_llm_end(
|
|
123
|
+
self,
|
|
124
|
+
response: LLMResult,
|
|
125
|
+
*,
|
|
126
|
+
run_id: UUID,
|
|
127
|
+
parent_run_id: Optional[UUID] = None,
|
|
128
|
+
**kwargs: Any,
|
|
129
|
+
) -> None:
|
|
130
|
+
key = str(run_id)
|
|
131
|
+
elapsed_ms = int((time.time() - self._start_times.pop(key, time.time())) * 1000)
|
|
132
|
+
prompt = self._prompts.pop(key, None)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
generation = (
|
|
136
|
+
response.generations[0][0] if response.generations and response.generations[0] else None
|
|
137
|
+
)
|
|
138
|
+
completion = getattr(generation, "text", None) or (
|
|
139
|
+
getattr(generation, "message", None) and getattr(generation.message, "content", None)
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
llm_output = response.llm_output or {}
|
|
143
|
+
model_name = (
|
|
144
|
+
llm_output.get("model_name")
|
|
145
|
+
or llm_output.get("model")
|
|
146
|
+
or kwargs.get("invocation_params", {}).get("model_name")
|
|
147
|
+
or kwargs.get("invocation_params", {}).get("model")
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
payload: Dict[str, Any] = {"elapsed_ms": elapsed_ms}
|
|
151
|
+
if prompt is not None:
|
|
152
|
+
payload["prompt"] = prompt
|
|
153
|
+
if completion is not None:
|
|
154
|
+
payload["completion"] = completion
|
|
155
|
+
if llm_output:
|
|
156
|
+
payload["llm_output"] = llm_output
|
|
157
|
+
|
|
158
|
+
event: Dict[str, Any] = {
|
|
159
|
+
"kind": "model_call",
|
|
160
|
+
"source": {**self._source, "run_id": key},
|
|
161
|
+
"payload": payload,
|
|
162
|
+
}
|
|
163
|
+
if model_name or self._provider:
|
|
164
|
+
event["model"] = {
|
|
165
|
+
k: v for k, v in {
|
|
166
|
+
"provider": self._provider,
|
|
167
|
+
"name": model_name,
|
|
168
|
+
}.items() if v
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
self._client.ingest(event)
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
log.warning("ProvaCallbackHandler.on_llm_end failed: %s", exc)
|
|
174
|
+
|
|
175
|
+
def on_llm_error(
|
|
176
|
+
self,
|
|
177
|
+
error: Union[Exception, KeyboardInterrupt],
|
|
178
|
+
*,
|
|
179
|
+
run_id: UUID,
|
|
180
|
+
parent_run_id: Optional[UUID] = None,
|
|
181
|
+
**kwargs: Any,
|
|
182
|
+
) -> None:
|
|
183
|
+
key = str(run_id)
|
|
184
|
+
self._start_times.pop(key, None)
|
|
185
|
+
self._prompts.pop(key, None)
|
|
186
|
+
|
|
187
|
+
# ------------------------------------------------------------------
|
|
188
|
+
# Chain (agent node) callbacks
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
def on_chain_start(
|
|
192
|
+
self,
|
|
193
|
+
serialized: Dict[str, Any],
|
|
194
|
+
inputs: Dict[str, Any],
|
|
195
|
+
*,
|
|
196
|
+
run_id: UUID,
|
|
197
|
+
parent_run_id: Optional[UUID] = None,
|
|
198
|
+
**kwargs: Any,
|
|
199
|
+
) -> None:
|
|
200
|
+
self._start_times[str(run_id)] = time.time()
|
|
201
|
+
|
|
202
|
+
def on_chain_end(
|
|
203
|
+
self,
|
|
204
|
+
outputs: Dict[str, Any],
|
|
205
|
+
*,
|
|
206
|
+
run_id: UUID,
|
|
207
|
+
parent_run_id: Optional[UUID] = None,
|
|
208
|
+
**kwargs: Any,
|
|
209
|
+
) -> None:
|
|
210
|
+
key = str(run_id)
|
|
211
|
+
elapsed_ms = int((time.time() - self._start_times.pop(key, time.time())) * 1000)
|
|
212
|
+
|
|
213
|
+
if parent_run_id is None:
|
|
214
|
+
return
|
|
215
|
+
|
|
216
|
+
name = kwargs.get("name") or (
|
|
217
|
+
serialized.get("name") if (serialized := kwargs.get("serialized")) else None
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
self._client.ingest({
|
|
222
|
+
"kind": "agent_step",
|
|
223
|
+
"source": {**self._source, "run_id": key, "parent_run_id": str(parent_run_id)},
|
|
224
|
+
"payload": {
|
|
225
|
+
"node": name or "unknown",
|
|
226
|
+
"outputs": _safe_truncate(outputs),
|
|
227
|
+
"elapsed_ms": elapsed_ms,
|
|
228
|
+
},
|
|
229
|
+
})
|
|
230
|
+
except Exception as exc:
|
|
231
|
+
log.warning("ProvaCallbackHandler.on_chain_end failed: %s", exc)
|
|
232
|
+
|
|
233
|
+
def on_chain_error(
|
|
234
|
+
self,
|
|
235
|
+
error: Union[Exception, KeyboardInterrupt],
|
|
236
|
+
*,
|
|
237
|
+
run_id: UUID,
|
|
238
|
+
parent_run_id: Optional[UUID] = None,
|
|
239
|
+
**kwargs: Any,
|
|
240
|
+
) -> None:
|
|
241
|
+
self._start_times.pop(str(run_id), None)
|
|
242
|
+
|
|
243
|
+
# ------------------------------------------------------------------
|
|
244
|
+
# Tool callbacks
|
|
245
|
+
# ------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
def on_tool_start(
|
|
248
|
+
self,
|
|
249
|
+
serialized: Dict[str, Any],
|
|
250
|
+
input_str: str,
|
|
251
|
+
*,
|
|
252
|
+
run_id: UUID,
|
|
253
|
+
parent_run_id: Optional[UUID] = None,
|
|
254
|
+
**kwargs: Any,
|
|
255
|
+
) -> None:
|
|
256
|
+
self._start_times[str(run_id)] = time.time()
|
|
257
|
+
|
|
258
|
+
def on_tool_end(
|
|
259
|
+
self,
|
|
260
|
+
output: Any,
|
|
261
|
+
*,
|
|
262
|
+
run_id: UUID,
|
|
263
|
+
parent_run_id: Optional[UUID] = None,
|
|
264
|
+
**kwargs: Any,
|
|
265
|
+
) -> None:
|
|
266
|
+
key = str(run_id)
|
|
267
|
+
elapsed_ms = int((time.time() - self._start_times.pop(key, time.time())) * 1000)
|
|
268
|
+
|
|
269
|
+
name = kwargs.get("name")
|
|
270
|
+
try:
|
|
271
|
+
self._client.ingest({
|
|
272
|
+
"kind": "tool_call",
|
|
273
|
+
"source": {**self._source, "run_id": key},
|
|
274
|
+
"payload": {
|
|
275
|
+
"tool": name or "unknown",
|
|
276
|
+
"output": _safe_truncate(output),
|
|
277
|
+
"elapsed_ms": elapsed_ms,
|
|
278
|
+
},
|
|
279
|
+
})
|
|
280
|
+
except Exception as exc:
|
|
281
|
+
log.warning("ProvaCallbackHandler.on_tool_end failed: %s", exc)
|
|
282
|
+
|
|
283
|
+
def on_tool_error(
|
|
284
|
+
self,
|
|
285
|
+
error: Union[Exception, KeyboardInterrupt],
|
|
286
|
+
*,
|
|
287
|
+
run_id: UUID,
|
|
288
|
+
parent_run_id: Optional[UUID] = None,
|
|
289
|
+
**kwargs: Any,
|
|
290
|
+
) -> None:
|
|
291
|
+
self._start_times.pop(str(run_id), None)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _safe_truncate(obj: Any, max_len: int = 2000) -> Any:
|
|
295
|
+
"""Truncate large string values to keep receipt payloads reasonable."""
|
|
296
|
+
if isinstance(obj, str):
|
|
297
|
+
return obj[:max_len] + ("..." if len(obj) > max_len else "")
|
|
298
|
+
if isinstance(obj, dict):
|
|
299
|
+
return {k: _safe_truncate(v, max_len) for k, v in obj.items()}
|
|
300
|
+
if isinstance(obj, list):
|
|
301
|
+
return [_safe_truncate(v, max_len) for v in obj[:50]]
|
|
302
|
+
return obj
|
prova_cp/canonical.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Stable JSON canonicalization. Matches lib/receipts/sign.ts:canonicalize."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def canonicalize(value: Any) -> str:
|
|
10
|
+
if value is None or not isinstance(value, (dict, list)):
|
|
11
|
+
return json.dumps(value, ensure_ascii=False, separators=(",", ":"))
|
|
12
|
+
if isinstance(value, list):
|
|
13
|
+
return "[" + ",".join(canonicalize(v) for v in value) + "]"
|
|
14
|
+
keys = sorted(value.keys())
|
|
15
|
+
return "{" + ",".join(json.dumps(k, ensure_ascii=False) + ":" + canonicalize(value[k]) for k in keys) + "}"
|
prova_cp/cli.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""prova-migrate CLI. Reads NDJSON and bulk-ingests into the Audit Vault."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .client import ProvaClient
|
|
13
|
+
from .migrate import migrate, read_ndjson, MAPPERS
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main(argv: Optional[list[str]] = None) -> int:
|
|
17
|
+
parser = argparse.ArgumentParser(
|
|
18
|
+
prog="prova-migrate",
|
|
19
|
+
description="Bulk-import LangSmith / Langfuse / OpenAI logs into the Prova Audit Vault.",
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument("--source", required=True, choices=sorted(MAPPERS.keys()))
|
|
22
|
+
parser.add_argument("--file", required=True, help='Path to NDJSON file, or "-" for stdin.')
|
|
23
|
+
parser.add_argument("--batch", type=int, default=200)
|
|
24
|
+
parser.add_argument("--base-url", default=None)
|
|
25
|
+
args = parser.parse_args(argv)
|
|
26
|
+
|
|
27
|
+
api_key = os.environ.get("PROVA_API_KEY")
|
|
28
|
+
if not api_key:
|
|
29
|
+
print("PROVA_API_KEY is required", file=sys.stderr)
|
|
30
|
+
return 2
|
|
31
|
+
|
|
32
|
+
stream = sys.stdin if args.file == "-" else open(args.file, "r", encoding="utf-8")
|
|
33
|
+
started = time.monotonic()
|
|
34
|
+
with ProvaClient(api_key, base_url=args.base_url) as client:
|
|
35
|
+
result = migrate(
|
|
36
|
+
client,
|
|
37
|
+
args.source,
|
|
38
|
+
read_ndjson(stream),
|
|
39
|
+
batch_size=args.batch,
|
|
40
|
+
on_progress=lambda p: print(
|
|
41
|
+
f"batch {p['batch']}: ingested={p['ingested']} skipped={p['skipped']} total={p['total']}",
|
|
42
|
+
file=sys.stderr,
|
|
43
|
+
),
|
|
44
|
+
)
|
|
45
|
+
if stream is not sys.stdin:
|
|
46
|
+
stream.close()
|
|
47
|
+
result["elapsed_s"] = round(time.monotonic() - started, 1)
|
|
48
|
+
print(json.dumps(result))
|
|
49
|
+
return 0
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
sys.exit(main())
|
prova_cp/client.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""ProvaClient: synchronous httpx wrapper around ingest, gateway-check, register."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Iterable, Mapping, Optional, Union
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from .verify import verify_receipt, ReceiptVerificationError # noqa: F401 (re-exported)
|
|
13
|
+
|
|
14
|
+
DEFAULT_BASE_URL = "https://api.prova.cobound.dev"
|
|
15
|
+
DEFAULT_TIMEOUT = 30.0
|
|
16
|
+
RETRYABLE_STATUS = {408, 425, 429, 500, 502, 503, 504}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ProvaApiError(Exception):
|
|
20
|
+
def __init__(self, status: int, code: str, detail: Any) -> None:
|
|
21
|
+
super().__init__(f"Prova API {status} {code}: {detail}")
|
|
22
|
+
self.status = status
|
|
23
|
+
self.code = code
|
|
24
|
+
self.detail = detail
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ProvaClient:
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
api_key: str,
|
|
31
|
+
*,
|
|
32
|
+
base_url: Optional[str] = None,
|
|
33
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
34
|
+
retry_attempts: int = 4,
|
|
35
|
+
retry_backoff: float = 0.25,
|
|
36
|
+
retry_max_backoff: float = 4.0,
|
|
37
|
+
verify_receipts: bool = False,
|
|
38
|
+
user_agent: str = "prova-sdk python",
|
|
39
|
+
transport: Optional[httpx.BaseTransport] = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
if not api_key:
|
|
42
|
+
raise ValueError("ProvaClient: api_key is required")
|
|
43
|
+
self._api_key = api_key
|
|
44
|
+
self._base_url = (base_url or os.environ.get("PROVA_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
|
|
45
|
+
self._timeout = timeout
|
|
46
|
+
self._attempts = retry_attempts
|
|
47
|
+
self._backoff = retry_backoff
|
|
48
|
+
self._max_backoff = retry_max_backoff
|
|
49
|
+
self._verify_receipts = verify_receipts
|
|
50
|
+
self._http = httpx.Client(
|
|
51
|
+
base_url=self._base_url,
|
|
52
|
+
timeout=timeout,
|
|
53
|
+
headers={"User-Agent": user_agent, "Content-Type": "application/json"},
|
|
54
|
+
transport=transport,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def __enter__(self) -> "ProvaClient":
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def __exit__(self, *exc: Any) -> None:
|
|
61
|
+
self.close()
|
|
62
|
+
|
|
63
|
+
def close(self) -> None:
|
|
64
|
+
self._http.close()
|
|
65
|
+
|
|
66
|
+
def _request(self, path: str, body: Any, *, idempotency_key: Optional[str] = None) -> Any:
|
|
67
|
+
headers = {"Authorization": f"Bearer {self._api_key}"}
|
|
68
|
+
if idempotency_key:
|
|
69
|
+
headers["Idempotency-Key"] = idempotency_key
|
|
70
|
+
last_err: Optional[BaseException] = None
|
|
71
|
+
for attempt in range(1, self._attempts + 1):
|
|
72
|
+
try:
|
|
73
|
+
resp = self._http.post(path, json=body, headers=headers)
|
|
74
|
+
except httpx.RequestError as e:
|
|
75
|
+
last_err = e
|
|
76
|
+
resp = None
|
|
77
|
+
if resp is not None:
|
|
78
|
+
if resp.status_code < 400:
|
|
79
|
+
return resp.json()
|
|
80
|
+
status = resp.status_code
|
|
81
|
+
try:
|
|
82
|
+
parsed = resp.json()
|
|
83
|
+
except Exception:
|
|
84
|
+
parsed = resp.text
|
|
85
|
+
code = parsed.get("error") if isinstance(parsed, dict) else "http_error"
|
|
86
|
+
if status not in RETRYABLE_STATUS or attempt == self._attempts:
|
|
87
|
+
raise ProvaApiError(status, str(code), parsed)
|
|
88
|
+
last_err = ProvaApiError(status, str(code), parsed)
|
|
89
|
+
sleep = min(self._max_backoff, self._backoff * (2 ** (attempt - 1)))
|
|
90
|
+
time.sleep(sleep + random.random() * 0.05)
|
|
91
|
+
assert last_err is not None
|
|
92
|
+
raise last_err
|
|
93
|
+
|
|
94
|
+
def ingest(self, events: Union[Mapping[str, Any], Iterable[Mapping[str, Any]]]) -> Mapping[str, Any]:
|
|
95
|
+
body = events if isinstance(events, list) else (list(events) if not isinstance(events, Mapping) else events)
|
|
96
|
+
res = self._request("/api/v1/audit/ingest", body)
|
|
97
|
+
if self._verify_receipts:
|
|
98
|
+
for r in res.get("receipts", []):
|
|
99
|
+
verify_receipt(r, base_url=self._base_url, client=self._http)
|
|
100
|
+
return res
|
|
101
|
+
|
|
102
|
+
def gateway_check(self, event: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
103
|
+
res = self._request("/api/v1/gateway/check", event)
|
|
104
|
+
if self._verify_receipts and res.get("receipt"):
|
|
105
|
+
verify_receipt(res["receipt"], base_url=self._base_url, client=self._http)
|
|
106
|
+
return res
|
|
107
|
+
|
|
108
|
+
def register(
|
|
109
|
+
self,
|
|
110
|
+
integrations: Union[Mapping[str, Any], Iterable[Mapping[str, Any]]],
|
|
111
|
+
) -> Mapping[str, Any]:
|
|
112
|
+
body = integrations if isinstance(integrations, (list, Mapping)) else list(integrations)
|
|
113
|
+
return self._request("/api/v1/inventory", body)
|
|
114
|
+
|
|
115
|
+
def verify(self, receipt: Mapping[str, Any]) -> None:
|
|
116
|
+
verify_receipt(receipt, base_url=self._base_url, client=self._http)
|
prova_cp/crewai.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""CrewAI auto-instrumentation for Prova.
|
|
2
|
+
|
|
3
|
+
CrewAI does not expose LangChain-style callbacks. Its stable, version-tolerant
|
|
4
|
+
hooks are the `step_callback` and `task_callback` parameters on a Crew (and
|
|
5
|
+
`step_callback` on an Agent). This module wires those to Prova ingest.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
|
|
9
|
+
from prova_cp import ProvaClient
|
|
10
|
+
from prova_cp.crewai import ProvaCrewAI
|
|
11
|
+
|
|
12
|
+
prova = ProvaClient(api_key="prv_...")
|
|
13
|
+
tap = ProvaCrewAI(prova, app_id="research-crew", environment="production")
|
|
14
|
+
|
|
15
|
+
crew = Crew(
|
|
16
|
+
agents=[...],
|
|
17
|
+
tasks=[...],
|
|
18
|
+
step_callback=tap.step_callback,
|
|
19
|
+
task_callback=tap.task_callback,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
Both callbacks are fail-silent: a Prova outage logs at warning level and never
|
|
23
|
+
breaks the crew. Agent steps become `agent_step` receipts; completed tasks
|
|
24
|
+
become `agent_run` receipts.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
_MAX_LEN = 2000
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _safe(obj: Any, depth: int = 0) -> Any:
|
|
38
|
+
if isinstance(obj, str):
|
|
39
|
+
return obj[:_MAX_LEN] + ("..." if len(obj) > _MAX_LEN else "")
|
|
40
|
+
if depth > 4:
|
|
41
|
+
return "[truncated]"
|
|
42
|
+
if isinstance(obj, dict):
|
|
43
|
+
return {k: _safe(v, depth + 1) for k, v in obj.items()}
|
|
44
|
+
if isinstance(obj, (list, tuple)):
|
|
45
|
+
return [_safe(v, depth + 1) for v in list(obj)[:50]]
|
|
46
|
+
# CrewAI output objects: pull common attributes without importing crewai.
|
|
47
|
+
for attr in ("raw", "output", "result", "summary", "description"):
|
|
48
|
+
if hasattr(obj, attr):
|
|
49
|
+
return _safe(getattr(obj, attr), depth + 1)
|
|
50
|
+
return str(obj)[:_MAX_LEN]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ProvaCrewAI:
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
client: Any,
|
|
57
|
+
*,
|
|
58
|
+
app_id: str = "crew",
|
|
59
|
+
environment: str = "production",
|
|
60
|
+
framework: str = "crewai",
|
|
61
|
+
) -> None:
|
|
62
|
+
self._client = client
|
|
63
|
+
self._source = {
|
|
64
|
+
"app_id": app_id,
|
|
65
|
+
"environment": environment,
|
|
66
|
+
"framework": framework,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def step_callback(self, step_output: Any) -> None:
|
|
70
|
+
"""Pass as Crew(step_callback=...) or Agent(step_callback=...)."""
|
|
71
|
+
try:
|
|
72
|
+
agent = getattr(step_output, "agent", None)
|
|
73
|
+
tool = getattr(step_output, "tool", None)
|
|
74
|
+
self._client.ingest({
|
|
75
|
+
"kind": "agent_step",
|
|
76
|
+
"source": dict(self._source),
|
|
77
|
+
"payload": {
|
|
78
|
+
"agent": str(agent) if agent else None,
|
|
79
|
+
"tool": str(tool) if tool else None,
|
|
80
|
+
"output": _safe(step_output),
|
|
81
|
+
},
|
|
82
|
+
})
|
|
83
|
+
except Exception as exc:
|
|
84
|
+
log.warning("ProvaCrewAI.step_callback failed: %s", exc)
|
|
85
|
+
|
|
86
|
+
def task_callback(self, task_output: Any) -> None:
|
|
87
|
+
"""Pass as Crew(task_callback=...)."""
|
|
88
|
+
try:
|
|
89
|
+
self._client.ingest({
|
|
90
|
+
"kind": "agent_run",
|
|
91
|
+
"source": dict(self._source),
|
|
92
|
+
"payload": {
|
|
93
|
+
"task": getattr(task_output, "description", None)
|
|
94
|
+
or getattr(task_output, "name", None),
|
|
95
|
+
"output": _safe(task_output),
|
|
96
|
+
},
|
|
97
|
+
})
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
log.warning("ProvaCrewAI.task_callback failed: %s", exc)
|
prova_cp/migrate.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""One-shot migration of LangSmith / Langfuse / OpenAI logs into the Audit Vault."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any, Callable, Dict, Iterable, Iterator, Mapping, Optional, TextIO
|
|
8
|
+
|
|
9
|
+
from .client import ProvaClient
|
|
10
|
+
|
|
11
|
+
Mapper = Callable[[Mapping[str, Any]], Optional[Dict[str, Any]]]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _provider_of(model: Optional[str]) -> str:
|
|
15
|
+
m = (model or "").lower()
|
|
16
|
+
if m.startswith(("gpt-", "o1", "o3")) or "openai" in m:
|
|
17
|
+
return "openai"
|
|
18
|
+
if m.startswith("claude"):
|
|
19
|
+
return "anthropic"
|
|
20
|
+
if m.startswith(("gemini", "text-bison")):
|
|
21
|
+
return "google"
|
|
22
|
+
if m.startswith(("mistral", "codestral")):
|
|
23
|
+
return "mistral"
|
|
24
|
+
if m.startswith(("llama", "meta-")):
|
|
25
|
+
return "meta"
|
|
26
|
+
return "other"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _model_block(model: Optional[str]) -> Optional[Dict[str, Any]]:
|
|
30
|
+
if not model:
|
|
31
|
+
return None
|
|
32
|
+
return {"provider": _provider_of(model), "name": model}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def langsmith_mapper(raw: Mapping[str, Any]) -> Optional[Dict[str, Any]]:
|
|
36
|
+
rid = raw.get("id")
|
|
37
|
+
if not isinstance(rid, str):
|
|
38
|
+
return None
|
|
39
|
+
run_type = raw.get("run_type") or "llm"
|
|
40
|
+
extra = raw.get("extra") if isinstance(raw.get("extra"), Mapping) else {}
|
|
41
|
+
invocation = extra.get("invocation_params") if isinstance(extra.get("invocation_params"), Mapping) else {}
|
|
42
|
+
model = invocation.get("model") or raw.get("model")
|
|
43
|
+
session = raw.get("session_name") or raw.get("session_id")
|
|
44
|
+
kind = (
|
|
45
|
+
"tool_call" if run_type == "tool"
|
|
46
|
+
else "agent_run" if run_type in ("chain", "agent")
|
|
47
|
+
else "model_call"
|
|
48
|
+
)
|
|
49
|
+
return {
|
|
50
|
+
"idempotency_key": f"langsmith:{rid}",
|
|
51
|
+
"occurred_at": raw.get("start_time"),
|
|
52
|
+
"kind": kind,
|
|
53
|
+
"source": {"org_id": "", "framework": "langgraph", "app_id": session or raw.get("name")},
|
|
54
|
+
"model": _model_block(model if isinstance(model, str) else None),
|
|
55
|
+
"payload": {
|
|
56
|
+
"_migrated_from": "langsmith",
|
|
57
|
+
"name": raw.get("name"),
|
|
58
|
+
"run_type": run_type,
|
|
59
|
+
"inputs": raw.get("inputs"),
|
|
60
|
+
"outputs": raw.get("outputs"),
|
|
61
|
+
"end_time": raw.get("end_time"),
|
|
62
|
+
"latency_ms": raw.get("latency") if isinstance(raw.get("latency"), (int, float)) else None,
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def langfuse_mapper(raw: Mapping[str, Any]) -> Optional[Dict[str, Any]]:
|
|
68
|
+
rid = raw.get("id")
|
|
69
|
+
if not isinstance(rid, str):
|
|
70
|
+
return None
|
|
71
|
+
obs_type = str(raw.get("type") or "GENERATION").upper()
|
|
72
|
+
model = raw.get("model") if isinstance(raw.get("model"), str) else None
|
|
73
|
+
start = raw.get("startTime") or raw.get("start_time")
|
|
74
|
+
trace_id = raw.get("traceId") or raw.get("trace_id")
|
|
75
|
+
kind = "agent_step" if obs_type in ("SPAN", "EVENT") else "model_call"
|
|
76
|
+
return {
|
|
77
|
+
"idempotency_key": f"langfuse:{rid}",
|
|
78
|
+
"occurred_at": start if isinstance(start, str) else None,
|
|
79
|
+
"kind": kind,
|
|
80
|
+
"source": {"org_id": "", "framework": "custom", "app_id": trace_id},
|
|
81
|
+
"model": _model_block(model),
|
|
82
|
+
"payload": {
|
|
83
|
+
"_migrated_from": "langfuse",
|
|
84
|
+
"name": raw.get("name"),
|
|
85
|
+
"type": obs_type,
|
|
86
|
+
"input": raw.get("input"),
|
|
87
|
+
"output": raw.get("output"),
|
|
88
|
+
"metadata": raw.get("metadata"),
|
|
89
|
+
"usage": raw.get("usage"),
|
|
90
|
+
},
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def openai_mapper(raw: Mapping[str, Any]) -> Optional[Dict[str, Any]]:
|
|
95
|
+
rid = raw.get("id") or raw.get("request_id")
|
|
96
|
+
if not isinstance(rid, str):
|
|
97
|
+
return None
|
|
98
|
+
model = raw.get("model") if isinstance(raw.get("model"), str) else None
|
|
99
|
+
created = raw.get("created") if isinstance(raw.get("created"), (int, float)) else None
|
|
100
|
+
created_at = raw.get("created_at")
|
|
101
|
+
if isinstance(created_at, str):
|
|
102
|
+
occurred = created_at
|
|
103
|
+
elif isinstance(created_at, (int, float)):
|
|
104
|
+
occurred = datetime.fromtimestamp(created_at, tz=timezone.utc).isoformat()
|
|
105
|
+
elif created is not None:
|
|
106
|
+
occurred = datetime.fromtimestamp(created, tz=timezone.utc).isoformat()
|
|
107
|
+
else:
|
|
108
|
+
occurred = None
|
|
109
|
+
usage = raw.get("usage")
|
|
110
|
+
if not isinstance(usage, Mapping):
|
|
111
|
+
usage = {
|
|
112
|
+
"prompt_tokens": raw.get("input_tokens"),
|
|
113
|
+
"completion_tokens": raw.get("output_tokens"),
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
"idempotency_key": f"openai:{rid}",
|
|
117
|
+
"occurred_at": occurred,
|
|
118
|
+
"kind": "model_call",
|
|
119
|
+
"source": {"org_id": "", "framework": "custom"},
|
|
120
|
+
"model": _model_block(model),
|
|
121
|
+
"payload": {
|
|
122
|
+
"_migrated_from": "openai",
|
|
123
|
+
"request_id": rid,
|
|
124
|
+
"choices": raw.get("choices"),
|
|
125
|
+
"messages": raw.get("messages"),
|
|
126
|
+
"usage": usage,
|
|
127
|
+
},
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
MAPPERS: Dict[str, Mapper] = {
|
|
132
|
+
"langsmith": langsmith_mapper,
|
|
133
|
+
"langfuse": langfuse_mapper,
|
|
134
|
+
"openai": openai_mapper,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def read_ndjson(stream: TextIO) -> Iterator[Dict[str, Any]]:
|
|
139
|
+
for line in stream:
|
|
140
|
+
line = line.strip()
|
|
141
|
+
if not line:
|
|
142
|
+
continue
|
|
143
|
+
try:
|
|
144
|
+
yield json.loads(line)
|
|
145
|
+
except json.JSONDecodeError:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def migrate(
|
|
150
|
+
client: ProvaClient,
|
|
151
|
+
source: str,
|
|
152
|
+
rows: Iterable[Mapping[str, Any]],
|
|
153
|
+
*,
|
|
154
|
+
batch_size: int = 200,
|
|
155
|
+
on_progress: Optional[Callable[[Dict[str, int]], None]] = None,
|
|
156
|
+
) -> Dict[str, int]:
|
|
157
|
+
mapper = MAPPERS.get(source)
|
|
158
|
+
if mapper is None:
|
|
159
|
+
raise ValueError(f"unknown source format: {source}")
|
|
160
|
+
size = max(1, min(1000, batch_size))
|
|
161
|
+
batch: list[Dict[str, Any]] = []
|
|
162
|
+
total = ingested = skipped = batch_n = 0
|
|
163
|
+
|
|
164
|
+
def flush() -> None:
|
|
165
|
+
nonlocal batch, ingested, batch_n
|
|
166
|
+
if not batch:
|
|
167
|
+
return
|
|
168
|
+
batch_n += 1
|
|
169
|
+
res = client.ingest(batch)
|
|
170
|
+
ingested += int(res.get("count", 0))
|
|
171
|
+
if on_progress:
|
|
172
|
+
on_progress({"batch": batch_n, "ingested": ingested, "skipped": skipped, "total": total})
|
|
173
|
+
batch = []
|
|
174
|
+
|
|
175
|
+
for raw in rows:
|
|
176
|
+
total += 1
|
|
177
|
+
mapped = mapper(raw)
|
|
178
|
+
if mapped is None:
|
|
179
|
+
skipped += 1
|
|
180
|
+
continue
|
|
181
|
+
batch.append(mapped)
|
|
182
|
+
if len(batch) >= size:
|
|
183
|
+
flush()
|
|
184
|
+
flush()
|
|
185
|
+
return {"total": total, "ingested": ingested, "skipped": skipped, "batches": batch_n}
|
prova_cp/verify.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Offline receipt verification using cryptography's Ed25519 primitives."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from typing import Any, Mapping, MutableMapping, Optional
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from cryptography.exceptions import InvalidSignature
|
|
10
|
+
from cryptography.hazmat.primitives import serialization
|
|
11
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey
|
|
12
|
+
|
|
13
|
+
from .canonical import canonicalize
|
|
14
|
+
|
|
15
|
+
DEFAULT_BASE_URL = "https://api.prova.cobound.dev"
|
|
16
|
+
|
|
17
|
+
_KEY_CACHE: MutableMapping[str, Ed25519PublicKey] = {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ReceiptVerificationError(Exception):
|
|
21
|
+
def __init__(self, message: str, receipt_id: Optional[str] = None) -> None:
|
|
22
|
+
super().__init__(message)
|
|
23
|
+
self.receipt_id = receipt_id
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _load_public_key(
|
|
27
|
+
key_id: str,
|
|
28
|
+
*,
|
|
29
|
+
public_key_pem: Optional[str],
|
|
30
|
+
base_url: str,
|
|
31
|
+
client: Optional[httpx.Client],
|
|
32
|
+
) -> Ed25519PublicKey:
|
|
33
|
+
if public_key_pem:
|
|
34
|
+
return serialization.load_pem_public_key(public_key_pem.encode("utf-8")) # type: ignore[return-value]
|
|
35
|
+
cached = _KEY_CACHE.get(key_id)
|
|
36
|
+
if cached is not None:
|
|
37
|
+
return cached
|
|
38
|
+
base = base_url.rstrip("/")
|
|
39
|
+
own_client = False
|
|
40
|
+
if client is None:
|
|
41
|
+
client = httpx.Client(timeout=10.0)
|
|
42
|
+
own_client = True
|
|
43
|
+
try:
|
|
44
|
+
resp = client.get(f"{base}/api/v1/keys/{key_id}")
|
|
45
|
+
if resp.status_code != 200:
|
|
46
|
+
raise ReceiptVerificationError(
|
|
47
|
+
f"failed to fetch public key {key_id}: HTTP {resp.status_code}",
|
|
48
|
+
key_id,
|
|
49
|
+
)
|
|
50
|
+
body = resp.json()
|
|
51
|
+
pem = body.get("public_key_pem")
|
|
52
|
+
if not pem:
|
|
53
|
+
raise ReceiptVerificationError(f"key registry returned no PEM for {key_id}", key_id)
|
|
54
|
+
key = serialization.load_pem_public_key(pem.encode("utf-8"))
|
|
55
|
+
_KEY_CACHE[key_id] = key # type: ignore[assignment]
|
|
56
|
+
return key # type: ignore[return-value]
|
|
57
|
+
finally:
|
|
58
|
+
if own_client:
|
|
59
|
+
client.close()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def verify_receipt(
|
|
63
|
+
receipt: Mapping[str, Any],
|
|
64
|
+
*,
|
|
65
|
+
public_key_pem: Optional[str] = None,
|
|
66
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
67
|
+
client: Optional[httpx.Client] = None,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Verify a receipt's hash and Ed25519 signature. Raises on mismatch."""
|
|
70
|
+
integrity = receipt.get("integrity") or {}
|
|
71
|
+
payload = receipt.get("payload")
|
|
72
|
+
findings = receipt.get("findings", [])
|
|
73
|
+
canonical = canonicalize({"findings": findings, "payload": payload})
|
|
74
|
+
expected = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
75
|
+
if expected != integrity.get("hash"):
|
|
76
|
+
raise ReceiptVerificationError(
|
|
77
|
+
f"hash mismatch: expected {expected}, got {integrity.get('hash')}",
|
|
78
|
+
receipt.get("event_id"),
|
|
79
|
+
)
|
|
80
|
+
key = _load_public_key(
|
|
81
|
+
integrity["key_id"],
|
|
82
|
+
public_key_pem=public_key_pem,
|
|
83
|
+
base_url=base_url,
|
|
84
|
+
client=client,
|
|
85
|
+
)
|
|
86
|
+
sig = bytes.fromhex(integrity["signature"])
|
|
87
|
+
try:
|
|
88
|
+
key.verify(sig, canonical.encode("utf-8"))
|
|
89
|
+
except InvalidSignature as e:
|
|
90
|
+
raise ReceiptVerificationError(
|
|
91
|
+
f"signature does not verify against key {integrity['key_id']}",
|
|
92
|
+
receipt.get("event_id"),
|
|
93
|
+
) from e
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _reset_key_cache_for_tests() -> None:
|
|
97
|
+
_KEY_CACHE.clear()
|
prova_cp/wrap.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Drop-in wrappers for raw OpenAI / Anthropic clients.
|
|
2
|
+
|
|
3
|
+
For teams not on LangGraph or CrewAI. Wrap the vendor client once; every
|
|
4
|
+
completion is mirrored to Prova as a signed `model_call` receipt. The wrapper
|
|
5
|
+
returns the vendor response unchanged and never raises on a Prova failure, so
|
|
6
|
+
it is safe to wrap a production client.
|
|
7
|
+
|
|
8
|
+
from openai import OpenAI
|
|
9
|
+
from prova_cp import ProvaClient
|
|
10
|
+
from prova_cp.wrap import wrap_openai
|
|
11
|
+
|
|
12
|
+
prova = ProvaClient(api_key="prv_...")
|
|
13
|
+
client = wrap_openai(OpenAI(), prova, app_id="support-bot", environment="production")
|
|
14
|
+
|
|
15
|
+
# use `client` exactly like the OpenAI client -- calls are auto-ingested
|
|
16
|
+
client.chat.completions.create(model="gpt-4o", messages=[...])
|
|
17
|
+
|
|
18
|
+
`wrap_anthropic` is identical for the Anthropic SDK (`messages.create`).
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
log = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
_MAX_LEN = 4000
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _safe(obj: Any, depth: int = 0) -> Any:
|
|
32
|
+
if isinstance(obj, str):
|
|
33
|
+
return obj[:_MAX_LEN] + ("..." if len(obj) > _MAX_LEN else "")
|
|
34
|
+
if depth > 5:
|
|
35
|
+
return "[truncated]"
|
|
36
|
+
if isinstance(obj, dict):
|
|
37
|
+
return {k: _safe(v, depth + 1) for k, v in obj.items()}
|
|
38
|
+
if isinstance(obj, (list, tuple)):
|
|
39
|
+
return [_safe(v, depth + 1) for v in list(obj)[:50]]
|
|
40
|
+
if hasattr(obj, "model_dump"):
|
|
41
|
+
try:
|
|
42
|
+
return _safe(obj.model_dump(), depth + 1)
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
return obj if isinstance(obj, (int, float, bool, type(None))) else str(obj)[:_MAX_LEN]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _CreateProxy:
|
|
49
|
+
"""Wraps a single `create` callable, ingesting after a successful call."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, fn: Any, client: Any, source: dict, provider: str) -> None:
|
|
52
|
+
self._fn = fn
|
|
53
|
+
self._client = client
|
|
54
|
+
self._source = source
|
|
55
|
+
self._provider = provider
|
|
56
|
+
|
|
57
|
+
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
58
|
+
response = self._fn(*args, **kwargs)
|
|
59
|
+
try:
|
|
60
|
+
model = kwargs.get("model")
|
|
61
|
+
self._client.ingest({
|
|
62
|
+
"kind": "model_call",
|
|
63
|
+
"source": dict(self._source),
|
|
64
|
+
"model": {"provider": self._provider, "name": model},
|
|
65
|
+
"payload": {
|
|
66
|
+
"request": _safe({k: v for k, v in kwargs.items() if k != "api_key"}),
|
|
67
|
+
"response": _safe(response),
|
|
68
|
+
},
|
|
69
|
+
})
|
|
70
|
+
except Exception as exc:
|
|
71
|
+
log.warning("prova_cp.wrap: ingest failed (%s): %s", self._provider, exc)
|
|
72
|
+
return response
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class _AttrProxy:
|
|
76
|
+
"""Transparently proxies attribute access, swapping in a _CreateProxy for
|
|
77
|
+
the configured leaf method (e.g. completions.create)."""
|
|
78
|
+
|
|
79
|
+
def __init__(self, target: Any, client: Any, source: dict, provider: str, path: tuple[str, ...]) -> None:
|
|
80
|
+
self.__dict__["_t"] = target
|
|
81
|
+
self.__dict__["_c"] = client
|
|
82
|
+
self.__dict__["_s"] = source
|
|
83
|
+
self.__dict__["_p"] = provider
|
|
84
|
+
self.__dict__["_path"] = path
|
|
85
|
+
|
|
86
|
+
def __getattr__(self, name: str) -> Any:
|
|
87
|
+
attr = getattr(self.__dict__["_t"], name)
|
|
88
|
+
new_path = self.__dict__["_path"] + (name,)
|
|
89
|
+
leaf = (("chat", "completions", "create"), ("messages", "create"), ("responses", "create"))
|
|
90
|
+
if new_path in leaf and callable(attr):
|
|
91
|
+
return _CreateProxy(attr, self.__dict__["_c"], self.__dict__["_s"], self.__dict__["_p"])
|
|
92
|
+
if callable(attr) or isinstance(attr, (str, int, float, bool, type(None))):
|
|
93
|
+
return attr
|
|
94
|
+
return _AttrProxy(attr, self.__dict__["_c"], self.__dict__["_s"], self.__dict__["_p"], new_path)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _wrap(vendor_client: Any, prova_client: Any, provider: str, app_id: str, environment: str) -> Any:
|
|
98
|
+
source = {"app_id": app_id, "environment": environment, "framework": provider}
|
|
99
|
+
return _AttrProxy(vendor_client, prova_client, source, provider, ())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def wrap_openai(vendor_client: Any, prova_client: Any, *, app_id: str = "agent", environment: str = "production") -> Any:
|
|
103
|
+
"""Wrap an `openai.OpenAI` client. `chat.completions.create` and
|
|
104
|
+
`responses.create` calls are mirrored to Prova."""
|
|
105
|
+
return _wrap(vendor_client, prova_client, "openai", app_id, environment)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def wrap_anthropic(vendor_client: Any, prova_client: Any, *, app_id: str = "agent", environment: str = "production") -> Any:
|
|
109
|
+
"""Wrap an `anthropic.Anthropic` client. `messages.create` calls are
|
|
110
|
+
mirrored to Prova."""
|
|
111
|
+
return _wrap(vendor_client, prova_client, "anthropic", app_id, environment)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prova-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Agent-side SDK for the Prova AI control plane (ingest, gateway-check, register).
|
|
5
|
+
Project-URL: Homepage, https://prova.cobound.dev/docs/sdk
|
|
6
|
+
Project-URL: Documentation, https://prova.cobound.dev/docs/sdk
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: agents,ai,audit,compliance,langgraph,llm,observability
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Requires-Dist: cryptography>=42.0
|
|
19
|
+
Requires-Dist: httpx>=0.27
|
|
20
|
+
Provides-Extra: langgraph
|
|
21
|
+
Requires-Dist: langchain-core>=0.2; extra == 'langgraph'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# prova-sdk (Python)
|
|
25
|
+
|
|
26
|
+
Agent-side SDK for the Prova AI control plane. Thin wrappers around:
|
|
27
|
+
|
|
28
|
+
- `POST /api/v1/audit/ingest`
|
|
29
|
+
- `POST /api/v1/gateway/check`
|
|
30
|
+
- `POST /api/v1/inventory`
|
|
31
|
+
|
|
32
|
+
Plus an Ed25519 receipt verifier and a one-shot migration tool that bulk-imports
|
|
33
|
+
existing LangSmith / Langfuse / OpenAI logs into the Audit Vault.
|
|
34
|
+
|
|
35
|
+
Separate from the legacy `prova` package (the reasoning-chain verifier).
|
|
36
|
+
See `/docs/sdk` for guidance on which one to install.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```sh
|
|
41
|
+
pip install prova-sdk
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Requires Python 3.10+.
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from prova_cp import ProvaClient
|
|
50
|
+
|
|
51
|
+
prova = ProvaClient(api_key="prv_...")
|
|
52
|
+
|
|
53
|
+
prova.ingest({
|
|
54
|
+
"kind": "model_call",
|
|
55
|
+
"source": {"org_id": "YOUR_ORG", "framework": "langgraph", "app_id": "claims-orchestrator"},
|
|
56
|
+
"model": {"provider": "openai", "name": "gpt-4o"},
|
|
57
|
+
"payload": {"messages": messages, "response": response},
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
check = prova.gateway_check({"kind": "model_call", "payload": {"messages": messages}})
|
|
61
|
+
if check["action"] == "block":
|
|
62
|
+
raise PolicyBlocked(check["findings"])
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Pass `verify_receipts=True` to make the client verify every returned receipt's
|
|
66
|
+
Ed25519 signature against the published public key before returning.
|
|
67
|
+
|
|
68
|
+
## LangGraph / LangChain auto-instrumentation
|
|
69
|
+
|
|
70
|
+
Install the optional extra and drop the callback handler into any graph. Every
|
|
71
|
+
LLM call, node, and tool call is ingested as a signed receipt automatically. No
|
|
72
|
+
per-node code changes.
|
|
73
|
+
|
|
74
|
+
```sh
|
|
75
|
+
pip install "prova-sdk[langgraph]"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from prova_cp import ProvaClient, ProvaCallbackHandler
|
|
80
|
+
|
|
81
|
+
prova = ProvaClient(api_key="prv_...")
|
|
82
|
+
handler = ProvaCallbackHandler(
|
|
83
|
+
prova,
|
|
84
|
+
app_id="claims-orchestrator",
|
|
85
|
+
environment="production",
|
|
86
|
+
framework="langgraph",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# LangGraph
|
|
90
|
+
graph.invoke(inputs, config={"callbacks": [handler]})
|
|
91
|
+
|
|
92
|
+
# LangChain
|
|
93
|
+
chain.invoke(inputs, config={"callbacks": [handler]})
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The handler is fail-silent: a Prova outage logs at warning level and never
|
|
97
|
+
breaks the agent. LLM calls become `model_call` receipts, graph nodes become
|
|
98
|
+
`agent_step`, tool calls become `tool_call`.
|
|
99
|
+
|
|
100
|
+
## CrewAI
|
|
101
|
+
|
|
102
|
+
CrewAI has no LangChain-style callbacks; use its `step_callback` /
|
|
103
|
+
`task_callback` hooks instead.
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from prova_cp import ProvaClient, ProvaCrewAI
|
|
107
|
+
|
|
108
|
+
tap = ProvaCrewAI(ProvaClient(api_key="prv_..."), app_id="research-crew")
|
|
109
|
+
crew = Crew(agents=[...], tasks=[...],
|
|
110
|
+
step_callback=tap.step_callback,
|
|
111
|
+
task_callback=tap.task_callback)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Agent steps become `agent_step` receipts; completed tasks become `agent_run`.
|
|
115
|
+
|
|
116
|
+
## Raw OpenAI / Anthropic clients (no framework)
|
|
117
|
+
|
|
118
|
+
Wrap the vendor client once. Every completion is mirrored to a signed receipt.
|
|
119
|
+
The vendor response is returned unchanged and a Prova failure never raises.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from openai import OpenAI
|
|
123
|
+
from prova_cp import ProvaClient, wrap_openai
|
|
124
|
+
|
|
125
|
+
client = wrap_openai(OpenAI(), ProvaClient(api_key="prv_..."), app_id="support-bot")
|
|
126
|
+
client.chat.completions.create(model="gpt-4o", messages=[...]) # auto-ingested
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
`wrap_anthropic` is identical for the Anthropic SDK (`messages.create`).
|
|
130
|
+
|
|
131
|
+
## Migrate existing logs
|
|
132
|
+
|
|
133
|
+
CLI:
|
|
134
|
+
|
|
135
|
+
```sh
|
|
136
|
+
PROVA_API_KEY=prv_... prova-migrate --source langsmith --file runs.ndjson
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Programmatic:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from prova_cp import ProvaClient, migrate
|
|
143
|
+
from prova_cp.migrate import read_ndjson
|
|
144
|
+
|
|
145
|
+
with ProvaClient(api_key="prv_...") as client, open("observations.ndjson") as f:
|
|
146
|
+
result = migrate(client, "langfuse", read_ndjson(f))
|
|
147
|
+
print(result)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Supported sources: `langsmith`, `langfuse`, `openai`. Idempotency keys are
|
|
151
|
+
derived from the source row id, so re-running the migration is safe.
|
|
152
|
+
|
|
153
|
+
## Verify a receipt offline
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
from prova_cp import verify_receipt
|
|
157
|
+
|
|
158
|
+
verify_receipt(receipt, public_key_pem=PUBLIC_KEY_PEM)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Or fetch the public key from the deployment automatically:
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
verify_receipt(receipt, base_url="https://api.prova.cobound.dev")
|
|
165
|
+
```
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
prova_cp/__init__.py,sha256=ezq5fn7zIU_sS811DNfOtc3jcQd8hXecd4NFZFAQXo4,745
|
|
2
|
+
prova_cp/callbacks.py,sha256=ymiYZ2EIf2YOWD8LavQn_5vQYnWGQOs0jueUAR7aQ7M,9379
|
|
3
|
+
prova_cp/canonical.py,sha256=QLHeDnndJIYt4bvZKCint7Es-fQ3M49_yjJWMb1HE7o,571
|
|
4
|
+
prova_cp/cli.py,sha256=8__mNDoyImKG5Tm3gLUt_j607JtgsPu0bdvO_f9uKVs,1720
|
|
5
|
+
prova_cp/client.py,sha256=Dp0WtuhUgg99jcTVIguXkAYmZQf_N_myVBoAxO8pHQM,4492
|
|
6
|
+
prova_cp/crewai.py,sha256=DFAAMr1sm5H8l-tkWkhUnyJ7dvIRXhqfhJWPKJhsp8A,3258
|
|
7
|
+
prova_cp/migrate.py,sha256=mv4wmbiKHDrFW1Vz3S9DLfOiZ7gN0gIoHENjm6YImBU,6264
|
|
8
|
+
prova_cp/verify.py,sha256=ImeejuFkuK9UIj8MQjh7e9Nin2RH65HELqivpqc0mLc,3232
|
|
9
|
+
prova_cp/wrap.py,sha256=OSM2bdjnOSwQmcgx8WaOQjrMTq7GIGdH5d2yLy2B17o,4495
|
|
10
|
+
prova_sdk-0.1.0.dist-info/METADATA,sha256=zp0TR5IerW8uN1ZjdfW1TjC2gbnkCkDxfiWoryeqvaY,4811
|
|
11
|
+
prova_sdk-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
prova_sdk-0.1.0.dist-info/entry_points.txt,sha256=2k2W3-LIfH81nhpY0CC41BRWAPeTr4GkG-zkgsvNPig,52
|
|
13
|
+
prova_sdk-0.1.0.dist-info/RECORD,,
|