opsveritas 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opsveritas-0.1.1/PKG-INFO +11 -0
- opsveritas-0.1.1/pyproject.toml +26 -0
- opsveritas-0.1.1/setup.cfg +4 -0
- opsveritas-0.1.1/src/opsveritas/__init__.py +7 -0
- opsveritas-0.1.1/src/opsveritas/_config.py +23 -0
- opsveritas-0.1.1/src/opsveritas/_context.py +86 -0
- opsveritas-0.1.1/src/opsveritas/_http.py +67 -0
- opsveritas-0.1.1/src/opsveritas/_pricing.py +73 -0
- opsveritas-0.1.1/src/opsveritas/_usage.py +60 -0
- opsveritas-0.1.1/src/opsveritas/trace.py +138 -0
- opsveritas-0.1.1/src/opsveritas/wrap.py +359 -0
- opsveritas-0.1.1/src/opsveritas.egg-info/PKG-INFO +11 -0
- opsveritas-0.1.1/src/opsveritas.egg-info/SOURCES.txt +16 -0
- opsveritas-0.1.1/src/opsveritas.egg-info/dependency_links.txt +1 -0
- opsveritas-0.1.1/src/opsveritas.egg-info/requires.txt +5 -0
- opsveritas-0.1.1/src/opsveritas.egg-info/top_level.txt +1 -0
- opsveritas-0.1.1/tests/test_monitor.py +131 -0
- opsveritas-0.1.1/tests/test_wrap.py +243 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: opsveritas
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Monitor your AI agents with 2 lines of code
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest-mock>=3.12; extra == "dev"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=42", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "opsveritas"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Monitor your AI agents with 2 lines of code"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
dependencies = []
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
dev = [
|
|
16
|
+
"pytest>=7.0",
|
|
17
|
+
"pytest-asyncio>=0.23",
|
|
18
|
+
"pytest-mock>=3.12",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
where = ["src"]
|
|
23
|
+
|
|
24
|
+
[tool.pytest.ini_options]
|
|
25
|
+
asyncio_mode = "auto"
|
|
26
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class _Config:
|
|
7
|
+
api_key: str
|
|
8
|
+
endpoint: str
|
|
9
|
+
|
|
10
|
+
_config: Optional[_Config] = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def init(api_key: str, *, endpoint: str = "https://agents.opsveritas.com") -> None:
|
|
14
|
+
global _config
|
|
15
|
+
if not api_key or not isinstance(api_key, str):
|
|
16
|
+
raise ValueError("[OpsVeritas] api_key is required")
|
|
17
|
+
_config = _Config(api_key=api_key, endpoint=endpoint.rstrip("/"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_config() -> _Config:
|
|
21
|
+
if _config is None:
|
|
22
|
+
raise RuntimeError("[OpsVeritas] Call opsveritas.init(api_key) before using the SDK")
|
|
23
|
+
return _config
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
_local = threading.local()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class run:
|
|
11
|
+
"""
|
|
12
|
+
Group multiple LLM calls into one execution record.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
with opsveritas.run('My Agent') as r:
|
|
16
|
+
resp1 = wrapped_openai.chat.completions.create(...)
|
|
17
|
+
resp2 = wrapped_anthropic.messages.create(...)
|
|
18
|
+
# → ONE execution sent with combined cost, tokens, duration
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, agent_name: str, *, user_id: Optional[str] = None) -> None:
|
|
22
|
+
self.agent_name = agent_name
|
|
23
|
+
self.user_id = user_id
|
|
24
|
+
self._start: float = 0.0
|
|
25
|
+
self._executed_at: str = ''
|
|
26
|
+
self._calls: list[dict[str, Any]] = []
|
|
27
|
+
|
|
28
|
+
def __enter__(self) -> 'run':
|
|
29
|
+
self._start = time.monotonic()
|
|
30
|
+
self._executed_at = datetime.now(timezone.utc).isoformat()
|
|
31
|
+
_local.current = self
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> bool:
|
|
35
|
+
_local.current = None
|
|
36
|
+
try:
|
|
37
|
+
duration_ms = int((time.monotonic() - self._start) * 1000)
|
|
38
|
+
status = 'failed' if exc_type is not None else 'success'
|
|
39
|
+
error_message = str(exc_val) if exc_val is not None else None
|
|
40
|
+
|
|
41
|
+
total_input = sum(c.get('input_tokens') or 0 for c in self._calls)
|
|
42
|
+
total_output = sum(c.get('output_tokens') or 0 for c in self._calls)
|
|
43
|
+
total_cost = sum(c.get('cost_usd') or 0.0 for c in self._calls)
|
|
44
|
+
|
|
45
|
+
# Preserve call order, deduplicate
|
|
46
|
+
seen: set[str] = set()
|
|
47
|
+
models: list[str] = []
|
|
48
|
+
for c in self._calls:
|
|
49
|
+
m = c.get('model')
|
|
50
|
+
if m and m not in seen:
|
|
51
|
+
seen.add(m)
|
|
52
|
+
models.append(m)
|
|
53
|
+
|
|
54
|
+
platform = (self._calls[0].get('platform') or 'custom_webhook') if self._calls else 'custom_webhook'
|
|
55
|
+
|
|
56
|
+
payload: dict[str, Any] = {
|
|
57
|
+
'platform': platform,
|
|
58
|
+
'agent_name': self.agent_name,
|
|
59
|
+
'status': status,
|
|
60
|
+
'executed_at': self._executed_at,
|
|
61
|
+
'duration_ms': duration_ms,
|
|
62
|
+
'error_message': error_message,
|
|
63
|
+
}
|
|
64
|
+
if total_input:
|
|
65
|
+
payload['input_tokens'] = total_input
|
|
66
|
+
if total_output:
|
|
67
|
+
payload['output_tokens'] = total_output
|
|
68
|
+
if total_cost:
|
|
69
|
+
payload['cost_usd'] = round(total_cost, 6)
|
|
70
|
+
if models:
|
|
71
|
+
payload['models'] = models
|
|
72
|
+
if self.user_id:
|
|
73
|
+
payload['user_id'] = self.user_id
|
|
74
|
+
|
|
75
|
+
from opsveritas._http import send_execution_sync
|
|
76
|
+
send_execution_sync(payload)
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
return False # never suppress exceptions
|
|
80
|
+
|
|
81
|
+
def _record(self, sub_call: dict[str, Any]) -> None:
|
|
82
|
+
self._calls.append(sub_call)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_active_run() -> Optional[run]:
|
|
86
|
+
return getattr(_local, 'current', None)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import atexit
|
|
3
|
+
import json
|
|
4
|
+
import threading
|
|
5
|
+
import urllib.request
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
from ._config import get_config
|
|
8
|
+
|
|
9
|
+
_pending: list[threading.Thread] = []
|
|
10
|
+
_lock = threading.Lock()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _flush() -> None:
|
|
14
|
+
with _lock:
|
|
15
|
+
threads = list(_pending)
|
|
16
|
+
for t in threads:
|
|
17
|
+
t.join(timeout=6)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
atexit.register(_flush)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _post_sync(url: str, api_key: str, payload: dict[str, Any]) -> None:
|
|
24
|
+
try:
|
|
25
|
+
data = json.dumps(payload).encode("utf-8")
|
|
26
|
+
req = urllib.request.Request(
|
|
27
|
+
url,
|
|
28
|
+
data=data,
|
|
29
|
+
headers={"Content-Type": "application/json", "x-opsveritas-key": api_key},
|
|
30
|
+
method="POST",
|
|
31
|
+
)
|
|
32
|
+
with urllib.request.urlopen(req, timeout=5):
|
|
33
|
+
pass
|
|
34
|
+
except Exception:
|
|
35
|
+
pass # Never crash user code
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def send_execution(payload: dict[str, Any]) -> None:
|
|
39
|
+
cfg = get_config()
|
|
40
|
+
url = f"{cfg.endpoint}/webhooks/agent-execution"
|
|
41
|
+
t = threading.Thread(target=_post_sync, args=(url, cfg.api_key, payload), daemon=True)
|
|
42
|
+
with _lock:
|
|
43
|
+
_pending.append(t)
|
|
44
|
+
t.start()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def send_execution_sync(payload: dict[str, Any]) -> None:
|
|
48
|
+
"""Blocking send — use when the caller is already blocking (e.g. run.__exit__)."""
|
|
49
|
+
cfg = get_config()
|
|
50
|
+
_post_sync(f"{cfg.endpoint}/webhooks/agent-execution", cfg.api_key, payload)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def send_execution_async(payload: dict[str, Any]) -> None:
|
|
54
|
+
"""Async variant — tries httpx first, falls back to sync thread."""
|
|
55
|
+
try:
|
|
56
|
+
import httpx # optional fast path
|
|
57
|
+
cfg = get_config()
|
|
58
|
+
async with httpx.AsyncClient(timeout=5) as client:
|
|
59
|
+
await client.post(
|
|
60
|
+
f"{cfg.endpoint}/webhooks/agent-execution",
|
|
61
|
+
json=payload,
|
|
62
|
+
headers={"x-opsveritas-key": cfg.api_key},
|
|
63
|
+
)
|
|
64
|
+
except ImportError:
|
|
65
|
+
send_execution(payload) # fallback to sync thread
|
|
66
|
+
except Exception:
|
|
67
|
+
pass
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
# Cost per 1M tokens (input, output) in USD
|
|
5
|
+
# ORDERING NOTE: more-specific keys must appear before generic prefixes they share.
|
|
6
|
+
# e.g. "gpt-4.1" before "gpt-4" — otherwise the substring match hits the wrong entry.
|
|
7
|
+
_PRICING: dict[str, tuple[float, float]] = {
|
|
8
|
+
# ── OpenAI ────────────────────────────────────────────────────────────────
|
|
9
|
+
# GPT-4.1 family (April 2025) — must be before "gpt-4" to avoid wrong match
|
|
10
|
+
"gpt-4.1-nano": (0.10, 0.40),
|
|
11
|
+
"gpt-4.1-mini": (0.40, 1.60),
|
|
12
|
+
"gpt-4.1": (2.00, 8.00),
|
|
13
|
+
# GPT-4o
|
|
14
|
+
"gpt-4o-mini": (0.15, 0.60),
|
|
15
|
+
"gpt-4o": (2.50, 10.00),
|
|
16
|
+
# Legacy GPT-4
|
|
17
|
+
"gpt-4-turbo": (10.00, 30.00),
|
|
18
|
+
"gpt-4": (30.00, 60.00),
|
|
19
|
+
"gpt-3.5-turbo": (0.50, 1.50),
|
|
20
|
+
# Reasoning models
|
|
21
|
+
"o4-mini": (1.10, 4.40),
|
|
22
|
+
"o3-mini": (1.10, 4.40),
|
|
23
|
+
"o3": (10.00, 40.00),
|
|
24
|
+
"o1-mini": (3.00, 12.00),
|
|
25
|
+
"o1": (15.00, 60.00),
|
|
26
|
+
|
|
27
|
+
# ── Anthropic ─────────────────────────────────────────────────────────────
|
|
28
|
+
# Claude 4 family — matched by substring for versioned IDs
|
|
29
|
+
# e.g. "claude-opus-4-8" matches "claude-opus-4"
|
|
30
|
+
"claude-opus-4": (15.00, 75.00),
|
|
31
|
+
"claude-sonnet-4": (3.00, 15.00),
|
|
32
|
+
"claude-haiku-4": (0.80, 4.00),
|
|
33
|
+
# Claude 3.7
|
|
34
|
+
"claude-3-7-sonnet": (3.00, 15.00),
|
|
35
|
+
# Claude 3.5
|
|
36
|
+
"claude-3-5-sonnet": (3.00, 15.00),
|
|
37
|
+
"claude-3-5-haiku": (0.80, 4.00),
|
|
38
|
+
# Claude 3
|
|
39
|
+
"claude-3-opus": (15.00, 75.00),
|
|
40
|
+
"claude-3-sonnet": (3.00, 15.00),
|
|
41
|
+
"claude-3-haiku": (0.25, 1.25),
|
|
42
|
+
|
|
43
|
+
# ── Groq ──────────────────────────────────────────────────────────────────
|
|
44
|
+
# Llama 3.x (new naming: llama-3.x-…)
|
|
45
|
+
"llama-3.3-70b": (0.59, 0.79),
|
|
46
|
+
"llama-3.1-70b": (0.59, 0.79),
|
|
47
|
+
"llama-3.1-8b": (0.05, 0.08),
|
|
48
|
+
# Legacy Groq naming (llama3-…)
|
|
49
|
+
"llama3-70b": (0.59, 0.79),
|
|
50
|
+
"llama3-8b": (0.05, 0.08),
|
|
51
|
+
"mixtral-8x7b": (0.24, 0.24),
|
|
52
|
+
"gemma2-9b": (0.20, 0.20),
|
|
53
|
+
|
|
54
|
+
# ── Google Gemini ─────────────────────────────────────────────────────────
|
|
55
|
+
"gemini-2.5-pro": (1.25, 10.00),
|
|
56
|
+
"gemini-2.5-flash": (0.15, 0.60),
|
|
57
|
+
"gemini-2.0-flash": (0.10, 0.40),
|
|
58
|
+
"gemini-1.5-pro": (1.25, 5.00),
|
|
59
|
+
"gemini-1.5-flash": (0.075, 0.30),
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def calc_cost(model: str, input_tokens: int, output_tokens: int) -> Optional[float]:
|
|
64
|
+
model_lower = model.lower()
|
|
65
|
+
key = next(
|
|
66
|
+
(k for k in _PRICING if model_lower in k or k in model_lower),
|
|
67
|
+
None,
|
|
68
|
+
)
|
|
69
|
+
if key is None:
|
|
70
|
+
return None
|
|
71
|
+
input_rate, output_rate = _PRICING[key]
|
|
72
|
+
raw = (input_tokens * input_rate + output_tokens * output_rate) / 1_000_000
|
|
73
|
+
return round(raw, 6)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def extract_usage(result: Any) -> dict[str, Any]:
|
|
6
|
+
"""Auto-detect OpenAI / Anthropic / Gemini response and extract token usage."""
|
|
7
|
+
if result is None:
|
|
8
|
+
return {}
|
|
9
|
+
|
|
10
|
+
usage = getattr(result, "usage", None)
|
|
11
|
+
model = getattr(result, "model", None)
|
|
12
|
+
|
|
13
|
+
if usage is not None:
|
|
14
|
+
# OpenAI: usage.prompt_tokens / usage.completion_tokens
|
|
15
|
+
prompt_tokens = getattr(usage, "prompt_tokens", None)
|
|
16
|
+
if prompt_tokens is not None:
|
|
17
|
+
completion_tokens = getattr(usage, "completion_tokens", 0) or 0
|
|
18
|
+
tool_calls: Optional[int] = None
|
|
19
|
+
choices = getattr(result, "choices", None)
|
|
20
|
+
if choices:
|
|
21
|
+
msg = getattr(choices[0], "message", None)
|
|
22
|
+
tc = getattr(msg, "tool_calls", None) if msg else None
|
|
23
|
+
if tc:
|
|
24
|
+
tool_calls = len(tc)
|
|
25
|
+
return {
|
|
26
|
+
"model": model,
|
|
27
|
+
"input_tokens": prompt_tokens,
|
|
28
|
+
"output_tokens": completion_tokens,
|
|
29
|
+
**({"tool_calls": tool_calls} if tool_calls else {}),
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Anthropic: usage.input_tokens / usage.output_tokens
|
|
33
|
+
input_tokens = getattr(usage, "input_tokens", None)
|
|
34
|
+
if input_tokens is not None:
|
|
35
|
+
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
36
|
+
content = getattr(result, "content", None)
|
|
37
|
+
tool_calls = None
|
|
38
|
+
if content:
|
|
39
|
+
tc_count = sum(1 for b in content if getattr(b, "type", None) == "tool_use")
|
|
40
|
+
if tc_count:
|
|
41
|
+
tool_calls = tc_count
|
|
42
|
+
return {
|
|
43
|
+
"model": model,
|
|
44
|
+
"input_tokens": input_tokens,
|
|
45
|
+
"output_tokens": output_tokens,
|
|
46
|
+
**({"tool_calls": tool_calls} if tool_calls else {}),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Gemini: result.usage_metadata.prompt_token_count / candidates_token_count
|
|
50
|
+
# (model name is on the model instance, injected separately by wrap())
|
|
51
|
+
usage_meta = getattr(result, "usage_metadata", None)
|
|
52
|
+
if usage_meta is not None:
|
|
53
|
+
prompt_token_count = getattr(usage_meta, "prompt_token_count", None)
|
|
54
|
+
if prompt_token_count is not None:
|
|
55
|
+
return {
|
|
56
|
+
"input_tokens": prompt_token_count,
|
|
57
|
+
"output_tokens": getattr(usage_meta, "candidates_token_count", 0) or 0,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return {}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any, Callable, Optional, TypeVar
|
|
8
|
+
|
|
9
|
+
from ._http import send_execution, send_execution_async
|
|
10
|
+
from ._pricing import calc_cost
|
|
11
|
+
from ._usage import extract_usage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _summarize(result: Any) -> Optional[str]:
|
|
15
|
+
"""Derive a short output_summary from a non-LLM return value.
|
|
16
|
+
|
|
17
|
+
Used so the server's silent-failure detector (which treats null output_tokens
|
|
18
|
+
+ null output_summary as silent) doesn't false-positive on agents that return
|
|
19
|
+
structured data instead of a raw LLM response object.
|
|
20
|
+
"""
|
|
21
|
+
if result is None:
|
|
22
|
+
return None
|
|
23
|
+
try:
|
|
24
|
+
if isinstance(result, str):
|
|
25
|
+
s = result.strip()
|
|
26
|
+
return s[:500] if s else None
|
|
27
|
+
if isinstance(result, (dict, list)):
|
|
28
|
+
return json.dumps(result, default=str)[:500]
|
|
29
|
+
return str(result)[:500]
|
|
30
|
+
except Exception:
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def monitor(
|
|
37
|
+
agent_name: str,
|
|
38
|
+
*,
|
|
39
|
+
platform: str = "custom_webhook",
|
|
40
|
+
user_id: Optional[str] = None,
|
|
41
|
+
) -> Callable[[F], F]:
|
|
42
|
+
"""Decorator that auto-instruments any sync or async function."""
|
|
43
|
+
def decorator(fn: F) -> F:
|
|
44
|
+
if inspect.iscoroutinefunction(fn):
|
|
45
|
+
@functools.wraps(fn)
|
|
46
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
47
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
48
|
+
t0 = time.monotonic()
|
|
49
|
+
status = "success"
|
|
50
|
+
error_message = None
|
|
51
|
+
result = None
|
|
52
|
+
try:
|
|
53
|
+
result = await fn(*args, **kwargs)
|
|
54
|
+
return result
|
|
55
|
+
except Exception as exc:
|
|
56
|
+
status = "failed"
|
|
57
|
+
error_message = str(exc)
|
|
58
|
+
raise
|
|
59
|
+
finally:
|
|
60
|
+
try:
|
|
61
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
62
|
+
usage = extract_usage(result)
|
|
63
|
+
cost_usd = (
|
|
64
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
65
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
66
|
+
else None
|
|
67
|
+
)
|
|
68
|
+
payload: dict[str, Any] = {
|
|
69
|
+
"platform": platform,
|
|
70
|
+
"agent_name": agent_name,
|
|
71
|
+
"status": status,
|
|
72
|
+
"executed_at": executed_at,
|
|
73
|
+
"duration_ms": duration_ms,
|
|
74
|
+
"error_message": error_message,
|
|
75
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
76
|
+
}
|
|
77
|
+
if cost_usd is not None:
|
|
78
|
+
payload["cost_usd"] = cost_usd
|
|
79
|
+
if user_id:
|
|
80
|
+
payload["user_id"] = user_id
|
|
81
|
+
# If no token usage was captured (non-LLM return value), derive a
|
|
82
|
+
# summary so the server doesn't false-positive a silent failure.
|
|
83
|
+
if "output_tokens" not in payload and "output_summary" not in payload:
|
|
84
|
+
s = _summarize(result)
|
|
85
|
+
if s:
|
|
86
|
+
payload["output_summary"] = s
|
|
87
|
+
await send_execution_async(payload)
|
|
88
|
+
except Exception:
|
|
89
|
+
pass # Never crash user code
|
|
90
|
+
return async_wrapper # type: ignore[return-value]
|
|
91
|
+
else:
|
|
92
|
+
@functools.wraps(fn)
|
|
93
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
94
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
95
|
+
t0 = time.monotonic()
|
|
96
|
+
status = "success"
|
|
97
|
+
error_message = None
|
|
98
|
+
result = None
|
|
99
|
+
try:
|
|
100
|
+
result = fn(*args, **kwargs)
|
|
101
|
+
return result
|
|
102
|
+
except Exception as exc:
|
|
103
|
+
status = "failed"
|
|
104
|
+
error_message = str(exc)
|
|
105
|
+
raise
|
|
106
|
+
finally:
|
|
107
|
+
try:
|
|
108
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
109
|
+
usage = extract_usage(result)
|
|
110
|
+
cost_usd = (
|
|
111
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
112
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
113
|
+
else None
|
|
114
|
+
)
|
|
115
|
+
payload: dict[str, Any] = {
|
|
116
|
+
"platform": platform,
|
|
117
|
+
"agent_name": agent_name,
|
|
118
|
+
"status": status,
|
|
119
|
+
"executed_at": executed_at,
|
|
120
|
+
"duration_ms": duration_ms,
|
|
121
|
+
"error_message": error_message,
|
|
122
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
123
|
+
}
|
|
124
|
+
if cost_usd is not None:
|
|
125
|
+
payload["cost_usd"] = cost_usd
|
|
126
|
+
if user_id:
|
|
127
|
+
payload["user_id"] = user_id
|
|
128
|
+
# If no token usage was captured (non-LLM return value), derive a
|
|
129
|
+
# summary so the server doesn't false-positive a silent failure.
|
|
130
|
+
if "output_tokens" not in payload and "output_summary" not in payload:
|
|
131
|
+
s = _summarize(result)
|
|
132
|
+
if s:
|
|
133
|
+
payload["output_summary"] = s
|
|
134
|
+
send_execution(payload)
|
|
135
|
+
except Exception:
|
|
136
|
+
pass # Never crash user code
|
|
137
|
+
return sync_wrapper # type: ignore[return-value]
|
|
138
|
+
return decorator
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import time
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any, Optional, TypeVar
|
|
5
|
+
|
|
6
|
+
from ._context import get_active_run
|
|
7
|
+
from ._http import send_execution, send_execution_async
|
|
8
|
+
from ._pricing import calc_cost
|
|
9
|
+
from ._usage import extract_usage
|
|
10
|
+
from .trace import _summarize
|
|
11
|
+
|
|
12
|
+
T = TypeVar("T")
|
|
13
|
+
|
|
14
|
+
_PATCHED = "_opsveritas_patched"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _extract_llm_output(result: Any) -> Optional[str]:
|
|
18
|
+
if result is None:
|
|
19
|
+
return None
|
|
20
|
+
try:
|
|
21
|
+
# OpenAI / Groq style
|
|
22
|
+
choices = getattr(result, "choices", None)
|
|
23
|
+
if choices:
|
|
24
|
+
content = getattr(getattr(choices[0], "message", None), "content", None)
|
|
25
|
+
if isinstance(content, str) and content.strip():
|
|
26
|
+
return content[:300]
|
|
27
|
+
# Anthropic style
|
|
28
|
+
content_blocks = getattr(result, "content", None)
|
|
29
|
+
if content_blocks:
|
|
30
|
+
for block in content_blocks:
|
|
31
|
+
if getattr(block, "type", None) == "text":
|
|
32
|
+
text = getattr(block, "text", None)
|
|
33
|
+
if isinstance(text, str) and text.strip():
|
|
34
|
+
return text[:300]
|
|
35
|
+
# Gemini old SDK
|
|
36
|
+
resp = getattr(result, "response", None)
|
|
37
|
+
if resp:
|
|
38
|
+
try:
|
|
39
|
+
text = resp.text
|
|
40
|
+
if isinstance(text, str) and text.strip():
|
|
41
|
+
return text[:300]
|
|
42
|
+
except Exception:
|
|
43
|
+
pass
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _patch_openai_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
|
|
50
|
+
completions = getattr(getattr(client, "chat", None), "completions", None)
|
|
51
|
+
if completions is None or getattr(completions, _PATCHED, None) is True:
|
|
52
|
+
return
|
|
53
|
+
|
|
54
|
+
orig_create = completions.create
|
|
55
|
+
|
|
56
|
+
def patched_create(*args: Any, **kwargs: Any) -> Any:
|
|
57
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
58
|
+
t0 = time.monotonic()
|
|
59
|
+
status = "success"
|
|
60
|
+
error_message = None
|
|
61
|
+
result = None
|
|
62
|
+
try:
|
|
63
|
+
result = orig_create(*args, **kwargs)
|
|
64
|
+
return result
|
|
65
|
+
except Exception as exc:
|
|
66
|
+
status = "failed"
|
|
67
|
+
error_message = str(exc)
|
|
68
|
+
raise
|
|
69
|
+
finally:
|
|
70
|
+
try:
|
|
71
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
72
|
+
usage = extract_usage(result)
|
|
73
|
+
cost_usd = (
|
|
74
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
75
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
76
|
+
else None
|
|
77
|
+
)
|
|
78
|
+
payload: dict[str, Any] = {
|
|
79
|
+
"platform": platform,
|
|
80
|
+
"agent_name": agent_name,
|
|
81
|
+
"status": status,
|
|
82
|
+
"executed_at": executed_at,
|
|
83
|
+
"duration_ms": duration_ms,
|
|
84
|
+
"error_message": error_message,
|
|
85
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
86
|
+
}
|
|
87
|
+
if cost_usd is not None:
|
|
88
|
+
payload["cost_usd"] = cost_usd
|
|
89
|
+
if user_id:
|
|
90
|
+
payload["user_id"] = user_id
|
|
91
|
+
s = _extract_llm_output(result)
|
|
92
|
+
if s:
|
|
93
|
+
payload["output_summary"] = s
|
|
94
|
+
active = get_active_run()
|
|
95
|
+
if active is not None:
|
|
96
|
+
active._record(payload)
|
|
97
|
+
else:
|
|
98
|
+
send_execution(payload)
|
|
99
|
+
except Exception:
|
|
100
|
+
pass # Never crash user code
|
|
101
|
+
|
|
102
|
+
completions.create = patched_create
|
|
103
|
+
setattr(completions, _PATCHED, True)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _patch_openai_async(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
|
|
107
|
+
completions = getattr(getattr(client, "chat", None), "completions", None)
|
|
108
|
+
if completions is None or getattr(completions, _PATCHED, None) is True:
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
orig_create = completions.create
|
|
112
|
+
|
|
113
|
+
async def patched_create(*args: Any, **kwargs: Any) -> Any:
|
|
114
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
115
|
+
t0 = time.monotonic()
|
|
116
|
+
status = "success"
|
|
117
|
+
error_message = None
|
|
118
|
+
result = None
|
|
119
|
+
try:
|
|
120
|
+
result = await orig_create(*args, **kwargs)
|
|
121
|
+
return result
|
|
122
|
+
except Exception as exc:
|
|
123
|
+
status = "failed"
|
|
124
|
+
error_message = str(exc)
|
|
125
|
+
raise
|
|
126
|
+
finally:
|
|
127
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
128
|
+
usage = extract_usage(result)
|
|
129
|
+
cost_usd = (
|
|
130
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
131
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
132
|
+
else None
|
|
133
|
+
)
|
|
134
|
+
payload: dict[str, Any] = {
|
|
135
|
+
"platform": platform,
|
|
136
|
+
"agent_name": agent_name,
|
|
137
|
+
"status": status,
|
|
138
|
+
"executed_at": executed_at,
|
|
139
|
+
"duration_ms": duration_ms,
|
|
140
|
+
"error_message": error_message,
|
|
141
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
142
|
+
}
|
|
143
|
+
if cost_usd is not None:
|
|
144
|
+
payload["cost_usd"] = cost_usd
|
|
145
|
+
if user_id:
|
|
146
|
+
payload["user_id"] = user_id
|
|
147
|
+
s = _extract_llm_output(result)
|
|
148
|
+
if s:
|
|
149
|
+
payload["output_summary"] = s
|
|
150
|
+
active = get_active_run()
|
|
151
|
+
if active is not None:
|
|
152
|
+
active._record(payload)
|
|
153
|
+
else:
|
|
154
|
+
await send_execution_async(payload)
|
|
155
|
+
|
|
156
|
+
completions.create = patched_create
|
|
157
|
+
setattr(completions, _PATCHED, True)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _patch_anthropic_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
|
|
161
|
+
messages = getattr(client, "messages", None)
|
|
162
|
+
if messages is None or getattr(messages, _PATCHED, None) is True:
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
orig_create = messages.create
|
|
166
|
+
|
|
167
|
+
def patched_create(*args: Any, **kwargs: Any) -> Any:
|
|
168
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
169
|
+
t0 = time.monotonic()
|
|
170
|
+
status = "success"
|
|
171
|
+
error_message = None
|
|
172
|
+
result = None
|
|
173
|
+
try:
|
|
174
|
+
result = orig_create(*args, **kwargs)
|
|
175
|
+
return result
|
|
176
|
+
except Exception as exc:
|
|
177
|
+
status = "failed"
|
|
178
|
+
error_message = str(exc)
|
|
179
|
+
raise
|
|
180
|
+
finally:
|
|
181
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
182
|
+
usage = extract_usage(result)
|
|
183
|
+
cost_usd = (
|
|
184
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
185
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
186
|
+
else None
|
|
187
|
+
)
|
|
188
|
+
payload: dict[str, Any] = {
|
|
189
|
+
"platform": platform,
|
|
190
|
+
"agent_name": agent_name,
|
|
191
|
+
"status": status,
|
|
192
|
+
"executed_at": executed_at,
|
|
193
|
+
"duration_ms": duration_ms,
|
|
194
|
+
"error_message": error_message,
|
|
195
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
196
|
+
}
|
|
197
|
+
if cost_usd is not None:
|
|
198
|
+
payload["cost_usd"] = cost_usd
|
|
199
|
+
if user_id:
|
|
200
|
+
payload["user_id"] = user_id
|
|
201
|
+
s = _extract_llm_output(result)
|
|
202
|
+
if s:
|
|
203
|
+
payload["output_summary"] = s
|
|
204
|
+
active = get_active_run()
|
|
205
|
+
if active is not None:
|
|
206
|
+
active._record(payload)
|
|
207
|
+
else:
|
|
208
|
+
send_execution(payload)
|
|
209
|
+
|
|
210
|
+
messages.create = patched_create
|
|
211
|
+
setattr(messages, _PATCHED, True)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _patch_gemini_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
|
|
215
|
+
if getattr(client, _PATCHED, None) is True:
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
# Strip "models/" prefix (Gemini SDK uses "models/gemini-1.5-flash" format)
|
|
219
|
+
raw_name = getattr(client, "model_name", None) or getattr(client, "model", None)
|
|
220
|
+
model_name: Optional[str] = str(raw_name).replace("models/", "") if raw_name else None
|
|
221
|
+
|
|
222
|
+
orig_generate = client.generate_content
|
|
223
|
+
|
|
224
|
+
def patched_generate(*args: Any, **kwargs: Any) -> Any:
|
|
225
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
226
|
+
t0 = time.monotonic()
|
|
227
|
+
status = "success"
|
|
228
|
+
error_message = None
|
|
229
|
+
result = None
|
|
230
|
+
try:
|
|
231
|
+
result = orig_generate(*args, **kwargs)
|
|
232
|
+
return result
|
|
233
|
+
except Exception as exc:
|
|
234
|
+
status = "failed"
|
|
235
|
+
error_message = str(exc)
|
|
236
|
+
raise
|
|
237
|
+
finally:
|
|
238
|
+
try:
|
|
239
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
240
|
+
usage = extract_usage(result)
|
|
241
|
+
# Gemini doesn't embed model in response — inject from instance
|
|
242
|
+
if model_name and "model" not in usage:
|
|
243
|
+
usage = {**usage, "model": model_name}
|
|
244
|
+
cost_usd = (
|
|
245
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
246
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
247
|
+
else None
|
|
248
|
+
)
|
|
249
|
+
payload: dict[str, Any] = {
|
|
250
|
+
"platform": platform,
|
|
251
|
+
"agent_name": agent_name,
|
|
252
|
+
"status": status,
|
|
253
|
+
"executed_at": executed_at,
|
|
254
|
+
"duration_ms": duration_ms,
|
|
255
|
+
"error_message": error_message,
|
|
256
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
257
|
+
}
|
|
258
|
+
if cost_usd is not None:
|
|
259
|
+
payload["cost_usd"] = cost_usd
|
|
260
|
+
if user_id:
|
|
261
|
+
payload["user_id"] = user_id
|
|
262
|
+
if "output_tokens" not in payload and "output_summary" not in payload:
|
|
263
|
+
s = _summarize(result)
|
|
264
|
+
if s:
|
|
265
|
+
payload["output_summary"] = s
|
|
266
|
+
active = get_active_run()
|
|
267
|
+
if active is not None:
|
|
268
|
+
active._record(payload)
|
|
269
|
+
else:
|
|
270
|
+
send_execution(payload)
|
|
271
|
+
except Exception:
|
|
272
|
+
pass
|
|
273
|
+
|
|
274
|
+
client.generate_content = patched_generate
|
|
275
|
+
setattr(client, _PATCHED, True)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _patch_gemini_new_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
|
|
279
|
+
"""Patch the new google-genai SDK: client.models.generate_content(model=..., contents=...)."""
|
|
280
|
+
models = getattr(client, "models", None)
|
|
281
|
+
if models is None or getattr(models, _PATCHED, None) is True:
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
orig_generate = models.generate_content
|
|
285
|
+
|
|
286
|
+
def patched_generate(*args: Any, **kwargs: Any) -> Any:
|
|
287
|
+
executed_at = datetime.now(timezone.utc).isoformat()
|
|
288
|
+
t0 = time.monotonic()
|
|
289
|
+
status = "success"
|
|
290
|
+
error_message = None
|
|
291
|
+
result = None
|
|
292
|
+
# model name is passed as kwarg or first positional arg
|
|
293
|
+
raw_model = kwargs.get("model") or (args[0] if args else None)
|
|
294
|
+
model_name: Optional[str] = str(raw_model).replace("models/", "") if raw_model else None
|
|
295
|
+
try:
|
|
296
|
+
result = orig_generate(*args, **kwargs)
|
|
297
|
+
return result
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
status = "failed"
|
|
300
|
+
error_message = str(exc)
|
|
301
|
+
raise
|
|
302
|
+
finally:
|
|
303
|
+
try:
|
|
304
|
+
duration_ms = int((time.monotonic() - t0) * 1000)
|
|
305
|
+
usage = extract_usage(result)
|
|
306
|
+
if model_name and "model" not in usage:
|
|
307
|
+
usage = {**usage, "model": model_name}
|
|
308
|
+
cost_usd = (
|
|
309
|
+
calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
|
|
310
|
+
if usage.get("model") and usage.get("input_tokens") is not None
|
|
311
|
+
else None
|
|
312
|
+
)
|
|
313
|
+
payload: dict[str, Any] = {
|
|
314
|
+
"platform": platform,
|
|
315
|
+
"agent_name": agent_name,
|
|
316
|
+
"status": status,
|
|
317
|
+
"executed_at": executed_at,
|
|
318
|
+
"duration_ms": duration_ms,
|
|
319
|
+
"error_message": error_message,
|
|
320
|
+
**{k: v for k, v in usage.items() if v is not None},
|
|
321
|
+
}
|
|
322
|
+
if cost_usd is not None:
|
|
323
|
+
payload["cost_usd"] = cost_usd
|
|
324
|
+
if user_id:
|
|
325
|
+
payload["user_id"] = user_id
|
|
326
|
+
if "output_tokens" not in payload and "output_summary" not in payload:
|
|
327
|
+
s = _summarize(result)
|
|
328
|
+
if s:
|
|
329
|
+
payload["output_summary"] = s
|
|
330
|
+
active = get_active_run()
|
|
331
|
+
if active is not None:
|
|
332
|
+
active._record(payload)
|
|
333
|
+
else:
|
|
334
|
+
send_execution(payload)
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
models.generate_content = patched_generate
|
|
339
|
+
setattr(models, _PATCHED, True)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def wrap(client: T, *, agent_name: str, platform: str = "sdk", user_id: Optional[str] = None) -> T:
|
|
343
|
+
"""Auto-instrument an OpenAI, Anthropic, or Gemini client."""
|
|
344
|
+
cls_name = type(client).__name__
|
|
345
|
+
|
|
346
|
+
if "AsyncOpenAI" in cls_name or "AsyncAzureOpenAI" in cls_name:
|
|
347
|
+
_patch_openai_async(client, agent_name, platform, user_id)
|
|
348
|
+
elif "Anthropic" in cls_name and "OpenAI" not in cls_name:
|
|
349
|
+
_patch_anthropic_sync(client, agent_name, platform, user_id)
|
|
350
|
+
elif hasattr(client, "chat") and hasattr(getattr(client, "chat", None), "completions"):
|
|
351
|
+
_patch_openai_sync(client, agent_name, platform, user_id)
|
|
352
|
+
elif hasattr(client, "messages") and callable(getattr(getattr(client, "messages", None), "create", None)):
|
|
353
|
+
_patch_anthropic_sync(client, agent_name, platform, user_id)
|
|
354
|
+
elif callable(getattr(client, "generate_content", None)):
|
|
355
|
+
_patch_gemini_sync(client, agent_name, platform, user_id)
|
|
356
|
+
elif callable(getattr(getattr(client, "models", None), "generate_content", None)):
|
|
357
|
+
_patch_gemini_new_sync(client, agent_name, platform, user_id)
|
|
358
|
+
|
|
359
|
+
return client
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: opsveritas
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Monitor your AI agents with 2 lines of code
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
11
|
+
Requires-Dist: pytest-mock>=3.12; extra == "dev"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
src/opsveritas/__init__.py
|
|
3
|
+
src/opsveritas/_config.py
|
|
4
|
+
src/opsveritas/_context.py
|
|
5
|
+
src/opsveritas/_http.py
|
|
6
|
+
src/opsveritas/_pricing.py
|
|
7
|
+
src/opsveritas/_usage.py
|
|
8
|
+
src/opsveritas/trace.py
|
|
9
|
+
src/opsveritas/wrap.py
|
|
10
|
+
src/opsveritas.egg-info/PKG-INFO
|
|
11
|
+
src/opsveritas.egg-info/SOURCES.txt
|
|
12
|
+
src/opsveritas.egg-info/dependency_links.txt
|
|
13
|
+
src/opsveritas.egg-info/requires.txt
|
|
14
|
+
src/opsveritas.egg-info/top_level.txt
|
|
15
|
+
tests/test_monitor.py
|
|
16
|
+
tests/test_wrap.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
opsveritas
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
import opsveritas
|
|
4
|
+
from opsveritas.trace import monitor
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.fixture(autouse=True)
|
|
8
|
+
def setup_sdk():
|
|
9
|
+
opsveritas.init("test-secret", endpoint="http://localhost:3001")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Fake OpenAI response
|
|
13
|
+
def fake_openai_response():
|
|
14
|
+
usage = MagicMock()
|
|
15
|
+
usage.prompt_tokens = 100
|
|
16
|
+
usage.completion_tokens = 50
|
|
17
|
+
usage.total_tokens = 150
|
|
18
|
+
resp = MagicMock()
|
|
19
|
+
resp.model = "gpt-4o-mini"
|
|
20
|
+
resp.usage = usage
|
|
21
|
+
resp.choices = []
|
|
22
|
+
return resp
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Fake Anthropic response
|
|
26
|
+
def fake_anthropic_response():
|
|
27
|
+
usage = MagicMock()
|
|
28
|
+
usage.input_tokens = 80
|
|
29
|
+
usage.output_tokens = 40
|
|
30
|
+
# No prompt_tokens — distinguishes Anthropic from OpenAI
|
|
31
|
+
del usage.prompt_tokens
|
|
32
|
+
resp = MagicMock()
|
|
33
|
+
resp.model = "claude-3-5-sonnet-20241022"
|
|
34
|
+
resp.usage = usage
|
|
35
|
+
resp.content = []
|
|
36
|
+
return resp
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TestMonitorSync:
|
|
40
|
+
def test_success_with_openai_shape(self):
|
|
41
|
+
posted = []
|
|
42
|
+
|
|
43
|
+
def fake_send(payload):
|
|
44
|
+
posted.append(payload)
|
|
45
|
+
|
|
46
|
+
with patch("opsveritas.trace.send_execution", side_effect=fake_send):
|
|
47
|
+
@monitor("sync-openai-agent")
|
|
48
|
+
def run():
|
|
49
|
+
return fake_openai_response()
|
|
50
|
+
|
|
51
|
+
result = run()
|
|
52
|
+
|
|
53
|
+
assert result is not None
|
|
54
|
+
assert len(posted) == 1
|
|
55
|
+
p = posted[0]
|
|
56
|
+
assert p["agent_name"] == "sync-openai-agent"
|
|
57
|
+
assert p["status"] == "success"
|
|
58
|
+
assert p["input_tokens"] == 100
|
|
59
|
+
assert p["output_tokens"] == 50
|
|
60
|
+
assert p["model"] == "gpt-4o-mini"
|
|
61
|
+
assert p["cost_usd"] > 0
|
|
62
|
+
assert p["duration_ms"] >= 0
|
|
63
|
+
|
|
64
|
+
def test_failure_sets_status(self):
|
|
65
|
+
posted = []
|
|
66
|
+
|
|
67
|
+
def fake_send(payload):
|
|
68
|
+
posted.append(payload)
|
|
69
|
+
|
|
70
|
+
with patch("opsveritas.trace.send_execution", side_effect=fake_send):
|
|
71
|
+
@monitor("failing-agent")
|
|
72
|
+
def run():
|
|
73
|
+
raise ValueError("API down")
|
|
74
|
+
|
|
75
|
+
with pytest.raises(ValueError, match="API down"):
|
|
76
|
+
run()
|
|
77
|
+
|
|
78
|
+
assert posted[0]["status"] == "failed"
|
|
79
|
+
assert posted[0]["error_message"] == "API down"
|
|
80
|
+
|
|
81
|
+
def test_does_not_crash_if_send_fails(self):
|
|
82
|
+
with patch("opsveritas.trace.send_execution", side_effect=Exception("network")):
|
|
83
|
+
@monitor("robust-agent")
|
|
84
|
+
def run():
|
|
85
|
+
return "ok"
|
|
86
|
+
|
|
87
|
+
# Should not raise
|
|
88
|
+
assert run() == "ok"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestMonitorAsync:
|
|
92
|
+
@pytest.mark.asyncio
|
|
93
|
+
async def test_async_success_with_anthropic_shape(self):
|
|
94
|
+
posted = []
|
|
95
|
+
|
|
96
|
+
async def fake_send_async(payload):
|
|
97
|
+
posted.append(payload)
|
|
98
|
+
|
|
99
|
+
with patch("opsveritas.trace.send_execution_async", side_effect=fake_send_async):
|
|
100
|
+
@monitor("async-anthropic-agent")
|
|
101
|
+
async def run():
|
|
102
|
+
return fake_anthropic_response()
|
|
103
|
+
|
|
104
|
+
await run()
|
|
105
|
+
|
|
106
|
+
assert len(posted) == 1
|
|
107
|
+
p = posted[0]
|
|
108
|
+
assert p["agent_name"] == "async-anthropic-agent"
|
|
109
|
+
assert p["status"] == "success"
|
|
110
|
+
assert p["input_tokens"] == 80
|
|
111
|
+
assert p["output_tokens"] == 40
|
|
112
|
+
assert p["model"] == "claude-3-5-sonnet-20241022"
|
|
113
|
+
assert p["cost_usd"] > 0
|
|
114
|
+
|
|
115
|
+
@pytest.mark.asyncio
|
|
116
|
+
async def test_async_failure(self):
|
|
117
|
+
posted = []
|
|
118
|
+
|
|
119
|
+
async def fake_send_async(payload):
|
|
120
|
+
posted.append(payload)
|
|
121
|
+
|
|
122
|
+
with patch("opsveritas.trace.send_execution_async", side_effect=fake_send_async):
|
|
123
|
+
@monitor("async-fail")
|
|
124
|
+
async def run():
|
|
125
|
+
raise RuntimeError("timeout")
|
|
126
|
+
|
|
127
|
+
with pytest.raises(RuntimeError):
|
|
128
|
+
await run()
|
|
129
|
+
|
|
130
|
+
assert posted[0]["status"] == "failed"
|
|
131
|
+
assert "timeout" in posted[0]["error_message"]
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
3
|
+
import opsveritas
|
|
4
|
+
from opsveritas.wrap import wrap, _PATCHED
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@pytest.fixture(autouse=True)
|
|
8
|
+
def setup_sdk():
|
|
9
|
+
opsveritas.init("test-secret", endpoint="http://localhost:3001")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ── Client stubs — use plain classes so hasattr() gives exact answers ─────────
|
|
13
|
+
|
|
14
|
+
def make_openai_client(response):
|
|
15
|
+
"""Stub with only chat.completions — no messages attribute."""
|
|
16
|
+
create_mock = MagicMock(return_value=response)
|
|
17
|
+
|
|
18
|
+
class Completions:
|
|
19
|
+
create = create_mock
|
|
20
|
+
|
|
21
|
+
class Chat:
|
|
22
|
+
completions = Completions()
|
|
23
|
+
|
|
24
|
+
class FakeOpenAI:
|
|
25
|
+
chat = Chat()
|
|
26
|
+
|
|
27
|
+
return FakeOpenAI()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def make_anthropic_client(response):
|
|
31
|
+
"""Stub with only messages — no chat attribute."""
|
|
32
|
+
create_mock = MagicMock(return_value=response)
|
|
33
|
+
|
|
34
|
+
class Messages:
|
|
35
|
+
create = create_mock
|
|
36
|
+
|
|
37
|
+
class FakeAnthropic:
|
|
38
|
+
messages = Messages()
|
|
39
|
+
|
|
40
|
+
return FakeAnthropic()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def make_failing_openai_client():
|
|
44
|
+
"""Stub whose create() raises."""
|
|
45
|
+
create_mock = MagicMock(side_effect=Exception("rate limited"))
|
|
46
|
+
|
|
47
|
+
class Completions:
|
|
48
|
+
create = create_mock
|
|
49
|
+
|
|
50
|
+
class Chat:
|
|
51
|
+
completions = Completions()
|
|
52
|
+
|
|
53
|
+
class FakeOpenAI:
|
|
54
|
+
chat = Chat()
|
|
55
|
+
|
|
56
|
+
return FakeOpenAI()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def fake_openai_response():
|
|
60
|
+
usage = MagicMock()
|
|
61
|
+
usage.prompt_tokens = 120
|
|
62
|
+
usage.completion_tokens = 60
|
|
63
|
+
resp = MagicMock()
|
|
64
|
+
resp.model = "gpt-4o-mini"
|
|
65
|
+
resp.usage = usage
|
|
66
|
+
resp.choices = []
|
|
67
|
+
return resp
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def fake_anthropic_response():
|
|
71
|
+
usage = MagicMock()
|
|
72
|
+
usage.input_tokens = 90
|
|
73
|
+
usage.output_tokens = 45
|
|
74
|
+
del usage.prompt_tokens
|
|
75
|
+
resp = MagicMock()
|
|
76
|
+
resp.model = "claude-3-5-sonnet-20241022"
|
|
77
|
+
resp.usage = usage
|
|
78
|
+
resp.content = []
|
|
79
|
+
return resp
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ── OpenAI tests ──────────────────────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
class TestWrapOpenAI:
|
|
85
|
+
def test_intercepts_create_and_sends_telemetry(self):
|
|
86
|
+
posted = []
|
|
87
|
+
|
|
88
|
+
def fake_send(payload):
|
|
89
|
+
posted.append(payload)
|
|
90
|
+
|
|
91
|
+
resp = fake_openai_response()
|
|
92
|
+
client = make_openai_client(resp)
|
|
93
|
+
|
|
94
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
95
|
+
wrapped = wrap(client, agent_name="wrapped-openai")
|
|
96
|
+
result = wrapped.chat.completions.create(model="gpt-4o-mini", messages=[])
|
|
97
|
+
|
|
98
|
+
assert result is resp
|
|
99
|
+
assert len(posted) == 1
|
|
100
|
+
p = posted[0]
|
|
101
|
+
assert p["agent_name"] == "wrapped-openai"
|
|
102
|
+
assert p["status"] == "success"
|
|
103
|
+
assert p["input_tokens"] == 120
|
|
104
|
+
assert p["output_tokens"] == 60
|
|
105
|
+
assert p["model"] == "gpt-4o-mini"
|
|
106
|
+
assert p["cost_usd"] > 0
|
|
107
|
+
|
|
108
|
+
def test_failed_call(self):
|
|
109
|
+
posted = []
|
|
110
|
+
|
|
111
|
+
def fake_send(payload):
|
|
112
|
+
posted.append(payload)
|
|
113
|
+
|
|
114
|
+
client = make_failing_openai_client()
|
|
115
|
+
|
|
116
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
117
|
+
wrapped = wrap(client, agent_name="fail-openai")
|
|
118
|
+
with pytest.raises(Exception, match="rate limited"):
|
|
119
|
+
wrapped.chat.completions.create(model="gpt-4o-mini", messages=[])
|
|
120
|
+
|
|
121
|
+
assert posted[0]["status"] == "failed"
|
|
122
|
+
assert "rate limited" in posted[0]["error_message"]
|
|
123
|
+
|
|
124
|
+
def test_no_double_patch(self):
|
|
125
|
+
posted = []
|
|
126
|
+
|
|
127
|
+
def fake_send(payload):
|
|
128
|
+
posted.append(payload)
|
|
129
|
+
|
|
130
|
+
resp = fake_openai_response()
|
|
131
|
+
client = make_openai_client(resp)
|
|
132
|
+
|
|
133
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
134
|
+
wrap(client, agent_name="agent-a")
|
|
135
|
+
wrap(client, agent_name="agent-b") # second call — no-op
|
|
136
|
+
|
|
137
|
+
client.chat.completions.create(model="gpt-4o-mini", messages=[])
|
|
138
|
+
|
|
139
|
+
assert len(posted) == 1 # only one telemetry event
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ── Anthropic tests ───────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
class TestWrapAnthropic:
|
|
145
|
+
def test_intercepts_messages_create(self):
|
|
146
|
+
posted = []
|
|
147
|
+
|
|
148
|
+
def fake_send(payload):
|
|
149
|
+
posted.append(payload)
|
|
150
|
+
|
|
151
|
+
resp = fake_anthropic_response()
|
|
152
|
+
client = make_anthropic_client(resp)
|
|
153
|
+
|
|
154
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
155
|
+
wrapped = wrap(client, agent_name="wrapped-anthropic")
|
|
156
|
+
result = wrapped.messages.create(model="claude-3-5-sonnet-20241022", messages=[])
|
|
157
|
+
|
|
158
|
+
assert result is resp
|
|
159
|
+
p = posted[0]
|
|
160
|
+
assert p["agent_name"] == "wrapped-anthropic"
|
|
161
|
+
assert p["input_tokens"] == 90
|
|
162
|
+
assert p["output_tokens"] == 45
|
|
163
|
+
assert p["model"] == "claude-3-5-sonnet-20241022"
|
|
164
|
+
assert p["cost_usd"] > 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# ── Gemini tests ─────────────────────────────────────────────────────────────
|
|
168
|
+
|
|
169
|
+
def fake_gemini_response():
|
|
170
|
+
# Use plain classes so hasattr() gives exact answers (no MagicMock auto-attributes)
|
|
171
|
+
class UsageMetadata:
|
|
172
|
+
prompt_token_count = 110
|
|
173
|
+
candidates_token_count = 55
|
|
174
|
+
|
|
175
|
+
class GeminiResponse:
|
|
176
|
+
usage_metadata = UsageMetadata()
|
|
177
|
+
|
|
178
|
+
return GeminiResponse()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def make_gemini_model(response):
|
|
182
|
+
class FakeGenerativeModel:
|
|
183
|
+
model_name = "models/gemini-1.5-flash"
|
|
184
|
+
generate_content = MagicMock(return_value=response)
|
|
185
|
+
|
|
186
|
+
return FakeGenerativeModel()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class TestWrapGemini:
|
|
190
|
+
def test_intercepts_generate_content(self):
|
|
191
|
+
posted = []
|
|
192
|
+
|
|
193
|
+
def fake_send(payload):
|
|
194
|
+
posted.append(payload)
|
|
195
|
+
|
|
196
|
+
resp = fake_gemini_response()
|
|
197
|
+
model = make_gemini_model(resp)
|
|
198
|
+
|
|
199
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
200
|
+
wrapped = wrap(model, agent_name="gemini-agent")
|
|
201
|
+
result = wrapped.generate_content("say hi")
|
|
202
|
+
|
|
203
|
+
assert result is resp
|
|
204
|
+
assert len(posted) == 1
|
|
205
|
+
p = posted[0]
|
|
206
|
+
assert p["agent_name"] == "gemini-agent"
|
|
207
|
+
assert p["status"] == "success"
|
|
208
|
+
assert p["input_tokens"] == 110
|
|
209
|
+
assert p["output_tokens"] == 55
|
|
210
|
+
assert p["model"] == "gemini-1.5-flash" # "models/" prefix stripped
|
|
211
|
+
assert p["cost_usd"] > 0
|
|
212
|
+
|
|
213
|
+
def test_failed_generate_content(self):
|
|
214
|
+
posted = []
|
|
215
|
+
|
|
216
|
+
def fake_send(payload):
|
|
217
|
+
posted.append(payload)
|
|
218
|
+
|
|
219
|
+
class FakeGenerativeModel:
|
|
220
|
+
model_name = "gemini-1.5-flash"
|
|
221
|
+
generate_content = MagicMock(side_effect=Exception("quota exceeded"))
|
|
222
|
+
|
|
223
|
+
model = FakeGenerativeModel()
|
|
224
|
+
|
|
225
|
+
with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
|
|
226
|
+
wrapped = wrap(model, agent_name="gemini-fail")
|
|
227
|
+
with pytest.raises(Exception, match="quota exceeded"):
|
|
228
|
+
wrapped.generate_content("hi")
|
|
229
|
+
|
|
230
|
+
assert posted[0]["status"] == "failed"
|
|
231
|
+
assert "quota exceeded" in posted[0]["error_message"]
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# ── Unknown client ────────────────────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
class TestWrapUnknown:
|
|
237
|
+
def test_passthrough_unknown_client(self):
|
|
238
|
+
class Unknown:
|
|
239
|
+
pass
|
|
240
|
+
|
|
241
|
+
client = Unknown()
|
|
242
|
+
result = wrap(client, agent_name="unknown")
|
|
243
|
+
assert result is client
|