opsveritas 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: opsveritas
3
+ Version: 0.1.1
4
+ Summary: Monitor your AI agents with 2 lines of code
5
+ License: MIT
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
11
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
@@ -0,0 +1,26 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "opsveritas"
7
+ version = "0.1.1"
8
+ description = "Monitor your AI agents with 2 lines of code"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.8"
12
+ dependencies = []
13
+
14
+ [project.optional-dependencies]
15
+ dev = [
16
+ "pytest>=7.0",
17
+ "pytest-asyncio>=0.23",
18
+ "pytest-mock>=3.12",
19
+ ]
20
+
21
+ [tool.setuptools.packages.find]
22
+ where = ["src"]
23
+
24
+ [tool.pytest.ini_options]
25
+ asyncio_mode = "auto"
26
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,7 @@
1
+ from ._config import init
2
+ from ._context import run
3
+ from .trace import monitor
4
+ from .wrap import wrap
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["init", "monitor", "run", "wrap"]
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ @dataclass
6
+ class _Config:
7
+ api_key: str
8
+ endpoint: str
9
+
10
+ _config: Optional[_Config] = None
11
+
12
+
13
+ def init(api_key: str, *, endpoint: str = "https://agents.opsveritas.com") -> None:
14
+ global _config
15
+ if not api_key or not isinstance(api_key, str):
16
+ raise ValueError("[OpsVeritas] api_key is required")
17
+ _config = _Config(api_key=api_key, endpoint=endpoint.rstrip("/"))
18
+
19
+
20
+ def get_config() -> _Config:
21
+ if _config is None:
22
+ raise RuntimeError("[OpsVeritas] Call opsveritas.init(api_key) before using the SDK")
23
+ return _config
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+ import threading
3
+ import time
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Optional
6
+
7
+ _local = threading.local()
8
+
9
+
10
+ class run:
11
+ """
12
+ Group multiple LLM calls into one execution record.
13
+
14
+ Usage:
15
+ with opsveritas.run('My Agent') as r:
16
+ resp1 = wrapped_openai.chat.completions.create(...)
17
+ resp2 = wrapped_anthropic.messages.create(...)
18
+ # → ONE execution sent with combined cost, tokens, duration
19
+ """
20
+
21
+ def __init__(self, agent_name: str, *, user_id: Optional[str] = None) -> None:
22
+ self.agent_name = agent_name
23
+ self.user_id = user_id
24
+ self._start: float = 0.0
25
+ self._executed_at: str = ''
26
+ self._calls: list[dict[str, Any]] = []
27
+
28
+ def __enter__(self) -> 'run':
29
+ self._start = time.monotonic()
30
+ self._executed_at = datetime.now(timezone.utc).isoformat()
31
+ _local.current = self
32
+ return self
33
+
34
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> bool:
35
+ _local.current = None
36
+ try:
37
+ duration_ms = int((time.monotonic() - self._start) * 1000)
38
+ status = 'failed' if exc_type is not None else 'success'
39
+ error_message = str(exc_val) if exc_val is not None else None
40
+
41
+ total_input = sum(c.get('input_tokens') or 0 for c in self._calls)
42
+ total_output = sum(c.get('output_tokens') or 0 for c in self._calls)
43
+ total_cost = sum(c.get('cost_usd') or 0.0 for c in self._calls)
44
+
45
+ # Preserve call order, deduplicate
46
+ seen: set[str] = set()
47
+ models: list[str] = []
48
+ for c in self._calls:
49
+ m = c.get('model')
50
+ if m and m not in seen:
51
+ seen.add(m)
52
+ models.append(m)
53
+
54
+ platform = (self._calls[0].get('platform') or 'custom_webhook') if self._calls else 'custom_webhook'
55
+
56
+ payload: dict[str, Any] = {
57
+ 'platform': platform,
58
+ 'agent_name': self.agent_name,
59
+ 'status': status,
60
+ 'executed_at': self._executed_at,
61
+ 'duration_ms': duration_ms,
62
+ 'error_message': error_message,
63
+ }
64
+ if total_input:
65
+ payload['input_tokens'] = total_input
66
+ if total_output:
67
+ payload['output_tokens'] = total_output
68
+ if total_cost:
69
+ payload['cost_usd'] = round(total_cost, 6)
70
+ if models:
71
+ payload['models'] = models
72
+ if self.user_id:
73
+ payload['user_id'] = self.user_id
74
+
75
+ from opsveritas._http import send_execution_sync
76
+ send_execution_sync(payload)
77
+ except Exception:
78
+ pass
79
+ return False # never suppress exceptions
80
+
81
+ def _record(self, sub_call: dict[str, Any]) -> None:
82
+ self._calls.append(sub_call)
83
+
84
+
85
+ def get_active_run() -> Optional[run]:
86
+ return getattr(_local, 'current', None)
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+ import atexit
3
+ import json
4
+ import threading
5
+ import urllib.request
6
+ from typing import Any, Optional
7
+ from ._config import get_config
8
+
9
+ _pending: list[threading.Thread] = []
10
+ _lock = threading.Lock()
11
+
12
+
13
+ def _flush() -> None:
14
+ with _lock:
15
+ threads = list(_pending)
16
+ for t in threads:
17
+ t.join(timeout=6)
18
+
19
+
20
+ atexit.register(_flush)
21
+
22
+
23
+ def _post_sync(url: str, api_key: str, payload: dict[str, Any]) -> None:
24
+ try:
25
+ data = json.dumps(payload).encode("utf-8")
26
+ req = urllib.request.Request(
27
+ url,
28
+ data=data,
29
+ headers={"Content-Type": "application/json", "x-opsveritas-key": api_key},
30
+ method="POST",
31
+ )
32
+ with urllib.request.urlopen(req, timeout=5):
33
+ pass
34
+ except Exception:
35
+ pass # Never crash user code
36
+
37
+
38
+ def send_execution(payload: dict[str, Any]) -> None:
39
+ cfg = get_config()
40
+ url = f"{cfg.endpoint}/webhooks/agent-execution"
41
+ t = threading.Thread(target=_post_sync, args=(url, cfg.api_key, payload), daemon=True)
42
+ with _lock:
43
+ _pending.append(t)
44
+ t.start()
45
+
46
+
47
+ def send_execution_sync(payload: dict[str, Any]) -> None:
48
+ """Blocking send — use when the caller is already blocking (e.g. run.__exit__)."""
49
+ cfg = get_config()
50
+ _post_sync(f"{cfg.endpoint}/webhooks/agent-execution", cfg.api_key, payload)
51
+
52
+
53
+ async def send_execution_async(payload: dict[str, Any]) -> None:
54
+ """Async variant — tries httpx first, falls back to sync thread."""
55
+ try:
56
+ import httpx # optional fast path
57
+ cfg = get_config()
58
+ async with httpx.AsyncClient(timeout=5) as client:
59
+ await client.post(
60
+ f"{cfg.endpoint}/webhooks/agent-execution",
61
+ json=payload,
62
+ headers={"x-opsveritas-key": cfg.api_key},
63
+ )
64
+ except ImportError:
65
+ send_execution(payload) # fallback to sync thread
66
+ except Exception:
67
+ pass
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+ from typing import Optional
3
+
4
+ # Cost per 1M tokens (input, output) in USD
5
+ # ORDERING NOTE: more-specific keys must appear before generic prefixes they share.
6
+ # e.g. "gpt-4.1" before "gpt-4" — otherwise the substring match hits the wrong entry.
7
+ _PRICING: dict[str, tuple[float, float]] = {
8
+ # ── OpenAI ────────────────────────────────────────────────────────────────
9
+ # GPT-4.1 family (April 2025) — must be before "gpt-4" to avoid wrong match
10
+ "gpt-4.1-nano": (0.10, 0.40),
11
+ "gpt-4.1-mini": (0.40, 1.60),
12
+ "gpt-4.1": (2.00, 8.00),
13
+ # GPT-4o
14
+ "gpt-4o-mini": (0.15, 0.60),
15
+ "gpt-4o": (2.50, 10.00),
16
+ # Legacy GPT-4
17
+ "gpt-4-turbo": (10.00, 30.00),
18
+ "gpt-4": (30.00, 60.00),
19
+ "gpt-3.5-turbo": (0.50, 1.50),
20
+ # Reasoning models
21
+ "o4-mini": (1.10, 4.40),
22
+ "o3-mini": (1.10, 4.40),
23
+ "o3": (10.00, 40.00),
24
+ "o1-mini": (3.00, 12.00),
25
+ "o1": (15.00, 60.00),
26
+
27
+ # ── Anthropic ─────────────────────────────────────────────────────────────
28
+ # Claude 4 family — matched by substring for versioned IDs
29
+ # e.g. "claude-opus-4-8" matches "claude-opus-4"
30
+ "claude-opus-4": (15.00, 75.00),
31
+ "claude-sonnet-4": (3.00, 15.00),
32
+ "claude-haiku-4": (0.80, 4.00),
33
+ # Claude 3.7
34
+ "claude-3-7-sonnet": (3.00, 15.00),
35
+ # Claude 3.5
36
+ "claude-3-5-sonnet": (3.00, 15.00),
37
+ "claude-3-5-haiku": (0.80, 4.00),
38
+ # Claude 3
39
+ "claude-3-opus": (15.00, 75.00),
40
+ "claude-3-sonnet": (3.00, 15.00),
41
+ "claude-3-haiku": (0.25, 1.25),
42
+
43
+ # ── Groq ──────────────────────────────────────────────────────────────────
44
+ # Llama 3.x (new naming: llama-3.x-…)
45
+ "llama-3.3-70b": (0.59, 0.79),
46
+ "llama-3.1-70b": (0.59, 0.79),
47
+ "llama-3.1-8b": (0.05, 0.08),
48
+ # Legacy Groq naming (llama3-…)
49
+ "llama3-70b": (0.59, 0.79),
50
+ "llama3-8b": (0.05, 0.08),
51
+ "mixtral-8x7b": (0.24, 0.24),
52
+ "gemma2-9b": (0.20, 0.20),
53
+
54
+ # ── Google Gemini ─────────────────────────────────────────────────────────
55
+ "gemini-2.5-pro": (1.25, 10.00),
56
+ "gemini-2.5-flash": (0.15, 0.60),
57
+ "gemini-2.0-flash": (0.10, 0.40),
58
+ "gemini-1.5-pro": (1.25, 5.00),
59
+ "gemini-1.5-flash": (0.075, 0.30),
60
+ }
61
+
62
+
63
+ def calc_cost(model: str, input_tokens: int, output_tokens: int) -> Optional[float]:
64
+ model_lower = model.lower()
65
+ key = next(
66
+ (k for k in _PRICING if model_lower in k or k in model_lower),
67
+ None,
68
+ )
69
+ if key is None:
70
+ return None
71
+ input_rate, output_rate = _PRICING[key]
72
+ raw = (input_tokens * input_rate + output_tokens * output_rate) / 1_000_000
73
+ return round(raw, 6)
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+ from typing import Any, Optional
3
+
4
+
5
+ def extract_usage(result: Any) -> dict[str, Any]:
6
+ """Auto-detect OpenAI / Anthropic / Gemini response and extract token usage."""
7
+ if result is None:
8
+ return {}
9
+
10
+ usage = getattr(result, "usage", None)
11
+ model = getattr(result, "model", None)
12
+
13
+ if usage is not None:
14
+ # OpenAI: usage.prompt_tokens / usage.completion_tokens
15
+ prompt_tokens = getattr(usage, "prompt_tokens", None)
16
+ if prompt_tokens is not None:
17
+ completion_tokens = getattr(usage, "completion_tokens", 0) or 0
18
+ tool_calls: Optional[int] = None
19
+ choices = getattr(result, "choices", None)
20
+ if choices:
21
+ msg = getattr(choices[0], "message", None)
22
+ tc = getattr(msg, "tool_calls", None) if msg else None
23
+ if tc:
24
+ tool_calls = len(tc)
25
+ return {
26
+ "model": model,
27
+ "input_tokens": prompt_tokens,
28
+ "output_tokens": completion_tokens,
29
+ **({"tool_calls": tool_calls} if tool_calls else {}),
30
+ }
31
+
32
+ # Anthropic: usage.input_tokens / usage.output_tokens
33
+ input_tokens = getattr(usage, "input_tokens", None)
34
+ if input_tokens is not None:
35
+ output_tokens = getattr(usage, "output_tokens", 0) or 0
36
+ content = getattr(result, "content", None)
37
+ tool_calls = None
38
+ if content:
39
+ tc_count = sum(1 for b in content if getattr(b, "type", None) == "tool_use")
40
+ if tc_count:
41
+ tool_calls = tc_count
42
+ return {
43
+ "model": model,
44
+ "input_tokens": input_tokens,
45
+ "output_tokens": output_tokens,
46
+ **({"tool_calls": tool_calls} if tool_calls else {}),
47
+ }
48
+
49
+ # Gemini: result.usage_metadata.prompt_token_count / candidates_token_count
50
+ # (model name is on the model instance, injected separately by wrap())
51
+ usage_meta = getattr(result, "usage_metadata", None)
52
+ if usage_meta is not None:
53
+ prompt_token_count = getattr(usage_meta, "prompt_token_count", None)
54
+ if prompt_token_count is not None:
55
+ return {
56
+ "input_tokens": prompt_token_count,
57
+ "output_tokens": getattr(usage_meta, "candidates_token_count", 0) or 0,
58
+ }
59
+
60
+ return {}
@@ -0,0 +1,138 @@
1
+ from __future__ import annotations
2
+ import functools
3
+ import inspect
4
+ import json
5
+ import time
6
+ from datetime import datetime, timezone
7
+ from typing import Any, Callable, Optional, TypeVar
8
+
9
+ from ._http import send_execution, send_execution_async
10
+ from ._pricing import calc_cost
11
+ from ._usage import extract_usage
12
+
13
+
14
+ def _summarize(result: Any) -> Optional[str]:
15
+ """Derive a short output_summary from a non-LLM return value.
16
+
17
+ Used so the server's silent-failure detector (which treats null output_tokens
18
+ + null output_summary as silent) doesn't false-positive on agents that return
19
+ structured data instead of a raw LLM response object.
20
+ """
21
+ if result is None:
22
+ return None
23
+ try:
24
+ if isinstance(result, str):
25
+ s = result.strip()
26
+ return s[:500] if s else None
27
+ if isinstance(result, (dict, list)):
28
+ return json.dumps(result, default=str)[:500]
29
+ return str(result)[:500]
30
+ except Exception:
31
+ return None
32
+
33
+ F = TypeVar("F", bound=Callable[..., Any])
34
+
35
+
36
+ def monitor(
37
+ agent_name: str,
38
+ *,
39
+ platform: str = "custom_webhook",
40
+ user_id: Optional[str] = None,
41
+ ) -> Callable[[F], F]:
42
+ """Decorator that auto-instruments any sync or async function."""
43
+ def decorator(fn: F) -> F:
44
+ if inspect.iscoroutinefunction(fn):
45
+ @functools.wraps(fn)
46
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
47
+ executed_at = datetime.now(timezone.utc).isoformat()
48
+ t0 = time.monotonic()
49
+ status = "success"
50
+ error_message = None
51
+ result = None
52
+ try:
53
+ result = await fn(*args, **kwargs)
54
+ return result
55
+ except Exception as exc:
56
+ status = "failed"
57
+ error_message = str(exc)
58
+ raise
59
+ finally:
60
+ try:
61
+ duration_ms = int((time.monotonic() - t0) * 1000)
62
+ usage = extract_usage(result)
63
+ cost_usd = (
64
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
65
+ if usage.get("model") and usage.get("input_tokens") is not None
66
+ else None
67
+ )
68
+ payload: dict[str, Any] = {
69
+ "platform": platform,
70
+ "agent_name": agent_name,
71
+ "status": status,
72
+ "executed_at": executed_at,
73
+ "duration_ms": duration_ms,
74
+ "error_message": error_message,
75
+ **{k: v for k, v in usage.items() if v is not None},
76
+ }
77
+ if cost_usd is not None:
78
+ payload["cost_usd"] = cost_usd
79
+ if user_id:
80
+ payload["user_id"] = user_id
81
+ # If no token usage was captured (non-LLM return value), derive a
82
+ # summary so the server doesn't false-positive a silent failure.
83
+ if "output_tokens" not in payload and "output_summary" not in payload:
84
+ s = _summarize(result)
85
+ if s:
86
+ payload["output_summary"] = s
87
+ await send_execution_async(payload)
88
+ except Exception:
89
+ pass # Never crash user code
90
+ return async_wrapper # type: ignore[return-value]
91
+ else:
92
+ @functools.wraps(fn)
93
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
94
+ executed_at = datetime.now(timezone.utc).isoformat()
95
+ t0 = time.monotonic()
96
+ status = "success"
97
+ error_message = None
98
+ result = None
99
+ try:
100
+ result = fn(*args, **kwargs)
101
+ return result
102
+ except Exception as exc:
103
+ status = "failed"
104
+ error_message = str(exc)
105
+ raise
106
+ finally:
107
+ try:
108
+ duration_ms = int((time.monotonic() - t0) * 1000)
109
+ usage = extract_usage(result)
110
+ cost_usd = (
111
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
112
+ if usage.get("model") and usage.get("input_tokens") is not None
113
+ else None
114
+ )
115
+ payload: dict[str, Any] = {
116
+ "platform": platform,
117
+ "agent_name": agent_name,
118
+ "status": status,
119
+ "executed_at": executed_at,
120
+ "duration_ms": duration_ms,
121
+ "error_message": error_message,
122
+ **{k: v for k, v in usage.items() if v is not None},
123
+ }
124
+ if cost_usd is not None:
125
+ payload["cost_usd"] = cost_usd
126
+ if user_id:
127
+ payload["user_id"] = user_id
128
+ # If no token usage was captured (non-LLM return value), derive a
129
+ # summary so the server doesn't false-positive a silent failure.
130
+ if "output_tokens" not in payload and "output_summary" not in payload:
131
+ s = _summarize(result)
132
+ if s:
133
+ payload["output_summary"] = s
134
+ send_execution(payload)
135
+ except Exception:
136
+ pass # Never crash user code
137
+ return sync_wrapper # type: ignore[return-value]
138
+ return decorator
@@ -0,0 +1,359 @@
1
+ from __future__ import annotations
2
+ import time
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Optional, TypeVar
5
+
6
+ from ._context import get_active_run
7
+ from ._http import send_execution, send_execution_async
8
+ from ._pricing import calc_cost
9
+ from ._usage import extract_usage
10
+ from .trace import _summarize
11
+
12
+ T = TypeVar("T")
13
+
14
+ _PATCHED = "_opsveritas_patched"
15
+
16
+
17
+ def _extract_llm_output(result: Any) -> Optional[str]:
18
+ if result is None:
19
+ return None
20
+ try:
21
+ # OpenAI / Groq style
22
+ choices = getattr(result, "choices", None)
23
+ if choices:
24
+ content = getattr(getattr(choices[0], "message", None), "content", None)
25
+ if isinstance(content, str) and content.strip():
26
+ return content[:300]
27
+ # Anthropic style
28
+ content_blocks = getattr(result, "content", None)
29
+ if content_blocks:
30
+ for block in content_blocks:
31
+ if getattr(block, "type", None) == "text":
32
+ text = getattr(block, "text", None)
33
+ if isinstance(text, str) and text.strip():
34
+ return text[:300]
35
+ # Gemini old SDK
36
+ resp = getattr(result, "response", None)
37
+ if resp:
38
+ try:
39
+ text = resp.text
40
+ if isinstance(text, str) and text.strip():
41
+ return text[:300]
42
+ except Exception:
43
+ pass
44
+ except Exception:
45
+ pass
46
+ return None
47
+
48
+
49
+ def _patch_openai_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
50
+ completions = getattr(getattr(client, "chat", None), "completions", None)
51
+ if completions is None or getattr(completions, _PATCHED, None) is True:
52
+ return
53
+
54
+ orig_create = completions.create
55
+
56
+ def patched_create(*args: Any, **kwargs: Any) -> Any:
57
+ executed_at = datetime.now(timezone.utc).isoformat()
58
+ t0 = time.monotonic()
59
+ status = "success"
60
+ error_message = None
61
+ result = None
62
+ try:
63
+ result = orig_create(*args, **kwargs)
64
+ return result
65
+ except Exception as exc:
66
+ status = "failed"
67
+ error_message = str(exc)
68
+ raise
69
+ finally:
70
+ try:
71
+ duration_ms = int((time.monotonic() - t0) * 1000)
72
+ usage = extract_usage(result)
73
+ cost_usd = (
74
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
75
+ if usage.get("model") and usage.get("input_tokens") is not None
76
+ else None
77
+ )
78
+ payload: dict[str, Any] = {
79
+ "platform": platform,
80
+ "agent_name": agent_name,
81
+ "status": status,
82
+ "executed_at": executed_at,
83
+ "duration_ms": duration_ms,
84
+ "error_message": error_message,
85
+ **{k: v for k, v in usage.items() if v is not None},
86
+ }
87
+ if cost_usd is not None:
88
+ payload["cost_usd"] = cost_usd
89
+ if user_id:
90
+ payload["user_id"] = user_id
91
+ s = _extract_llm_output(result)
92
+ if s:
93
+ payload["output_summary"] = s
94
+ active = get_active_run()
95
+ if active is not None:
96
+ active._record(payload)
97
+ else:
98
+ send_execution(payload)
99
+ except Exception:
100
+ pass # Never crash user code
101
+
102
+ completions.create = patched_create
103
+ setattr(completions, _PATCHED, True)
104
+
105
+
106
+ def _patch_openai_async(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
107
+ completions = getattr(getattr(client, "chat", None), "completions", None)
108
+ if completions is None or getattr(completions, _PATCHED, None) is True:
109
+ return
110
+
111
+ orig_create = completions.create
112
+
113
+ async def patched_create(*args: Any, **kwargs: Any) -> Any:
114
+ executed_at = datetime.now(timezone.utc).isoformat()
115
+ t0 = time.monotonic()
116
+ status = "success"
117
+ error_message = None
118
+ result = None
119
+ try:
120
+ result = await orig_create(*args, **kwargs)
121
+ return result
122
+ except Exception as exc:
123
+ status = "failed"
124
+ error_message = str(exc)
125
+ raise
126
+ finally:
127
+ duration_ms = int((time.monotonic() - t0) * 1000)
128
+ usage = extract_usage(result)
129
+ cost_usd = (
130
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
131
+ if usage.get("model") and usage.get("input_tokens") is not None
132
+ else None
133
+ )
134
+ payload: dict[str, Any] = {
135
+ "platform": platform,
136
+ "agent_name": agent_name,
137
+ "status": status,
138
+ "executed_at": executed_at,
139
+ "duration_ms": duration_ms,
140
+ "error_message": error_message,
141
+ **{k: v for k, v in usage.items() if v is not None},
142
+ }
143
+ if cost_usd is not None:
144
+ payload["cost_usd"] = cost_usd
145
+ if user_id:
146
+ payload["user_id"] = user_id
147
+ s = _extract_llm_output(result)
148
+ if s:
149
+ payload["output_summary"] = s
150
+ active = get_active_run()
151
+ if active is not None:
152
+ active._record(payload)
153
+ else:
154
+ await send_execution_async(payload)
155
+
156
+ completions.create = patched_create
157
+ setattr(completions, _PATCHED, True)
158
+
159
+
160
+ def _patch_anthropic_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
161
+ messages = getattr(client, "messages", None)
162
+ if messages is None or getattr(messages, _PATCHED, None) is True:
163
+ return
164
+
165
+ orig_create = messages.create
166
+
167
+ def patched_create(*args: Any, **kwargs: Any) -> Any:
168
+ executed_at = datetime.now(timezone.utc).isoformat()
169
+ t0 = time.monotonic()
170
+ status = "success"
171
+ error_message = None
172
+ result = None
173
+ try:
174
+ result = orig_create(*args, **kwargs)
175
+ return result
176
+ except Exception as exc:
177
+ status = "failed"
178
+ error_message = str(exc)
179
+ raise
180
+ finally:
181
+ duration_ms = int((time.monotonic() - t0) * 1000)
182
+ usage = extract_usage(result)
183
+ cost_usd = (
184
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
185
+ if usage.get("model") and usage.get("input_tokens") is not None
186
+ else None
187
+ )
188
+ payload: dict[str, Any] = {
189
+ "platform": platform,
190
+ "agent_name": agent_name,
191
+ "status": status,
192
+ "executed_at": executed_at,
193
+ "duration_ms": duration_ms,
194
+ "error_message": error_message,
195
+ **{k: v for k, v in usage.items() if v is not None},
196
+ }
197
+ if cost_usd is not None:
198
+ payload["cost_usd"] = cost_usd
199
+ if user_id:
200
+ payload["user_id"] = user_id
201
+ s = _extract_llm_output(result)
202
+ if s:
203
+ payload["output_summary"] = s
204
+ active = get_active_run()
205
+ if active is not None:
206
+ active._record(payload)
207
+ else:
208
+ send_execution(payload)
209
+
210
+ messages.create = patched_create
211
+ setattr(messages, _PATCHED, True)
212
+
213
+
214
+ def _patch_gemini_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
215
+ if getattr(client, _PATCHED, None) is True:
216
+ return
217
+
218
+ # Strip "models/" prefix (Gemini SDK uses "models/gemini-1.5-flash" format)
219
+ raw_name = getattr(client, "model_name", None) or getattr(client, "model", None)
220
+ model_name: Optional[str] = str(raw_name).replace("models/", "") if raw_name else None
221
+
222
+ orig_generate = client.generate_content
223
+
224
+ def patched_generate(*args: Any, **kwargs: Any) -> Any:
225
+ executed_at = datetime.now(timezone.utc).isoformat()
226
+ t0 = time.monotonic()
227
+ status = "success"
228
+ error_message = None
229
+ result = None
230
+ try:
231
+ result = orig_generate(*args, **kwargs)
232
+ return result
233
+ except Exception as exc:
234
+ status = "failed"
235
+ error_message = str(exc)
236
+ raise
237
+ finally:
238
+ try:
239
+ duration_ms = int((time.monotonic() - t0) * 1000)
240
+ usage = extract_usage(result)
241
+ # Gemini doesn't embed model in response — inject from instance
242
+ if model_name and "model" not in usage:
243
+ usage = {**usage, "model": model_name}
244
+ cost_usd = (
245
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
246
+ if usage.get("model") and usage.get("input_tokens") is not None
247
+ else None
248
+ )
249
+ payload: dict[str, Any] = {
250
+ "platform": platform,
251
+ "agent_name": agent_name,
252
+ "status": status,
253
+ "executed_at": executed_at,
254
+ "duration_ms": duration_ms,
255
+ "error_message": error_message,
256
+ **{k: v for k, v in usage.items() if v is not None},
257
+ }
258
+ if cost_usd is not None:
259
+ payload["cost_usd"] = cost_usd
260
+ if user_id:
261
+ payload["user_id"] = user_id
262
+ if "output_tokens" not in payload and "output_summary" not in payload:
263
+ s = _summarize(result)
264
+ if s:
265
+ payload["output_summary"] = s
266
+ active = get_active_run()
267
+ if active is not None:
268
+ active._record(payload)
269
+ else:
270
+ send_execution(payload)
271
+ except Exception:
272
+ pass
273
+
274
+ client.generate_content = patched_generate
275
+ setattr(client, _PATCHED, True)
276
+
277
+
278
+ def _patch_gemini_new_sync(client: Any, agent_name: str, platform: str, user_id: Optional[str]) -> None:
279
+ """Patch the new google-genai SDK: client.models.generate_content(model=..., contents=...)."""
280
+ models = getattr(client, "models", None)
281
+ if models is None or getattr(models, _PATCHED, None) is True:
282
+ return
283
+
284
+ orig_generate = models.generate_content
285
+
286
+ def patched_generate(*args: Any, **kwargs: Any) -> Any:
287
+ executed_at = datetime.now(timezone.utc).isoformat()
288
+ t0 = time.monotonic()
289
+ status = "success"
290
+ error_message = None
291
+ result = None
292
+ # model name is passed as kwarg or first positional arg
293
+ raw_model = kwargs.get("model") or (args[0] if args else None)
294
+ model_name: Optional[str] = str(raw_model).replace("models/", "") if raw_model else None
295
+ try:
296
+ result = orig_generate(*args, **kwargs)
297
+ return result
298
+ except Exception as exc:
299
+ status = "failed"
300
+ error_message = str(exc)
301
+ raise
302
+ finally:
303
+ try:
304
+ duration_ms = int((time.monotonic() - t0) * 1000)
305
+ usage = extract_usage(result)
306
+ if model_name and "model" not in usage:
307
+ usage = {**usage, "model": model_name}
308
+ cost_usd = (
309
+ calc_cost(usage["model"], usage["input_tokens"], usage["output_tokens"])
310
+ if usage.get("model") and usage.get("input_tokens") is not None
311
+ else None
312
+ )
313
+ payload: dict[str, Any] = {
314
+ "platform": platform,
315
+ "agent_name": agent_name,
316
+ "status": status,
317
+ "executed_at": executed_at,
318
+ "duration_ms": duration_ms,
319
+ "error_message": error_message,
320
+ **{k: v for k, v in usage.items() if v is not None},
321
+ }
322
+ if cost_usd is not None:
323
+ payload["cost_usd"] = cost_usd
324
+ if user_id:
325
+ payload["user_id"] = user_id
326
+ if "output_tokens" not in payload and "output_summary" not in payload:
327
+ s = _summarize(result)
328
+ if s:
329
+ payload["output_summary"] = s
330
+ active = get_active_run()
331
+ if active is not None:
332
+ active._record(payload)
333
+ else:
334
+ send_execution(payload)
335
+ except Exception:
336
+ pass
337
+
338
+ models.generate_content = patched_generate
339
+ setattr(models, _PATCHED, True)
340
+
341
+
342
+ def wrap(client: T, *, agent_name: str, platform: str = "sdk", user_id: Optional[str] = None) -> T:
343
+ """Auto-instrument an OpenAI, Anthropic, or Gemini client."""
344
+ cls_name = type(client).__name__
345
+
346
+ if "AsyncOpenAI" in cls_name or "AsyncAzureOpenAI" in cls_name:
347
+ _patch_openai_async(client, agent_name, platform, user_id)
348
+ elif "Anthropic" in cls_name and "OpenAI" not in cls_name:
349
+ _patch_anthropic_sync(client, agent_name, platform, user_id)
350
+ elif hasattr(client, "chat") and hasattr(getattr(client, "chat", None), "completions"):
351
+ _patch_openai_sync(client, agent_name, platform, user_id)
352
+ elif hasattr(client, "messages") and callable(getattr(getattr(client, "messages", None), "create", None)):
353
+ _patch_anthropic_sync(client, agent_name, platform, user_id)
354
+ elif callable(getattr(client, "generate_content", None)):
355
+ _patch_gemini_sync(client, agent_name, platform, user_id)
356
+ elif callable(getattr(getattr(client, "models", None), "generate_content", None)):
357
+ _patch_gemini_new_sync(client, agent_name, platform, user_id)
358
+
359
+ return client
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: opsveritas
3
+ Version: 0.1.1
4
+ Summary: Monitor your AI agents with 2 lines of code
5
+ License: MIT
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == "dev"
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
11
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
@@ -0,0 +1,16 @@
1
+ pyproject.toml
2
+ src/opsveritas/__init__.py
3
+ src/opsveritas/_config.py
4
+ src/opsveritas/_context.py
5
+ src/opsveritas/_http.py
6
+ src/opsveritas/_pricing.py
7
+ src/opsveritas/_usage.py
8
+ src/opsveritas/trace.py
9
+ src/opsveritas/wrap.py
10
+ src/opsveritas.egg-info/PKG-INFO
11
+ src/opsveritas.egg-info/SOURCES.txt
12
+ src/opsveritas.egg-info/dependency_links.txt
13
+ src/opsveritas.egg-info/requires.txt
14
+ src/opsveritas.egg-info/top_level.txt
15
+ tests/test_monitor.py
16
+ tests/test_wrap.py
@@ -0,0 +1,5 @@
1
+
2
+ [dev]
3
+ pytest>=7.0
4
+ pytest-asyncio>=0.23
5
+ pytest-mock>=3.12
@@ -0,0 +1 @@
1
+ opsveritas
@@ -0,0 +1,131 @@
1
+ import pytest
2
+ from unittest.mock import MagicMock, patch
3
+ import opsveritas
4
+ from opsveritas.trace import monitor
5
+
6
+
7
+ @pytest.fixture(autouse=True)
8
+ def setup_sdk():
9
+ opsveritas.init("test-secret", endpoint="http://localhost:3001")
10
+
11
+
12
+ # Fake OpenAI response
13
+ def fake_openai_response():
14
+ usage = MagicMock()
15
+ usage.prompt_tokens = 100
16
+ usage.completion_tokens = 50
17
+ usage.total_tokens = 150
18
+ resp = MagicMock()
19
+ resp.model = "gpt-4o-mini"
20
+ resp.usage = usage
21
+ resp.choices = []
22
+ return resp
23
+
24
+
25
+ # Fake Anthropic response
26
+ def fake_anthropic_response():
27
+ usage = MagicMock()
28
+ usage.input_tokens = 80
29
+ usage.output_tokens = 40
30
+ # No prompt_tokens — distinguishes Anthropic from OpenAI
31
+ del usage.prompt_tokens
32
+ resp = MagicMock()
33
+ resp.model = "claude-3-5-sonnet-20241022"
34
+ resp.usage = usage
35
+ resp.content = []
36
+ return resp
37
+
38
+
39
+ class TestMonitorSync:
40
+ def test_success_with_openai_shape(self):
41
+ posted = []
42
+
43
+ def fake_send(payload):
44
+ posted.append(payload)
45
+
46
+ with patch("opsveritas.trace.send_execution", side_effect=fake_send):
47
+ @monitor("sync-openai-agent")
48
+ def run():
49
+ return fake_openai_response()
50
+
51
+ result = run()
52
+
53
+ assert result is not None
54
+ assert len(posted) == 1
55
+ p = posted[0]
56
+ assert p["agent_name"] == "sync-openai-agent"
57
+ assert p["status"] == "success"
58
+ assert p["input_tokens"] == 100
59
+ assert p["output_tokens"] == 50
60
+ assert p["model"] == "gpt-4o-mini"
61
+ assert p["cost_usd"] > 0
62
+ assert p["duration_ms"] >= 0
63
+
64
+ def test_failure_sets_status(self):
65
+ posted = []
66
+
67
+ def fake_send(payload):
68
+ posted.append(payload)
69
+
70
+ with patch("opsveritas.trace.send_execution", side_effect=fake_send):
71
+ @monitor("failing-agent")
72
+ def run():
73
+ raise ValueError("API down")
74
+
75
+ with pytest.raises(ValueError, match="API down"):
76
+ run()
77
+
78
+ assert posted[0]["status"] == "failed"
79
+ assert posted[0]["error_message"] == "API down"
80
+
81
+ def test_does_not_crash_if_send_fails(self):
82
+ with patch("opsveritas.trace.send_execution", side_effect=Exception("network")):
83
+ @monitor("robust-agent")
84
+ def run():
85
+ return "ok"
86
+
87
+ # Should not raise
88
+ assert run() == "ok"
89
+
90
+
91
+ class TestMonitorAsync:
92
+ @pytest.mark.asyncio
93
+ async def test_async_success_with_anthropic_shape(self):
94
+ posted = []
95
+
96
+ async def fake_send_async(payload):
97
+ posted.append(payload)
98
+
99
+ with patch("opsveritas.trace.send_execution_async", side_effect=fake_send_async):
100
+ @monitor("async-anthropic-agent")
101
+ async def run():
102
+ return fake_anthropic_response()
103
+
104
+ await run()
105
+
106
+ assert len(posted) == 1
107
+ p = posted[0]
108
+ assert p["agent_name"] == "async-anthropic-agent"
109
+ assert p["status"] == "success"
110
+ assert p["input_tokens"] == 80
111
+ assert p["output_tokens"] == 40
112
+ assert p["model"] == "claude-3-5-sonnet-20241022"
113
+ assert p["cost_usd"] > 0
114
+
115
+ @pytest.mark.asyncio
116
+ async def test_async_failure(self):
117
+ posted = []
118
+
119
+ async def fake_send_async(payload):
120
+ posted.append(payload)
121
+
122
+ with patch("opsveritas.trace.send_execution_async", side_effect=fake_send_async):
123
+ @monitor("async-fail")
124
+ async def run():
125
+ raise RuntimeError("timeout")
126
+
127
+ with pytest.raises(RuntimeError):
128
+ await run()
129
+
130
+ assert posted[0]["status"] == "failed"
131
+ assert "timeout" in posted[0]["error_message"]
@@ -0,0 +1,243 @@
1
+ import pytest
2
+ from unittest.mock import MagicMock, patch
3
+ import opsveritas
4
+ from opsveritas.wrap import wrap, _PATCHED
5
+
6
+
7
+ @pytest.fixture(autouse=True)
8
+ def setup_sdk():
9
+ opsveritas.init("test-secret", endpoint="http://localhost:3001")
10
+
11
+
12
+ # ── Client stubs — use plain classes so hasattr() gives exact answers ─────────
13
+
14
+ def make_openai_client(response):
15
+ """Stub with only chat.completions — no messages attribute."""
16
+ create_mock = MagicMock(return_value=response)
17
+
18
+ class Completions:
19
+ create = create_mock
20
+
21
+ class Chat:
22
+ completions = Completions()
23
+
24
+ class FakeOpenAI:
25
+ chat = Chat()
26
+
27
+ return FakeOpenAI()
28
+
29
+
30
+ def make_anthropic_client(response):
31
+ """Stub with only messages — no chat attribute."""
32
+ create_mock = MagicMock(return_value=response)
33
+
34
+ class Messages:
35
+ create = create_mock
36
+
37
+ class FakeAnthropic:
38
+ messages = Messages()
39
+
40
+ return FakeAnthropic()
41
+
42
+
43
+ def make_failing_openai_client():
44
+ """Stub whose create() raises."""
45
+ create_mock = MagicMock(side_effect=Exception("rate limited"))
46
+
47
+ class Completions:
48
+ create = create_mock
49
+
50
+ class Chat:
51
+ completions = Completions()
52
+
53
+ class FakeOpenAI:
54
+ chat = Chat()
55
+
56
+ return FakeOpenAI()
57
+
58
+
59
+ def fake_openai_response():
60
+ usage = MagicMock()
61
+ usage.prompt_tokens = 120
62
+ usage.completion_tokens = 60
63
+ resp = MagicMock()
64
+ resp.model = "gpt-4o-mini"
65
+ resp.usage = usage
66
+ resp.choices = []
67
+ return resp
68
+
69
+
70
+ def fake_anthropic_response():
71
+ usage = MagicMock()
72
+ usage.input_tokens = 90
73
+ usage.output_tokens = 45
74
+ del usage.prompt_tokens
75
+ resp = MagicMock()
76
+ resp.model = "claude-3-5-sonnet-20241022"
77
+ resp.usage = usage
78
+ resp.content = []
79
+ return resp
80
+
81
+
82
+ # ── OpenAI tests ──────────────────────────────────────────────────────────────
83
+
84
+ class TestWrapOpenAI:
85
+ def test_intercepts_create_and_sends_telemetry(self):
86
+ posted = []
87
+
88
+ def fake_send(payload):
89
+ posted.append(payload)
90
+
91
+ resp = fake_openai_response()
92
+ client = make_openai_client(resp)
93
+
94
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
95
+ wrapped = wrap(client, agent_name="wrapped-openai")
96
+ result = wrapped.chat.completions.create(model="gpt-4o-mini", messages=[])
97
+
98
+ assert result is resp
99
+ assert len(posted) == 1
100
+ p = posted[0]
101
+ assert p["agent_name"] == "wrapped-openai"
102
+ assert p["status"] == "success"
103
+ assert p["input_tokens"] == 120
104
+ assert p["output_tokens"] == 60
105
+ assert p["model"] == "gpt-4o-mini"
106
+ assert p["cost_usd"] > 0
107
+
108
+ def test_failed_call(self):
109
+ posted = []
110
+
111
+ def fake_send(payload):
112
+ posted.append(payload)
113
+
114
+ client = make_failing_openai_client()
115
+
116
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
117
+ wrapped = wrap(client, agent_name="fail-openai")
118
+ with pytest.raises(Exception, match="rate limited"):
119
+ wrapped.chat.completions.create(model="gpt-4o-mini", messages=[])
120
+
121
+ assert posted[0]["status"] == "failed"
122
+ assert "rate limited" in posted[0]["error_message"]
123
+
124
+ def test_no_double_patch(self):
125
+ posted = []
126
+
127
+ def fake_send(payload):
128
+ posted.append(payload)
129
+
130
+ resp = fake_openai_response()
131
+ client = make_openai_client(resp)
132
+
133
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
134
+ wrap(client, agent_name="agent-a")
135
+ wrap(client, agent_name="agent-b") # second call — no-op
136
+
137
+ client.chat.completions.create(model="gpt-4o-mini", messages=[])
138
+
139
+ assert len(posted) == 1 # only one telemetry event
140
+
141
+
142
+ # ── Anthropic tests ───────────────────────────────────────────────────────────
143
+
144
+ class TestWrapAnthropic:
145
+ def test_intercepts_messages_create(self):
146
+ posted = []
147
+
148
+ def fake_send(payload):
149
+ posted.append(payload)
150
+
151
+ resp = fake_anthropic_response()
152
+ client = make_anthropic_client(resp)
153
+
154
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
155
+ wrapped = wrap(client, agent_name="wrapped-anthropic")
156
+ result = wrapped.messages.create(model="claude-3-5-sonnet-20241022", messages=[])
157
+
158
+ assert result is resp
159
+ p = posted[0]
160
+ assert p["agent_name"] == "wrapped-anthropic"
161
+ assert p["input_tokens"] == 90
162
+ assert p["output_tokens"] == 45
163
+ assert p["model"] == "claude-3-5-sonnet-20241022"
164
+ assert p["cost_usd"] > 0
165
+
166
+
167
+ # ── Gemini tests ─────────────────────────────────────────────────────────────
168
+
169
+ def fake_gemini_response():
170
+ # Use plain classes so hasattr() gives exact answers (no MagicMock auto-attributes)
171
+ class UsageMetadata:
172
+ prompt_token_count = 110
173
+ candidates_token_count = 55
174
+
175
+ class GeminiResponse:
176
+ usage_metadata = UsageMetadata()
177
+
178
+ return GeminiResponse()
179
+
180
+
181
+ def make_gemini_model(response):
182
+ class FakeGenerativeModel:
183
+ model_name = "models/gemini-1.5-flash"
184
+ generate_content = MagicMock(return_value=response)
185
+
186
+ return FakeGenerativeModel()
187
+
188
+
189
+ class TestWrapGemini:
190
+ def test_intercepts_generate_content(self):
191
+ posted = []
192
+
193
+ def fake_send(payload):
194
+ posted.append(payload)
195
+
196
+ resp = fake_gemini_response()
197
+ model = make_gemini_model(resp)
198
+
199
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
200
+ wrapped = wrap(model, agent_name="gemini-agent")
201
+ result = wrapped.generate_content("say hi")
202
+
203
+ assert result is resp
204
+ assert len(posted) == 1
205
+ p = posted[0]
206
+ assert p["agent_name"] == "gemini-agent"
207
+ assert p["status"] == "success"
208
+ assert p["input_tokens"] == 110
209
+ assert p["output_tokens"] == 55
210
+ assert p["model"] == "gemini-1.5-flash" # "models/" prefix stripped
211
+ assert p["cost_usd"] > 0
212
+
213
+ def test_failed_generate_content(self):
214
+ posted = []
215
+
216
+ def fake_send(payload):
217
+ posted.append(payload)
218
+
219
+ class FakeGenerativeModel:
220
+ model_name = "gemini-1.5-flash"
221
+ generate_content = MagicMock(side_effect=Exception("quota exceeded"))
222
+
223
+ model = FakeGenerativeModel()
224
+
225
+ with patch("opsveritas.wrap.send_execution", side_effect=fake_send):
226
+ wrapped = wrap(model, agent_name="gemini-fail")
227
+ with pytest.raises(Exception, match="quota exceeded"):
228
+ wrapped.generate_content("hi")
229
+
230
+ assert posted[0]["status"] == "failed"
231
+ assert "quota exceeded" in posted[0]["error_message"]
232
+
233
+
234
+ # ── Unknown client ────────────────────────────────────────────────────────────
235
+
236
+ class TestWrapUnknown:
237
+ def test_passthrough_unknown_client(self):
238
+ class Unknown:
239
+ pass
240
+
241
+ client = Unknown()
242
+ result = wrap(client, agent_name="unknown")
243
+ assert result is client