caum 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caum-1.0.0/PKG-INFO +99 -0
- caum-1.0.0/README.md +81 -0
- caum-1.0.0/caum/__init__.py +28 -0
- caum-1.0.0/caum/client.py +131 -0
- caum-1.0.0/caum/proxy.py +48 -0
- caum-1.0.0/caum/wrappers.py +211 -0
- caum-1.0.0/caum.egg-info/PKG-INFO +99 -0
- caum-1.0.0/caum.egg-info/SOURCES.txt +22 -0
- caum-1.0.0/caum.egg-info/dependency_links.txt +1 -0
- caum-1.0.0/caum.egg-info/requires.txt +1 -0
- caum-1.0.0/caum.egg-info/top_level.txt +1 -0
- caum-1.0.0/pyproject.toml +26 -0
- caum-1.0.0/setup.cfg +4 -0
- caum-1.0.0/setup.py +24 -0
- caum-1.0.0/tests/test_adaptive.py +116 -0
- caum-1.0.0/tests/test_adaptive_baseline.py +86 -0
- caum-1.0.0/tests/test_adversarial.py +199 -0
- caum-1.0.0/tests/test_anchor.py +177 -0
- caum-1.0.0/tests/test_cp2_coverage.py +142 -0
- caum-1.0.0/tests/test_edge.py +130 -0
- caum-1.0.0/tests/test_integrations.py +254 -0
- caum-1.0.0/tests/test_properties.py +276 -0
- caum-1.0.0/tests/test_sbert.py +97 -0
- caum-1.0.0/tests/test_vault_signing.py +128 -0
caum-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: caum
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI Agent Observability - Detect when your agent is wasting money
|
|
5
|
+
Home-page: https://caum.systems
|
|
6
|
+
Author: Andres Silva
|
|
7
|
+
Author-email: Andres Silva <contact@caum.systems>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Project-URL: Homepage, https://caum.systems
|
|
10
|
+
Project-URL: Documentation, https://caum.systems/docs
|
|
11
|
+
Project-URL: Dashboard, https://caum.systems/dashboard
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: requests>=2.28.0
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: home-page
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
|
|
19
|
+
# CAUM — AI Agent Observability
|
|
20
|
+
|
|
21
|
+
Detect when your AI agent is wasting money. Zero semantic access.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install caum
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quick Start (5 lines)
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import caum
|
|
33
|
+
|
|
34
|
+
caum.init("caum_live_your_api_key")
|
|
35
|
+
caum.observe("session-1", tool="bash", content="pytest tests/")
|
|
36
|
+
caum.observe("session-1", tool="edit", content="Fixed the bug")
|
|
37
|
+
result = caum.end("session-1")
|
|
38
|
+
print(f"Tier: {result['tier']}, Cost wasted: ${result['cost']}")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Zero-Code Integration (Anthropic)
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import anthropic
|
|
45
|
+
import caum
|
|
46
|
+
|
|
47
|
+
caum.init("caum_live_your_api_key")
|
|
48
|
+
client = caum.wrap_anthropic(anthropic.Anthropic())
|
|
49
|
+
|
|
50
|
+
# Use client exactly as before — CAUM observes automatically
|
|
51
|
+
response = client.messages.create(
|
|
52
|
+
model="claude-sonnet-4-20250514",
|
|
53
|
+
messages=[{"role": "user", "content": "Fix the auth bug"}],
|
|
54
|
+
tools=[...],
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Zero-Code Integration (OpenAI)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import openai
|
|
62
|
+
import caum
|
|
63
|
+
|
|
64
|
+
caum.init("caum_live_your_api_key")
|
|
65
|
+
client = caum.wrap_openai(openai.OpenAI())
|
|
66
|
+
|
|
67
|
+
# Use client exactly as before
|
|
68
|
+
response = client.chat.completions.create(
|
|
69
|
+
model="gpt-5",
|
|
70
|
+
messages=[...],
|
|
71
|
+
tools=[...],
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## What CAUM Detects
|
|
76
|
+
|
|
77
|
+
| Regime | Meaning | Your Agent Is... |
|
|
78
|
+
|--------|---------|------------------|
|
|
79
|
+
| EXPLORER | Healthy | Using diverse tools, making progress |
|
|
80
|
+
| GRIND | Degraded | Repeating similar actions |
|
|
81
|
+
| LOOP | Critical | Stuck in a repetitive cycle |
|
|
82
|
+
| REASONING_LOOP | Critical | Thinking in circles, not acting |
|
|
83
|
+
| STAGNATION | Critical | Same tool, same result, over and over |
|
|
84
|
+
|
|
85
|
+
## How It Works
|
|
86
|
+
|
|
87
|
+
CAUM observes the **structure** of your agent's behavior — which tools it uses, in what order, how often it repeats. It never reads your prompts, code, or data.
|
|
88
|
+
|
|
89
|
+
When your agent enters a wasteful pattern (looping, stagnating, circular reasoning), CAUM detects it and alerts you. You decide what to do. CAUM never stops your agent.
|
|
90
|
+
|
|
91
|
+
## Get Your API Key
|
|
92
|
+
|
|
93
|
+
Sign up at [caum.systems/pilot](https://caum.systems/pilot/) — 14-day free trial, 1,000 steps included.
|
|
94
|
+
|
|
95
|
+
## Links
|
|
96
|
+
|
|
97
|
+
- Website: [caum.systems](https://caum.systems)
|
|
98
|
+
- Dashboard: [caum.systems/dashboard](https://caum.systems/dashboard)
|
|
99
|
+
- Docs: [caum.systems/docs](https://caum.systems/docs)
|
caum-1.0.0/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# CAUM — AI Agent Observability
|
|
2
|
+
|
|
3
|
+
Detect when your AI agent is wasting money. Zero semantic access.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install caum
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start (5 lines)
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import caum
|
|
15
|
+
|
|
16
|
+
caum.init("caum_live_your_api_key")
|
|
17
|
+
caum.observe("session-1", tool="bash", content="pytest tests/")
|
|
18
|
+
caum.observe("session-1", tool="edit", content="Fixed the bug")
|
|
19
|
+
result = caum.end("session-1")
|
|
20
|
+
print(f"Tier: {result['tier']}, Cost wasted: ${result['cost']}")
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Zero-Code Integration (Anthropic)
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import anthropic
|
|
27
|
+
import caum
|
|
28
|
+
|
|
29
|
+
caum.init("caum_live_your_api_key")
|
|
30
|
+
client = caum.wrap_anthropic(anthropic.Anthropic())
|
|
31
|
+
|
|
32
|
+
# Use client exactly as before — CAUM observes automatically
|
|
33
|
+
response = client.messages.create(
|
|
34
|
+
model="claude-sonnet-4-20250514",
|
|
35
|
+
messages=[{"role": "user", "content": "Fix the auth bug"}],
|
|
36
|
+
tools=[...],
|
|
37
|
+
)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Zero-Code Integration (OpenAI)
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import openai
|
|
44
|
+
import caum
|
|
45
|
+
|
|
46
|
+
caum.init("caum_live_your_api_key")
|
|
47
|
+
client = caum.wrap_openai(openai.OpenAI())
|
|
48
|
+
|
|
49
|
+
# Use client exactly as before
|
|
50
|
+
response = client.chat.completions.create(
|
|
51
|
+
model="gpt-5",
|
|
52
|
+
messages=[...],
|
|
53
|
+
tools=[...],
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## What CAUM Detects
|
|
58
|
+
|
|
59
|
+
| Regime | Meaning | Your Agent Is... |
|
|
60
|
+
|--------|---------|------------------|
|
|
61
|
+
| EXPLORER | Healthy | Using diverse tools, making progress |
|
|
62
|
+
| GRIND | Degraded | Repeating similar actions |
|
|
63
|
+
| LOOP | Critical | Stuck in a repetitive cycle |
|
|
64
|
+
| REASONING_LOOP | Critical | Thinking in circles, not acting |
|
|
65
|
+
| STAGNATION | Critical | Same tool, same result, over and over |
|
|
66
|
+
|
|
67
|
+
## How It Works
|
|
68
|
+
|
|
69
|
+
CAUM observes the **structure** of your agent's behavior — which tools it uses, in what order, how often it repeats. It never reads your prompts, code, or data.
|
|
70
|
+
|
|
71
|
+
When your agent enters a wasteful pattern (looping, stagnating, circular reasoning), CAUM detects it and alerts you. You decide what to do. CAUM never stops your agent.
|
|
72
|
+
|
|
73
|
+
## Get Your API Key
|
|
74
|
+
|
|
75
|
+
Sign up at [caum.systems/pilot](https://caum.systems/pilot/) — 14-day free trial, 1,000 steps included.
|
|
76
|
+
|
|
77
|
+
## Links
|
|
78
|
+
|
|
79
|
+
- Website: [caum.systems](https://caum.systems)
|
|
80
|
+
- Dashboard: [caum.systems/dashboard](https://caum.systems/dashboard)
|
|
81
|
+
- Docs: [caum.systems/docs](https://caum.systems/docs)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CAUM SDK — AI Agent Observability
|
|
3
|
+
==================================
|
|
4
|
+
Detect when your AI agent is wasting money. 5 lines to integrate.
|
|
5
|
+
|
|
6
|
+
Quick start:
|
|
7
|
+
import caum
|
|
8
|
+
caum.init("caum_live_your_api_key")
|
|
9
|
+
caum.observe("my-session", tool="bash", content="pytest tests/")
|
|
10
|
+
caum.observe("my-session", tool="edit", content="Fixed bug on line 42")
|
|
11
|
+
result = caum.end("my-session")
|
|
12
|
+
print(result["tier"]) # T1-T5
|
|
13
|
+
|
|
14
|
+
As a proxy (zero code changes):
|
|
15
|
+
caum watch --api-key caum_live_xxx --provider anthropic
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
__version__ = "1.0.0"
|
|
19
|
+
|
|
20
|
+
from .client import init, observe, end, get_status
|
|
21
|
+
from .proxy import CaumProxy
|
|
22
|
+
from .wrappers import wrap_anthropic, wrap_openai
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"init", "observe", "end", "get_status",
|
|
26
|
+
"CaumProxy",
|
|
27
|
+
"wrap_anthropic", "wrap_openai",
|
|
28
|
+
]
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CAUM Client — Core API client for observing AI agents.
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import threading
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("caum")
|
|
12
|
+
|
|
13
|
+
_API_URL = "https://caum-observation-production.up.railway.app"
|
|
14
|
+
_API_KEY = None
|
|
15
|
+
_TIMEOUT = 10
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def init(api_key: str, api_url: Optional[str] = None):
|
|
19
|
+
"""Initialize CAUM with your API key.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
api_key: Your CAUM API key (starts with caum_live_)
|
|
23
|
+
api_url: Optional custom API URL (for self-hosted)
|
|
24
|
+
"""
|
|
25
|
+
global _API_KEY, _API_URL
|
|
26
|
+
_API_KEY = api_key
|
|
27
|
+
if api_url:
|
|
28
|
+
_API_URL = api_url.rstrip("/")
|
|
29
|
+
logger.info(f"CAUM initialized (endpoint: {_API_URL})")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _post(path: str, data: dict) -> dict:
|
|
33
|
+
"""Make authenticated POST request to CAUM API."""
|
|
34
|
+
if not _API_KEY:
|
|
35
|
+
raise RuntimeError("CAUM not initialized. Call caum.init('your_api_key') first.")
|
|
36
|
+
try:
|
|
37
|
+
resp = requests.post(
|
|
38
|
+
f"{_API_URL}{path}",
|
|
39
|
+
json=data,
|
|
40
|
+
headers={
|
|
41
|
+
"Authorization": f"Bearer {_API_KEY}",
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
},
|
|
44
|
+
timeout=_TIMEOUT,
|
|
45
|
+
)
|
|
46
|
+
resp.raise_for_status()
|
|
47
|
+
return resp.json()
|
|
48
|
+
except requests.exceptions.RequestException as e:
|
|
49
|
+
logger.warning(f"CAUM API error: {e}")
|
|
50
|
+
return {"error": str(e)}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _post_async(path: str, data: dict):
|
|
54
|
+
"""Non-blocking POST. Fire and forget — never slows down the agent."""
|
|
55
|
+
thread = threading.Thread(target=_post, args=(path, data), daemon=True)
|
|
56
|
+
thread.start()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def observe(session_id: str, tool: str, content: str = "",
|
|
60
|
+
step: Optional[int] = None, agent_name: Optional[str] = None,
|
|
61
|
+
task: Optional[str] = None, cost_usd: float = 0.0,
|
|
62
|
+
blocking: bool = False) -> Optional[dict]:
|
|
63
|
+
"""Observe a single tool call from your AI agent.
|
|
64
|
+
|
|
65
|
+
This is the core function. Call it every time your agent uses a tool.
|
|
66
|
+
By default it's non-blocking (fire-and-forget) so it never slows your agent.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
session_id: Unique ID for this agent session
|
|
70
|
+
tool: Tool name (bash, edit, read, reasoning, etc.)
|
|
71
|
+
content: What the tool did (optional, CAUM doesn't read semantics)
|
|
72
|
+
step: Step number (auto-incremented if not provided)
|
|
73
|
+
agent_name: Name of the agent (optional)
|
|
74
|
+
task: Task description (optional)
|
|
75
|
+
cost_usd: Cost of this step in USD (optional)
|
|
76
|
+
blocking: If True, wait for response. If False (default), fire-and-forget.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
If blocking=True: dict with regime, failure_risk, tier, etc.
|
|
80
|
+
If blocking=False: None (result sent async)
|
|
81
|
+
"""
|
|
82
|
+
data = {
|
|
83
|
+
"session_id": session_id,
|
|
84
|
+
"tool": tool,
|
|
85
|
+
"content": content[:500] if content else "",
|
|
86
|
+
"cost_usd": cost_usd,
|
|
87
|
+
}
|
|
88
|
+
if step is not None:
|
|
89
|
+
data["step"] = step
|
|
90
|
+
if agent_name:
|
|
91
|
+
data["agent_name"] = agent_name
|
|
92
|
+
if task:
|
|
93
|
+
data["task"] = task
|
|
94
|
+
|
|
95
|
+
if blocking:
|
|
96
|
+
return _post("/v1/step", data)
|
|
97
|
+
else:
|
|
98
|
+
_post_async("/v1/step", data)
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def end(session_id: str, resolved: Optional[bool] = None) -> dict:
|
|
103
|
+
"""End a session and get the final assessment.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
session_id: The session to finalize
|
|
107
|
+
resolved: Did the agent succeed? True/False/None (auto-infer)
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
dict with uds, tier, regime, cost, etc.
|
|
111
|
+
"""
|
|
112
|
+
data = {}
|
|
113
|
+
if resolved is not None:
|
|
114
|
+
data["resolved"] = resolved
|
|
115
|
+
return _post(f"/v1/session/{session_id}/end", data)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def get_status() -> dict:
|
|
119
|
+
"""Get your CAUM usage and calibration status."""
|
|
120
|
+
if not _API_KEY:
|
|
121
|
+
raise RuntimeError("CAUM not initialized.")
|
|
122
|
+
try:
|
|
123
|
+
resp = requests.get(
|
|
124
|
+
f"{_API_URL}/v1/usage",
|
|
125
|
+
headers={"Authorization": f"Bearer {_API_KEY}"},
|
|
126
|
+
timeout=_TIMEOUT,
|
|
127
|
+
)
|
|
128
|
+
resp.raise_for_status()
|
|
129
|
+
return resp.json()
|
|
130
|
+
except requests.exceptions.RequestException as e:
|
|
131
|
+
return {"error": str(e)}
|
caum-1.0.0/caum/proxy.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CAUM Proxy — Sits between your agent and the LLM API.
|
|
3
|
+
Zero code changes required.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
from caum_sdk import CaumProxy
|
|
7
|
+
proxy = CaumProxy(api_key="caum_live_xxx", provider="anthropic")
|
|
8
|
+
proxy.start(port=8080)
|
|
9
|
+
|
|
10
|
+
# Point your agent's API base URL to http://localhost:8080
|
|
11
|
+
# CAUM intercepts, observes, and forwards to the real API
|
|
12
|
+
"""
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("caum.proxy")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CaumProxy:
|
|
19
|
+
"""HTTP proxy that intercepts LLM API calls and observes tool usage.
|
|
20
|
+
|
|
21
|
+
This is a placeholder for the full proxy implementation.
|
|
22
|
+
The proxy will:
|
|
23
|
+
1. Listen on a local port
|
|
24
|
+
2. Intercept POST requests to /v1/messages (Anthropic) or /v1/chat/completions (OpenAI)
|
|
25
|
+
3. Forward to the real API
|
|
26
|
+
4. Parse the response for tool_use blocks
|
|
27
|
+
5. Send observations to CAUM API (non-blocking)
|
|
28
|
+
6. Return the original response unchanged
|
|
29
|
+
|
|
30
|
+
The agent sees no difference. CAUM observes everything.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, api_key: str, provider: str = "anthropic",
|
|
34
|
+
caum_api_url: str = None):
|
|
35
|
+
self.api_key = api_key
|
|
36
|
+
self.provider = provider
|
|
37
|
+
self.caum_api_url = caum_api_url
|
|
38
|
+
from . import client
|
|
39
|
+
client.init(api_key, caum_api_url)
|
|
40
|
+
|
|
41
|
+
def start(self, port: int = 8080):
|
|
42
|
+
"""Start the proxy server."""
|
|
43
|
+
logger.info(f"CAUM proxy starting on port {port} (provider: {self.provider})")
|
|
44
|
+
logger.info("Point your agent's API base URL to http://localhost:{port}")
|
|
45
|
+
# TODO: implement with httpx or aiohttp reverse proxy
|
|
46
|
+
raise NotImplementedError(
|
|
47
|
+
"Full proxy coming soon. Use caum.wrap_anthropic() or caum.wrap_openai() instead."
|
|
48
|
+
)
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CAUM Wrappers — Zero-code integration for Anthropic and OpenAI.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
import anthropic
|
|
6
|
+
import caum
|
|
7
|
+
|
|
8
|
+
caum.init("caum_live_xxx")
|
|
9
|
+
client = caum.wrap_anthropic(anthropic.Anthropic())
|
|
10
|
+
|
|
11
|
+
# Use client exactly as before — CAUM observes automatically
|
|
12
|
+
response = client.messages.create(
|
|
13
|
+
model="claude-sonnet-4-20250514",
|
|
14
|
+
messages=[{"role": "user", "content": "Fix the bug in auth.py"}],
|
|
15
|
+
tools=[...],
|
|
16
|
+
)
|
|
17
|
+
"""
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import uuid
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from . import client as caum_client
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger("caum.wrapper")
|
|
26
|
+
|
|
27
|
+
_step_counters = {} # session_id -> step count
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _next_step(session_id: str) -> int:
|
|
31
|
+
_step_counters[session_id] = _step_counters.get(session_id, 0) + 1
|
|
32
|
+
return _step_counters[session_id]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _extract_tool_calls_anthropic(response) -> list:
|
|
36
|
+
"""Extract tool calls from an Anthropic response."""
|
|
37
|
+
tools = []
|
|
38
|
+
try:
|
|
39
|
+
for block in response.content:
|
|
40
|
+
if block.type == "tool_use":
|
|
41
|
+
tools.append({
|
|
42
|
+
"tool": block.name,
|
|
43
|
+
"content": json.dumps(block.input)[:500] if block.input else "",
|
|
44
|
+
"id": block.id,
|
|
45
|
+
})
|
|
46
|
+
elif block.type == "text" and block.text:
|
|
47
|
+
tools.append({
|
|
48
|
+
"tool": "reasoning",
|
|
49
|
+
"content": block.text[:500],
|
|
50
|
+
})
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.debug(f"Error extracting Anthropic tools: {e}")
|
|
53
|
+
return tools
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _extract_tool_calls_openai(response) -> list:
|
|
57
|
+
"""Extract tool calls from an OpenAI response."""
|
|
58
|
+
tools = []
|
|
59
|
+
try:
|
|
60
|
+
msg = response.choices[0].message
|
|
61
|
+
if msg.tool_calls:
|
|
62
|
+
for tc in msg.tool_calls:
|
|
63
|
+
tools.append({
|
|
64
|
+
"tool": tc.function.name,
|
|
65
|
+
"content": tc.function.arguments[:500] if tc.function.arguments else "",
|
|
66
|
+
"id": tc.id,
|
|
67
|
+
})
|
|
68
|
+
if msg.content:
|
|
69
|
+
tools.append({
|
|
70
|
+
"tool": "reasoning",
|
|
71
|
+
"content": msg.content[:500],
|
|
72
|
+
})
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.debug(f"Error extracting OpenAI tools: {e}")
|
|
75
|
+
return tools
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class _AnthropicMessagesWrapper:
|
|
79
|
+
"""Wraps anthropic.Anthropic().messages to auto-observe tool calls."""
|
|
80
|
+
|
|
81
|
+
def __init__(self, original_messages, session_id: Optional[str] = None):
|
|
82
|
+
self._original = original_messages
|
|
83
|
+
self._session_id = session_id or f"caum-{uuid.uuid4().hex[:12]}"
|
|
84
|
+
|
|
85
|
+
def create(self, **kwargs):
|
|
86
|
+
response = self._original.create(**kwargs)
|
|
87
|
+
|
|
88
|
+
# Extract and observe tool calls
|
|
89
|
+
tool_calls = _extract_tool_calls_anthropic(response)
|
|
90
|
+
for tc in tool_calls:
|
|
91
|
+
caum_client.observe(
|
|
92
|
+
session_id=self._session_id,
|
|
93
|
+
tool=tc["tool"],
|
|
94
|
+
content=tc["content"],
|
|
95
|
+
step=_next_step(self._session_id),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Estimate cost from usage
|
|
99
|
+
if hasattr(response, "usage"):
|
|
100
|
+
input_tokens = getattr(response.usage, "input_tokens", 0)
|
|
101
|
+
output_tokens = getattr(response.usage, "output_tokens", 0)
|
|
102
|
+
# Rough cost estimate (Claude Sonnet pricing)
|
|
103
|
+
cost = (input_tokens * 3 + output_tokens * 15) / 1_000_000
|
|
104
|
+
if cost > 0 and tool_calls:
|
|
105
|
+
tool_calls[-1]["cost_usd"] = cost
|
|
106
|
+
|
|
107
|
+
return response
|
|
108
|
+
|
|
109
|
+
def __getattr__(self, name):
|
|
110
|
+
return getattr(self._original, name)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class _AnthropicWrapper:
|
|
114
|
+
"""Wraps anthropic.Anthropic() client."""
|
|
115
|
+
|
|
116
|
+
def __init__(self, original_client, session_id: Optional[str] = None):
|
|
117
|
+
self._original = original_client
|
|
118
|
+
self._session_id = session_id
|
|
119
|
+
self.messages = _AnthropicMessagesWrapper(
|
|
120
|
+
original_client.messages, session_id)
|
|
121
|
+
|
|
122
|
+
def __getattr__(self, name):
|
|
123
|
+
if name == "messages":
|
|
124
|
+
return self.messages
|
|
125
|
+
return getattr(self._original, name)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class _OpenAIChatCompletionsWrapper:
|
|
129
|
+
"""Wraps openai.OpenAI().chat.completions to auto-observe."""
|
|
130
|
+
|
|
131
|
+
def __init__(self, original_completions, session_id: Optional[str] = None):
|
|
132
|
+
self._original = original_completions
|
|
133
|
+
self._session_id = session_id or f"caum-{uuid.uuid4().hex[:12]}"
|
|
134
|
+
|
|
135
|
+
def create(self, **kwargs):
|
|
136
|
+
response = self._original.create(**kwargs)
|
|
137
|
+
|
|
138
|
+
tool_calls = _extract_tool_calls_openai(response)
|
|
139
|
+
for tc in tool_calls:
|
|
140
|
+
caum_client.observe(
|
|
141
|
+
session_id=self._session_id,
|
|
142
|
+
tool=tc["tool"],
|
|
143
|
+
content=tc["content"],
|
|
144
|
+
step=_next_step(self._session_id),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return response
|
|
148
|
+
|
|
149
|
+
def __getattr__(self, name):
|
|
150
|
+
return getattr(self._original, name)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class _OpenAIChatWrapper:
|
|
154
|
+
def __init__(self, original_chat, session_id):
|
|
155
|
+
self._original = original_chat
|
|
156
|
+
self.completions = _OpenAIChatCompletionsWrapper(
|
|
157
|
+
original_chat.completions, session_id)
|
|
158
|
+
|
|
159
|
+
def __getattr__(self, name):
|
|
160
|
+
if name == "completions":
|
|
161
|
+
return self.completions
|
|
162
|
+
return getattr(self._original, name)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class _OpenAIWrapper:
|
|
166
|
+
def __init__(self, original_client, session_id):
|
|
167
|
+
self._original = original_client
|
|
168
|
+
self.chat = _OpenAIChatWrapper(original_client.chat, session_id)
|
|
169
|
+
|
|
170
|
+
def __getattr__(self, name):
|
|
171
|
+
if name == "chat":
|
|
172
|
+
return self.chat
|
|
173
|
+
return getattr(self._original, name)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def wrap_anthropic(client, session_id: Optional[str] = None):
|
|
177
|
+
"""Wrap an Anthropic client to auto-observe all tool calls.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
client: An anthropic.Anthropic() instance
|
|
181
|
+
session_id: Optional session ID (auto-generated if not provided)
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Wrapped client — use exactly like the original
|
|
185
|
+
|
|
186
|
+
Example:
|
|
187
|
+
import anthropic, caum
|
|
188
|
+
caum.init("caum_live_xxx")
|
|
189
|
+
client = caum.wrap_anthropic(anthropic.Anthropic())
|
|
190
|
+
response = client.messages.create(...) # CAUM observes automatically
|
|
191
|
+
"""
|
|
192
|
+
return _AnthropicWrapper(client, session_id)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def wrap_openai(client, session_id: Optional[str] = None):
|
|
196
|
+
"""Wrap an OpenAI client to auto-observe all tool calls.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
client: An openai.OpenAI() instance
|
|
200
|
+
session_id: Optional session ID (auto-generated if not provided)
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Wrapped client — use exactly like the original
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
import openai, caum
|
|
207
|
+
caum.init("caum_live_xxx")
|
|
208
|
+
client = caum.wrap_openai(openai.OpenAI())
|
|
209
|
+
response = client.chat.completions.create(...) # CAUM observes automatically
|
|
210
|
+
"""
|
|
211
|
+
return _OpenAIWrapper(client, session_id)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: caum
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: AI Agent Observability - Detect when your agent is wasting money
|
|
5
|
+
Home-page: https://caum.systems
|
|
6
|
+
Author: Andres Silva
|
|
7
|
+
Author-email: Andres Silva <contact@caum.systems>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Project-URL: Homepage, https://caum.systems
|
|
10
|
+
Project-URL: Documentation, https://caum.systems/docs
|
|
11
|
+
Project-URL: Dashboard, https://caum.systems/dashboard
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: requests>=2.28.0
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: home-page
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
|
|
19
|
+
# CAUM — AI Agent Observability
|
|
20
|
+
|
|
21
|
+
Detect when your AI agent is wasting money. Zero semantic access.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install caum
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quick Start (5 lines)
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import caum
|
|
33
|
+
|
|
34
|
+
caum.init("caum_live_your_api_key")
|
|
35
|
+
caum.observe("session-1", tool="bash", content="pytest tests/")
|
|
36
|
+
caum.observe("session-1", tool="edit", content="Fixed the bug")
|
|
37
|
+
result = caum.end("session-1")
|
|
38
|
+
print(f"Tier: {result['tier']}, Cost wasted: ${result['cost']}")
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Zero-Code Integration (Anthropic)
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import anthropic
|
|
45
|
+
import caum
|
|
46
|
+
|
|
47
|
+
caum.init("caum_live_your_api_key")
|
|
48
|
+
client = caum.wrap_anthropic(anthropic.Anthropic())
|
|
49
|
+
|
|
50
|
+
# Use client exactly as before — CAUM observes automatically
|
|
51
|
+
response = client.messages.create(
|
|
52
|
+
model="claude-sonnet-4-20250514",
|
|
53
|
+
messages=[{"role": "user", "content": "Fix the auth bug"}],
|
|
54
|
+
tools=[...],
|
|
55
|
+
)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Zero-Code Integration (OpenAI)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import openai
|
|
62
|
+
import caum
|
|
63
|
+
|
|
64
|
+
caum.init("caum_live_your_api_key")
|
|
65
|
+
client = caum.wrap_openai(openai.OpenAI())
|
|
66
|
+
|
|
67
|
+
# Use client exactly as before
|
|
68
|
+
response = client.chat.completions.create(
|
|
69
|
+
model="gpt-5",
|
|
70
|
+
messages=[...],
|
|
71
|
+
tools=[...],
|
|
72
|
+
)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## What CAUM Detects
|
|
76
|
+
|
|
77
|
+
| Regime | Meaning | Your Agent Is... |
|
|
78
|
+
|--------|---------|------------------|
|
|
79
|
+
| EXPLORER | Healthy | Using diverse tools, making progress |
|
|
80
|
+
| GRIND | Degraded | Repeating similar actions |
|
|
81
|
+
| LOOP | Critical | Stuck in a repetitive cycle |
|
|
82
|
+
| REASONING_LOOP | Critical | Thinking in circles, not acting |
|
|
83
|
+
| STAGNATION | Critical | Same tool, same result, over and over |
|
|
84
|
+
|
|
85
|
+
## How It Works
|
|
86
|
+
|
|
87
|
+
CAUM observes the **structure** of your agent's behavior — which tools it uses, in what order, how often it repeats. It never reads your prompts, code, or data.
|
|
88
|
+
|
|
89
|
+
When your agent enters a wasteful pattern (looping, stagnating, circular reasoning), CAUM detects it and alerts you. You decide what to do. CAUM never stops your agent.
|
|
90
|
+
|
|
91
|
+
## Get Your API Key
|
|
92
|
+
|
|
93
|
+
Sign up at [caum.systems/pilot](https://caum.systems/pilot/) — 14-day free trial, 1,000 steps included.
|
|
94
|
+
|
|
95
|
+
## Links
|
|
96
|
+
|
|
97
|
+
- Website: [caum.systems](https://caum.systems)
|
|
98
|
+
- Dashboard: [caum.systems/dashboard](https://caum.systems/dashboard)
|
|
99
|
+
- Docs: [caum.systems/docs](https://caum.systems/docs)
|