agentpulse-py 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentpulse/__init__.py ADDED
@@ -0,0 +1,264 @@
1
+ """
2
+ AgentPulse Python SDK — /agent-metrics ingestion
3
+
4
+ Zero-config setup via environment variables:
5
+ AGENTPULSE_API_KEY="at_xxx"
6
+ AGENTPULSE_URL="https://agentpulse-backend.up.railway.app" # optional
7
+ AGENTPULSE_SERVICE="my-agent" # optional
8
+
9
+ ── Sync usage ────────────────────────────────────────────────────────────────
10
+
11
+ import agentpulse
12
+ import time
13
+
14
+ with agentpulse.track_run("my-agent", model="llama-3.3-70b") as run:
15
+ t0 = time.time_ns()
16
+ result = llm.call(prompt)
17
+ run.add_step(
18
+ "llm_response",
19
+ input=prompt,
20
+ output=result.text,
21
+ tokens=result.usage.total_tokens,
22
+ latency=(time.time_ns() - t0) // 1_000_000,
23
+ )
24
+
25
+ agentpulse.flush() # call before process exit
26
+
27
+ ── Async usage ───────────────────────────────────────────────────────────────
28
+
29
+ async with agentpulse.async_track_run("my-agent") as run:
30
+ run.add_step(...)
31
+
32
+ ── Decorator usage ───────────────────────────────────────────────────────────
33
+
34
+ @agentpulse.traced_run("my-agent")
35
+ def handle_query(query: str) -> str:
36
+ ...
37
+
38
+ ── Manual init (optional, overrides env vars) ────────────────────────────────
39
+
40
+ agentpulse.init(
41
+ api_key="at_xxx",
42
+ base_url="https://api.agentpulse.io",
43
+ batch_size=30,
44
+ flush_interval=3.0,
45
+ )
46
+ """
47
+
48
+ import logging
49
+ import os
50
+ from contextlib import asynccontextmanager, contextmanager
51
+ from functools import wraps
52
+ from typing import Optional, List
53
+
54
+ from ._config import Config, set_config
55
+ from ._run import AgentRun
56
+ from ._worker import IngestionWorker
57
+ from ._client import send_batch
58
+ from ._wrap_anthropic import wrap_anthropic
59
+ from ._wrap_openai import wrap_openai
60
+ from ._wrap_gemini import wrap_gemini
61
+ from ._wrap_grok import wrap_grok
62
+ from ._wrap_bedrock import wrap_bedrock
63
+
64
+ logger = logging.getLogger("agentpulse")
65
+
66
+ # ── Module-level singleton (set by init / _auto_init) ─────────────────────────
67
+ # These are the ONLY module-level variables. They are written once at startup
68
+ # and then read-only. All per-run state lives in AgentRun instances on the stack.
69
+
70
+ _worker: Optional[IngestionWorker] = None
71
+ _config: Optional[Config] = None
72
+
73
+
74
+ # ── Initialisation ────────────────────────────────────────────────────────────
75
+
76
+ def init(
77
+ api_key: str,
78
+ base_url: str = "https://agentpulse-backend.up.railway.app",
79
+ service_name: str = "agent",
80
+ batch_size: int = 20,
81
+ flush_interval: float = 2.0,
82
+ max_queue_size: int = 1000,
83
+ max_retries: int = 4,
84
+ retry_base_delay: float = 0.5,
85
+ ) -> None:
86
+ """
87
+ Explicitly initialise the SDK.
88
+ Optional — env vars (AGENTPULSE_API_KEY etc.) trigger auto-init on import.
89
+ Calling init() again replaces the worker and config.
90
+ """
91
+ global _worker, _config
92
+
93
+ _config = Config(
94
+ api_key=api_key,
95
+ base_url=base_url.rstrip("/"),
96
+ service_name=service_name,
97
+ batch_size=batch_size,
98
+ flush_interval=flush_interval,
99
+ max_queue_size=max_queue_size,
100
+ max_retries=max_retries,
101
+ retry_base_delay=retry_base_delay,
102
+ )
103
+ set_config(_config)
104
+
105
+ _worker = IngestionWorker(
106
+ send_fn=lambda batch: send_batch(
107
+ batch,
108
+ api_key=_config.api_key,
109
+ base_url=_config.base_url,
110
+ max_retries=_config.max_retries,
111
+ base_delay=_config.retry_base_delay,
112
+ ),
113
+ batch_size=_config.batch_size,
114
+ flush_interval=_config.flush_interval,
115
+ max_queue_size=_config.max_queue_size,
116
+ )
117
+
118
+
119
+ def configure(
120
+ api_key: str,
121
+ base_url: str = "https://agentpulse-backend.up.railway.app",
122
+ service_name: str = "agent",
123
+ **kwargs,
124
+ ) -> None:
125
+ """Alias for init() — same API as the agentpulse JS SDK."""
126
+ init(api_key=api_key, base_url=base_url, service_name=service_name, **kwargs)
127
+
128
+
129
+ def _auto_init() -> None:
130
+ """Called on import. No-op if AGENTPULSE_API_KEY is not set."""
131
+ key = os.getenv("AGENTPULSE_API_KEY")
132
+ if not key:
133
+ return
134
+ init(
135
+ api_key=key,
136
+ base_url=os.getenv("AGENTPULSE_URL", "https://agentpulse-backend.up.railway.app"),
137
+ service_name=os.getenv("AGENTPULSE_SERVICE", "agent"),
138
+ )
139
+
140
+
141
+ # ── Core: enqueue a finished run ──────────────────────────────────────────────
142
+
143
+ def _enqueue(run: AgentRun) -> None:
144
+ """Serialize the run and push it onto the worker queue (non-blocking)."""
145
+ if _worker is None:
146
+ # SDK not initialised — silently skip. Don't crash the agent.
147
+ return
148
+ _worker.enqueue(run.to_payload())
149
+
150
+
151
+ # ── Context managers ──────────────────────────────────────────────────────────
152
+
153
+ @contextmanager
154
+ def track_run(
155
+ agent_name: str,
156
+ *,
157
+ model: Optional[str] = None,
158
+ user_id: Optional[str] = None,
159
+ tags: Optional[List[str]] = None,
160
+ ):
161
+ """
162
+ Sync context manager for tracking a single agent run.
163
+
164
+ - Creates a fresh AgentRun (no shared state).
165
+ - On success: enqueues the run payload.
166
+ - On exception: marks the run failed, then enqueues before re-raising.
167
+
168
+ Example:
169
+ with agentpulse.track_run("hotel-search", model="llama-3.3-70b") as run:
170
+ run.add_step("llm_response", input="...", output="...", tokens=200, latency=350)
171
+ """
172
+ run = AgentRun(agent_name, model=model, user_id=user_id, tags=tags)
173
+ try:
174
+ yield run
175
+ except Exception as exc:
176
+ run.fail(str(exc))
177
+ raise
178
+ finally:
179
+ # Always enqueue — even on failure — so the backend can record the error.
180
+ _enqueue(run)
181
+
182
+
183
+ @asynccontextmanager
184
+ async def async_track_run(
185
+ agent_name: str,
186
+ *,
187
+ model: Optional[str] = None,
188
+ user_id: Optional[str] = None,
189
+ tags: Optional[List[str]] = None,
190
+ ):
191
+ """
192
+ Async context manager. Behaves identically to track_run but works inside
193
+ async def functions and coroutines.
194
+
195
+ The enqueue() call is synchronous and non-blocking (queue.put_nowait),
196
+ so it is safe to call from async code without await.
197
+
198
+ Example:
199
+ async with agentpulse.async_track_run("my-agent") as run:
200
+ run.add_step(...)
201
+ """
202
+ run = AgentRun(agent_name, model=model, user_id=user_id, tags=tags)
203
+ try:
204
+ yield run
205
+ except Exception as exc:
206
+ run.fail(str(exc))
207
+ raise
208
+ finally:
209
+ _enqueue(run)
210
+
211
+
212
+ # ── Decorator ─────────────────────────────────────────────────────────────────
213
+
214
+ def traced_run(fn_or_name=None):
215
+ """
216
+ Decorator version of track_run. Wraps a function in a tracked run.
217
+ The first positional string argument (if any) is treated as user input.
218
+
219
+ @agentpulse.traced_run # uses function name as agent name
220
+ def my_agent(query): ...
221
+
222
+ @agentpulse.traced_run("hotel-search") # explicit agent name
223
+ def handle_hotels(query): ...
224
+ """
225
+ def _decorator(fn, name: str):
226
+ @wraps(fn)
227
+ def wrapper(*args, **kwargs):
228
+ with track_run(name) as _run:
229
+ return fn(*args, **kwargs)
230
+ return wrapper
231
+
232
+ if callable(fn_or_name):
233
+ # Used as @traced_run without arguments
234
+ return _decorator(fn_or_name, fn_or_name.__name__)
235
+
236
+ # Used as @traced_run("name") with an explicit agent name
237
+ agent_name = fn_or_name or ""
238
+ def wrapper_factory(fn):
239
+ return _decorator(fn, agent_name or fn.__name__)
240
+ return wrapper_factory
241
+
242
+
243
+ # ── Shutdown helpers ──────────────────────────────────────────────────────────
244
+
245
+ def flush(timeout: float = 10.0) -> None:
246
+ """
247
+ Block until all queued runs have been sent (or timeout expires).
248
+
249
+ Call this at the end of short-lived scripts to ensure nothing is lost:
250
+
251
+ if __name__ == "__main__":
252
+ run_agent(query)
253
+ agentpulse.flush()
254
+
255
+ Long-running services (servers, workers) don't need this — the background
256
+ thread sends continuously. The atexit handler also provides a best-effort
257
+ flush on normal process exit.
258
+ """
259
+ if _worker:
260
+ _worker.flush(timeout)
261
+
262
+
263
+ # ── Bootstrap (runs once on import) ───────────────────────────────────────────
264
+ _auto_init()
agentpulse/_client.py ADDED
@@ -0,0 +1,102 @@
1
+ """
2
+ HTTP client for /agent-metrics ingestion.
3
+
4
+ Design decisions:
5
+ - Stdlib only (urllib.request) — no extra dependencies.
6
+ - Each payload is sent as a separate POST. A future batch endpoint on the
7
+ backend could collapse this into one request per flush cycle.
8
+ - Retries use exponential backoff with jitter to avoid thundering herd when
9
+ the backend recovers from a blip.
10
+ - Never raises — failure is logged and the payload is discarded. The agent
11
+ process must never crash due to observability errors.
12
+ """
13
+
14
+ import json
15
+ import logging
16
+ import random
17
+ import time
18
+ import urllib.error
19
+ import urllib.request
20
+ from typing import List
21
+
22
+ logger = logging.getLogger("agentpulse")
23
+
24
+
25
+ def send_batch(
26
+ payloads: List[dict],
27
+ api_key: str,
28
+ base_url: str,
29
+ max_retries: int = 4,
30
+ base_delay: float = 0.5,
31
+ ) -> None:
32
+ """
33
+ Send a list of run payloads to /agent-metrics.
34
+
35
+ Each payload is sent individually because the current backend endpoint
36
+ accepts one run per request. The loop is intentionally sequential so we
37
+ don't hammer the backend with parallel requests from a single worker thread.
38
+ """
39
+ url = f"{base_url.rstrip('/')}/agent-metrics"
40
+ headers = {
41
+ "Content-Type": "application/json",
42
+ "Authorization": f"Bearer {api_key}",
43
+ }
44
+
45
+ for payload in payloads:
46
+ _send_one_with_retry(payload, url, headers, max_retries, base_delay)
47
+
48
+
49
+ def _send_one_with_retry(
50
+ payload: dict,
51
+ url: str,
52
+ headers: dict,
53
+ max_retries: int,
54
+ base_delay: float,
55
+ ) -> bool:
56
+ """
57
+ POST a single payload, retrying on transient failures.
58
+
59
+ Retryable: network errors, 429, 5xx responses.
60
+ Non-retryable: 4xx (except 429) — bad payload, wrong key, etc.
61
+
62
+ Returns True on success, False after all retries exhausted.
63
+ """
64
+ data = json.dumps(payload).encode()
65
+
66
+ for attempt in range(max_retries + 1):
67
+ try:
68
+ req = urllib.request.Request(url, data=data, method="POST", headers=headers)
69
+ with urllib.request.urlopen(req, timeout=10):
70
+ return True # 2xx — done
71
+
72
+ except urllib.error.HTTPError as e:
73
+ if e.code < 500 and e.code != 429:
74
+ # 4xx (not rate-limit) — no point retrying, payload is bad
75
+ logger.error(
76
+ "[agentpulse] Permanent error %d for run '%s' — dropping.",
77
+ e.code,
78
+ payload.get("agentName", "?"),
79
+ )
80
+ return False
81
+
82
+ # 5xx or 429 — transient, fall through to retry
83
+ err_msg = f"HTTP {e.code}"
84
+
85
+ except Exception as e:
86
+ err_msg = str(e)
87
+
88
+ if attempt < max_retries:
89
+ # Exponential backoff + small random jitter to spread retries
90
+ delay = base_delay * (2 ** attempt) + random.uniform(0, 0.1)
91
+ logger.warning(
92
+ "[agentpulse] Send failed (attempt %d/%d): %s. Retrying in %.2fs.",
93
+ attempt + 1, max_retries + 1, err_msg, delay,
94
+ )
95
+ time.sleep(delay)
96
+ else:
97
+ logger.error(
98
+ "[agentpulse] Gave up sending run '%s' after %d attempts: %s",
99
+ payload.get("agentName", "?"), max_retries + 1, err_msg,
100
+ )
101
+
102
+ return False
agentpulse/_config.py ADDED
@@ -0,0 +1,34 @@
1
+ """
2
+ Configuration dataclass for the AgentPulse SDK.
3
+
4
+ Populated either via agentpulse.init() or auto-read from environment variables.
5
+ Kept as a plain dataclass — no global mutation after init.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Optional
10
+
11
+ _active: Optional["Config"] = None
12
+
13
+
14
+ @dataclass
15
+ class Config:
16
+ api_key: str
17
+ base_url: str = "https://agentpulse-backend.up.railway.app"
18
+ service_name: str = "agent"
19
+
20
+ # Worker tuning
21
+ batch_size: int = 20 # max runs per flush cycle
22
+ flush_interval: float = 2.0 # seconds between forced flushes
23
+ max_queue_size: int = 1000 # drop runs (with warning) if queue exceeds this
24
+ max_retries: int = 4 # per-run retry attempts (exponential backoff)
25
+ retry_base_delay: float = 0.5 # seconds — doubles each retry
26
+
27
+
28
+ def set_config(cfg: Config) -> None:
29
+ global _active
30
+ _active = cfg
31
+
32
+
33
+ def get_config() -> Optional[Config]:
34
+ return _active
agentpulse/_run.py ADDED
@@ -0,0 +1,152 @@
1
+ """
2
+ AgentRun — per-run context object.
3
+
4
+ One instance is created per agent invocation. It lives entirely on the call
5
+ stack (via context manager / decorator), so there is NO shared mutable state
6
+ between concurrent runs.
7
+
8
+ Design decisions:
9
+ - Plain object, no threading primitives needed — each run is owned by exactly
10
+ one coroutine/thread until it's enqueued as an immutable dict payload.
11
+ - Duration is computed at finalization time (on __exit__), not at creation,
12
+ so the clock starts the moment the context is entered.
13
+ - Steps are appended in order. The caller is responsible for latency timing
14
+ around individual steps (see example in agent.py).
15
+ """
16
+
17
+ import time
18
+ from typing import Optional, List
19
+
20
+
21
+ class Step:
22
+ __slots__ = ("step_type", "input", "output", "tokens", "latency", "cost", "status")
23
+
24
+ def __init__(
25
+ self,
26
+ step_type: str,
27
+ input: str = "",
28
+ output: str = "",
29
+ tokens: int = 0,
30
+ latency: int = 0,
31
+ cost: float = 0.0,
32
+ status: str = "success",
33
+ ) -> None:
34
+ self.step_type = step_type
35
+ self.input = input
36
+ self.output = output
37
+ self.tokens = tokens
38
+ self.latency = latency # milliseconds
39
+ self.cost = cost
40
+ self.status = status
41
+
42
+
43
+ class AgentRun:
44
+ """
45
+ Tracks a single agent run. Use via context manager:
46
+
47
+ with track_run("my-agent") as run:
48
+ run.add_step("llm_response", input="...", output="...", tokens=120, latency=400)
49
+
50
+ Or async:
51
+
52
+ async with async_track_run("my-agent") as run:
53
+ run.add_step(...)
54
+
55
+ Calling run.fail("message") marks the run as failed — useful for explicit
56
+ error paths. The context manager also catches unhandled exceptions and marks
57
+ the run failed automatically.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ agent_name: str,
63
+ *,
64
+ model: Optional[str] = None,
65
+ user_id: Optional[str] = None,
66
+ tags: Optional[List[str]] = None,
67
+ ) -> None:
68
+ self.agent_name = agent_name
69
+ self.model = model
70
+ self.user_id = user_id
71
+ self.tags = tags or []
72
+
73
+ self._steps: List[Step] = []
74
+ self._total_tokens: int = 0
75
+ self._total_cost: float = 0.0
76
+ self._status: str = "success"
77
+ self._error: Optional[str] = None
78
+ self._start_ns: int = time.time_ns() # set at construction = context entry
79
+
80
+ # ── Public API ─────────────────────────────────────────────────────────────
81
+
82
+ def add_step(
83
+ self,
84
+ step_type: str,
85
+ *,
86
+ input: str = "",
87
+ output: str = "",
88
+ tokens: int = 0,
89
+ latency: int = 0,
90
+ cost: float = 0.0,
91
+ status: str = "success",
92
+ ) -> None:
93
+ """
94
+ Record one step (LLM call, tool call, etc.).
95
+
96
+ Tip: measure latency yourself around the operation:
97
+ t0 = time.time_ns()
98
+ result = llm.call(...)
99
+ run.add_step("llm_response", latency=(time.time_ns() - t0) // 1_000_000, ...)
100
+ """
101
+ self._steps.append(Step(step_type, input, output, tokens, latency, cost, status))
102
+ self._total_tokens += tokens
103
+ self._total_cost += cost
104
+ if status == "failed":
105
+ # A failed step marks the whole run failed unless already set
106
+ self._status = "failed"
107
+
108
+ def fail(self, error: str) -> None:
109
+ """Explicitly mark this run as failed with an error message."""
110
+ self._status = "failed"
111
+ self._error = error
112
+
113
+ # ── Serialization ──────────────────────────────────────────────────────────
114
+
115
+ def to_payload(self) -> dict:
116
+ """
117
+ Build the /agent-metrics request body.
118
+ Called once at run end — duration is computed here.
119
+ """
120
+ duration_ms = (time.time_ns() - self._start_ns) // 1_000_000
121
+
122
+ payload: dict = {
123
+ "agentName": self.agent_name,
124
+ "status": self._status,
125
+ "duration": duration_ms,
126
+ "tokens": self._total_tokens,
127
+ "cost": self._total_cost,
128
+ }
129
+
130
+ if self.model:
131
+ payload["model"] = self.model
132
+ if self._error:
133
+ payload["error"] = self._error
134
+ if self.user_id:
135
+ payload["userId"] = self.user_id
136
+ if self.tags:
137
+ payload["tags"] = self.tags
138
+ if self._steps:
139
+ payload["steps"] = [
140
+ {
141
+ "stepType": s.step_type,
142
+ "input": s.input,
143
+ "output": s.output,
144
+ "tokens": s.tokens,
145
+ "latency": s.latency,
146
+ "cost": s.cost,
147
+ "status": s.status,
148
+ }
149
+ for s in self._steps
150
+ ]
151
+
152
+ return payload
agentpulse/_send.py ADDED
@@ -0,0 +1,87 @@
1
+ """
2
+ _send.py — Fire-and-forget POST to AgentPulse backend /agent-metrics.
3
+ Uses stdlib only (urllib) — no extra dependencies.
4
+ """
5
+
6
+ import json
7
+ import threading
8
+ import urllib.request
9
+ from typing import Any, Dict, Optional
10
+
11
+
12
+ def _post(url: str, api_key: str, payload: Dict[str, Any]) -> None:
13
+ try:
14
+ data = json.dumps(payload).encode()
15
+ req = urllib.request.Request(
16
+ url,
17
+ data=data,
18
+ headers={
19
+ "Content-Type": "application/json",
20
+ "Authorization": f"Bearer {api_key}",
21
+ },
22
+ method="POST",
23
+ )
24
+ urllib.request.urlopen(req, timeout=10)
25
+ except Exception:
26
+ pass # Never crash the agent
27
+
28
+
29
+ def send_to_backend(payload: Dict[str, Any]) -> None:
30
+ """Fire-and-forget: sends payload in a background thread."""
31
+ from agentpulse._config import get_config
32
+ cfg = get_config()
33
+ if not cfg:
34
+ return
35
+ url = f"{cfg.base_url}/agent-metrics"
36
+ t = threading.Thread(target=_post, args=(url, cfg.api_key, payload), daemon=False)
37
+ t.start()
38
+
39
+
40
+ async def async_send_to_backend(payload: Dict[str, Any]) -> None:
41
+ """Async fire-and-forget: schedules a background thread via asyncio."""
42
+ import asyncio
43
+ from agentpulse._config import get_config
44
+ cfg = get_config()
45
+ if not cfg:
46
+ return
47
+ url = f"{cfg.base_url}/agent-metrics"
48
+ loop = asyncio.get_event_loop()
49
+ loop.run_in_executor(None, _post, url, cfg.api_key, payload)
50
+
51
+
52
+ def build_payload(
53
+ agent_name: str,
54
+ model: str,
55
+ prompt: str,
56
+ output: str,
57
+ input_tokens: int,
58
+ output_tokens: int,
59
+ latency: int,
60
+ status: str,
61
+ error: Optional[str] = None,
62
+ ) -> Dict[str, Any]:
63
+ tokens = input_tokens + output_tokens
64
+ step2: Dict[str, Any] = {
65
+ "stepType": "llm_response",
66
+ "input": "",
67
+ "output": output,
68
+ "tokens": tokens,
69
+ "latency": latency,
70
+ "status": status,
71
+ }
72
+ if error:
73
+ step2["error"] = error
74
+ return {
75
+ "agentName": agent_name,
76
+ "model": model,
77
+ "status": status,
78
+ "duration": latency,
79
+ "tokens": tokens,
80
+ "inputTokens": input_tokens,
81
+ "outputTokens": output_tokens,
82
+ "tags": [],
83
+ "steps": [
84
+ {"stepType": "user_prompt", "input": prompt, "output": "", "tokens": 0, "latency": 0, "status": "success"},
85
+ step2,
86
+ ],
87
+ }