agentos-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentos/tracing.py ADDED
@@ -0,0 +1,168 @@
1
+ """Trace context management — ID generation, context propagation, spans.
2
+
3
+ Uses ``contextvars`` for async-safe, thread-safe context propagation.
4
+ Supports nested traces and spans via context managers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import contextvars
10
+ import os
11
+ import uuid
12
+ from collections.abc import Generator
13
+ from contextlib import contextmanager
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from typing import Any
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # ID generators
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ def generate_trace_id() -> str:
24
+ """Generate a W3C-compatible trace ID (32 lowercase hex chars)."""
25
+ return uuid.uuid4().hex
26
+
27
+
28
+ def generate_span_id() -> str:
29
+ """Generate a W3C-compatible span ID (16 lowercase hex chars)."""
30
+ return os.urandom(8).hex()
31
+
32
+
33
+ def generate_event_id() -> str:
34
+ """Generate a UUID string for event_id (v7 if available, else v4)."""
35
+ try:
36
+ return str(uuid.uuid7())
37
+ except AttributeError:
38
+ return str(uuid.uuid4())
39
+
40
+
41
+ def utcnow_iso() -> str:
42
+ """Return current UTC time as ISO 8601 string with timezone."""
43
+ return datetime.now(timezone.utc).isoformat()
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Trace context
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ @dataclass
52
+ class TraceContext:
53
+ """Holds the current trace/span identity and metadata.
54
+
55
+ Passed through ``contextvars`` so nested ``@observe`` / ``with span()``
56
+ calls automatically inherit the parent context.
57
+ """
58
+
59
+ trace_id: str
60
+ span_id: str
61
+ agent_id: str
62
+ parent_span_id: str | None = None
63
+ task_run_id: str | None = None
64
+ user_id: str | None = None
65
+ session_id: str | None = None
66
+ metadata: dict[str, Any] = field(default_factory=dict)
67
+
68
+ # Mutable observation state (updated via trace_context helper)
69
+ _observation_updates: dict[str, Any] = field(default_factory=dict, repr=False)
70
+ _trace_updates: dict[str, Any] = field(default_factory=dict, repr=False)
71
+ _scores: list[dict[str, Any]] = field(default_factory=list, repr=False)
72
+
73
+
74
+ # The global context variable holding the current trace context.
75
+ _current_context: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
76
+ "agentos_trace_context", default=None
77
+ )
78
+
79
+
80
+ def get_current_context() -> TraceContext | None:
81
+ """Return the current trace context, or None if not inside a trace."""
82
+ return _current_context.get()
83
+
84
+
85
+ def _set_context(ctx: TraceContext | None) -> contextvars.Token[TraceContext | None]:
86
+ """Set the current trace context. Returns a token for restoring."""
87
+ return _current_context.set(ctx)
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Context managers
92
+ # ---------------------------------------------------------------------------
93
+
94
+
95
+ @contextmanager
96
+ def trace(
97
+ agent_id: str,
98
+ *,
99
+ trace_id: str | None = None,
100
+ task_run_id: str | None = None,
101
+ user_id: str | None = None,
102
+ session_id: str | None = None,
103
+ ) -> Generator[TraceContext, None, None]:
104
+ """Start a new trace. Creates a root span.
105
+
106
+ Usage::
107
+
108
+ with trace(agent_id="my-agent") as ctx:
109
+ # ctx.trace_id, ctx.span_id available
110
+ do_work()
111
+ """
112
+ ctx = TraceContext(
113
+ trace_id=trace_id or generate_trace_id(),
114
+ span_id=generate_span_id(),
115
+ agent_id=agent_id,
116
+ task_run_id=task_run_id,
117
+ user_id=user_id,
118
+ session_id=session_id,
119
+ )
120
+ token = _set_context(ctx)
121
+ try:
122
+ yield ctx
123
+ finally:
124
+ _current_context.reset(token)
125
+
126
+
127
+ @contextmanager
128
+ def span(
129
+ *,
130
+ agent_id: str | None = None,
131
+ ) -> Generator[TraceContext, None, None]:
132
+ """Start a child span within the current trace.
133
+
134
+ Inherits trace_id, agent_id, user_id, session_id, task_run_id from parent.
135
+ Sets parent_span_id to the parent's span_id.
136
+
137
+ Usage::
138
+
139
+ with trace(agent_id="bot") as t:
140
+ with span() as s:
141
+ # s.parent_span_id == t.span_id
142
+ do_step()
143
+ """
144
+ parent = _current_context.get()
145
+ if parent is None:
146
+ # No parent trace — create a standalone span with a new trace
147
+ effective_agent_id = agent_id or "unknown"
148
+ ctx = TraceContext(
149
+ trace_id=generate_trace_id(),
150
+ span_id=generate_span_id(),
151
+ agent_id=effective_agent_id,
152
+ )
153
+ else:
154
+ ctx = TraceContext(
155
+ trace_id=parent.trace_id,
156
+ span_id=generate_span_id(),
157
+ agent_id=agent_id or parent.agent_id,
158
+ parent_span_id=parent.span_id,
159
+ task_run_id=parent.task_run_id,
160
+ user_id=parent.user_id,
161
+ session_id=parent.session_id,
162
+ )
163
+
164
+ token = _set_context(ctx)
165
+ try:
166
+ yield ctx
167
+ finally:
168
+ _current_context.reset(token)
agentos/transport.py ADDED
@@ -0,0 +1,285 @@
1
+ """HTTP transport with batch queue and retry logic.
2
+
3
+ All events flow through the transport layer:
4
+ 1. ``enqueue()`` adds to an in-memory queue
5
+ 2. Background thread flushes periodically (sync) or asyncio task (async)
6
+ 3. ``_send_batch()`` posts to ``/v1/events/batch`` with retry + backoff
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import threading
13
+ import time
14
+ from typing import Any
15
+
16
+ import httpx
17
+
18
+ from agentos.config import AgentOSConfig
19
+
20
+ logger = logging.getLogger("agentos.transport")
21
+
22
+ SDK_NAME = "agentos-python"
23
+
24
+
25
+ class Transport:
26
+ """Synchronous transport — uses httpx.Client and a background flush thread."""
27
+
28
+ def __init__(self, config: AgentOSConfig, version: str = "0.1.0") -> None:
29
+ self._config = config
30
+ self._version = version
31
+ self._queue: list[dict[str, Any]] = []
32
+ self._lock = threading.Lock()
33
+ self._closed = False
34
+
35
+ self._client = httpx.Client(
36
+ base_url=config.base_url,
37
+ headers={
38
+ "X-API-Key": config.api_key,
39
+ "Content-Type": "application/json",
40
+ "User-Agent": f"{SDK_NAME}/{version}",
41
+ },
42
+ timeout=httpx.Timeout(30.0, connect=10.0),
43
+ )
44
+
45
+ self._flush_thread: threading.Thread | None = None
46
+ if config.flush_interval > 0:
47
+ self._flush_thread = threading.Thread(
48
+ target=self._periodic_flush, daemon=True, name="agentos-flush"
49
+ )
50
+ self._flush_thread.start()
51
+
52
+ def enqueue(self, event: dict[str, Any]) -> None:
53
+ """Add an event to the queue. Auto-flushes when batch_size is reached."""
54
+ if self._closed:
55
+ return
56
+
57
+ with self._lock:
58
+ if len(self._queue) >= self._config.max_queue_size:
59
+ if self._config.debug:
60
+ logger.warning("Queue full (%d), dropping oldest event", len(self._queue))
61
+ self._queue.pop(0)
62
+ self._queue.append(event)
63
+ should_flush = len(self._queue) >= self._config.batch_size
64
+
65
+ if should_flush:
66
+ self.flush()
67
+
68
+ def flush(self) -> None:
69
+ """Send all queued events immediately."""
70
+ with self._lock:
71
+ if not self._queue:
72
+ return
73
+ batch = self._queue[:]
74
+ self._queue.clear()
75
+
76
+ # Send in chunks of 500 (server limit)
77
+ for i in range(0, len(batch), 500):
78
+ chunk = batch[i : i + 500]
79
+ try:
80
+ self._send_batch(chunk)
81
+ except Exception:
82
+ logger.exception("Failed to send batch of %d events", len(chunk))
83
+
84
+ def _send_batch(self, events: list[dict[str, Any]]) -> None:
85
+ """POST events to /v1/events/batch with retry and exponential backoff."""
86
+ payload = {"events": events}
87
+ backoff = 1.0
88
+
89
+ for attempt in range(self._config.max_retries + 1):
90
+ try:
91
+ response = self._client.post("/v1/events/batch", json=payload)
92
+
93
+ if response.status_code == 200:
94
+ if self._config.debug:
95
+ logger.debug("Sent batch of %d events", len(events))
96
+ return
97
+
98
+ if response.status_code == 429:
99
+ retry_after = response.headers.get("Retry-After")
100
+ wait = float(retry_after) if retry_after else backoff
101
+ if self._config.debug:
102
+ logger.debug("Rate limited, waiting %.1fs", wait)
103
+ time.sleep(wait)
104
+ backoff = min(backoff * 2, 30.0)
105
+ continue
106
+
107
+ if response.status_code >= 500:
108
+ if attempt < self._config.max_retries:
109
+ time.sleep(backoff)
110
+ backoff = min(backoff * 2, 30.0)
111
+ continue
112
+ logger.error(
113
+ "Server error %d after %d retries",
114
+ response.status_code,
115
+ self._config.max_retries,
116
+ )
117
+ return
118
+
119
+ # 4xx (not 429) — don't retry
120
+ if self._config.debug:
121
+ logger.warning("Client error %d: %s", response.status_code, response.text[:200])
122
+ return
123
+
124
+ except httpx.TransportError as exc:
125
+ if attempt < self._config.max_retries:
126
+ if self._config.debug:
127
+ logger.debug("Transport error: %s, retrying in %.1fs", exc, backoff)
128
+ time.sleep(backoff)
129
+ backoff = min(backoff * 2, 30.0)
130
+ continue
131
+ logger.error("Transport error after %d retries: %s", self._config.max_retries, exc)
132
+
133
+ def _periodic_flush(self) -> None:
134
+ """Background thread that flushes the queue on an interval."""
135
+ while not self._closed:
136
+ time.sleep(self._config.flush_interval)
137
+ if self._closed:
138
+ break
139
+ try:
140
+ self.flush()
141
+ except Exception:
142
+ logger.exception("Error in periodic flush")
143
+
144
+ def shutdown(self) -> None:
145
+ """Flush remaining events and close the HTTP client."""
146
+ self._closed = True
147
+ try:
148
+ self.flush()
149
+ except Exception:
150
+ logger.exception("Error flushing during shutdown")
151
+ finally:
152
+ self._client.close()
153
+
154
+ @property
155
+ def queue_size(self) -> int:
156
+ """Number of events currently queued."""
157
+ with self._lock:
158
+ return len(self._queue)
159
+
160
+
161
+ class AsyncTransport:
162
+ """Asynchronous transport — uses httpx.AsyncClient and asyncio for flush."""
163
+
164
+ def __init__(self, config: AgentOSConfig, version: str = "0.1.0") -> None:
165
+ self._config = config
166
+ self._version = version
167
+ self._queue: list[dict[str, Any]] = []
168
+ self._closed = False
169
+ self._flush_task: Any = None # asyncio.Task, typed as Any for import-free
170
+
171
+ self._client = httpx.AsyncClient(
172
+ base_url=config.base_url,
173
+ headers={
174
+ "X-API-Key": config.api_key,
175
+ "Content-Type": "application/json",
176
+ "User-Agent": f"{SDK_NAME}/{version}",
177
+ },
178
+ timeout=httpx.Timeout(30.0, connect=10.0),
179
+ )
180
+
181
+ async def start(self) -> None:
182
+ """Start the periodic flush task."""
183
+ import asyncio
184
+
185
+ if self._config.flush_interval > 0 and self._flush_task is None:
186
+ self._flush_task = asyncio.create_task(self._periodic_flush())
187
+
188
+ async def enqueue(self, event: dict[str, Any]) -> None:
189
+ """Add an event to the queue."""
190
+ if self._closed:
191
+ return
192
+
193
+ if len(self._queue) >= self._config.max_queue_size:
194
+ if self._config.debug:
195
+ logger.warning("Queue full (%d), dropping oldest event", len(self._queue))
196
+ self._queue.pop(0)
197
+ self._queue.append(event)
198
+
199
+ if len(self._queue) >= self._config.batch_size:
200
+ await self.flush()
201
+
202
+ async def flush(self) -> None:
203
+ """Send all queued events immediately."""
204
+ if not self._queue:
205
+ return
206
+ batch = self._queue[:]
207
+ self._queue.clear()
208
+
209
+ for i in range(0, len(batch), 500):
210
+ chunk = batch[i : i + 500]
211
+ try:
212
+ await self._send_batch(chunk)
213
+ except Exception:
214
+ logger.exception("Failed to send batch of %d events", len(chunk))
215
+
216
+ async def _send_batch(self, events: list[dict[str, Any]]) -> None:
217
+ """POST events with retry and exponential backoff."""
218
+ import asyncio
219
+
220
+ payload = {"events": events}
221
+ backoff = 1.0
222
+
223
+ for attempt in range(self._config.max_retries + 1):
224
+ try:
225
+ response = await self._client.post("/v1/events/batch", json=payload)
226
+
227
+ if response.status_code == 200:
228
+ if self._config.debug:
229
+ logger.debug("Sent batch of %d events", len(events))
230
+ return
231
+
232
+ if response.status_code == 429:
233
+ retry_after = response.headers.get("Retry-After")
234
+ wait = float(retry_after) if retry_after else backoff
235
+ await asyncio.sleep(wait)
236
+ backoff = min(backoff * 2, 30.0)
237
+ continue
238
+
239
+ if response.status_code >= 500:
240
+ if attempt < self._config.max_retries:
241
+ await asyncio.sleep(backoff)
242
+ backoff = min(backoff * 2, 30.0)
243
+ continue
244
+ logger.error("Server error %d after retries", response.status_code)
245
+ return
246
+
247
+ if self._config.debug:
248
+ logger.warning("Client error %d: %s", response.status_code, response.text[:200])
249
+ return
250
+
251
+ except httpx.TransportError as exc:
252
+ if attempt < self._config.max_retries:
253
+ await asyncio.sleep(backoff)
254
+ backoff = min(backoff * 2, 30.0)
255
+ continue
256
+ logger.error("Transport error after retries: %s", exc)
257
+
258
+ async def _periodic_flush(self) -> None:
259
+ """Periodic flush task."""
260
+ import asyncio
261
+
262
+ while not self._closed:
263
+ await asyncio.sleep(self._config.flush_interval)
264
+ if self._closed:
265
+ break
266
+ try:
267
+ await self.flush()
268
+ except Exception:
269
+ logger.exception("Error in periodic flush")
270
+
271
+ async def shutdown(self) -> None:
272
+ """Flush remaining events and close the client."""
273
+ self._closed = True
274
+ if self._flush_task is not None:
275
+ self._flush_task.cancel()
276
+ try:
277
+ await self.flush()
278
+ except Exception:
279
+ logger.exception("Error flushing during shutdown")
280
+ finally:
281
+ await self._client.aclose()
282
+
283
+ @property
284
+ def queue_size(self) -> int:
285
+ return len(self._queue)
agentos/types.py ADDED
@@ -0,0 +1,101 @@
1
+ """Type definitions for the Agent OS SDK.
2
+
3
+ Mirrors the server's event schema at shared/schema/events.py.
4
+ Uses string enums and TypedDicts for lightweight, dependency-free types.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import enum
10
+ from typing import Any
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # Enums — match server values exactly
14
+ # ---------------------------------------------------------------------------
15
+
16
+
17
+ class EventType(str, enum.Enum):
18
+ """All supported event types in the Agent OS event stream."""
19
+
20
+ LLM_CALL = "agent.llm_call"
21
+ TOOL_CALL = "agent.tool_call"
22
+ AGENT_HANDOFF = "agent.handoff"
23
+ RETRIEVAL_QUERY = "agent.retrieval_query"
24
+ EVAL = "agent.eval"
25
+ SECURITY_ALERT = "agent.security_alert"
26
+ FLAG_CHECK = "agent.flag_check"
27
+ BUSINESS_EVENT = "agent.business_event"
28
+ ROUTING_DECISION = "agent.routing_decision"
29
+ ROUTING_OUTCOME = "agent.routing_outcome"
30
+
31
+
32
+ class FinishReason(str, enum.Enum):
33
+ STOP = "stop"
34
+ LENGTH = "length"
35
+ TOOL_CALLS = "tool_calls"
36
+ CONTENT_FILTER = "content_filter"
37
+
38
+
39
+ class ToolStatus(str, enum.Enum):
40
+ SUCCESS = "success"
41
+ ERROR = "error"
42
+ TIMEOUT = "timeout"
43
+
44
+
45
+ class SecurityAlertType(str, enum.Enum):
46
+ PROMPT_INJECTION = "prompt_injection"
47
+ PII_DETECTED = "pii_detected"
48
+ JAILBREAK = "jailbreak"
49
+ TOXIC_CONTENT = "toxic_content"
50
+ DATA_EXFILTRATION = "data_exfiltration"
51
+ POLICY_VIOLATION = "policy_violation"
52
+
53
+
54
+ class Severity(str, enum.Enum):
55
+ CRITICAL = "critical"
56
+ HIGH = "high"
57
+ MEDIUM = "medium"
58
+ LOW = "low"
59
+ INFO = "info"
60
+
61
+
62
+ class SecurityAction(str, enum.Enum):
63
+ BLOCKED = "blocked"
64
+ FLAGGED = "flagged"
65
+ ALLOWED = "allowed"
66
+ REDACTED = "redacted"
67
+
68
+
69
+ class EvalEvaluator(str, enum.Enum):
70
+ HUMAN = "human"
71
+ LLM = "llm"
72
+ HEURISTIC = "heuristic"
73
+ CODE = "code"
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # TypedDict-style types (using plain dicts for Python 3.9 compat)
78
+ # ---------------------------------------------------------------------------
79
+
80
+ # These are documentation types — the SDK uses plain dicts internally
81
+ # to avoid requiring TypedDict backport on 3.9/3.10.
82
+
83
+ LLMMessage = dict[str, Any]
84
+ """{"role": str, "content": str | None, "tool_calls": list | None}"""
85
+
86
+ ErrorInfo = dict[str, str]
87
+ """{"type": str, "message": str}"""
88
+
89
+ CostInfo = dict[str, float | str]
90
+ """{"input_cost": float, "output_cost": float, "total_cost": float, "currency": str}"""
91
+
92
+ RetrievalDocument = dict[str, Any]
93
+ """{"id": str, "score": float | None, "content": str | None, "metadata": dict | None}"""
94
+
95
+ EventPayload = dict[str, Any]
96
+ """The full event dict sent to the ingestion API (EventInput wire format).
97
+
98
+ Required keys: event_type, trace_id, span_id, agent_id
99
+ Optional keys: event_id, timestamp, parent_span_id, task_run_id, user_id,
100
+ session_id, environment, sdk_name, sdk_version, properties
101
+ """