guardloop 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
guardloop/__init__.py ADDED
@@ -0,0 +1,48 @@
1
+ """GuardLoop public API."""
2
+
3
+ from guardloop.circuit_breaker import (
4
+ CircuitBreakerConfig,
5
+ CircuitBreakerPolicy,
6
+ CircuitBreakerSnapshot,
7
+ CircuitBreakerState,
8
+ )
9
+ from guardloop.context import RunContext
10
+ from guardloop.exceptions import (
11
+ BudgetExceeded,
12
+ CircuitBreakerOpen,
13
+ GuardLoopError,
14
+ ModelPricingMissing,
15
+ TimeLimitExceeded,
16
+ TokenLimitExceeded,
17
+ TokenLimitMissing,
18
+ ToolCallLimitExceeded,
19
+ )
20
+ from guardloop.models import BudgetConfig, RunResult, TelemetryConfig
21
+ from guardloop.pricing import ModelPricing
22
+ from guardloop.runtime import GuardLoop
23
+
24
+ AgentRuntime = GuardLoop
25
+ AgentRuntimeError = GuardLoopError
26
+
27
+ __all__ = [
28
+ "AgentRuntime",
29
+ "AgentRuntimeError",
30
+ "BudgetConfig",
31
+ "BudgetExceeded",
32
+ "CircuitBreakerConfig",
33
+ "CircuitBreakerOpen",
34
+ "CircuitBreakerPolicy",
35
+ "CircuitBreakerSnapshot",
36
+ "CircuitBreakerState",
37
+ "GuardLoop",
38
+ "GuardLoopError",
39
+ "ModelPricing",
40
+ "ModelPricingMissing",
41
+ "RunContext",
42
+ "RunResult",
43
+ "TelemetryConfig",
44
+ "TimeLimitExceeded",
45
+ "TokenLimitExceeded",
46
+ "TokenLimitMissing",
47
+ "ToolCallLimitExceeded",
48
+ ]
guardloop/budget.py ADDED
@@ -0,0 +1,180 @@
1
+ """Resource accounting and hard pre-flight budget checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from dataclasses import dataclass
7
+ from decimal import Decimal
8
+
9
+ from guardloop.exceptions import (
10
+ BudgetExceeded,
11
+ TimeLimitExceeded,
12
+ TokenLimitExceeded,
13
+ TokenLimitMissing,
14
+ ToolCallLimitExceeded,
15
+ )
16
+ from guardloop.models import BudgetConfig
17
+ from guardloop.pricing import ModelPricing, PricingCatalog
18
+
19
+
20
+ @dataclass(frozen=True, slots=True)
21
+ class LLMPreflight:
22
+ provider: str
23
+ model: str
24
+ estimated_input_tokens: int
25
+ reserved_output_tokens: int
26
+ estimated_cost_usd: Decimal
27
+ pricing: ModelPricing
28
+
29
+
30
+ class BudgetController:
31
+ """Single source of truth for one runtime execution's resource usage."""
32
+
33
+ def __init__(self, config: BudgetConfig, pricing_catalog: PricingCatalog) -> None:
34
+ self.config = config
35
+ self.pricing_catalog = pricing_catalog
36
+ self._started_at = time.monotonic()
37
+ self._cost_usd = Decimal("0")
38
+ self._estimated_cost_usd = Decimal("0")
39
+ self._input_tokens = 0
40
+ self._output_tokens = 0
41
+ self._tool_calls = 0
42
+
43
+ @property
44
+ def cost_usd(self) -> Decimal:
45
+ return self._cost_usd
46
+
47
+ @property
48
+ def estimated_cost_usd(self) -> Decimal:
49
+ return self._estimated_cost_usd
50
+
51
+ @property
52
+ def input_tokens(self) -> int:
53
+ return self._input_tokens
54
+
55
+ @property
56
+ def output_tokens(self) -> int:
57
+ return self._output_tokens
58
+
59
+ @property
60
+ def tokens_used(self) -> int:
61
+ return self._input_tokens + self._output_tokens
62
+
63
+ @property
64
+ def tool_calls(self) -> int:
65
+ return self._tool_calls
66
+
67
+ @property
68
+ def duration_seconds(self) -> float:
69
+ return time.monotonic() - self._started_at
70
+
71
+ def check_time(self) -> None:
72
+ if (
73
+ self.config.time_limit_seconds is not None
74
+ and self.duration_seconds > self.config.time_limit_seconds
75
+ ):
76
+ raise TimeLimitExceeded(
77
+ f"Run exceeded time limit of {self.config.time_limit_seconds:.3f}s.",
78
+ details={
79
+ "limit_seconds": self.config.time_limit_seconds,
80
+ "duration_seconds": self.duration_seconds,
81
+ },
82
+ )
83
+
84
+ def check_llm_call(
85
+ self,
86
+ *,
87
+ provider: str,
88
+ model: str,
89
+ estimated_input_tokens: int,
90
+ reserved_output_tokens: int | None,
91
+ ) -> LLMPreflight:
92
+ self.check_time()
93
+ if reserved_output_tokens is None or reserved_output_tokens <= 0:
94
+ raise TokenLimitMissing(
95
+ "LLM calls must include a positive max output token limit so the runtime can "
96
+ "reserve worst-case spend before the request."
97
+ )
98
+
99
+ pricing = self.pricing_catalog.get(provider, model)
100
+ projected_tokens = self.tokens_used + estimated_input_tokens + reserved_output_tokens
101
+ if self.config.token_limit is not None and projected_tokens > self.config.token_limit:
102
+ raise TokenLimitExceeded(
103
+ "LLM call would exceed token_limit before the request is sent.",
104
+ details={
105
+ "limit": self.config.token_limit,
106
+ "current_tokens": self.tokens_used,
107
+ "estimated_input_tokens": estimated_input_tokens,
108
+ "reserved_output_tokens": reserved_output_tokens,
109
+ "projected_tokens": projected_tokens,
110
+ },
111
+ )
112
+
113
+ projected_call_cost = pricing.estimate_cost(
114
+ input_tokens=estimated_input_tokens,
115
+ output_tokens=reserved_output_tokens,
116
+ )
117
+ projected_cost = self.cost_usd + projected_call_cost
118
+ cost_limit = self.config.cost_limit
119
+ if cost_limit is not None and projected_cost > cost_limit:
120
+ raise BudgetExceeded(
121
+ "LLM call would exceed cost_limit_usd before the request is sent.",
122
+ limit=cost_limit,
123
+ current=self.cost_usd,
124
+ projected=projected_cost,
125
+ )
126
+
127
+ self._estimated_cost_usd += projected_call_cost
128
+ return LLMPreflight(
129
+ provider=provider,
130
+ model=model,
131
+ estimated_input_tokens=estimated_input_tokens,
132
+ reserved_output_tokens=reserved_output_tokens,
133
+ estimated_cost_usd=projected_call_cost,
134
+ pricing=pricing,
135
+ )
136
+
137
+ def record_llm_call(
138
+ self,
139
+ *,
140
+ provider: str,
141
+ model: str,
142
+ input_tokens: int,
143
+ output_tokens: int,
144
+ ) -> Decimal:
145
+ self.check_time()
146
+ pricing = self.pricing_catalog.get(provider, model)
147
+ actual_cost = pricing.estimate_cost(input_tokens=input_tokens, output_tokens=output_tokens)
148
+ self._input_tokens += input_tokens
149
+ self._output_tokens += output_tokens
150
+ self._cost_usd += actual_cost
151
+
152
+ if self.config.token_limit is not None and self.tokens_used > self.config.token_limit:
153
+ raise TokenLimitExceeded(
154
+ "Actual provider usage exceeded token_limit after the request completed.",
155
+ details={"limit": self.config.token_limit, "tokens_used": self.tokens_used},
156
+ )
157
+ cost_limit = self.config.cost_limit
158
+ if cost_limit is not None and self.cost_usd > cost_limit:
159
+ raise BudgetExceeded(
160
+ "Actual provider usage exceeded cost_limit_usd after the request completed.",
161
+ limit=cost_limit,
162
+ current=self.cost_usd,
163
+ projected=self.cost_usd,
164
+ )
165
+ return actual_cost
166
+
167
+ def record_tool_call_started(self, tool_name: str) -> None:
168
+ self.check_time()
169
+ projected = self._tool_calls + 1
170
+ if self.config.tool_call_limit is not None and projected > self.config.tool_call_limit:
171
+ raise ToolCallLimitExceeded(
172
+ "Tool call would exceed tool_call_limit before the tool is invoked.",
173
+ details={
174
+ "tool": tool_name,
175
+ "limit": self.config.tool_call_limit,
176
+ "current_tool_calls": self._tool_calls,
177
+ "projected_tool_calls": projected,
178
+ },
179
+ )
180
+ self._tool_calls = projected
@@ -0,0 +1,243 @@
1
+ """Per-tool circuit breaker state machines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+ from collections.abc import Callable
8
+ from dataclasses import dataclass
9
+ from enum import StrEnum
10
+
11
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
12
+
13
+ from guardloop.exceptions import CircuitBreakerOpen
14
+
15
+
16
+ class CircuitBreakerState(StrEnum):
17
+ """Public circuit breaker states."""
18
+
19
+ CLOSED = "closed"
20
+ OPEN = "open"
21
+ HALF_OPEN = "half_open"
22
+
23
+
24
+ class CircuitBreakerPolicy(BaseModel):
25
+ """Failure policy for one tool circuit breaker."""
26
+
27
+ model_config = ConfigDict(frozen=True)
28
+
29
+ enabled: bool = True
30
+ failure_threshold: int = 3
31
+ recovery_timeout_seconds: float = 30.0
32
+ half_open_success_threshold: int = 1
33
+
34
+ @field_validator("failure_threshold", "half_open_success_threshold")
35
+ @classmethod
36
+ def _validate_positive_int(cls, value: int) -> int:
37
+ if value < 1:
38
+ raise ValueError("circuit breaker thresholds must be at least 1")
39
+ return value
40
+
41
+ @field_validator("recovery_timeout_seconds")
42
+ @classmethod
43
+ def _validate_recovery_timeout(cls, value: float) -> float:
44
+ if value <= 0:
45
+ raise ValueError("recovery_timeout_seconds must be greater than zero")
46
+ return value
47
+
48
+
49
+ class CircuitBreakerConfig(BaseModel):
50
+ """Circuit breaker configuration for runtime tools."""
51
+
52
+ model_config = ConfigDict(frozen=True)
53
+
54
+ enabled: bool = True
55
+ default: CircuitBreakerPolicy = Field(default_factory=CircuitBreakerPolicy)
56
+ tool_overrides: dict[str, CircuitBreakerPolicy] = Field(default_factory=dict)
57
+
58
+
59
+ class CircuitBreakerSnapshot(BaseModel):
60
+ """Point-in-time circuit breaker state for inspection and metadata."""
61
+
62
+ model_config = ConfigDict(frozen=True)
63
+
64
+ tool_name: str
65
+ state: CircuitBreakerState
66
+ failure_count: int = 0
67
+ consecutive_successes: int = 0
68
+ opened_at: float | None = None
69
+ remaining_open_seconds: float = 0.0
70
+
71
+
72
+ @dataclass
73
+ class CircuitBreakerDecision:
74
+ """Internal decision returned after a breaker state check or update."""
75
+
76
+ snapshot: CircuitBreakerSnapshot
77
+ events: tuple[str, ...] = ()
78
+
79
+
80
+ @dataclass
81
+ class _CircuitBreakerRecord:
82
+ policy: CircuitBreakerPolicy
83
+ state: CircuitBreakerState = CircuitBreakerState.CLOSED
84
+ failure_count: int = 0
85
+ consecutive_successes: int = 0
86
+ opened_at: float | None = None
87
+
88
+
89
+ Clock = Callable[[], float]
90
+
91
+ EVENT_OPENED = "guardloop.circuit_breaker.opened"
92
+ EVENT_REOPENED = "guardloop.circuit_breaker.reopened"
93
+ EVENT_HALF_OPENED = "guardloop.circuit_breaker.half_opened"
94
+ EVENT_CLOSED = "guardloop.circuit_breaker.closed"
95
+
96
+
97
+ class CircuitBreakerRegistry:
98
+ """Thread-safe in-memory registry of per-tool circuit breakers."""
99
+
100
+ def __init__(
101
+ self,
102
+ config: CircuitBreakerConfig | None = None,
103
+ *,
104
+ clock: Clock | None = None,
105
+ ) -> None:
106
+ self._config = config or CircuitBreakerConfig()
107
+ self._clock = clock or time.monotonic
108
+ self._lock = threading.Lock()
109
+ self._breakers: dict[str, _CircuitBreakerRecord] = {}
110
+
111
+ def before_call(self, tool_name: str) -> CircuitBreakerDecision | None:
112
+ policy = self._policy_for(tool_name)
113
+ if policy is None:
114
+ return None
115
+
116
+ with self._lock:
117
+ now = self._clock()
118
+ breaker = self._breaker_for(tool_name, policy)
119
+ if breaker.state != CircuitBreakerState.OPEN:
120
+ return CircuitBreakerDecision(snapshot=self._snapshot(tool_name, breaker, now))
121
+
122
+ remaining = self._remaining_open_seconds(breaker, now)
123
+ if remaining > 0:
124
+ snapshot = self._snapshot(tool_name, breaker, now)
125
+ raise CircuitBreakerOpen(
126
+ tool_name=tool_name,
127
+ state=snapshot.state.value,
128
+ failure_count=snapshot.failure_count,
129
+ remaining_open_seconds=snapshot.remaining_open_seconds,
130
+ )
131
+
132
+ breaker.state = CircuitBreakerState.HALF_OPEN
133
+ breaker.consecutive_successes = 0
134
+ return CircuitBreakerDecision(
135
+ snapshot=self._snapshot(tool_name, breaker, now),
136
+ events=(EVENT_HALF_OPENED,),
137
+ )
138
+
139
+ def record_success(self, tool_name: str) -> CircuitBreakerDecision | None:
140
+ policy = self._policy_for(tool_name)
141
+ if policy is None:
142
+ return None
143
+
144
+ with self._lock:
145
+ now = self._clock()
146
+ breaker = self._breaker_for(tool_name, policy)
147
+ events: tuple[str, ...] = ()
148
+
149
+ if breaker.state == CircuitBreakerState.HALF_OPEN:
150
+ breaker.consecutive_successes += 1
151
+ if breaker.consecutive_successes >= breaker.policy.half_open_success_threshold:
152
+ breaker.state = CircuitBreakerState.CLOSED
153
+ breaker.failure_count = 0
154
+ breaker.consecutive_successes = 0
155
+ breaker.opened_at = None
156
+ events = (EVENT_CLOSED,)
157
+ elif breaker.state == CircuitBreakerState.CLOSED:
158
+ breaker.failure_count = 0
159
+ breaker.consecutive_successes = 0
160
+
161
+ return CircuitBreakerDecision(
162
+ snapshot=self._snapshot(tool_name, breaker, now), events=events
163
+ )
164
+
165
+ def record_failure(self, tool_name: str) -> CircuitBreakerDecision | None:
166
+ policy = self._policy_for(tool_name)
167
+ if policy is None:
168
+ return None
169
+
170
+ with self._lock:
171
+ now = self._clock()
172
+ breaker = self._breaker_for(tool_name, policy)
173
+ events: tuple[str, ...] = ()
174
+
175
+ if breaker.state == CircuitBreakerState.HALF_OPEN:
176
+ breaker.state = CircuitBreakerState.OPEN
177
+ breaker.failure_count = max(1, breaker.failure_count)
178
+ breaker.consecutive_successes = 0
179
+ breaker.opened_at = now
180
+ events = (EVENT_REOPENED,)
181
+ else:
182
+ breaker.failure_count += 1
183
+ breaker.consecutive_successes = 0
184
+ if (
185
+ breaker.state == CircuitBreakerState.CLOSED
186
+ and breaker.failure_count >= breaker.policy.failure_threshold
187
+ ):
188
+ breaker.state = CircuitBreakerState.OPEN
189
+ breaker.opened_at = now
190
+ events = (EVENT_OPENED,)
191
+
192
+ return CircuitBreakerDecision(
193
+ snapshot=self._snapshot(tool_name, breaker, now), events=events
194
+ )
195
+
196
+ def snapshots(self) -> dict[str, CircuitBreakerSnapshot]:
197
+ with self._lock:
198
+ now = self._clock()
199
+ return {
200
+ tool_name: self._snapshot(tool_name, breaker, now)
201
+ for tool_name, breaker in sorted(self._breakers.items())
202
+ }
203
+
204
+ def reset(self, tool_name: str | None = None) -> None:
205
+ with self._lock:
206
+ if tool_name is None:
207
+ self._breakers.clear()
208
+ return
209
+ self._breakers.pop(tool_name, None)
210
+
211
+ def _policy_for(self, tool_name: str) -> CircuitBreakerPolicy | None:
212
+ if not self._config.enabled:
213
+ return None
214
+
215
+ policy = self._config.tool_overrides.get(tool_name, self._config.default)
216
+ if not policy.enabled:
217
+ return None
218
+ return policy
219
+
220
+ def _breaker_for(self, tool_name: str, policy: CircuitBreakerPolicy) -> _CircuitBreakerRecord:
221
+ breaker = self._breakers.get(tool_name)
222
+ if breaker is None:
223
+ breaker = _CircuitBreakerRecord(policy=policy)
224
+ self._breakers[tool_name] = breaker
225
+ return breaker
226
+
227
+ def _remaining_open_seconds(self, breaker: _CircuitBreakerRecord, now: float) -> float:
228
+ if breaker.state != CircuitBreakerState.OPEN or breaker.opened_at is None:
229
+ return 0.0
230
+ opened_until = breaker.opened_at + breaker.policy.recovery_timeout_seconds
231
+ return max(0.0, opened_until - now)
232
+
233
+ def _snapshot(
234
+ self, tool_name: str, breaker: _CircuitBreakerRecord, now: float
235
+ ) -> CircuitBreakerSnapshot:
236
+ return CircuitBreakerSnapshot(
237
+ tool_name=tool_name,
238
+ state=breaker.state,
239
+ failure_count=breaker.failure_count,
240
+ consecutive_successes=breaker.consecutive_successes,
241
+ opened_at=breaker.opened_at,
242
+ remaining_open_seconds=self._remaining_open_seconds(breaker, now),
243
+ )
guardloop/context.py ADDED
@@ -0,0 +1,68 @@
1
+ """RunContext passed to user agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from typing import Any
7
+
8
+ from guardloop.budget import BudgetController
9
+ from guardloop.circuit_breaker import CircuitBreakerRegistry
10
+ from guardloop.providers.anthropic import WrappedAnthropicClient
11
+ from guardloop.providers.openai import WrappedOpenAIClient
12
+ from guardloop.telemetry.tracer import Telemetry
13
+ from guardloop.tools import ToolRunner
14
+
15
+
16
+ class RunContext:
17
+ """Runtime services available to an agent during one execution."""
18
+
19
+ def __init__(
20
+ self,
21
+ *,
22
+ budget: BudgetController,
23
+ telemetry: Telemetry,
24
+ circuit_breakers: CircuitBreakerRegistry,
25
+ openai_client: Any | None = None,
26
+ anthropic_client: Any | None = None,
27
+ ) -> None:
28
+ self.budget = budget
29
+ self.telemetry = telemetry
30
+ self._raw_openai_client = openai_client
31
+ self._raw_anthropic_client = anthropic_client
32
+ self._openai: WrappedOpenAIClient | None = None
33
+ self._anthropic: WrappedAnthropicClient | None = None
34
+ self._tools = ToolRunner(budget, telemetry, circuit_breakers)
35
+
36
+ @property
37
+ def openai(self) -> WrappedOpenAIClient:
38
+ if self._openai is None:
39
+ client = self._raw_openai_client
40
+ if client is None:
41
+ from openai import AsyncOpenAI
42
+
43
+ client = AsyncOpenAI()
44
+ self._openai = WrappedOpenAIClient(client, self.budget, self.telemetry)
45
+ return self._openai
46
+
47
+ @property
48
+ def anthropic(self) -> WrappedAnthropicClient:
49
+ if self._anthropic is None:
50
+ client = self._raw_anthropic_client
51
+ if client is None:
52
+ from anthropic import AsyncAnthropic
53
+
54
+ client = AsyncAnthropic()
55
+ self._anthropic = WrappedAnthropicClient(client, self.budget, self.telemetry)
56
+ return self._anthropic
57
+
58
+ def wrap_tool(self, name: str, func: Callable[..., Any]) -> Callable[..., Any]:
59
+ return self._tools.wrap(name, func)
60
+
61
+ async def call_tool(
62
+ self,
63
+ name: str,
64
+ func: Callable[..., Any],
65
+ *args: Any,
66
+ **kwargs: Any,
67
+ ) -> Any:
68
+ return await self._tools.call(name, func, *args, **kwargs)
@@ -0,0 +1,92 @@
1
+ """Public exception hierarchy for controlled runtime stops."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from decimal import Decimal
6
+ from typing import Any
7
+
8
+
9
+ class GuardLoopError(Exception):
10
+ """Base class for all controlled GuardLoop exceptions."""
11
+
12
+ terminated_reason = "runtime_error"
13
+
14
+ def __init__(self, message: str, *, details: dict[str, Any] | None = None) -> None:
15
+ super().__init__(message)
16
+ self.details = details or {}
17
+
18
+
19
+ class BudgetExceeded(GuardLoopError):
20
+ """Raised when a call would exceed the configured cost cap."""
21
+
22
+ terminated_reason = "budget_exceeded"
23
+
24
+ def __init__(
25
+ self,
26
+ message: str,
27
+ *,
28
+ limit: Decimal | None = None,
29
+ current: Decimal | None = None,
30
+ projected: Decimal | None = None,
31
+ ) -> None:
32
+ details = {"limit": limit, "current": current, "projected": projected}
33
+ super().__init__(message, details=details)
34
+
35
+
36
+ class TokenLimitExceeded(GuardLoopError):
37
+ """Raised when a call would exceed the configured token cap."""
38
+
39
+ terminated_reason = "token_limit_exceeded"
40
+
41
+
42
+ class ToolCallLimitExceeded(GuardLoopError):
43
+ """Raised when a tool call would exceed the configured tool-call cap."""
44
+
45
+ terminated_reason = "tool_call_limit_exceeded"
46
+
47
+
48
+ class CircuitBreakerOpen(GuardLoopError):
49
+ """Raised when a tool circuit breaker rejects a call."""
50
+
51
+ terminated_reason = "circuit_breaker_open"
52
+
53
+ def __init__(
54
+ self,
55
+ *,
56
+ tool_name: str,
57
+ state: str,
58
+ failure_count: int,
59
+ remaining_open_seconds: float,
60
+ ) -> None:
61
+ details = {
62
+ "tool_name": tool_name,
63
+ "state": state,
64
+ "failure_count": failure_count,
65
+ "remaining_open_seconds": remaining_open_seconds,
66
+ }
67
+ super().__init__(
68
+ f"Circuit breaker for tool '{tool_name}' is open for another "
69
+ f"{remaining_open_seconds:.3f}s.",
70
+ details=details,
71
+ )
72
+
73
+
74
+ class TimeLimitExceeded(GuardLoopError):
75
+ """Raised when the run exceeds the configured wall-clock cap."""
76
+
77
+ terminated_reason = "timeout"
78
+
79
+
80
+ class ModelPricingMissing(GuardLoopError):
81
+ """Raised when no pricing entry exists for a provider/model pair."""
82
+
83
+ terminated_reason = "model_pricing_missing"
84
+
85
+
86
+ class TokenLimitMissing(GuardLoopError):
87
+ """Raised when the runtime cannot reserve output tokens before an LLM call."""
88
+
89
+ terminated_reason = "token_limit_missing"
90
+
91
+
92
+ AgentRuntimeError = GuardLoopError