guardloop 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guardloop/__init__.py +48 -0
- guardloop/budget.py +180 -0
- guardloop/circuit_breaker.py +243 -0
- guardloop/context.py +68 -0
- guardloop/exceptions.py +92 -0
- guardloop/models.py +94 -0
- guardloop/pricing.py +116 -0
- guardloop/providers/__init__.py +6 -0
- guardloop/providers/anthropic.py +150 -0
- guardloop/providers/openai.py +138 -0
- guardloop/py.typed +0 -0
- guardloop/runtime.py +190 -0
- guardloop/telemetry/__init__.py +5 -0
- guardloop/telemetry/conventions.py +98 -0
- guardloop/telemetry/tracer.py +86 -0
- guardloop/tokenization.py +49 -0
- guardloop/tools.py +171 -0
- guardloop-0.2.0.dist-info/METADATA +188 -0
- guardloop-0.2.0.dist-info/RECORD +21 -0
- guardloop-0.2.0.dist-info/WHEEL +4 -0
- guardloop-0.2.0.dist-info/licenses/LICENSE +21 -0
guardloop/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""GuardLoop public API."""
|
|
2
|
+
|
|
3
|
+
from guardloop.circuit_breaker import (
|
|
4
|
+
CircuitBreakerConfig,
|
|
5
|
+
CircuitBreakerPolicy,
|
|
6
|
+
CircuitBreakerSnapshot,
|
|
7
|
+
CircuitBreakerState,
|
|
8
|
+
)
|
|
9
|
+
from guardloop.context import RunContext
|
|
10
|
+
from guardloop.exceptions import (
|
|
11
|
+
BudgetExceeded,
|
|
12
|
+
CircuitBreakerOpen,
|
|
13
|
+
GuardLoopError,
|
|
14
|
+
ModelPricingMissing,
|
|
15
|
+
TimeLimitExceeded,
|
|
16
|
+
TokenLimitExceeded,
|
|
17
|
+
TokenLimitMissing,
|
|
18
|
+
ToolCallLimitExceeded,
|
|
19
|
+
)
|
|
20
|
+
from guardloop.models import BudgetConfig, RunResult, TelemetryConfig
|
|
21
|
+
from guardloop.pricing import ModelPricing
|
|
22
|
+
from guardloop.runtime import GuardLoop
|
|
23
|
+
|
|
24
|
+
AgentRuntime = GuardLoop
|
|
25
|
+
AgentRuntimeError = GuardLoopError
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"AgentRuntime",
|
|
29
|
+
"AgentRuntimeError",
|
|
30
|
+
"BudgetConfig",
|
|
31
|
+
"BudgetExceeded",
|
|
32
|
+
"CircuitBreakerConfig",
|
|
33
|
+
"CircuitBreakerOpen",
|
|
34
|
+
"CircuitBreakerPolicy",
|
|
35
|
+
"CircuitBreakerSnapshot",
|
|
36
|
+
"CircuitBreakerState",
|
|
37
|
+
"GuardLoop",
|
|
38
|
+
"GuardLoopError",
|
|
39
|
+
"ModelPricing",
|
|
40
|
+
"ModelPricingMissing",
|
|
41
|
+
"RunContext",
|
|
42
|
+
"RunResult",
|
|
43
|
+
"TelemetryConfig",
|
|
44
|
+
"TimeLimitExceeded",
|
|
45
|
+
"TokenLimitExceeded",
|
|
46
|
+
"TokenLimitMissing",
|
|
47
|
+
"ToolCallLimitExceeded",
|
|
48
|
+
]
|
guardloop/budget.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Resource accounting and hard pre-flight budget checks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from decimal import Decimal
|
|
8
|
+
|
|
9
|
+
from guardloop.exceptions import (
|
|
10
|
+
BudgetExceeded,
|
|
11
|
+
TimeLimitExceeded,
|
|
12
|
+
TokenLimitExceeded,
|
|
13
|
+
TokenLimitMissing,
|
|
14
|
+
ToolCallLimitExceeded,
|
|
15
|
+
)
|
|
16
|
+
from guardloop.models import BudgetConfig
|
|
17
|
+
from guardloop.pricing import ModelPricing, PricingCatalog
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True, slots=True)
|
|
21
|
+
class LLMPreflight:
|
|
22
|
+
provider: str
|
|
23
|
+
model: str
|
|
24
|
+
estimated_input_tokens: int
|
|
25
|
+
reserved_output_tokens: int
|
|
26
|
+
estimated_cost_usd: Decimal
|
|
27
|
+
pricing: ModelPricing
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BudgetController:
|
|
31
|
+
"""Single source of truth for one runtime execution's resource usage."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, config: BudgetConfig, pricing_catalog: PricingCatalog) -> None:
|
|
34
|
+
self.config = config
|
|
35
|
+
self.pricing_catalog = pricing_catalog
|
|
36
|
+
self._started_at = time.monotonic()
|
|
37
|
+
self._cost_usd = Decimal("0")
|
|
38
|
+
self._estimated_cost_usd = Decimal("0")
|
|
39
|
+
self._input_tokens = 0
|
|
40
|
+
self._output_tokens = 0
|
|
41
|
+
self._tool_calls = 0
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def cost_usd(self) -> Decimal:
|
|
45
|
+
return self._cost_usd
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def estimated_cost_usd(self) -> Decimal:
|
|
49
|
+
return self._estimated_cost_usd
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def input_tokens(self) -> int:
|
|
53
|
+
return self._input_tokens
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def output_tokens(self) -> int:
|
|
57
|
+
return self._output_tokens
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def tokens_used(self) -> int:
|
|
61
|
+
return self._input_tokens + self._output_tokens
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def tool_calls(self) -> int:
|
|
65
|
+
return self._tool_calls
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def duration_seconds(self) -> float:
|
|
69
|
+
return time.monotonic() - self._started_at
|
|
70
|
+
|
|
71
|
+
def check_time(self) -> None:
|
|
72
|
+
if (
|
|
73
|
+
self.config.time_limit_seconds is not None
|
|
74
|
+
and self.duration_seconds > self.config.time_limit_seconds
|
|
75
|
+
):
|
|
76
|
+
raise TimeLimitExceeded(
|
|
77
|
+
f"Run exceeded time limit of {self.config.time_limit_seconds:.3f}s.",
|
|
78
|
+
details={
|
|
79
|
+
"limit_seconds": self.config.time_limit_seconds,
|
|
80
|
+
"duration_seconds": self.duration_seconds,
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def check_llm_call(
|
|
85
|
+
self,
|
|
86
|
+
*,
|
|
87
|
+
provider: str,
|
|
88
|
+
model: str,
|
|
89
|
+
estimated_input_tokens: int,
|
|
90
|
+
reserved_output_tokens: int | None,
|
|
91
|
+
) -> LLMPreflight:
|
|
92
|
+
self.check_time()
|
|
93
|
+
if reserved_output_tokens is None or reserved_output_tokens <= 0:
|
|
94
|
+
raise TokenLimitMissing(
|
|
95
|
+
"LLM calls must include a positive max output token limit so the runtime can "
|
|
96
|
+
"reserve worst-case spend before the request."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
pricing = self.pricing_catalog.get(provider, model)
|
|
100
|
+
projected_tokens = self.tokens_used + estimated_input_tokens + reserved_output_tokens
|
|
101
|
+
if self.config.token_limit is not None and projected_tokens > self.config.token_limit:
|
|
102
|
+
raise TokenLimitExceeded(
|
|
103
|
+
"LLM call would exceed token_limit before the request is sent.",
|
|
104
|
+
details={
|
|
105
|
+
"limit": self.config.token_limit,
|
|
106
|
+
"current_tokens": self.tokens_used,
|
|
107
|
+
"estimated_input_tokens": estimated_input_tokens,
|
|
108
|
+
"reserved_output_tokens": reserved_output_tokens,
|
|
109
|
+
"projected_tokens": projected_tokens,
|
|
110
|
+
},
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
projected_call_cost = pricing.estimate_cost(
|
|
114
|
+
input_tokens=estimated_input_tokens,
|
|
115
|
+
output_tokens=reserved_output_tokens,
|
|
116
|
+
)
|
|
117
|
+
projected_cost = self.cost_usd + projected_call_cost
|
|
118
|
+
cost_limit = self.config.cost_limit
|
|
119
|
+
if cost_limit is not None and projected_cost > cost_limit:
|
|
120
|
+
raise BudgetExceeded(
|
|
121
|
+
"LLM call would exceed cost_limit_usd before the request is sent.",
|
|
122
|
+
limit=cost_limit,
|
|
123
|
+
current=self.cost_usd,
|
|
124
|
+
projected=projected_cost,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self._estimated_cost_usd += projected_call_cost
|
|
128
|
+
return LLMPreflight(
|
|
129
|
+
provider=provider,
|
|
130
|
+
model=model,
|
|
131
|
+
estimated_input_tokens=estimated_input_tokens,
|
|
132
|
+
reserved_output_tokens=reserved_output_tokens,
|
|
133
|
+
estimated_cost_usd=projected_call_cost,
|
|
134
|
+
pricing=pricing,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def record_llm_call(
|
|
138
|
+
self,
|
|
139
|
+
*,
|
|
140
|
+
provider: str,
|
|
141
|
+
model: str,
|
|
142
|
+
input_tokens: int,
|
|
143
|
+
output_tokens: int,
|
|
144
|
+
) -> Decimal:
|
|
145
|
+
self.check_time()
|
|
146
|
+
pricing = self.pricing_catalog.get(provider, model)
|
|
147
|
+
actual_cost = pricing.estimate_cost(input_tokens=input_tokens, output_tokens=output_tokens)
|
|
148
|
+
self._input_tokens += input_tokens
|
|
149
|
+
self._output_tokens += output_tokens
|
|
150
|
+
self._cost_usd += actual_cost
|
|
151
|
+
|
|
152
|
+
if self.config.token_limit is not None and self.tokens_used > self.config.token_limit:
|
|
153
|
+
raise TokenLimitExceeded(
|
|
154
|
+
"Actual provider usage exceeded token_limit after the request completed.",
|
|
155
|
+
details={"limit": self.config.token_limit, "tokens_used": self.tokens_used},
|
|
156
|
+
)
|
|
157
|
+
cost_limit = self.config.cost_limit
|
|
158
|
+
if cost_limit is not None and self.cost_usd > cost_limit:
|
|
159
|
+
raise BudgetExceeded(
|
|
160
|
+
"Actual provider usage exceeded cost_limit_usd after the request completed.",
|
|
161
|
+
limit=cost_limit,
|
|
162
|
+
current=self.cost_usd,
|
|
163
|
+
projected=self.cost_usd,
|
|
164
|
+
)
|
|
165
|
+
return actual_cost
|
|
166
|
+
|
|
167
|
+
def record_tool_call_started(self, tool_name: str) -> None:
|
|
168
|
+
self.check_time()
|
|
169
|
+
projected = self._tool_calls + 1
|
|
170
|
+
if self.config.tool_call_limit is not None and projected > self.config.tool_call_limit:
|
|
171
|
+
raise ToolCallLimitExceeded(
|
|
172
|
+
"Tool call would exceed tool_call_limit before the tool is invoked.",
|
|
173
|
+
details={
|
|
174
|
+
"tool": tool_name,
|
|
175
|
+
"limit": self.config.tool_call_limit,
|
|
176
|
+
"current_tool_calls": self._tool_calls,
|
|
177
|
+
"projected_tool_calls": projected,
|
|
178
|
+
},
|
|
179
|
+
)
|
|
180
|
+
self._tool_calls = projected
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Per-tool circuit breaker state machines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
12
|
+
|
|
13
|
+
from guardloop.exceptions import CircuitBreakerOpen
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CircuitBreakerState(StrEnum):
|
|
17
|
+
"""Public circuit breaker states."""
|
|
18
|
+
|
|
19
|
+
CLOSED = "closed"
|
|
20
|
+
OPEN = "open"
|
|
21
|
+
HALF_OPEN = "half_open"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CircuitBreakerPolicy(BaseModel):
|
|
25
|
+
"""Failure policy for one tool circuit breaker."""
|
|
26
|
+
|
|
27
|
+
model_config = ConfigDict(frozen=True)
|
|
28
|
+
|
|
29
|
+
enabled: bool = True
|
|
30
|
+
failure_threshold: int = 3
|
|
31
|
+
recovery_timeout_seconds: float = 30.0
|
|
32
|
+
half_open_success_threshold: int = 1
|
|
33
|
+
|
|
34
|
+
@field_validator("failure_threshold", "half_open_success_threshold")
|
|
35
|
+
@classmethod
|
|
36
|
+
def _validate_positive_int(cls, value: int) -> int:
|
|
37
|
+
if value < 1:
|
|
38
|
+
raise ValueError("circuit breaker thresholds must be at least 1")
|
|
39
|
+
return value
|
|
40
|
+
|
|
41
|
+
@field_validator("recovery_timeout_seconds")
|
|
42
|
+
@classmethod
|
|
43
|
+
def _validate_recovery_timeout(cls, value: float) -> float:
|
|
44
|
+
if value <= 0:
|
|
45
|
+
raise ValueError("recovery_timeout_seconds must be greater than zero")
|
|
46
|
+
return value
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CircuitBreakerConfig(BaseModel):
|
|
50
|
+
"""Circuit breaker configuration for runtime tools."""
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(frozen=True)
|
|
53
|
+
|
|
54
|
+
enabled: bool = True
|
|
55
|
+
default: CircuitBreakerPolicy = Field(default_factory=CircuitBreakerPolicy)
|
|
56
|
+
tool_overrides: dict[str, CircuitBreakerPolicy] = Field(default_factory=dict)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CircuitBreakerSnapshot(BaseModel):
|
|
60
|
+
"""Point-in-time circuit breaker state for inspection and metadata."""
|
|
61
|
+
|
|
62
|
+
model_config = ConfigDict(frozen=True)
|
|
63
|
+
|
|
64
|
+
tool_name: str
|
|
65
|
+
state: CircuitBreakerState
|
|
66
|
+
failure_count: int = 0
|
|
67
|
+
consecutive_successes: int = 0
|
|
68
|
+
opened_at: float | None = None
|
|
69
|
+
remaining_open_seconds: float = 0.0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CircuitBreakerDecision:
|
|
74
|
+
"""Internal decision returned after a breaker state check or update."""
|
|
75
|
+
|
|
76
|
+
snapshot: CircuitBreakerSnapshot
|
|
77
|
+
events: tuple[str, ...] = ()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class _CircuitBreakerRecord:
|
|
82
|
+
policy: CircuitBreakerPolicy
|
|
83
|
+
state: CircuitBreakerState = CircuitBreakerState.CLOSED
|
|
84
|
+
failure_count: int = 0
|
|
85
|
+
consecutive_successes: int = 0
|
|
86
|
+
opened_at: float | None = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
Clock = Callable[[], float]
|
|
90
|
+
|
|
91
|
+
EVENT_OPENED = "guardloop.circuit_breaker.opened"
|
|
92
|
+
EVENT_REOPENED = "guardloop.circuit_breaker.reopened"
|
|
93
|
+
EVENT_HALF_OPENED = "guardloop.circuit_breaker.half_opened"
|
|
94
|
+
EVENT_CLOSED = "guardloop.circuit_breaker.closed"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class CircuitBreakerRegistry:
|
|
98
|
+
"""Thread-safe in-memory registry of per-tool circuit breakers."""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
config: CircuitBreakerConfig | None = None,
|
|
103
|
+
*,
|
|
104
|
+
clock: Clock | None = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
self._config = config or CircuitBreakerConfig()
|
|
107
|
+
self._clock = clock or time.monotonic
|
|
108
|
+
self._lock = threading.Lock()
|
|
109
|
+
self._breakers: dict[str, _CircuitBreakerRecord] = {}
|
|
110
|
+
|
|
111
|
+
def before_call(self, tool_name: str) -> CircuitBreakerDecision | None:
|
|
112
|
+
policy = self._policy_for(tool_name)
|
|
113
|
+
if policy is None:
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
with self._lock:
|
|
117
|
+
now = self._clock()
|
|
118
|
+
breaker = self._breaker_for(tool_name, policy)
|
|
119
|
+
if breaker.state != CircuitBreakerState.OPEN:
|
|
120
|
+
return CircuitBreakerDecision(snapshot=self._snapshot(tool_name, breaker, now))
|
|
121
|
+
|
|
122
|
+
remaining = self._remaining_open_seconds(breaker, now)
|
|
123
|
+
if remaining > 0:
|
|
124
|
+
snapshot = self._snapshot(tool_name, breaker, now)
|
|
125
|
+
raise CircuitBreakerOpen(
|
|
126
|
+
tool_name=tool_name,
|
|
127
|
+
state=snapshot.state.value,
|
|
128
|
+
failure_count=snapshot.failure_count,
|
|
129
|
+
remaining_open_seconds=snapshot.remaining_open_seconds,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
breaker.state = CircuitBreakerState.HALF_OPEN
|
|
133
|
+
breaker.consecutive_successes = 0
|
|
134
|
+
return CircuitBreakerDecision(
|
|
135
|
+
snapshot=self._snapshot(tool_name, breaker, now),
|
|
136
|
+
events=(EVENT_HALF_OPENED,),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def record_success(self, tool_name: str) -> CircuitBreakerDecision | None:
|
|
140
|
+
policy = self._policy_for(tool_name)
|
|
141
|
+
if policy is None:
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
with self._lock:
|
|
145
|
+
now = self._clock()
|
|
146
|
+
breaker = self._breaker_for(tool_name, policy)
|
|
147
|
+
events: tuple[str, ...] = ()
|
|
148
|
+
|
|
149
|
+
if breaker.state == CircuitBreakerState.HALF_OPEN:
|
|
150
|
+
breaker.consecutive_successes += 1
|
|
151
|
+
if breaker.consecutive_successes >= breaker.policy.half_open_success_threshold:
|
|
152
|
+
breaker.state = CircuitBreakerState.CLOSED
|
|
153
|
+
breaker.failure_count = 0
|
|
154
|
+
breaker.consecutive_successes = 0
|
|
155
|
+
breaker.opened_at = None
|
|
156
|
+
events = (EVENT_CLOSED,)
|
|
157
|
+
elif breaker.state == CircuitBreakerState.CLOSED:
|
|
158
|
+
breaker.failure_count = 0
|
|
159
|
+
breaker.consecutive_successes = 0
|
|
160
|
+
|
|
161
|
+
return CircuitBreakerDecision(
|
|
162
|
+
snapshot=self._snapshot(tool_name, breaker, now), events=events
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def record_failure(self, tool_name: str) -> CircuitBreakerDecision | None:
|
|
166
|
+
policy = self._policy_for(tool_name)
|
|
167
|
+
if policy is None:
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
with self._lock:
|
|
171
|
+
now = self._clock()
|
|
172
|
+
breaker = self._breaker_for(tool_name, policy)
|
|
173
|
+
events: tuple[str, ...] = ()
|
|
174
|
+
|
|
175
|
+
if breaker.state == CircuitBreakerState.HALF_OPEN:
|
|
176
|
+
breaker.state = CircuitBreakerState.OPEN
|
|
177
|
+
breaker.failure_count = max(1, breaker.failure_count)
|
|
178
|
+
breaker.consecutive_successes = 0
|
|
179
|
+
breaker.opened_at = now
|
|
180
|
+
events = (EVENT_REOPENED,)
|
|
181
|
+
else:
|
|
182
|
+
breaker.failure_count += 1
|
|
183
|
+
breaker.consecutive_successes = 0
|
|
184
|
+
if (
|
|
185
|
+
breaker.state == CircuitBreakerState.CLOSED
|
|
186
|
+
and breaker.failure_count >= breaker.policy.failure_threshold
|
|
187
|
+
):
|
|
188
|
+
breaker.state = CircuitBreakerState.OPEN
|
|
189
|
+
breaker.opened_at = now
|
|
190
|
+
events = (EVENT_OPENED,)
|
|
191
|
+
|
|
192
|
+
return CircuitBreakerDecision(
|
|
193
|
+
snapshot=self._snapshot(tool_name, breaker, now), events=events
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def snapshots(self) -> dict[str, CircuitBreakerSnapshot]:
|
|
197
|
+
with self._lock:
|
|
198
|
+
now = self._clock()
|
|
199
|
+
return {
|
|
200
|
+
tool_name: self._snapshot(tool_name, breaker, now)
|
|
201
|
+
for tool_name, breaker in sorted(self._breakers.items())
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
def reset(self, tool_name: str | None = None) -> None:
|
|
205
|
+
with self._lock:
|
|
206
|
+
if tool_name is None:
|
|
207
|
+
self._breakers.clear()
|
|
208
|
+
return
|
|
209
|
+
self._breakers.pop(tool_name, None)
|
|
210
|
+
|
|
211
|
+
def _policy_for(self, tool_name: str) -> CircuitBreakerPolicy | None:
|
|
212
|
+
if not self._config.enabled:
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
policy = self._config.tool_overrides.get(tool_name, self._config.default)
|
|
216
|
+
if not policy.enabled:
|
|
217
|
+
return None
|
|
218
|
+
return policy
|
|
219
|
+
|
|
220
|
+
def _breaker_for(self, tool_name: str, policy: CircuitBreakerPolicy) -> _CircuitBreakerRecord:
|
|
221
|
+
breaker = self._breakers.get(tool_name)
|
|
222
|
+
if breaker is None:
|
|
223
|
+
breaker = _CircuitBreakerRecord(policy=policy)
|
|
224
|
+
self._breakers[tool_name] = breaker
|
|
225
|
+
return breaker
|
|
226
|
+
|
|
227
|
+
def _remaining_open_seconds(self, breaker: _CircuitBreakerRecord, now: float) -> float:
|
|
228
|
+
if breaker.state != CircuitBreakerState.OPEN or breaker.opened_at is None:
|
|
229
|
+
return 0.0
|
|
230
|
+
opened_until = breaker.opened_at + breaker.policy.recovery_timeout_seconds
|
|
231
|
+
return max(0.0, opened_until - now)
|
|
232
|
+
|
|
233
|
+
def _snapshot(
|
|
234
|
+
self, tool_name: str, breaker: _CircuitBreakerRecord, now: float
|
|
235
|
+
) -> CircuitBreakerSnapshot:
|
|
236
|
+
return CircuitBreakerSnapshot(
|
|
237
|
+
tool_name=tool_name,
|
|
238
|
+
state=breaker.state,
|
|
239
|
+
failure_count=breaker.failure_count,
|
|
240
|
+
consecutive_successes=breaker.consecutive_successes,
|
|
241
|
+
opened_at=breaker.opened_at,
|
|
242
|
+
remaining_open_seconds=self._remaining_open_seconds(breaker, now),
|
|
243
|
+
)
|
guardloop/context.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""RunContext passed to user agents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from guardloop.budget import BudgetController
|
|
9
|
+
from guardloop.circuit_breaker import CircuitBreakerRegistry
|
|
10
|
+
from guardloop.providers.anthropic import WrappedAnthropicClient
|
|
11
|
+
from guardloop.providers.openai import WrappedOpenAIClient
|
|
12
|
+
from guardloop.telemetry.tracer import Telemetry
|
|
13
|
+
from guardloop.tools import ToolRunner
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RunContext:
|
|
17
|
+
"""Runtime services available to an agent during one execution."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
budget: BudgetController,
|
|
23
|
+
telemetry: Telemetry,
|
|
24
|
+
circuit_breakers: CircuitBreakerRegistry,
|
|
25
|
+
openai_client: Any | None = None,
|
|
26
|
+
anthropic_client: Any | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
self.budget = budget
|
|
29
|
+
self.telemetry = telemetry
|
|
30
|
+
self._raw_openai_client = openai_client
|
|
31
|
+
self._raw_anthropic_client = anthropic_client
|
|
32
|
+
self._openai: WrappedOpenAIClient | None = None
|
|
33
|
+
self._anthropic: WrappedAnthropicClient | None = None
|
|
34
|
+
self._tools = ToolRunner(budget, telemetry, circuit_breakers)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def openai(self) -> WrappedOpenAIClient:
|
|
38
|
+
if self._openai is None:
|
|
39
|
+
client = self._raw_openai_client
|
|
40
|
+
if client is None:
|
|
41
|
+
from openai import AsyncOpenAI
|
|
42
|
+
|
|
43
|
+
client = AsyncOpenAI()
|
|
44
|
+
self._openai = WrappedOpenAIClient(client, self.budget, self.telemetry)
|
|
45
|
+
return self._openai
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def anthropic(self) -> WrappedAnthropicClient:
|
|
49
|
+
if self._anthropic is None:
|
|
50
|
+
client = self._raw_anthropic_client
|
|
51
|
+
if client is None:
|
|
52
|
+
from anthropic import AsyncAnthropic
|
|
53
|
+
|
|
54
|
+
client = AsyncAnthropic()
|
|
55
|
+
self._anthropic = WrappedAnthropicClient(client, self.budget, self.telemetry)
|
|
56
|
+
return self._anthropic
|
|
57
|
+
|
|
58
|
+
def wrap_tool(self, name: str, func: Callable[..., Any]) -> Callable[..., Any]:
|
|
59
|
+
return self._tools.wrap(name, func)
|
|
60
|
+
|
|
61
|
+
async def call_tool(
|
|
62
|
+
self,
|
|
63
|
+
name: str,
|
|
64
|
+
func: Callable[..., Any],
|
|
65
|
+
*args: Any,
|
|
66
|
+
**kwargs: Any,
|
|
67
|
+
) -> Any:
|
|
68
|
+
return await self._tools.call(name, func, *args, **kwargs)
|
guardloop/exceptions.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Public exception hierarchy for controlled runtime stops."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GuardLoopError(Exception):
|
|
10
|
+
"""Base class for all controlled GuardLoop exceptions."""
|
|
11
|
+
|
|
12
|
+
terminated_reason = "runtime_error"
|
|
13
|
+
|
|
14
|
+
def __init__(self, message: str, *, details: dict[str, Any] | None = None) -> None:
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.details = details or {}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BudgetExceeded(GuardLoopError):
|
|
20
|
+
"""Raised when a call would exceed the configured cost cap."""
|
|
21
|
+
|
|
22
|
+
terminated_reason = "budget_exceeded"
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
message: str,
|
|
27
|
+
*,
|
|
28
|
+
limit: Decimal | None = None,
|
|
29
|
+
current: Decimal | None = None,
|
|
30
|
+
projected: Decimal | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
details = {"limit": limit, "current": current, "projected": projected}
|
|
33
|
+
super().__init__(message, details=details)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TokenLimitExceeded(GuardLoopError):
|
|
37
|
+
"""Raised when a call would exceed the configured token cap."""
|
|
38
|
+
|
|
39
|
+
terminated_reason = "token_limit_exceeded"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ToolCallLimitExceeded(GuardLoopError):
|
|
43
|
+
"""Raised when a tool call would exceed the configured tool-call cap."""
|
|
44
|
+
|
|
45
|
+
terminated_reason = "tool_call_limit_exceeded"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class CircuitBreakerOpen(GuardLoopError):
|
|
49
|
+
"""Raised when a tool circuit breaker rejects a call."""
|
|
50
|
+
|
|
51
|
+
terminated_reason = "circuit_breaker_open"
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
tool_name: str,
|
|
57
|
+
state: str,
|
|
58
|
+
failure_count: int,
|
|
59
|
+
remaining_open_seconds: float,
|
|
60
|
+
) -> None:
|
|
61
|
+
details = {
|
|
62
|
+
"tool_name": tool_name,
|
|
63
|
+
"state": state,
|
|
64
|
+
"failure_count": failure_count,
|
|
65
|
+
"remaining_open_seconds": remaining_open_seconds,
|
|
66
|
+
}
|
|
67
|
+
super().__init__(
|
|
68
|
+
f"Circuit breaker for tool '{tool_name}' is open for another "
|
|
69
|
+
f"{remaining_open_seconds:.3f}s.",
|
|
70
|
+
details=details,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class TimeLimitExceeded(GuardLoopError):
|
|
75
|
+
"""Raised when the run exceeds the configured wall-clock cap."""
|
|
76
|
+
|
|
77
|
+
terminated_reason = "timeout"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ModelPricingMissing(GuardLoopError):
|
|
81
|
+
"""Raised when no pricing entry exists for a provider/model pair."""
|
|
82
|
+
|
|
83
|
+
terminated_reason = "model_pricing_missing"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class TokenLimitMissing(GuardLoopError):
|
|
87
|
+
"""Raised when the runtime cannot reserve output tokens before an LLM call."""
|
|
88
|
+
|
|
89
|
+
terminated_reason = "token_limit_missing"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
AgentRuntimeError = GuardLoopError
|