guardloop 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guardloop/__init__.py +48 -0
- guardloop/budget.py +180 -0
- guardloop/circuit_breaker.py +243 -0
- guardloop/context.py +68 -0
- guardloop/exceptions.py +92 -0
- guardloop/models.py +94 -0
- guardloop/pricing.py +116 -0
- guardloop/providers/__init__.py +6 -0
- guardloop/providers/anthropic.py +150 -0
- guardloop/providers/openai.py +138 -0
- guardloop/py.typed +0 -0
- guardloop/runtime.py +190 -0
- guardloop/telemetry/__init__.py +5 -0
- guardloop/telemetry/conventions.py +98 -0
- guardloop/telemetry/tracer.py +86 -0
- guardloop/tokenization.py +49 -0
- guardloop/tools.py +171 -0
- guardloop-0.2.0.dist-info/METADATA +188 -0
- guardloop-0.2.0.dist-info/RECORD +21 -0
- guardloop-0.2.0.dist-info/WHEEL +4 -0
- guardloop-0.2.0.dist-info/licenses/LICENSE +21 -0
guardloop/models.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Pydantic models for runtime configuration and results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator
|
|
9
|
+
|
|
10
|
+
DecimalInput = Decimal | str | int | float
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BudgetConfig(BaseModel):
|
|
14
|
+
"""Hard resource limits for a single agent run."""
|
|
15
|
+
|
|
16
|
+
model_config = ConfigDict(frozen=True)
|
|
17
|
+
|
|
18
|
+
cost_limit_usd: DecimalInput | None = Field(default=None)
|
|
19
|
+
token_limit: int | None = Field(default=None)
|
|
20
|
+
time_limit_seconds: float | None = Field(default=None)
|
|
21
|
+
tool_call_limit: int | None = Field(default=None)
|
|
22
|
+
|
|
23
|
+
@field_validator("cost_limit_usd", mode="before")
|
|
24
|
+
@classmethod
|
|
25
|
+
def _parse_decimal(cls, value: object) -> object:
|
|
26
|
+
if value is None or isinstance(value, Decimal):
|
|
27
|
+
return value
|
|
28
|
+
return Decimal(str(value))
|
|
29
|
+
|
|
30
|
+
@field_validator("cost_limit_usd")
|
|
31
|
+
@classmethod
|
|
32
|
+
def _validate_cost_limit(cls, value: DecimalInput | None) -> DecimalInput | None:
|
|
33
|
+
decimal_value = Decimal(str(value)) if value is not None else None
|
|
34
|
+
if decimal_value is not None and decimal_value < 0:
|
|
35
|
+
raise ValueError("cost_limit_usd must be non-negative")
|
|
36
|
+
return value
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def cost_limit(self) -> Decimal | None:
|
|
40
|
+
if self.cost_limit_usd is None:
|
|
41
|
+
return None
|
|
42
|
+
if isinstance(self.cost_limit_usd, Decimal):
|
|
43
|
+
return self.cost_limit_usd
|
|
44
|
+
return Decimal(str(self.cost_limit_usd))
|
|
45
|
+
|
|
46
|
+
@field_validator("token_limit", "tool_call_limit")
|
|
47
|
+
@classmethod
|
|
48
|
+
def _validate_optional_non_negative_int(cls, value: int | None) -> int | None:
|
|
49
|
+
if value is not None and value < 0:
|
|
50
|
+
raise ValueError("limits must be non-negative")
|
|
51
|
+
return value
|
|
52
|
+
|
|
53
|
+
@field_validator("time_limit_seconds")
|
|
54
|
+
@classmethod
|
|
55
|
+
def _validate_time_limit(cls, value: float | None) -> float | None:
|
|
56
|
+
if value is not None and value <= 0:
|
|
57
|
+
raise ValueError("time_limit_seconds must be greater than zero")
|
|
58
|
+
return value
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class TelemetryConfig(BaseModel):
|
|
62
|
+
"""OpenTelemetry behavior for runtime spans."""
|
|
63
|
+
|
|
64
|
+
model_config = ConfigDict(frozen=True)
|
|
65
|
+
|
|
66
|
+
enabled: bool = True
|
|
67
|
+
service_name: str = "guardloop"
|
|
68
|
+
otlp_endpoint: str | None = None
|
|
69
|
+
console_exporter: bool = False
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class RunResult(BaseModel):
|
|
73
|
+
"""Structured result returned from every runtime execution."""
|
|
74
|
+
|
|
75
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
76
|
+
|
|
77
|
+
output: str | None = None
|
|
78
|
+
success: bool
|
|
79
|
+
cost_usd: Decimal = Decimal("0")
|
|
80
|
+
estimated_cost_usd: Decimal = Decimal("0")
|
|
81
|
+
tokens_used: int = 0
|
|
82
|
+
input_tokens: int = 0
|
|
83
|
+
output_tokens: int = 0
|
|
84
|
+
duration_seconds: float = 0.0
|
|
85
|
+
tool_calls: int = 0
|
|
86
|
+
trace_id: str | None = None
|
|
87
|
+
terminated_reason: str | None = None
|
|
88
|
+
error_type: str | None = None
|
|
89
|
+
error_message: str | None = None
|
|
90
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
91
|
+
|
|
92
|
+
@field_serializer("cost_usd", "estimated_cost_usd")
|
|
93
|
+
def _serialize_decimal(self, value: Decimal) -> str:
|
|
94
|
+
return str(value)
|
guardloop/pricing.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Provider/model pricing catalog.
|
|
2
|
+
|
|
3
|
+
Prices are USD per one million tokens and are intentionally overrideable because
|
|
4
|
+
provider pricing changes over time.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Iterable
|
|
10
|
+
from decimal import Decimal
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, field_validator
|
|
13
|
+
|
|
14
|
+
from guardloop.exceptions import ModelPricingMissing
|
|
15
|
+
|
|
16
|
+
MILLION = Decimal("1000000")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ModelPricing(BaseModel):
|
|
20
|
+
"""Token pricing for one provider/model pair."""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(frozen=True)
|
|
23
|
+
|
|
24
|
+
provider: str
|
|
25
|
+
model: str
|
|
26
|
+
input_cost_per_million_tokens: Decimal
|
|
27
|
+
output_cost_per_million_tokens: Decimal
|
|
28
|
+
|
|
29
|
+
@field_validator(
|
|
30
|
+
"input_cost_per_million_tokens",
|
|
31
|
+
"output_cost_per_million_tokens",
|
|
32
|
+
mode="before",
|
|
33
|
+
)
|
|
34
|
+
@classmethod
|
|
35
|
+
def _parse_decimal(cls, value: object) -> object:
|
|
36
|
+
if isinstance(value, Decimal):
|
|
37
|
+
return value
|
|
38
|
+
return Decimal(str(value))
|
|
39
|
+
|
|
40
|
+
@field_validator("provider", "model")
|
|
41
|
+
@classmethod
|
|
42
|
+
def _normalize(cls, value: str) -> str:
|
|
43
|
+
return value.strip().lower()
|
|
44
|
+
|
|
45
|
+
def estimate_cost(self, *, input_tokens: int, output_tokens: int) -> Decimal:
|
|
46
|
+
input_cost = Decimal(input_tokens) * self.input_cost_per_million_tokens / MILLION
|
|
47
|
+
output_cost = Decimal(output_tokens) * self.output_cost_per_million_tokens / MILLION
|
|
48
|
+
return input_cost + output_cost
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _price(provider: str, model: str, input_price: str, output_price: str) -> ModelPricing:
|
|
52
|
+
return ModelPricing(
|
|
53
|
+
provider=provider,
|
|
54
|
+
model=model,
|
|
55
|
+
input_cost_per_million_tokens=Decimal(input_price),
|
|
56
|
+
output_cost_per_million_tokens=Decimal(output_price),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
DEFAULT_MODEL_PRICES: tuple[ModelPricing, ...] = (
|
|
61
|
+
# OpenAI API pricing checked May 3, 2026.
|
|
62
|
+
_price("openai", "gpt-5.5", "5.00", "30.00"),
|
|
63
|
+
_price("openai", "gpt-5.4", "2.50", "15.00"),
|
|
64
|
+
_price("openai", "gpt-5.4-mini", "0.75", "4.50"),
|
|
65
|
+
_price("openai", "gpt-5.2", "1.75", "14.00"),
|
|
66
|
+
_price("openai", "gpt-5.2-2025-12-11", "1.75", "14.00"),
|
|
67
|
+
_price("openai", "gpt-5.2-chat-latest", "1.75", "14.00"),
|
|
68
|
+
_price("openai", "gpt-5.2-codex", "1.75", "14.00"),
|
|
69
|
+
_price("openai", "gpt-5.2-pro", "21.00", "168.00"),
|
|
70
|
+
_price("openai", "gpt-5.1", "1.25", "10.00"),
|
|
71
|
+
_price("openai", "gpt-5", "1.25", "10.00"),
|
|
72
|
+
_price("openai", "gpt-5-mini", "0.25", "2.00"),
|
|
73
|
+
_price("openai", "gpt-5-nano", "0.05", "0.40"),
|
|
74
|
+
_price("openai", "gpt-4.1", "2.00", "8.00"),
|
|
75
|
+
_price("openai", "gpt-4.1-mini", "0.40", "1.60"),
|
|
76
|
+
_price("openai", "gpt-4.1-nano", "0.10", "0.40"),
|
|
77
|
+
_price("openai", "gpt-4o", "2.50", "10.00"),
|
|
78
|
+
_price("openai", "gpt-4o-mini", "0.15", "0.60"),
|
|
79
|
+
# Anthropic Claude pricing checked May 3, 2026.
|
|
80
|
+
_price("anthropic", "claude-opus-4-1-20250805", "15.00", "75.00"),
|
|
81
|
+
_price("anthropic", "claude-opus-4-1", "15.00", "75.00"),
|
|
82
|
+
_price("anthropic", "claude-opus-4-20250514", "15.00", "75.00"),
|
|
83
|
+
_price("anthropic", "claude-opus-4-0", "15.00", "75.00"),
|
|
84
|
+
_price("anthropic", "claude-sonnet-4-20250514", "3.00", "15.00"),
|
|
85
|
+
_price("anthropic", "claude-sonnet-4-0", "3.00", "15.00"),
|
|
86
|
+
_price("anthropic", "claude-3-7-sonnet-20250219", "3.00", "15.00"),
|
|
87
|
+
_price("anthropic", "claude-3-7-sonnet-latest", "3.00", "15.00"),
|
|
88
|
+
_price("anthropic", "claude-3-5-haiku-20241022", "0.80", "4.00"),
|
|
89
|
+
_price("anthropic", "claude-3-5-haiku-latest", "0.80", "4.00"),
|
|
90
|
+
_price("anthropic", "claude-3-haiku-20240307", "0.25", "1.25"),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class PricingCatalog:
|
|
95
|
+
"""Lookup table for model pricing with user-provided overrides."""
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
prices: Iterable[ModelPricing] | None = None,
|
|
100
|
+
*,
|
|
101
|
+
include_defaults: bool = True,
|
|
102
|
+
) -> None:
|
|
103
|
+
entries = list(DEFAULT_MODEL_PRICES if include_defaults else ())
|
|
104
|
+
if prices is not None:
|
|
105
|
+
entries.extend(prices)
|
|
106
|
+
self._prices = {(entry.provider, entry.model): entry for entry in entries}
|
|
107
|
+
|
|
108
|
+
def get(self, provider: str, model: str) -> ModelPricing:
|
|
109
|
+
key = (provider.strip().lower(), model.strip().lower())
|
|
110
|
+
if key not in self._prices:
|
|
111
|
+
raise ModelPricingMissing(
|
|
112
|
+
f"No pricing is configured for provider={provider!r}, model={model!r}. "
|
|
113
|
+
"Pass a custom ModelPricing entry to GuardLoop(pricing=[...]).",
|
|
114
|
+
details={"provider": provider, "model": model},
|
|
115
|
+
)
|
|
116
|
+
return self._prices[key]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Anthropic Messages API wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from collections.abc import Awaitable, Mapping
|
|
7
|
+
from typing import Any, Protocol, cast
|
|
8
|
+
|
|
9
|
+
from guardloop.budget import BudgetController
|
|
10
|
+
from guardloop.telemetry.conventions import llm_request_attributes, llm_response_attributes
|
|
11
|
+
from guardloop.telemetry.tracer import Telemetry
|
|
12
|
+
from guardloop.tokenization import estimate_anthropic_tokens
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _MessagesAPI(Protocol):
|
|
16
|
+
def create(self, **kwargs: Any) -> Awaitable[object] | object: ...
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _AnthropicClient(Protocol):
|
|
20
|
+
@property
|
|
21
|
+
def messages(self) -> _MessagesAPI: ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class WrappedAnthropicClient:
|
|
25
|
+
"""Anthropic client facade that currently wraps `messages.create`."""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
client: object,
|
|
30
|
+
budget: BudgetController,
|
|
31
|
+
telemetry: Telemetry,
|
|
32
|
+
) -> None:
|
|
33
|
+
typed_client = cast(_AnthropicClient, client)
|
|
34
|
+
self.messages = WrappedAnthropicMessages(typed_client.messages, budget, telemetry)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class WrappedAnthropicMessages:
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
messages: _MessagesAPI,
|
|
41
|
+
budget: BudgetController,
|
|
42
|
+
telemetry: Telemetry,
|
|
43
|
+
) -> None:
|
|
44
|
+
self._messages = messages
|
|
45
|
+
self._budget = budget
|
|
46
|
+
self._telemetry = telemetry
|
|
47
|
+
|
|
48
|
+
async def create(self, **kwargs: Any) -> object:
|
|
49
|
+
model = _require_str(kwargs, "model")
|
|
50
|
+
max_tokens = _optional_positive_int(kwargs.get("max_tokens"))
|
|
51
|
+
estimated_input_tokens = estimate_anthropic_tokens(
|
|
52
|
+
{"system": kwargs.get("system"), "messages": kwargs.get("messages")}
|
|
53
|
+
)
|
|
54
|
+
preflight = self._budget.check_llm_call(
|
|
55
|
+
provider="anthropic",
|
|
56
|
+
model=model,
|
|
57
|
+
estimated_input_tokens=estimated_input_tokens,
|
|
58
|
+
reserved_output_tokens=max_tokens,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
with self._telemetry.start_span(
|
|
62
|
+
"llm_call anthropic.messages.create",
|
|
63
|
+
llm_request_attributes(
|
|
64
|
+
provider="anthropic",
|
|
65
|
+
model=model,
|
|
66
|
+
estimated_input_tokens=estimated_input_tokens,
|
|
67
|
+
reserved_output_tokens=preflight.reserved_output_tokens,
|
|
68
|
+
estimated_cost_usd=preflight.estimated_cost_usd,
|
|
69
|
+
),
|
|
70
|
+
) as span:
|
|
71
|
+
try:
|
|
72
|
+
maybe_response = self._messages.create(**kwargs)
|
|
73
|
+
response = (
|
|
74
|
+
await maybe_response if inspect.isawaitable(maybe_response) else maybe_response
|
|
75
|
+
)
|
|
76
|
+
input_tokens, output_tokens = _anthropic_usage_tokens(
|
|
77
|
+
response,
|
|
78
|
+
fallback_input_tokens=estimated_input_tokens,
|
|
79
|
+
)
|
|
80
|
+
actual_cost = self._budget.record_llm_call(
|
|
81
|
+
provider="anthropic",
|
|
82
|
+
model=model,
|
|
83
|
+
input_tokens=input_tokens,
|
|
84
|
+
output_tokens=output_tokens,
|
|
85
|
+
)
|
|
86
|
+
self._telemetry.set_attributes(
|
|
87
|
+
span,
|
|
88
|
+
llm_response_attributes(
|
|
89
|
+
model=model,
|
|
90
|
+
input_tokens=input_tokens,
|
|
91
|
+
output_tokens=output_tokens,
|
|
92
|
+
cost_usd=actual_cost,
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
self._telemetry.mark_ok(span)
|
|
96
|
+
return response
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
self._telemetry.record_exception(span, exc)
|
|
99
|
+
raise
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _require_str(kwargs: Mapping[str, Any], key: str) -> str:
|
|
103
|
+
value = kwargs.get(key)
|
|
104
|
+
if not isinstance(value, str) or not value.strip():
|
|
105
|
+
raise ValueError(f"Anthropic messages.create requires a non-empty {key!r}.")
|
|
106
|
+
return value
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _optional_positive_int(value: object) -> int | None:
|
|
110
|
+
if value is None:
|
|
111
|
+
return None
|
|
112
|
+
if isinstance(value, bool):
|
|
113
|
+
return None
|
|
114
|
+
if isinstance(value, int):
|
|
115
|
+
return value if value > 0 else None
|
|
116
|
+
try:
|
|
117
|
+
parsed = int(str(value))
|
|
118
|
+
except ValueError:
|
|
119
|
+
return None
|
|
120
|
+
return parsed if parsed > 0 else None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _get(obj: object, key: str, default: object = None) -> object:
|
|
124
|
+
if isinstance(obj, Mapping):
|
|
125
|
+
return cast(Mapping[str, object], obj).get(key, default)
|
|
126
|
+
return getattr(obj, key, default)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _anthropic_usage_tokens(response: object, *, fallback_input_tokens: int) -> tuple[int, int]:
|
|
130
|
+
usage = _get(response, "usage")
|
|
131
|
+
if usage is None:
|
|
132
|
+
return fallback_input_tokens, 0
|
|
133
|
+
input_tokens = _as_int(
|
|
134
|
+
_get(usage, "input_tokens", fallback_input_tokens),
|
|
135
|
+
fallback_input_tokens,
|
|
136
|
+
)
|
|
137
|
+
cache_creation = _as_int(_get(usage, "cache_creation_input_tokens", 0), 0)
|
|
138
|
+
cache_read = _as_int(_get(usage, "cache_read_input_tokens", 0), 0)
|
|
139
|
+
output_tokens = _as_int(_get(usage, "output_tokens", 0), 0)
|
|
140
|
+
return input_tokens + cache_creation + cache_read, output_tokens
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _as_int(value: object, default: int) -> int:
|
|
144
|
+
if isinstance(value, bool) or value is None:
|
|
145
|
+
return default
|
|
146
|
+
if isinstance(value, int):
|
|
147
|
+
return value
|
|
148
|
+
if isinstance(value, float | str):
|
|
149
|
+
return int(value)
|
|
150
|
+
return default
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""OpenAI Responses API wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from collections.abc import Awaitable, Mapping
|
|
7
|
+
from typing import Any, Protocol, cast
|
|
8
|
+
|
|
9
|
+
from guardloop.budget import BudgetController
|
|
10
|
+
from guardloop.telemetry.conventions import llm_request_attributes, llm_response_attributes
|
|
11
|
+
from guardloop.telemetry.tracer import Telemetry
|
|
12
|
+
from guardloop.tokenization import estimate_openai_tokens
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _ResponsesAPI(Protocol):
|
|
16
|
+
def create(self, **kwargs: Any) -> Awaitable[object] | object: ...
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _OpenAIClient(Protocol):
|
|
20
|
+
@property
|
|
21
|
+
def responses(self) -> _ResponsesAPI: ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class WrappedOpenAIClient:
|
|
25
|
+
"""OpenAI client facade that currently wraps `responses.create`."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, client: object, budget: BudgetController, telemetry: Telemetry) -> None:
|
|
28
|
+
typed_client = cast(_OpenAIClient, client)
|
|
29
|
+
self.responses = WrappedOpenAIResponses(typed_client.responses, budget, telemetry)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WrappedOpenAIResponses:
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
responses: _ResponsesAPI,
|
|
36
|
+
budget: BudgetController,
|
|
37
|
+
telemetry: Telemetry,
|
|
38
|
+
) -> None:
|
|
39
|
+
self._responses = responses
|
|
40
|
+
self._budget = budget
|
|
41
|
+
self._telemetry = telemetry
|
|
42
|
+
|
|
43
|
+
async def create(self, **kwargs: Any) -> object:
|
|
44
|
+
model = _require_str(kwargs, "model")
|
|
45
|
+
max_output_tokens = _optional_positive_int(kwargs.get("max_output_tokens"))
|
|
46
|
+
estimated_input_tokens = estimate_openai_tokens(model, kwargs.get("input"))
|
|
47
|
+
preflight = self._budget.check_llm_call(
|
|
48
|
+
provider="openai",
|
|
49
|
+
model=model,
|
|
50
|
+
estimated_input_tokens=estimated_input_tokens,
|
|
51
|
+
reserved_output_tokens=max_output_tokens,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
with self._telemetry.start_span(
|
|
55
|
+
"llm_call openai.responses.create",
|
|
56
|
+
llm_request_attributes(
|
|
57
|
+
provider="openai",
|
|
58
|
+
model=model,
|
|
59
|
+
estimated_input_tokens=estimated_input_tokens,
|
|
60
|
+
reserved_output_tokens=preflight.reserved_output_tokens,
|
|
61
|
+
estimated_cost_usd=preflight.estimated_cost_usd,
|
|
62
|
+
),
|
|
63
|
+
) as span:
|
|
64
|
+
try:
|
|
65
|
+
maybe_response = self._responses.create(**kwargs)
|
|
66
|
+
response = (
|
|
67
|
+
await maybe_response if inspect.isawaitable(maybe_response) else maybe_response
|
|
68
|
+
)
|
|
69
|
+
input_tokens, output_tokens = _openai_usage_tokens(
|
|
70
|
+
response,
|
|
71
|
+
fallback_input_tokens=estimated_input_tokens,
|
|
72
|
+
)
|
|
73
|
+
actual_cost = self._budget.record_llm_call(
|
|
74
|
+
provider="openai",
|
|
75
|
+
model=model,
|
|
76
|
+
input_tokens=input_tokens,
|
|
77
|
+
output_tokens=output_tokens,
|
|
78
|
+
)
|
|
79
|
+
self._telemetry.set_attributes(
|
|
80
|
+
span,
|
|
81
|
+
llm_response_attributes(
|
|
82
|
+
model=model,
|
|
83
|
+
input_tokens=input_tokens,
|
|
84
|
+
output_tokens=output_tokens,
|
|
85
|
+
cost_usd=actual_cost,
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
self._telemetry.mark_ok(span)
|
|
89
|
+
return response
|
|
90
|
+
except Exception as exc:
|
|
91
|
+
self._telemetry.record_exception(span, exc)
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _require_str(kwargs: Mapping[str, Any], key: str) -> str:
|
|
96
|
+
value = kwargs.get(key)
|
|
97
|
+
if not isinstance(value, str) or not value.strip():
|
|
98
|
+
raise ValueError(f"OpenAI responses.create requires a non-empty {key!r}.")
|
|
99
|
+
return value
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _optional_positive_int(value: object) -> int | None:
|
|
103
|
+
if value is None:
|
|
104
|
+
return None
|
|
105
|
+
if isinstance(value, bool):
|
|
106
|
+
return None
|
|
107
|
+
if isinstance(value, int):
|
|
108
|
+
return value if value > 0 else None
|
|
109
|
+
try:
|
|
110
|
+
parsed = int(str(value))
|
|
111
|
+
except ValueError:
|
|
112
|
+
return None
|
|
113
|
+
return parsed if parsed > 0 else None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _get(obj: object, key: str, default: object = None) -> object:
|
|
117
|
+
if isinstance(obj, Mapping):
|
|
118
|
+
return cast(Mapping[str, object], obj).get(key, default)
|
|
119
|
+
return getattr(obj, key, default)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _openai_usage_tokens(response: object, *, fallback_input_tokens: int) -> tuple[int, int]:
|
|
123
|
+
usage = _get(response, "usage")
|
|
124
|
+
if usage is None:
|
|
125
|
+
return fallback_input_tokens, 0
|
|
126
|
+
input_tokens = _get(usage, "input_tokens", _get(usage, "prompt_tokens", fallback_input_tokens))
|
|
127
|
+
output_tokens = _get(usage, "output_tokens", _get(usage, "completion_tokens", 0))
|
|
128
|
+
return _as_int(input_tokens, fallback_input_tokens), _as_int(output_tokens, 0)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _as_int(value: object, default: int) -> int:
|
|
132
|
+
if isinstance(value, bool) or value is None:
|
|
133
|
+
return default
|
|
134
|
+
if isinstance(value, int):
|
|
135
|
+
return value
|
|
136
|
+
if isinstance(value, float | str):
|
|
137
|
+
return int(value)
|
|
138
|
+
return default
|
guardloop/py.typed
ADDED
|
File without changes
|
guardloop/runtime.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Main GuardLoop entry point."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
from collections.abc import Awaitable, Callable, Iterable
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from opentelemetry.trace import Span, Tracer
|
|
11
|
+
|
|
12
|
+
from guardloop.budget import BudgetController
|
|
13
|
+
from guardloop.circuit_breaker import (
|
|
14
|
+
CircuitBreakerConfig,
|
|
15
|
+
CircuitBreakerRegistry,
|
|
16
|
+
CircuitBreakerSnapshot,
|
|
17
|
+
)
|
|
18
|
+
from guardloop.context import RunContext
|
|
19
|
+
from guardloop.exceptions import GuardLoopError
|
|
20
|
+
from guardloop.models import BudgetConfig, RunResult, TelemetryConfig
|
|
21
|
+
from guardloop.pricing import ModelPricing, PricingCatalog
|
|
22
|
+
from guardloop.telemetry.conventions import (
|
|
23
|
+
GUARDLOOP_TERMINATED_REASON,
|
|
24
|
+
run_attributes,
|
|
25
|
+
)
|
|
26
|
+
from guardloop.telemetry.tracer import Telemetry
|
|
27
|
+
|
|
28
|
+
AgentCallable = Callable[..., Awaitable[object] | object]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GuardLoop:
|
|
32
|
+
"""Execution wrapper that enforces runtime guardrails."""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*,
|
|
37
|
+
budget: BudgetConfig | None = None,
|
|
38
|
+
telemetry: TelemetryConfig | None = None,
|
|
39
|
+
circuit_breakers: CircuitBreakerConfig | None = None,
|
|
40
|
+
pricing: Iterable[ModelPricing] | None = None,
|
|
41
|
+
include_default_pricing: bool = True,
|
|
42
|
+
openai_client: Any | None = None,
|
|
43
|
+
anthropic_client: Any | None = None,
|
|
44
|
+
tracer: Tracer | None = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
self.budget_config = budget or BudgetConfig()
|
|
47
|
+
self.telemetry_config = telemetry or TelemetryConfig()
|
|
48
|
+
self.pricing_catalog = PricingCatalog(pricing, include_defaults=include_default_pricing)
|
|
49
|
+
self._circuit_breakers = CircuitBreakerRegistry(circuit_breakers)
|
|
50
|
+
self._openai_client = openai_client
|
|
51
|
+
self._anthropic_client = anthropic_client
|
|
52
|
+
self._telemetry = Telemetry(self.telemetry_config, tracer=tracer)
|
|
53
|
+
|
|
54
|
+
def circuit_breaker_snapshots(self) -> dict[str, CircuitBreakerSnapshot]:
|
|
55
|
+
"""Return current per-tool circuit breaker state."""
|
|
56
|
+
|
|
57
|
+
return self._circuit_breakers.snapshots()
|
|
58
|
+
|
|
59
|
+
def reset_circuit_breakers(self, tool_name: str | None = None) -> None:
|
|
60
|
+
"""Reset all circuit breakers or one named tool breaker."""
|
|
61
|
+
|
|
62
|
+
self._circuit_breakers.reset(tool_name)
|
|
63
|
+
|
|
64
|
+
async def run(self, agent: AgentCallable, *args: object, **kwargs: object) -> RunResult:
|
|
65
|
+
budget = BudgetController(self.budget_config, self.pricing_catalog)
|
|
66
|
+
ctx = RunContext(
|
|
67
|
+
budget=budget,
|
|
68
|
+
telemetry=self._telemetry,
|
|
69
|
+
circuit_breakers=self._circuit_breakers,
|
|
70
|
+
openai_client=self._openai_client,
|
|
71
|
+
anthropic_client=self._anthropic_client,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
with self._telemetry.start_span("agent_run", run_attributes()) as span:
|
|
75
|
+
trace_id = self._telemetry.trace_id(span)
|
|
76
|
+
try:
|
|
77
|
+
result = await self._run_with_optional_timeout(agent, ctx, *args, **kwargs)
|
|
78
|
+
self._telemetry.mark_ok(span)
|
|
79
|
+
return self._result(
|
|
80
|
+
budget=budget,
|
|
81
|
+
span=span,
|
|
82
|
+
trace_id=trace_id,
|
|
83
|
+
success=True,
|
|
84
|
+
output=None if result is None else str(result),
|
|
85
|
+
)
|
|
86
|
+
except TimeoutError as exc:
|
|
87
|
+
self._telemetry.record_exception(span, exc)
|
|
88
|
+
span.set_attribute(GUARDLOOP_TERMINATED_REASON, "timeout")
|
|
89
|
+
return self._result(
|
|
90
|
+
budget=budget,
|
|
91
|
+
span=span,
|
|
92
|
+
trace_id=trace_id,
|
|
93
|
+
success=False,
|
|
94
|
+
terminated_reason="timeout",
|
|
95
|
+
error_type=type(exc).__name__,
|
|
96
|
+
error_message=f"Run exceeded time limit of "
|
|
97
|
+
f"{self.budget_config.time_limit_seconds:.3f}s.",
|
|
98
|
+
)
|
|
99
|
+
except GuardLoopError as exc:
|
|
100
|
+
self._telemetry.record_exception(span, exc)
|
|
101
|
+
span.set_attribute(GUARDLOOP_TERMINATED_REASON, exc.terminated_reason)
|
|
102
|
+
return self._result(
|
|
103
|
+
budget=budget,
|
|
104
|
+
span=span,
|
|
105
|
+
trace_id=trace_id,
|
|
106
|
+
success=False,
|
|
107
|
+
terminated_reason=exc.terminated_reason,
|
|
108
|
+
error_type=type(exc).__name__,
|
|
109
|
+
error_message=str(exc),
|
|
110
|
+
metadata={"details": _json_safe_details(exc.details)},
|
|
111
|
+
)
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
self._telemetry.record_exception(span, exc)
|
|
114
|
+
span.set_attribute(GUARDLOOP_TERMINATED_REASON, "error")
|
|
115
|
+
return self._result(
|
|
116
|
+
budget=budget,
|
|
117
|
+
span=span,
|
|
118
|
+
trace_id=trace_id,
|
|
119
|
+
success=False,
|
|
120
|
+
terminated_reason="error",
|
|
121
|
+
error_type=type(exc).__name__,
|
|
122
|
+
error_message=str(exc),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
async def _run_with_optional_timeout(
|
|
126
|
+
self,
|
|
127
|
+
agent: AgentCallable,
|
|
128
|
+
ctx: RunContext,
|
|
129
|
+
*args: object,
|
|
130
|
+
**kwargs: object,
|
|
131
|
+
) -> object:
|
|
132
|
+
if self.budget_config.time_limit_seconds is None:
|
|
133
|
+
return await _call_agent(agent, ctx, *args, **kwargs)
|
|
134
|
+
async with asyncio.timeout(self.budget_config.time_limit_seconds):
|
|
135
|
+
return await _call_agent(agent, ctx, *args, **kwargs)
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def _result(
|
|
139
|
+
*,
|
|
140
|
+
budget: BudgetController,
|
|
141
|
+
span: Span,
|
|
142
|
+
trace_id: str | None,
|
|
143
|
+
success: bool,
|
|
144
|
+
output: str | None = None,
|
|
145
|
+
terminated_reason: str | None = None,
|
|
146
|
+
error_type: str | None = None,
|
|
147
|
+
error_message: str | None = None,
|
|
148
|
+
metadata: dict[str, Any] | None = None,
|
|
149
|
+
) -> RunResult:
|
|
150
|
+
return RunResult(
|
|
151
|
+
output=output,
|
|
152
|
+
success=success,
|
|
153
|
+
cost_usd=budget.cost_usd,
|
|
154
|
+
estimated_cost_usd=budget.estimated_cost_usd,
|
|
155
|
+
tokens_used=budget.tokens_used,
|
|
156
|
+
input_tokens=budget.input_tokens,
|
|
157
|
+
output_tokens=budget.output_tokens,
|
|
158
|
+
duration_seconds=budget.duration_seconds,
|
|
159
|
+
tool_calls=budget.tool_calls,
|
|
160
|
+
trace_id=trace_id or Telemetry.trace_id(span),
|
|
161
|
+
terminated_reason=terminated_reason,
|
|
162
|
+
error_type=error_type,
|
|
163
|
+
error_message=error_message,
|
|
164
|
+
metadata=metadata or {},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
async def _call_agent(
|
|
169
|
+
agent: AgentCallable,
|
|
170
|
+
ctx: RunContext,
|
|
171
|
+
*args: object,
|
|
172
|
+
**kwargs: object,
|
|
173
|
+
) -> object:
|
|
174
|
+
result = agent(ctx, *args, **kwargs)
|
|
175
|
+
if inspect.isawaitable(result):
|
|
176
|
+
return await result
|
|
177
|
+
return result
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
JsonSafeDetail = str | int | float | bool | None
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _json_safe_details(details: dict[str, Any]) -> dict[str, JsonSafeDetail]:
|
|
184
|
+
return {key: _json_safe_detail(value) for key, value in details.items() if value is not None}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _json_safe_detail(value: Any) -> JsonSafeDetail:
|
|
188
|
+
if isinstance(value, str | int | float | bool):
|
|
189
|
+
return value
|
|
190
|
+
return str(value)
|