bedrock-kit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bedrock_kit/__init__.py +35 -0
- bedrock_kit/client.py +254 -0
- bedrock_kit/cost.py +236 -0
- bedrock_kit/exceptions.py +27 -0
- bedrock_kit/retry.py +120 -0
- bedrock_kit/schema.py +156 -0
- bedrock_kit-0.1.0.dist-info/METADATA +188 -0
- bedrock_kit-0.1.0.dist-info/RECORD +10 -0
- bedrock_kit-0.1.0.dist-info/WHEEL +4 -0
- bedrock_kit-0.1.0.dist-info/licenses/LICENSE +21 -0
bedrock_kit/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""bedrock-kit - small, opinionated AWS Bedrock client wrapper.
|
|
2
|
+
|
|
3
|
+
Adds the things every Bedrock production team rebuilds: adaptive throttle,
|
|
4
|
+
per-call cost tracking with cache-aware accounting, and structured-output
|
|
5
|
+
parse-and-repair. Single-cloud, single-purpose, < 3000 LOC.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from bedrock_kit.client import BedrockClient, BedrockResponse, Usage
|
|
9
|
+
from bedrock_kit.cost import CostEntry, CostLedger, Pricing
|
|
10
|
+
from bedrock_kit.exceptions import (
|
|
11
|
+
BedrockKitError,
|
|
12
|
+
JsonParseError,
|
|
13
|
+
PricingNotFoundError,
|
|
14
|
+
ThrottleExhausted,
|
|
15
|
+
)
|
|
16
|
+
from bedrock_kit.retry import AdaptiveThrottle
|
|
17
|
+
from bedrock_kit.schema import JsonSchema
|
|
18
|
+
|
|
19
|
+
__version__ = "0.1.0"
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"AdaptiveThrottle",
|
|
23
|
+
"BedrockClient",
|
|
24
|
+
"BedrockKitError",
|
|
25
|
+
"BedrockResponse",
|
|
26
|
+
"CostEntry",
|
|
27
|
+
"CostLedger",
|
|
28
|
+
"JsonParseError",
|
|
29
|
+
"JsonSchema",
|
|
30
|
+
"Pricing",
|
|
31
|
+
"PricingNotFoundError",
|
|
32
|
+
"ThrottleExhausted",
|
|
33
|
+
"Usage",
|
|
34
|
+
"__version__",
|
|
35
|
+
]
|
bedrock_kit/client.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""BedrockClient - the user-facing wrapper.
|
|
2
|
+
|
|
3
|
+
Wraps a boto3 bedrock-runtime client (or anything quacking like one) and
|
|
4
|
+
adds adaptive throttle, cost tracking, and JSON-schema parsing-and-repair.
|
|
5
|
+
Uses the Bedrock Converse API (`converse`) which normalizes Anthropic /
|
|
6
|
+
Mistral / Cohere / Llama into one shape.
|
|
7
|
+
|
|
8
|
+
We keep boto3 as a runtime dependency and let users inject their own client
|
|
9
|
+
for testing.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections.abc import Sequence
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, TypeVar
|
|
17
|
+
|
|
18
|
+
from bedrock_kit.cost import CostEntry, CostLedger
|
|
19
|
+
from bedrock_kit.exceptions import JsonParseError
|
|
20
|
+
from bedrock_kit.retry import AdaptiveThrottle
|
|
21
|
+
from bedrock_kit.schema import JsonSchema
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class Usage:
|
|
28
|
+
input_tokens: int
|
|
29
|
+
output_tokens: int
|
|
30
|
+
cache_read_tokens: int
|
|
31
|
+
cache_write_tokens: int
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_converse(cls, raw: dict[str, Any]) -> Usage:
|
|
35
|
+
usage = raw.get("usage", {}) or {}
|
|
36
|
+
return cls(
|
|
37
|
+
input_tokens=int(usage.get("inputTokens", 0)),
|
|
38
|
+
output_tokens=int(usage.get("outputTokens", 0)),
|
|
39
|
+
# Bedrock Converse uses cacheReadInputTokens/cacheWriteInputTokens
|
|
40
|
+
cache_read_tokens=int(usage.get("cacheReadInputTokens", 0)),
|
|
41
|
+
cache_write_tokens=int(usage.get("cacheWriteInputTokens", 0)),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class BedrockResponse:
|
|
47
|
+
text: str
|
|
48
|
+
usage: Usage
|
|
49
|
+
model_id: str
|
|
50
|
+
stop_reason: str
|
|
51
|
+
cost_usd: float
|
|
52
|
+
cache_hit: bool
|
|
53
|
+
parsed: Any = None
|
|
54
|
+
raw: dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
cost_entry: CostEntry | None = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _extract_text_from_converse(raw: dict[str, Any]) -> str:
|
|
59
|
+
output = raw.get("output", {}) or {}
|
|
60
|
+
message = output.get("message", {}) or {}
|
|
61
|
+
parts = []
|
|
62
|
+
for block in message.get("content", []) or []:
|
|
63
|
+
text = block.get("text")
|
|
64
|
+
if text:
|
|
65
|
+
parts.append(text)
|
|
66
|
+
return "".join(parts)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class _BotoClientFactory:
|
|
70
|
+
@staticmethod
|
|
71
|
+
def make(region: str, **client_kwargs: Any) -> Any:
|
|
72
|
+
try:
|
|
73
|
+
import boto3 # type: ignore
|
|
74
|
+
except ImportError as e:
|
|
75
|
+
raise RuntimeError(
|
|
76
|
+
"boto3 is required for the default BedrockClient. "
|
|
77
|
+
"Install with `pip install bedrock-kit[boto]` or pass `client=` explicitly."
|
|
78
|
+
) from e
|
|
79
|
+
return boto3.client("bedrock-runtime", region_name=region, **client_kwargs)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class BedrockClient:
|
|
83
|
+
"""Wrapper around bedrock-runtime.converse with cost + retry + schema parsing.
|
|
84
|
+
|
|
85
|
+
client = BedrockClient(region="us-east-1")
|
|
86
|
+
resp = client.invoke(
|
|
87
|
+
model_id="anthropic.claude-sonnet-4-5",
|
|
88
|
+
messages=[{"role": "user", "content": "hello"}],
|
|
89
|
+
system="You are helpful",
|
|
90
|
+
max_tokens=512,
|
|
91
|
+
)
|
|
92
|
+
resp.text
|
|
93
|
+
resp.cost_usd
|
|
94
|
+
resp.cache_hit
|
|
95
|
+
|
|
96
|
+
Inject a fake for testing:
|
|
97
|
+
|
|
98
|
+
client = BedrockClient(client=fake_client)
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
region: str | None = None,
|
|
104
|
+
*,
|
|
105
|
+
client: Any | None = None,
|
|
106
|
+
retry: AdaptiveThrottle | None = None,
|
|
107
|
+
cost_ledger: CostLedger | None = None,
|
|
108
|
+
boto_kwargs: dict[str, Any] | None = None,
|
|
109
|
+
) -> None:
|
|
110
|
+
if client is None:
|
|
111
|
+
if region is None:
|
|
112
|
+
raise ValueError("either `region` or `client` must be provided")
|
|
113
|
+
client = _BotoClientFactory.make(region, **(boto_kwargs or {}))
|
|
114
|
+
self._client = client
|
|
115
|
+
self._retry = retry or AdaptiveThrottle()
|
|
116
|
+
self._ledger = cost_ledger or CostLedger()
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def cost_ledger(self) -> CostLedger:
|
|
120
|
+
return self._ledger
|
|
121
|
+
|
|
122
|
+
def invoke(
|
|
123
|
+
self,
|
|
124
|
+
model_id: str,
|
|
125
|
+
messages: Sequence[dict[str, Any]],
|
|
126
|
+
*,
|
|
127
|
+
system: str | list[dict[str, Any]] | None = None,
|
|
128
|
+
max_tokens: int = 1024,
|
|
129
|
+
temperature: float | None = None,
|
|
130
|
+
top_p: float | None = None,
|
|
131
|
+
stop_sequences: list[str] | None = None,
|
|
132
|
+
response_schema: JsonSchema[T] | None = None,
|
|
133
|
+
additional_model_fields: dict[str, Any] | None = None,
|
|
134
|
+
) -> BedrockResponse:
|
|
135
|
+
body = self._build_body(
|
|
136
|
+
messages=messages,
|
|
137
|
+
system=system,
|
|
138
|
+
max_tokens=max_tokens,
|
|
139
|
+
temperature=temperature,
|
|
140
|
+
top_p=top_p,
|
|
141
|
+
stop_sequences=stop_sequences,
|
|
142
|
+
additional_model_fields=additional_model_fields,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
raw = self._retry.call(self._client.converse, modelId=model_id, **body)
|
|
146
|
+
response = self._build_response(model_id, raw)
|
|
147
|
+
|
|
148
|
+
if response_schema is not None:
|
|
149
|
+
response = self._apply_schema(
|
|
150
|
+
response=response,
|
|
151
|
+
model_id=model_id,
|
|
152
|
+
schema=response_schema,
|
|
153
|
+
body=body,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return response
|
|
157
|
+
|
|
158
|
+
def _apply_schema(
|
|
159
|
+
self,
|
|
160
|
+
response: BedrockResponse,
|
|
161
|
+
model_id: str,
|
|
162
|
+
schema: JsonSchema[T],
|
|
163
|
+
body: dict[str, Any],
|
|
164
|
+
) -> BedrockResponse:
|
|
165
|
+
last_err: Exception | None = None
|
|
166
|
+
for attempt in range(schema.max_repair_attempts + 1):
|
|
167
|
+
try:
|
|
168
|
+
response.parsed = schema.parse(response.text)
|
|
169
|
+
return response
|
|
170
|
+
except JsonParseError as e:
|
|
171
|
+
last_err = e
|
|
172
|
+
if attempt == schema.max_repair_attempts:
|
|
173
|
+
raise
|
|
174
|
+
# Append a follow-up turn asking the model to fix the output
|
|
175
|
+
followup_messages = list(body["messages"]) + [
|
|
176
|
+
{"role": "assistant", "content": [{"text": response.text}]},
|
|
177
|
+
{
|
|
178
|
+
"role": "user",
|
|
179
|
+
"content": [{"text": schema.repair_prompt(response.text, e)}],
|
|
180
|
+
},
|
|
181
|
+
]
|
|
182
|
+
retry_body = dict(body)
|
|
183
|
+
retry_body["messages"] = followup_messages
|
|
184
|
+
raw = self._retry.call(self._client.converse, modelId=model_id, **retry_body)
|
|
185
|
+
response = self._build_response(model_id, raw)
|
|
186
|
+
# unreachable: the loop either returns or raises
|
|
187
|
+
raise JsonParseError(
|
|
188
|
+
f"schema retries exhausted: {last_err}",
|
|
189
|
+
raw_text=response.text,
|
|
190
|
+
attempts=schema.max_repair_attempts + 1,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _build_body(
|
|
194
|
+
self,
|
|
195
|
+
messages: Sequence[dict[str, Any]],
|
|
196
|
+
system: str | list[dict[str, Any]] | None,
|
|
197
|
+
max_tokens: int,
|
|
198
|
+
temperature: float | None,
|
|
199
|
+
top_p: float | None,
|
|
200
|
+
stop_sequences: list[str] | None,
|
|
201
|
+
additional_model_fields: dict[str, Any] | None,
|
|
202
|
+
) -> dict[str, Any]:
|
|
203
|
+
# Normalize messages: callers may pass {"role":..., "content": "string"}
|
|
204
|
+
# but Converse expects content as a list of content blocks.
|
|
205
|
+
norm_messages: list[dict[str, Any]] = []
|
|
206
|
+
for m in messages:
|
|
207
|
+
content = m.get("content")
|
|
208
|
+
if isinstance(content, str):
|
|
209
|
+
norm_messages.append({"role": m["role"], "content": [{"text": content}]})
|
|
210
|
+
else:
|
|
211
|
+
norm_messages.append(dict(m))
|
|
212
|
+
|
|
213
|
+
inference_config: dict[str, Any] = {"maxTokens": max_tokens}
|
|
214
|
+
if temperature is not None:
|
|
215
|
+
inference_config["temperature"] = temperature
|
|
216
|
+
if top_p is not None:
|
|
217
|
+
inference_config["topP"] = top_p
|
|
218
|
+
if stop_sequences:
|
|
219
|
+
inference_config["stopSequences"] = list(stop_sequences)
|
|
220
|
+
|
|
221
|
+
body: dict[str, Any] = {
|
|
222
|
+
"messages": norm_messages,
|
|
223
|
+
"inferenceConfig": inference_config,
|
|
224
|
+
}
|
|
225
|
+
if system is not None:
|
|
226
|
+
if isinstance(system, str):
|
|
227
|
+
body["system"] = [{"text": system}]
|
|
228
|
+
else:
|
|
229
|
+
body["system"] = list(system)
|
|
230
|
+
if additional_model_fields:
|
|
231
|
+
body["additionalModelRequestFields"] = additional_model_fields
|
|
232
|
+
return body
|
|
233
|
+
|
|
234
|
+
def _build_response(self, model_id: str, raw: dict[str, Any]) -> BedrockResponse:
|
|
235
|
+
usage = Usage.from_converse(raw)
|
|
236
|
+
cost_entry = self._ledger.record(
|
|
237
|
+
model_id=model_id,
|
|
238
|
+
input_tokens=usage.input_tokens,
|
|
239
|
+
output_tokens=usage.output_tokens,
|
|
240
|
+
cache_read_tokens=usage.cache_read_tokens,
|
|
241
|
+
cache_write_tokens=usage.cache_write_tokens,
|
|
242
|
+
)
|
|
243
|
+
text = _extract_text_from_converse(raw)
|
|
244
|
+
return BedrockResponse(
|
|
245
|
+
text=text,
|
|
246
|
+
usage=usage,
|
|
247
|
+
model_id=model_id,
|
|
248
|
+
stop_reason=str(raw.get("stopReason", "")),
|
|
249
|
+
cost_usd=cost_entry.cost_usd,
|
|
250
|
+
cache_hit=cost_entry.cache_hit,
|
|
251
|
+
parsed=None,
|
|
252
|
+
raw=raw,
|
|
253
|
+
cost_entry=cost_entry,
|
|
254
|
+
)
|
bedrock_kit/cost.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""CostLedger - per-call cost accounting with cache-aware token math.
|
|
2
|
+
|
|
3
|
+
Bedrock returns four token counts on Anthropic models:
|
|
4
|
+
* input_tokens
|
|
5
|
+
* output_tokens
|
|
6
|
+
* cache_creation_input_tokens (when cache_control is set on a block)
|
|
7
|
+
* cache_read_input_tokens (when a previous request hit the cache)
|
|
8
|
+
|
|
9
|
+
Each has its own price. cache_read is typically 10% of input; cache_creation
|
|
10
|
+
is 1.25x input. We compute total cost from all four. Pricing is in USD per
|
|
11
|
+
1M tokens, matching how AWS publishes the rates.
|
|
12
|
+
|
|
13
|
+
The default pricing table covers the popular Anthropic models on Bedrock as
|
|
14
|
+
of 2026-Q2. Verify against AWS docs - rates change. You can pass `pricing=`
|
|
15
|
+
to override per-model.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from collections.abc import Iterable
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from bedrock_kit.exceptions import PricingNotFoundError
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class Pricing:
|
|
30
|
+
"""USD per 1,000,000 tokens. cache_* default to standard Anthropic ratios."""
|
|
31
|
+
|
|
32
|
+
input: float
|
|
33
|
+
output: float
|
|
34
|
+
cache_read: float | None = None
|
|
35
|
+
cache_write: float | None = None
|
|
36
|
+
|
|
37
|
+
def cost_for(
|
|
38
|
+
self,
|
|
39
|
+
input_tokens: int,
|
|
40
|
+
output_tokens: int,
|
|
41
|
+
cache_read_tokens: int = 0,
|
|
42
|
+
cache_write_tokens: int = 0,
|
|
43
|
+
) -> float:
|
|
44
|
+
cache_read_rate = self.cache_read if self.cache_read is not None else self.input * 0.1
|
|
45
|
+
cache_write_rate = self.cache_write if self.cache_write is not None else self.input * 1.25
|
|
46
|
+
total = (
|
|
47
|
+
input_tokens * self.input
|
|
48
|
+
+ output_tokens * self.output
|
|
49
|
+
+ cache_read_tokens * cache_read_rate
|
|
50
|
+
+ cache_write_tokens * cache_write_rate
|
|
51
|
+
)
|
|
52
|
+
return total / 1_000_000.0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Default pricing for popular Bedrock models (USD per 1M tokens).
|
|
56
|
+
# Source: aws.amazon.com/bedrock/pricing as of 2026-Q2. VERIFY before billing on these.
|
|
57
|
+
DEFAULT_PRICING: dict[str, Pricing] = {
|
|
58
|
+
"anthropic.claude-sonnet-4-5": Pricing(
|
|
59
|
+
input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
|
|
60
|
+
),
|
|
61
|
+
"anthropic.claude-sonnet-4-5-v1:0": Pricing(
|
|
62
|
+
input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
|
|
63
|
+
),
|
|
64
|
+
"anthropic.claude-opus-4-7": Pricing(
|
|
65
|
+
input=15.0, output=75.0, cache_read=1.5, cache_write=18.75
|
|
66
|
+
),
|
|
67
|
+
"anthropic.claude-opus-4-7-v1:0": Pricing(
|
|
68
|
+
input=15.0, output=75.0, cache_read=1.5, cache_write=18.75
|
|
69
|
+
),
|
|
70
|
+
"anthropic.claude-haiku-4-5": Pricing(
|
|
71
|
+
input=1.0, output=5.0, cache_read=0.1, cache_write=1.25
|
|
72
|
+
),
|
|
73
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0": Pricing(
|
|
74
|
+
input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
|
|
75
|
+
),
|
|
76
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0": Pricing(
|
|
77
|
+
input=0.8, output=4.0, cache_read=0.08, cache_write=1.0
|
|
78
|
+
),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _normalize_model_id(model_id: str) -> str:
|
|
83
|
+
"""Strip arn:aws:bedrock:region::foundation-model/ prefix and inference-profile prefix."""
|
|
84
|
+
if model_id.startswith("arn:aws:bedrock:"):
|
|
85
|
+
# arn:...:foundation-model/anthropic.claude-... or .../inference-profile/...
|
|
86
|
+
tail = model_id.rsplit("/", 1)[-1]
|
|
87
|
+
return tail
|
|
88
|
+
if model_id.startswith("us.") or model_id.startswith("eu.") or model_id.startswith("apac."):
|
|
89
|
+
# cross-region inference profile prefix
|
|
90
|
+
return model_id.split(".", 1)[1]
|
|
91
|
+
return model_id
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(frozen=True)
|
|
95
|
+
class CostEntry:
|
|
96
|
+
timestamp: datetime
|
|
97
|
+
model_id: str
|
|
98
|
+
input_tokens: int
|
|
99
|
+
output_tokens: int
|
|
100
|
+
cache_read_tokens: int
|
|
101
|
+
cache_write_tokens: int
|
|
102
|
+
cost_usd: float
|
|
103
|
+
cache_hit: bool
|
|
104
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
105
|
+
|
|
106
|
+
def to_dict(self) -> dict[str, Any]:
|
|
107
|
+
return {
|
|
108
|
+
"timestamp": self.timestamp.isoformat(),
|
|
109
|
+
"model_id": self.model_id,
|
|
110
|
+
"input_tokens": self.input_tokens,
|
|
111
|
+
"output_tokens": self.output_tokens,
|
|
112
|
+
"cache_read_tokens": self.cache_read_tokens,
|
|
113
|
+
"cache_write_tokens": self.cache_write_tokens,
|
|
114
|
+
"cost_usd": self.cost_usd,
|
|
115
|
+
"cache_hit": self.cache_hit,
|
|
116
|
+
"metadata": self.metadata,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class CostLedger:
|
|
121
|
+
"""Accumulates per-call cost entries.
|
|
122
|
+
|
|
123
|
+
ledger = CostLedger()
|
|
124
|
+
client = BedrockClient(..., cost_ledger=ledger)
|
|
125
|
+
# ... after some invokes:
|
|
126
|
+
ledger.total_usd
|
|
127
|
+
ledger.by_model # {"anthropic.claude-sonnet-4-5": 0.0234, ...}
|
|
128
|
+
ledger.entries # list[CostEntry]
|
|
129
|
+
ledger.to_dict()
|
|
130
|
+
ledger.to_pandas() # requires `pandas` extra
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
pricing: dict[str, Pricing] | None = None,
|
|
136
|
+
strict: bool = False,
|
|
137
|
+
) -> None:
|
|
138
|
+
self._pricing = {**DEFAULT_PRICING, **(pricing or {})}
|
|
139
|
+
self.strict = strict
|
|
140
|
+
self._entries: list[CostEntry] = []
|
|
141
|
+
|
|
142
|
+
def record(
|
|
143
|
+
self,
|
|
144
|
+
model_id: str,
|
|
145
|
+
input_tokens: int,
|
|
146
|
+
output_tokens: int,
|
|
147
|
+
cache_read_tokens: int = 0,
|
|
148
|
+
cache_write_tokens: int = 0,
|
|
149
|
+
metadata: dict[str, Any] | None = None,
|
|
150
|
+
) -> CostEntry:
|
|
151
|
+
cost = self.compute_cost(
|
|
152
|
+
model_id, input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
|
|
153
|
+
)
|
|
154
|
+
entry = CostEntry(
|
|
155
|
+
timestamp=datetime.now(timezone.utc),
|
|
156
|
+
model_id=model_id,
|
|
157
|
+
input_tokens=input_tokens,
|
|
158
|
+
output_tokens=output_tokens,
|
|
159
|
+
cache_read_tokens=cache_read_tokens,
|
|
160
|
+
cache_write_tokens=cache_write_tokens,
|
|
161
|
+
cost_usd=cost,
|
|
162
|
+
cache_hit=cache_read_tokens > 0,
|
|
163
|
+
metadata=metadata or {},
|
|
164
|
+
)
|
|
165
|
+
self._entries.append(entry)
|
|
166
|
+
return entry
|
|
167
|
+
|
|
168
|
+
def compute_cost(
|
|
169
|
+
self,
|
|
170
|
+
model_id: str,
|
|
171
|
+
input_tokens: int,
|
|
172
|
+
output_tokens: int,
|
|
173
|
+
cache_read_tokens: int = 0,
|
|
174
|
+
cache_write_tokens: int = 0,
|
|
175
|
+
) -> float:
|
|
176
|
+
pricing = self._lookup(model_id)
|
|
177
|
+
if pricing is None:
|
|
178
|
+
if self.strict:
|
|
179
|
+
raise PricingNotFoundError(f"no pricing for {model_id!r}")
|
|
180
|
+
return 0.0
|
|
181
|
+
return pricing.cost_for(
|
|
182
|
+
input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _lookup(self, model_id: str) -> Pricing | None:
|
|
186
|
+
if model_id in self._pricing:
|
|
187
|
+
return self._pricing[model_id]
|
|
188
|
+
normalized = _normalize_model_id(model_id)
|
|
189
|
+
return self._pricing.get(normalized)
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def entries(self) -> list[CostEntry]:
|
|
193
|
+
return list(self._entries)
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def total_usd(self) -> float:
|
|
197
|
+
return sum(e.cost_usd for e in self._entries)
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def by_model(self) -> dict[str, float]:
|
|
201
|
+
out: dict[str, float] = {}
|
|
202
|
+
for e in self._entries:
|
|
203
|
+
out[e.model_id] = out.get(e.model_id, 0.0) + e.cost_usd
|
|
204
|
+
return out
|
|
205
|
+
|
|
206
|
+
def reset(self) -> None:
|
|
207
|
+
self._entries.clear()
|
|
208
|
+
|
|
209
|
+
def extend(self, entries: Iterable[CostEntry]) -> None:
|
|
210
|
+
self._entries.extend(entries)
|
|
211
|
+
|
|
212
|
+
def to_dict(self) -> dict[str, Any]:
|
|
213
|
+
return {
|
|
214
|
+
"total_usd": self.total_usd,
|
|
215
|
+
"by_model": self.by_model,
|
|
216
|
+
"n_entries": len(self._entries),
|
|
217
|
+
"entries": [e.to_dict() for e in self._entries],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
def to_pandas(self):
|
|
221
|
+
import pandas as pd
|
|
222
|
+
|
|
223
|
+
if not self._entries:
|
|
224
|
+
return pd.DataFrame(
|
|
225
|
+
columns=[
|
|
226
|
+
"timestamp",
|
|
227
|
+
"model_id",
|
|
228
|
+
"input_tokens",
|
|
229
|
+
"output_tokens",
|
|
230
|
+
"cache_read_tokens",
|
|
231
|
+
"cache_write_tokens",
|
|
232
|
+
"cost_usd",
|
|
233
|
+
"cache_hit",
|
|
234
|
+
]
|
|
235
|
+
)
|
|
236
|
+
return pd.DataFrame([e.to_dict() for e in self._entries])
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""bedrock-kit exception hierarchy."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BedrockKitError(Exception):
|
|
5
|
+
"""Base for all bedrock-kit errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ThrottleExhausted(BedrockKitError):
|
|
9
|
+
"""Raised when AdaptiveThrottle gives up after max_attempts."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, attempts: int, last_exc: BaseException):
|
|
12
|
+
self.attempts = attempts
|
|
13
|
+
self.last_exc = last_exc
|
|
14
|
+
super().__init__(f"throttle exhausted after {attempts} attempts: {last_exc}")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PricingNotFoundError(BedrockKitError):
|
|
18
|
+
"""Raised when CostLedger has no pricing for a model and strict=True."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class JsonParseError(BedrockKitError):
|
|
22
|
+
"""Raised when JsonSchema cannot parse/validate a response after retries."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, message: str, raw_text: str, attempts: int):
|
|
25
|
+
self.raw_text = raw_text
|
|
26
|
+
self.attempts = attempts
|
|
27
|
+
super().__init__(f"{message} (after {attempts} attempts)")
|
bedrock_kit/retry.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""AdaptiveThrottle - exponential backoff with full jitter for Bedrock throttling.
|
|
2
|
+
|
|
3
|
+
AWS recommends "full jitter" for throttle retries (delay = uniform(0, base * 2^n)).
|
|
4
|
+
This avoids the thundering-herd that "decorrelated jitter" still allows when
|
|
5
|
+
many workers retry against the same throttled region.
|
|
6
|
+
|
|
7
|
+
We retry on:
|
|
8
|
+
* boto3 ClientError with ErrorCode in THROTTLE_CODES
|
|
9
|
+
* any exception in `also_retry` (caller-supplied)
|
|
10
|
+
|
|
11
|
+
We do NOT retry on validation errors, auth errors, or model-not-found - those
|
|
12
|
+
are bugs to fix, not transient failures.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import random
|
|
19
|
+
import time
|
|
20
|
+
from collections.abc import Callable
|
|
21
|
+
from typing import Any, TypeVar
|
|
22
|
+
|
|
23
|
+
from bedrock_kit.exceptions import ThrottleExhausted
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
T = TypeVar("T")
|
|
28
|
+
|
|
29
|
+
# Bedrock + STS error codes that mean "back off and retry"
|
|
30
|
+
THROTTLE_CODES: frozenset[str] = frozenset({
|
|
31
|
+
"ThrottlingException",
|
|
32
|
+
"Throttling",
|
|
33
|
+
"TooManyRequestsException",
|
|
34
|
+
"RequestLimitExceeded",
|
|
35
|
+
"ServiceUnavailableException",
|
|
36
|
+
"ServiceUnavailable",
|
|
37
|
+
"ProvisionedThroughputExceededException",
|
|
38
|
+
"ModelTimeoutException",
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _error_code(exc: BaseException) -> str | None:
|
|
43
|
+
"""Extract AWS error code from a botocore ClientError without importing botocore."""
|
|
44
|
+
response = getattr(exc, "response", None)
|
|
45
|
+
if isinstance(response, dict):
|
|
46
|
+
code = response.get("Error", {}).get("Code")
|
|
47
|
+
if isinstance(code, str):
|
|
48
|
+
return code
|
|
49
|
+
# botocore.exceptions.ClientError also carries operation_name, response.Error.Code
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class AdaptiveThrottle:
|
|
54
|
+
"""Retry a callable with exponential backoff + full jitter.
|
|
55
|
+
|
|
56
|
+
Construct once, reuse across many calls:
|
|
57
|
+
|
|
58
|
+
throttle = AdaptiveThrottle(max_attempts=8, base_delay=0.5, max_delay=30.0)
|
|
59
|
+
result = throttle.call(client.invoke_model, body=...)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
max_attempts: int = 6,
|
|
65
|
+
base_delay: float = 1.0,
|
|
66
|
+
max_delay: float = 30.0,
|
|
67
|
+
jitter: bool = True,
|
|
68
|
+
also_retry: tuple[type[BaseException], ...] = (),
|
|
69
|
+
sleep_fn: Callable[[float], None] | None = None,
|
|
70
|
+
rng: random.Random | None = None,
|
|
71
|
+
) -> None:
|
|
72
|
+
if max_attempts < 1:
|
|
73
|
+
raise ValueError("max_attempts must be >= 1")
|
|
74
|
+
if base_delay <= 0:
|
|
75
|
+
raise ValueError("base_delay must be > 0")
|
|
76
|
+
if max_delay < base_delay:
|
|
77
|
+
raise ValueError("max_delay must be >= base_delay")
|
|
78
|
+
self.max_attempts = max_attempts
|
|
79
|
+
self.base_delay = base_delay
|
|
80
|
+
self.max_delay = max_delay
|
|
81
|
+
self.jitter = jitter
|
|
82
|
+
self.also_retry = also_retry
|
|
83
|
+
self._sleep = sleep_fn or time.sleep
|
|
84
|
+
self._rng = rng or random.Random()
|
|
85
|
+
|
|
86
|
+
def _should_retry(self, exc: BaseException) -> bool:
|
|
87
|
+
if isinstance(exc, self.also_retry):
|
|
88
|
+
return True
|
|
89
|
+
code = _error_code(exc)
|
|
90
|
+
return code in THROTTLE_CODES
|
|
91
|
+
|
|
92
|
+
def _delay_for(self, attempt_index: int) -> float:
|
|
93
|
+
"""attempt_index is 0-based: 0 is the first retry sleep, 1 is the second, ..."""
|
|
94
|
+
capped = min(self.base_delay * (2**attempt_index), self.max_delay)
|
|
95
|
+
if self.jitter:
|
|
96
|
+
return self._rng.uniform(0.0, capped)
|
|
97
|
+
return capped
|
|
98
|
+
|
|
99
|
+
def call(self, fn: Callable[..., T], *args: Any, **kwargs: Any) -> T:
|
|
100
|
+
last_exc: BaseException | None = None
|
|
101
|
+
for attempt in range(self.max_attempts):
|
|
102
|
+
try:
|
|
103
|
+
return fn(*args, **kwargs)
|
|
104
|
+
except Exception as exc: # noqa: BLE001 - we re-raise selectively
|
|
105
|
+
if not self._should_retry(exc):
|
|
106
|
+
raise
|
|
107
|
+
last_exc = exc
|
|
108
|
+
if attempt == self.max_attempts - 1:
|
|
109
|
+
break
|
|
110
|
+
delay = self._delay_for(attempt)
|
|
111
|
+
logger.debug(
|
|
112
|
+
"throttle: attempt %d/%d failed (%s); sleeping %.2fs",
|
|
113
|
+
attempt + 1,
|
|
114
|
+
self.max_attempts,
|
|
115
|
+
_error_code(exc) or type(exc).__name__,
|
|
116
|
+
delay,
|
|
117
|
+
)
|
|
118
|
+
self._sleep(delay)
|
|
119
|
+
assert last_exc is not None
|
|
120
|
+
raise ThrottleExhausted(self.max_attempts, last_exc) from last_exc
|
bedrock_kit/schema.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""JsonSchema - parse + validate + repair JSON output from Bedrock.
|
|
2
|
+
|
|
3
|
+
Models still emit invalid JSON sometimes: trailing commas, smart quotes,
|
|
4
|
+
markdown fences around the JSON. We do three passes before giving up:
|
|
5
|
+
|
|
6
|
+
1. json.loads on the raw text
|
|
7
|
+
2. lightweight repair: strip ```json fences, strip leading/trailing prose,
|
|
8
|
+
trim trailing comma before } or ]
|
|
9
|
+
3. ask the LLM again with the parse error as context (if `repair=True`
|
|
10
|
+
and a retry callback is provided)
|
|
11
|
+
|
|
12
|
+
Each pass is opt-in. The retry-with-LLM step requires the caller to wire it
|
|
13
|
+
in via BedrockClient.invoke; standalone JsonSchema only does passes 1 and 2.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import re
|
|
20
|
+
from collections.abc import Callable
|
|
21
|
+
from typing import Any, Generic, TypeVar
|
|
22
|
+
|
|
23
|
+
from bedrock_kit.exceptions import JsonParseError
|
|
24
|
+
|
|
25
|
+
T = TypeVar("T")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_FENCE_RE = re.compile(r"^\s*```(?:json|JSON)?\s*\n?|\n?```\s*$", re.MULTILINE)
|
|
29
|
+
_TRAILING_COMMA_RE = re.compile(r",(\s*[}\]])")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _strip_fences(text: str) -> str:
|
|
33
|
+
return _FENCE_RE.sub("", text).strip()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _extract_first_json_object(text: str) -> str:
|
|
37
|
+
"""Find the first balanced {...} or [...] in the text. Useful when the model
|
|
38
|
+
wraps JSON in prose like 'Here you go: { ... } let me know if...'."""
|
|
39
|
+
if not text:
|
|
40
|
+
return text
|
|
41
|
+
open_chars = {"{": "}", "[": "]"}
|
|
42
|
+
for i, ch in enumerate(text):
|
|
43
|
+
if ch in open_chars:
|
|
44
|
+
close = open_chars[ch]
|
|
45
|
+
depth = 0
|
|
46
|
+
in_str = False
|
|
47
|
+
escape = False
|
|
48
|
+
for j in range(i, len(text)):
|
|
49
|
+
c = text[j]
|
|
50
|
+
if escape:
|
|
51
|
+
escape = False
|
|
52
|
+
continue
|
|
53
|
+
if c == "\\":
|
|
54
|
+
escape = True
|
|
55
|
+
continue
|
|
56
|
+
if c == '"':
|
|
57
|
+
in_str = not in_str
|
|
58
|
+
continue
|
|
59
|
+
if in_str:
|
|
60
|
+
continue
|
|
61
|
+
if c == ch:
|
|
62
|
+
depth += 1
|
|
63
|
+
elif c == close:
|
|
64
|
+
depth -= 1
|
|
65
|
+
if depth == 0:
|
|
66
|
+
return text[i : j + 1]
|
|
67
|
+
break
|
|
68
|
+
return text
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _light_repair(text: str) -> str:
|
|
72
|
+
text = _strip_fences(text)
|
|
73
|
+
text = _extract_first_json_object(text)
|
|
74
|
+
text = _TRAILING_COMMA_RE.sub(r"\1", text)
|
|
75
|
+
return text.strip()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def parse_json(text: str, *, repair: bool = True) -> Any:
|
|
79
|
+
"""Parse JSON from `text`. With repair=True, try a light repair pass on failure."""
|
|
80
|
+
try:
|
|
81
|
+
return json.loads(text)
|
|
82
|
+
except json.JSONDecodeError:
|
|
83
|
+
if not repair:
|
|
84
|
+
raise
|
|
85
|
+
repaired = _light_repair(text)
|
|
86
|
+
return json.loads(repaired)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class JsonSchema(Generic[T]):
|
|
90
|
+
"""Validate-and-repair wrapper around a pydantic model.
|
|
91
|
+
|
|
92
|
+
from pydantic import BaseModel
|
|
93
|
+
|
|
94
|
+
class Answer(BaseModel):
|
|
95
|
+
text: str
|
|
96
|
+
confidence: float
|
|
97
|
+
|
|
98
|
+
schema = JsonSchema(Answer, repair=True, max_repair_attempts=2)
|
|
99
|
+
parsed: Answer = schema.parse(response_text)
|
|
100
|
+
|
|
101
|
+
`max_repair_attempts` is for the LLM-retry path used by BedrockClient.invoke.
|
|
102
|
+
Standalone .parse() does the JSON repair pass once; if you want the LLM
|
|
103
|
+
retry behavior, pass the schema to client.invoke(..., response_schema=schema).
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
model: type,
|
|
109
|
+
*,
|
|
110
|
+
repair: bool = True,
|
|
111
|
+
max_repair_attempts: int = 2,
|
|
112
|
+
validator: Callable[[Any], T] | None = None,
|
|
113
|
+
) -> None:
|
|
114
|
+
self.model = model
|
|
115
|
+
self.repair = repair
|
|
116
|
+
self.max_repair_attempts = max_repair_attempts
|
|
117
|
+
self._validator = validator or self._default_validator(model)
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _default_validator(model: type) -> Callable[[Any], Any]:
|
|
121
|
+
# pydantic v2: model.model_validate
|
|
122
|
+
if hasattr(model, "model_validate"):
|
|
123
|
+
return model.model_validate # type: ignore[no-any-return]
|
|
124
|
+
# dataclass-style or callable
|
|
125
|
+
return lambda obj: model(**obj) if isinstance(obj, dict) else model(obj)
|
|
126
|
+
|
|
127
|
+
def parse(self, text: str) -> T:
|
|
128
|
+
"""Parse one response. Does NOT call the LLM again - that's BedrockClient's job."""
|
|
129
|
+
attempts = 0
|
|
130
|
+
last_err: Exception | None = None
|
|
131
|
+
candidates = [text]
|
|
132
|
+
if self.repair:
|
|
133
|
+
candidates.append(_light_repair(text))
|
|
134
|
+
for candidate in candidates:
|
|
135
|
+
attempts += 1
|
|
136
|
+
try:
|
|
137
|
+
obj = json.loads(candidate)
|
|
138
|
+
return self._validator(obj)
|
|
139
|
+
except Exception as e: # noqa: BLE001 - we collect and re-raise
|
|
140
|
+
last_err = e
|
|
141
|
+
continue
|
|
142
|
+
raise JsonParseError(
|
|
143
|
+
f"could not parse response into {self.model.__name__}: {last_err}",
|
|
144
|
+
raw_text=text,
|
|
145
|
+
attempts=attempts,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def repair_prompt(self, raw: str, error: Exception) -> str:
|
|
149
|
+
"""Build a follow-up prompt for the LLM to fix its output."""
|
|
150
|
+
return (
|
|
151
|
+
"Your previous response could not be parsed as valid JSON for the "
|
|
152
|
+
f"{self.model.__name__} schema. The error was:\n\n{error}\n\n"
|
|
153
|
+
f"Your previous output was:\n\n{raw}\n\n"
|
|
154
|
+
"Return ONLY a valid JSON object that matches the schema. No prose, "
|
|
155
|
+
"no markdown fences."
|
|
156
|
+
)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bedrock-kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Small, opinionated AWS Bedrock client wrapper: adaptive throttle, cache-aware cost tracking, and structured-output parse-and-repair. Single-cloud, single-purpose.
|
|
5
|
+
Project-URL: Homepage, https://github.com/MukundaKatta/bedrock-kit
|
|
6
|
+
Project-URL: Issues, https://github.com/MukundaKatta/bedrock-kit/issues
|
|
7
|
+
Project-URL: Source, https://github.com/MukundaKatta/bedrock-kit
|
|
8
|
+
Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: anthropic,aws,bedrock,claude,cost-tracking,llm,mlops,rag,retry,structured-output
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: pydantic>=2.0
|
|
27
|
+
Provides-Extra: boto
|
|
28
|
+
Requires-Dist: boto3>=1.28; extra == 'boto'
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pandas>=2.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
33
|
+
Provides-Extra: pandas
|
|
34
|
+
Requires-Dist: pandas>=2.0; extra == 'pandas'
|
|
35
|
+
Description-Content-Type: text/markdown
|
|
36
|
+
|
|
37
|
+
# bedrock-kit
|
|
38
|
+
|
|
39
|
+
[](https://github.com/MukundaKatta/bedrock-kit/actions/workflows/ci.yml)
|
|
40
|
+
[](https://pypi.org/project/bedrock-kit/)
|
|
41
|
+
[](https://pypi.org/project/bedrock-kit/)
|
|
42
|
+
[](LICENSE)
|
|
43
|
+
|
|
44
|
+
**Small, opinionated AWS Bedrock client wrapper.**
|
|
45
|
+
|
|
46
|
+
Every Bedrock production team rebuilds the same three things: adaptive
|
|
47
|
+
throttle for `ThrottlingException`, per-call cost tracking that handles
|
|
48
|
+
cache-read tokens, and structured-output parsing-with-repair. `bedrock-kit`
|
|
49
|
+
ships those, and nothing else. Single-cloud, single-purpose. No proxy
|
|
50
|
+
server. Wraps the official boto3 client; you can inject a fake for testing.
|
|
51
|
+
|
|
52
|
+
## Install
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install "bedrock-kit[boto]"
|
|
56
|
+
# optional
|
|
57
|
+
pip install "bedrock-kit[boto,pandas]" # adds CostLedger.to_pandas()
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Quickstart
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from bedrock_kit import BedrockClient, AdaptiveThrottle, CostLedger
|
|
64
|
+
|
|
65
|
+
ledger = CostLedger()
|
|
66
|
+
client = BedrockClient(
|
|
67
|
+
region="us-east-1",
|
|
68
|
+
retry=AdaptiveThrottle(max_attempts=8, base_delay=1.0, max_delay=30.0),
|
|
69
|
+
cost_ledger=ledger,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
resp = client.invoke(
|
|
73
|
+
model_id="anthropic.claude-sonnet-4-5",
|
|
74
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
75
|
+
system="You are concise.",
|
|
76
|
+
max_tokens=512,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
resp.text # the model's reply
|
|
80
|
+
resp.usage.input_tokens # 12
|
|
81
|
+
resp.usage.cache_read_tokens # 0
|
|
82
|
+
resp.cost_usd # 0.000176
|
|
83
|
+
resp.cache_hit # False
|
|
84
|
+
ledger.total_usd # accumulates across all calls
|
|
85
|
+
ledger.by_model # {"anthropic.claude-sonnet-4-5": 0.000176}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Structured output with repair
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from pydantic import BaseModel
|
|
92
|
+
from bedrock_kit import JsonSchema
|
|
93
|
+
|
|
94
|
+
class Sentiment(BaseModel):
|
|
95
|
+
label: str
|
|
96
|
+
confidence: float
|
|
97
|
+
|
|
98
|
+
resp = client.invoke(
|
|
99
|
+
model_id="anthropic.claude-sonnet-4-5",
|
|
100
|
+
messages=[{"role": "user", "content": "Classify: 'this is great!'"}],
|
|
101
|
+
response_schema=JsonSchema(Sentiment, max_repair_attempts=2),
|
|
102
|
+
)
|
|
103
|
+
resp.parsed # Sentiment(label="positive", confidence=0.95)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
If the model returns invalid JSON, `bedrock-kit` first does a light local
|
|
107
|
+
repair pass (strip markdown fences, trailing commas, surrounding prose).
|
|
108
|
+
If that still fails, it asks the model to fix its own output, up to
|
|
109
|
+
`max_repair_attempts` times.
|
|
110
|
+
|
|
111
|
+
## Adaptive throttle
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
throttle = AdaptiveThrottle(
|
|
115
|
+
max_attempts=8, # total attempts (incl. first)
|
|
116
|
+
base_delay=1.0, # seconds
|
|
117
|
+
max_delay=30.0,
|
|
118
|
+
jitter=True, # full-jitter (uniform(0, capped_delay))
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Retries on Bedrock throttle codes: `ThrottlingException`,
|
|
123
|
+
`TooManyRequestsException`, `ServiceUnavailableException`,
|
|
124
|
+
`ProvisionedThroughputExceededException`, `ModelTimeoutException`. Does
|
|
125
|
+
**not** retry validation, auth, or model-not-found errors - those are
|
|
126
|
+
your bugs to fix, not transient.
|
|
127
|
+
|
|
128
|
+
## Cost tracking
|
|
129
|
+
|
|
130
|
+
`CostLedger` ships pricing for popular Anthropic Bedrock models. Override
|
|
131
|
+
or extend with `pricing=`:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from bedrock_kit import CostLedger, Pricing
|
|
135
|
+
|
|
136
|
+
ledger = CostLedger(
|
|
137
|
+
pricing={
|
|
138
|
+
"amazon.nova-pro-v1:0": Pricing(
|
|
139
|
+
input=0.8, output=3.2, cache_read=0.2, cache_write=1.0
|
|
140
|
+
),
|
|
141
|
+
},
|
|
142
|
+
strict=True, # raise PricingNotFoundError on unknown models
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Default pricing is best-effort and dated; verify against
|
|
147
|
+
[aws.amazon.com/bedrock/pricing](https://aws.amazon.com/bedrock/pricing)
|
|
148
|
+
before using these numbers for billing.
|
|
149
|
+
|
|
150
|
+
## Why not LiteLLM?
|
|
151
|
+
|
|
152
|
+
LiteLLM is great if you need cross-provider routing. `bedrock-kit` is for
|
|
153
|
+
the case where you've already decided on Bedrock, you don't want a 46k-LOC
|
|
154
|
+
multi-provider abstraction, and you want a small surface a security team
|
|
155
|
+
can audit. We don't proxy, don't include a server, don't ship a UI. We're
|
|
156
|
+
< 1000 LOC of Python.
|
|
157
|
+
|
|
158
|
+
## What it explicitly does NOT do
|
|
159
|
+
|
|
160
|
+
- No multi-provider routing
|
|
161
|
+
- No proxy server, no UI
|
|
162
|
+
- No prompt management
|
|
163
|
+
- No agent loop
|
|
164
|
+
- No image generation
|
|
165
|
+
- No SageMaker, Bedrock Agents, or Knowledge Bases SDK wrapping
|
|
166
|
+
- No streaming or cancellation yet (planned for v0.2)
|
|
167
|
+
- No OpenTelemetry emission yet (planned for v0.2)
|
|
168
|
+
|
|
169
|
+
## Testing without AWS
|
|
170
|
+
|
|
171
|
+
The default `BedrockClient` makes a real boto3 client. For tests, inject a
|
|
172
|
+
fake that quacks like one:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from bedrock_kit import BedrockClient, AdaptiveThrottle
|
|
176
|
+
|
|
177
|
+
class FakeClient:
|
|
178
|
+
def converse(self, **kwargs):
|
|
179
|
+
return {"output": {"message": {"content": [{"text": "stub"}]}},
|
|
180
|
+
"stopReason": "end_turn",
|
|
181
|
+
"usage": {"inputTokens": 1, "outputTokens": 1}}
|
|
182
|
+
|
|
183
|
+
client = BedrockClient(client=FakeClient(), retry=AdaptiveThrottle(sleep_fn=lambda _: None))
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Status
|
|
187
|
+
|
|
188
|
+
v0.1 - alpha. Public API may change before v1.0. Issues and PRs welcome.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
bedrock_kit/__init__.py,sha256=JmrMV-0j1RxQiJpY4iA2dg7pVhHwTulqiiu9E8Ra2c0,930
|
|
2
|
+
bedrock_kit/client.py,sha256=3_1wjyd6hMiBjR5BzzdNIPL-EttuENMw2O5eL1HsKEc,8718
|
|
3
|
+
bedrock_kit/cost.py,sha256=2dgzajQQFXPwWHXIS7GAJW3B-2U9p4zYd5FnCEQfajY,7772
|
|
4
|
+
bedrock_kit/exceptions.py,sha256=35vZlOvMOvSOyBrxYYeJ8JfQBuUzdNHtRBe0zIGgTU0,895
|
|
5
|
+
bedrock_kit/retry.py,sha256=cyJhqZUhq0Sj7eYequg1KpbFEA3m94S5Nk8Q7rfyvM0,4228
|
|
6
|
+
bedrock_kit/schema.py,sha256=TfDpEyS0oiDeXL5jrUgKSU5uGcNCqji4Ed4O8UAlvPQ,5273
|
|
7
|
+
bedrock_kit-0.1.0.dist-info/METADATA,sha256=jAlkIfLLvMslu5RlCPhvf5zGrz0U67eHYhllebKpfeo,6571
|
|
8
|
+
bedrock_kit-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
bedrock_kit-0.1.0.dist-info/licenses/LICENSE,sha256=p1GujHnprYaKo-fuZc9Tpy9i711QOy8PeYBhNM0VOdw,1074
|
|
10
|
+
bedrock_kit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mukunda Rao Katta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|