bedrock-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ """bedrock-kit - small, opinionated AWS Bedrock client wrapper.
2
+
3
+ Adds the things every Bedrock production team rebuilds: adaptive throttle,
4
+ per-call cost tracking with cache-aware accounting, and structured-output
5
+ parse-and-repair. Single-cloud, single-purpose, < 3000 LOC.
6
+ """
7
+
8
+ from bedrock_kit.client import BedrockClient, BedrockResponse, Usage
9
+ from bedrock_kit.cost import CostEntry, CostLedger, Pricing
10
+ from bedrock_kit.exceptions import (
11
+ BedrockKitError,
12
+ JsonParseError,
13
+ PricingNotFoundError,
14
+ ThrottleExhausted,
15
+ )
16
+ from bedrock_kit.retry import AdaptiveThrottle
17
+ from bedrock_kit.schema import JsonSchema
18
+
19
+ __version__ = "0.1.0"
20
+
21
+ __all__ = [
22
+ "AdaptiveThrottle",
23
+ "BedrockClient",
24
+ "BedrockKitError",
25
+ "BedrockResponse",
26
+ "CostEntry",
27
+ "CostLedger",
28
+ "JsonParseError",
29
+ "JsonSchema",
30
+ "Pricing",
31
+ "PricingNotFoundError",
32
+ "ThrottleExhausted",
33
+ "Usage",
34
+ "__version__",
35
+ ]
bedrock_kit/client.py ADDED
@@ -0,0 +1,254 @@
1
+ """BedrockClient - the user-facing wrapper.
2
+
3
+ Wraps a boto3 bedrock-runtime client (or anything quacking like one) and
4
+ adds adaptive throttle, cost tracking, and JSON-schema parsing-and-repair.
5
+ Uses the Bedrock Converse API (`converse`) which normalizes Anthropic /
6
+ Mistral / Cohere / Llama into one shape.
7
+
8
+ We keep boto3 as a runtime dependency and let users inject their own client
9
+ for testing.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from collections.abc import Sequence
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, TypeVar
17
+
18
+ from bedrock_kit.cost import CostEntry, CostLedger
19
+ from bedrock_kit.exceptions import JsonParseError
20
+ from bedrock_kit.retry import AdaptiveThrottle
21
+ from bedrock_kit.schema import JsonSchema
22
+
23
+ T = TypeVar("T")
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class Usage:
28
+ input_tokens: int
29
+ output_tokens: int
30
+ cache_read_tokens: int
31
+ cache_write_tokens: int
32
+
33
+ @classmethod
34
+ def from_converse(cls, raw: dict[str, Any]) -> Usage:
35
+ usage = raw.get("usage", {}) or {}
36
+ return cls(
37
+ input_tokens=int(usage.get("inputTokens", 0)),
38
+ output_tokens=int(usage.get("outputTokens", 0)),
39
+ # Bedrock Converse uses cacheReadInputTokens/cacheWriteInputTokens
40
+ cache_read_tokens=int(usage.get("cacheReadInputTokens", 0)),
41
+ cache_write_tokens=int(usage.get("cacheWriteInputTokens", 0)),
42
+ )
43
+
44
+
45
+ @dataclass
46
+ class BedrockResponse:
47
+ text: str
48
+ usage: Usage
49
+ model_id: str
50
+ stop_reason: str
51
+ cost_usd: float
52
+ cache_hit: bool
53
+ parsed: Any = None
54
+ raw: dict[str, Any] = field(default_factory=dict)
55
+ cost_entry: CostEntry | None = None
56
+
57
+
58
+ def _extract_text_from_converse(raw: dict[str, Any]) -> str:
59
+ output = raw.get("output", {}) or {}
60
+ message = output.get("message", {}) or {}
61
+ parts = []
62
+ for block in message.get("content", []) or []:
63
+ text = block.get("text")
64
+ if text:
65
+ parts.append(text)
66
+ return "".join(parts)
67
+
68
+
69
+ class _BotoClientFactory:
70
+ @staticmethod
71
+ def make(region: str, **client_kwargs: Any) -> Any:
72
+ try:
73
+ import boto3 # type: ignore
74
+ except ImportError as e:
75
+ raise RuntimeError(
76
+ "boto3 is required for the default BedrockClient. "
77
+ "Install with `pip install bedrock-kit[boto]` or pass `client=` explicitly."
78
+ ) from e
79
+ return boto3.client("bedrock-runtime", region_name=region, **client_kwargs)
80
+
81
+
82
+ class BedrockClient:
83
+ """Wrapper around bedrock-runtime.converse with cost + retry + schema parsing.
84
+
85
+ client = BedrockClient(region="us-east-1")
86
+ resp = client.invoke(
87
+ model_id="anthropic.claude-sonnet-4-5",
88
+ messages=[{"role": "user", "content": "hello"}],
89
+ system="You are helpful",
90
+ max_tokens=512,
91
+ )
92
+ resp.text
93
+ resp.cost_usd
94
+ resp.cache_hit
95
+
96
+ Inject a fake for testing:
97
+
98
+ client = BedrockClient(client=fake_client)
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ region: str | None = None,
104
+ *,
105
+ client: Any | None = None,
106
+ retry: AdaptiveThrottle | None = None,
107
+ cost_ledger: CostLedger | None = None,
108
+ boto_kwargs: dict[str, Any] | None = None,
109
+ ) -> None:
110
+ if client is None:
111
+ if region is None:
112
+ raise ValueError("either `region` or `client` must be provided")
113
+ client = _BotoClientFactory.make(region, **(boto_kwargs or {}))
114
+ self._client = client
115
+ self._retry = retry or AdaptiveThrottle()
116
+ self._ledger = cost_ledger or CostLedger()
117
+
118
+ @property
119
+ def cost_ledger(self) -> CostLedger:
120
+ return self._ledger
121
+
122
+ def invoke(
123
+ self,
124
+ model_id: str,
125
+ messages: Sequence[dict[str, Any]],
126
+ *,
127
+ system: str | list[dict[str, Any]] | None = None,
128
+ max_tokens: int = 1024,
129
+ temperature: float | None = None,
130
+ top_p: float | None = None,
131
+ stop_sequences: list[str] | None = None,
132
+ response_schema: JsonSchema[T] | None = None,
133
+ additional_model_fields: dict[str, Any] | None = None,
134
+ ) -> BedrockResponse:
135
+ body = self._build_body(
136
+ messages=messages,
137
+ system=system,
138
+ max_tokens=max_tokens,
139
+ temperature=temperature,
140
+ top_p=top_p,
141
+ stop_sequences=stop_sequences,
142
+ additional_model_fields=additional_model_fields,
143
+ )
144
+
145
+ raw = self._retry.call(self._client.converse, modelId=model_id, **body)
146
+ response = self._build_response(model_id, raw)
147
+
148
+ if response_schema is not None:
149
+ response = self._apply_schema(
150
+ response=response,
151
+ model_id=model_id,
152
+ schema=response_schema,
153
+ body=body,
154
+ )
155
+
156
+ return response
157
+
158
+ def _apply_schema(
159
+ self,
160
+ response: BedrockResponse,
161
+ model_id: str,
162
+ schema: JsonSchema[T],
163
+ body: dict[str, Any],
164
+ ) -> BedrockResponse:
165
+ last_err: Exception | None = None
166
+ for attempt in range(schema.max_repair_attempts + 1):
167
+ try:
168
+ response.parsed = schema.parse(response.text)
169
+ return response
170
+ except JsonParseError as e:
171
+ last_err = e
172
+ if attempt == schema.max_repair_attempts:
173
+ raise
174
+ # Append a follow-up turn asking the model to fix the output
175
+ followup_messages = list(body["messages"]) + [
176
+ {"role": "assistant", "content": [{"text": response.text}]},
177
+ {
178
+ "role": "user",
179
+ "content": [{"text": schema.repair_prompt(response.text, e)}],
180
+ },
181
+ ]
182
+ retry_body = dict(body)
183
+ retry_body["messages"] = followup_messages
184
+ raw = self._retry.call(self._client.converse, modelId=model_id, **retry_body)
185
+ response = self._build_response(model_id, raw)
186
+ # unreachable: the loop either returns or raises
187
+ raise JsonParseError(
188
+ f"schema retries exhausted: {last_err}",
189
+ raw_text=response.text,
190
+ attempts=schema.max_repair_attempts + 1,
191
+ )
192
+
193
+ def _build_body(
194
+ self,
195
+ messages: Sequence[dict[str, Any]],
196
+ system: str | list[dict[str, Any]] | None,
197
+ max_tokens: int,
198
+ temperature: float | None,
199
+ top_p: float | None,
200
+ stop_sequences: list[str] | None,
201
+ additional_model_fields: dict[str, Any] | None,
202
+ ) -> dict[str, Any]:
203
+ # Normalize messages: callers may pass {"role":..., "content": "string"}
204
+ # but Converse expects content as a list of content blocks.
205
+ norm_messages: list[dict[str, Any]] = []
206
+ for m in messages:
207
+ content = m.get("content")
208
+ if isinstance(content, str):
209
+ norm_messages.append({"role": m["role"], "content": [{"text": content}]})
210
+ else:
211
+ norm_messages.append(dict(m))
212
+
213
+ inference_config: dict[str, Any] = {"maxTokens": max_tokens}
214
+ if temperature is not None:
215
+ inference_config["temperature"] = temperature
216
+ if top_p is not None:
217
+ inference_config["topP"] = top_p
218
+ if stop_sequences:
219
+ inference_config["stopSequences"] = list(stop_sequences)
220
+
221
+ body: dict[str, Any] = {
222
+ "messages": norm_messages,
223
+ "inferenceConfig": inference_config,
224
+ }
225
+ if system is not None:
226
+ if isinstance(system, str):
227
+ body["system"] = [{"text": system}]
228
+ else:
229
+ body["system"] = list(system)
230
+ if additional_model_fields:
231
+ body["additionalModelRequestFields"] = additional_model_fields
232
+ return body
233
+
234
+ def _build_response(self, model_id: str, raw: dict[str, Any]) -> BedrockResponse:
235
+ usage = Usage.from_converse(raw)
236
+ cost_entry = self._ledger.record(
237
+ model_id=model_id,
238
+ input_tokens=usage.input_tokens,
239
+ output_tokens=usage.output_tokens,
240
+ cache_read_tokens=usage.cache_read_tokens,
241
+ cache_write_tokens=usage.cache_write_tokens,
242
+ )
243
+ text = _extract_text_from_converse(raw)
244
+ return BedrockResponse(
245
+ text=text,
246
+ usage=usage,
247
+ model_id=model_id,
248
+ stop_reason=str(raw.get("stopReason", "")),
249
+ cost_usd=cost_entry.cost_usd,
250
+ cache_hit=cost_entry.cache_hit,
251
+ parsed=None,
252
+ raw=raw,
253
+ cost_entry=cost_entry,
254
+ )
bedrock_kit/cost.py ADDED
@@ -0,0 +1,236 @@
1
+ """CostLedger - per-call cost accounting with cache-aware token math.
2
+
3
+ Bedrock returns four token counts on Anthropic models:
4
+ * input_tokens
5
+ * output_tokens
6
+ * cache_creation_input_tokens (when cache_control is set on a block)
7
+ * cache_read_input_tokens (when a previous request hit the cache)
8
+
9
+ Each has its own price. cache_read is typically 10% of input; cache_creation
10
+ is 1.25x input. We compute total cost from all four. Pricing is in USD per
11
+ 1M tokens, matching how AWS publishes the rates.
12
+
13
+ The default pricing table covers the popular Anthropic models on Bedrock as
14
+ of 2026-Q2. Verify against AWS docs - rates change. You can pass `pricing=`
15
+ to override per-model.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Iterable
21
+ from dataclasses import dataclass, field
22
+ from datetime import datetime, timezone
23
+ from typing import Any
24
+
25
+ from bedrock_kit.exceptions import PricingNotFoundError
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class Pricing:
30
+ """USD per 1,000,000 tokens. cache_* default to standard Anthropic ratios."""
31
+
32
+ input: float
33
+ output: float
34
+ cache_read: float | None = None
35
+ cache_write: float | None = None
36
+
37
+ def cost_for(
38
+ self,
39
+ input_tokens: int,
40
+ output_tokens: int,
41
+ cache_read_tokens: int = 0,
42
+ cache_write_tokens: int = 0,
43
+ ) -> float:
44
+ cache_read_rate = self.cache_read if self.cache_read is not None else self.input * 0.1
45
+ cache_write_rate = self.cache_write if self.cache_write is not None else self.input * 1.25
46
+ total = (
47
+ input_tokens * self.input
48
+ + output_tokens * self.output
49
+ + cache_read_tokens * cache_read_rate
50
+ + cache_write_tokens * cache_write_rate
51
+ )
52
+ return total / 1_000_000.0
53
+
54
+
55
+ # Default pricing for popular Bedrock models (USD per 1M tokens).
56
+ # Source: aws.amazon.com/bedrock/pricing as of 2026-Q2. VERIFY before billing on these.
57
+ DEFAULT_PRICING: dict[str, Pricing] = {
58
+ "anthropic.claude-sonnet-4-5": Pricing(
59
+ input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
60
+ ),
61
+ "anthropic.claude-sonnet-4-5-v1:0": Pricing(
62
+ input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
63
+ ),
64
+ "anthropic.claude-opus-4-7": Pricing(
65
+ input=15.0, output=75.0, cache_read=1.5, cache_write=18.75
66
+ ),
67
+ "anthropic.claude-opus-4-7-v1:0": Pricing(
68
+ input=15.0, output=75.0, cache_read=1.5, cache_write=18.75
69
+ ),
70
+ "anthropic.claude-haiku-4-5": Pricing(
71
+ input=1.0, output=5.0, cache_read=0.1, cache_write=1.25
72
+ ),
73
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": Pricing(
74
+ input=3.0, output=15.0, cache_read=0.3, cache_write=3.75
75
+ ),
76
+ "anthropic.claude-3-5-haiku-20241022-v1:0": Pricing(
77
+ input=0.8, output=4.0, cache_read=0.08, cache_write=1.0
78
+ ),
79
+ }
80
+
81
+
82
+ def _normalize_model_id(model_id: str) -> str:
83
+ """Strip arn:aws:bedrock:region::foundation-model/ prefix and inference-profile prefix."""
84
+ if model_id.startswith("arn:aws:bedrock:"):
85
+ # arn:...:foundation-model/anthropic.claude-... or .../inference-profile/...
86
+ tail = model_id.rsplit("/", 1)[-1]
87
+ return tail
88
+ if model_id.startswith("us.") or model_id.startswith("eu.") or model_id.startswith("apac."):
89
+ # cross-region inference profile prefix
90
+ return model_id.split(".", 1)[1]
91
+ return model_id
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class CostEntry:
96
+ timestamp: datetime
97
+ model_id: str
98
+ input_tokens: int
99
+ output_tokens: int
100
+ cache_read_tokens: int
101
+ cache_write_tokens: int
102
+ cost_usd: float
103
+ cache_hit: bool
104
+ metadata: dict[str, Any] = field(default_factory=dict)
105
+
106
+ def to_dict(self) -> dict[str, Any]:
107
+ return {
108
+ "timestamp": self.timestamp.isoformat(),
109
+ "model_id": self.model_id,
110
+ "input_tokens": self.input_tokens,
111
+ "output_tokens": self.output_tokens,
112
+ "cache_read_tokens": self.cache_read_tokens,
113
+ "cache_write_tokens": self.cache_write_tokens,
114
+ "cost_usd": self.cost_usd,
115
+ "cache_hit": self.cache_hit,
116
+ "metadata": self.metadata,
117
+ }
118
+
119
+
120
+ class CostLedger:
121
+ """Accumulates per-call cost entries.
122
+
123
+ ledger = CostLedger()
124
+ client = BedrockClient(..., cost_ledger=ledger)
125
+ # ... after some invokes:
126
+ ledger.total_usd
127
+ ledger.by_model # {"anthropic.claude-sonnet-4-5": 0.0234, ...}
128
+ ledger.entries # list[CostEntry]
129
+ ledger.to_dict()
130
+ ledger.to_pandas() # requires `pandas` extra
131
+ """
132
+
133
+ def __init__(
134
+ self,
135
+ pricing: dict[str, Pricing] | None = None,
136
+ strict: bool = False,
137
+ ) -> None:
138
+ self._pricing = {**DEFAULT_PRICING, **(pricing or {})}
139
+ self.strict = strict
140
+ self._entries: list[CostEntry] = []
141
+
142
+ def record(
143
+ self,
144
+ model_id: str,
145
+ input_tokens: int,
146
+ output_tokens: int,
147
+ cache_read_tokens: int = 0,
148
+ cache_write_tokens: int = 0,
149
+ metadata: dict[str, Any] | None = None,
150
+ ) -> CostEntry:
151
+ cost = self.compute_cost(
152
+ model_id, input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
153
+ )
154
+ entry = CostEntry(
155
+ timestamp=datetime.now(timezone.utc),
156
+ model_id=model_id,
157
+ input_tokens=input_tokens,
158
+ output_tokens=output_tokens,
159
+ cache_read_tokens=cache_read_tokens,
160
+ cache_write_tokens=cache_write_tokens,
161
+ cost_usd=cost,
162
+ cache_hit=cache_read_tokens > 0,
163
+ metadata=metadata or {},
164
+ )
165
+ self._entries.append(entry)
166
+ return entry
167
+
168
+ def compute_cost(
169
+ self,
170
+ model_id: str,
171
+ input_tokens: int,
172
+ output_tokens: int,
173
+ cache_read_tokens: int = 0,
174
+ cache_write_tokens: int = 0,
175
+ ) -> float:
176
+ pricing = self._lookup(model_id)
177
+ if pricing is None:
178
+ if self.strict:
179
+ raise PricingNotFoundError(f"no pricing for {model_id!r}")
180
+ return 0.0
181
+ return pricing.cost_for(
182
+ input_tokens, output_tokens, cache_read_tokens, cache_write_tokens
183
+ )
184
+
185
+ def _lookup(self, model_id: str) -> Pricing | None:
186
+ if model_id in self._pricing:
187
+ return self._pricing[model_id]
188
+ normalized = _normalize_model_id(model_id)
189
+ return self._pricing.get(normalized)
190
+
191
+ @property
192
+ def entries(self) -> list[CostEntry]:
193
+ return list(self._entries)
194
+
195
+ @property
196
+ def total_usd(self) -> float:
197
+ return sum(e.cost_usd for e in self._entries)
198
+
199
+ @property
200
+ def by_model(self) -> dict[str, float]:
201
+ out: dict[str, float] = {}
202
+ for e in self._entries:
203
+ out[e.model_id] = out.get(e.model_id, 0.0) + e.cost_usd
204
+ return out
205
+
206
+ def reset(self) -> None:
207
+ self._entries.clear()
208
+
209
+ def extend(self, entries: Iterable[CostEntry]) -> None:
210
+ self._entries.extend(entries)
211
+
212
+ def to_dict(self) -> dict[str, Any]:
213
+ return {
214
+ "total_usd": self.total_usd,
215
+ "by_model": self.by_model,
216
+ "n_entries": len(self._entries),
217
+ "entries": [e.to_dict() for e in self._entries],
218
+ }
219
+
220
+ def to_pandas(self):
221
+ import pandas as pd
222
+
223
+ if not self._entries:
224
+ return pd.DataFrame(
225
+ columns=[
226
+ "timestamp",
227
+ "model_id",
228
+ "input_tokens",
229
+ "output_tokens",
230
+ "cache_read_tokens",
231
+ "cache_write_tokens",
232
+ "cost_usd",
233
+ "cache_hit",
234
+ ]
235
+ )
236
+ return pd.DataFrame([e.to_dict() for e in self._entries])
@@ -0,0 +1,27 @@
1
+ """bedrock-kit exception hierarchy."""
2
+
3
+
4
+ class BedrockKitError(Exception):
5
+ """Base for all bedrock-kit errors."""
6
+
7
+
8
+ class ThrottleExhausted(BedrockKitError):
9
+ """Raised when AdaptiveThrottle gives up after max_attempts."""
10
+
11
+ def __init__(self, attempts: int, last_exc: BaseException):
12
+ self.attempts = attempts
13
+ self.last_exc = last_exc
14
+ super().__init__(f"throttle exhausted after {attempts} attempts: {last_exc}")
15
+
16
+
17
+ class PricingNotFoundError(BedrockKitError):
18
+ """Raised when CostLedger has no pricing for a model and strict=True."""
19
+
20
+
21
+ class JsonParseError(BedrockKitError):
22
+ """Raised when JsonSchema cannot parse/validate a response after retries."""
23
+
24
+ def __init__(self, message: str, raw_text: str, attempts: int):
25
+ self.raw_text = raw_text
26
+ self.attempts = attempts
27
+ super().__init__(f"{message} (after {attempts} attempts)")
bedrock_kit/retry.py ADDED
@@ -0,0 +1,120 @@
1
+ """AdaptiveThrottle - exponential backoff with full jitter for Bedrock throttling.
2
+
3
+ AWS recommends "full jitter" for throttle retries (delay = uniform(0, base * 2^n)).
4
+ This avoids the thundering-herd that "decorrelated jitter" still allows when
5
+ many workers retry against the same throttled region.
6
+
7
+ We retry on:
8
+ * boto3 ClientError with ErrorCode in THROTTLE_CODES
9
+ * any exception in `also_retry` (caller-supplied)
10
+
11
+ We do NOT retry on validation errors, auth errors, or model-not-found - those
12
+ are bugs to fix, not transient failures.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import random
19
+ import time
20
+ from collections.abc import Callable
21
+ from typing import Any, TypeVar
22
+
23
+ from bedrock_kit.exceptions import ThrottleExhausted
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ T = TypeVar("T")
28
+
29
+ # Bedrock + STS error codes that mean "back off and retry"
30
+ THROTTLE_CODES: frozenset[str] = frozenset({
31
+ "ThrottlingException",
32
+ "Throttling",
33
+ "TooManyRequestsException",
34
+ "RequestLimitExceeded",
35
+ "ServiceUnavailableException",
36
+ "ServiceUnavailable",
37
+ "ProvisionedThroughputExceededException",
38
+ "ModelTimeoutException",
39
+ })
40
+
41
+
42
+ def _error_code(exc: BaseException) -> str | None:
43
+ """Extract AWS error code from a botocore ClientError without importing botocore."""
44
+ response = getattr(exc, "response", None)
45
+ if isinstance(response, dict):
46
+ code = response.get("Error", {}).get("Code")
47
+ if isinstance(code, str):
48
+ return code
49
+ # botocore.exceptions.ClientError also carries operation_name, response.Error.Code
50
+ return None
51
+
52
+
53
+ class AdaptiveThrottle:
54
+ """Retry a callable with exponential backoff + full jitter.
55
+
56
+ Construct once, reuse across many calls:
57
+
58
+ throttle = AdaptiveThrottle(max_attempts=8, base_delay=0.5, max_delay=30.0)
59
+ result = throttle.call(client.invoke_model, body=...)
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ max_attempts: int = 6,
65
+ base_delay: float = 1.0,
66
+ max_delay: float = 30.0,
67
+ jitter: bool = True,
68
+ also_retry: tuple[type[BaseException], ...] = (),
69
+ sleep_fn: Callable[[float], None] | None = None,
70
+ rng: random.Random | None = None,
71
+ ) -> None:
72
+ if max_attempts < 1:
73
+ raise ValueError("max_attempts must be >= 1")
74
+ if base_delay <= 0:
75
+ raise ValueError("base_delay must be > 0")
76
+ if max_delay < base_delay:
77
+ raise ValueError("max_delay must be >= base_delay")
78
+ self.max_attempts = max_attempts
79
+ self.base_delay = base_delay
80
+ self.max_delay = max_delay
81
+ self.jitter = jitter
82
+ self.also_retry = also_retry
83
+ self._sleep = sleep_fn or time.sleep
84
+ self._rng = rng or random.Random()
85
+
86
+ def _should_retry(self, exc: BaseException) -> bool:
87
+ if isinstance(exc, self.also_retry):
88
+ return True
89
+ code = _error_code(exc)
90
+ return code in THROTTLE_CODES
91
+
92
+ def _delay_for(self, attempt_index: int) -> float:
93
+ """attempt_index is 0-based: 0 is the first retry sleep, 1 is the second, ..."""
94
+ capped = min(self.base_delay * (2**attempt_index), self.max_delay)
95
+ if self.jitter:
96
+ return self._rng.uniform(0.0, capped)
97
+ return capped
98
+
99
+ def call(self, fn: Callable[..., T], *args: Any, **kwargs: Any) -> T:
100
+ last_exc: BaseException | None = None
101
+ for attempt in range(self.max_attempts):
102
+ try:
103
+ return fn(*args, **kwargs)
104
+ except Exception as exc: # noqa: BLE001 - we re-raise selectively
105
+ if not self._should_retry(exc):
106
+ raise
107
+ last_exc = exc
108
+ if attempt == self.max_attempts - 1:
109
+ break
110
+ delay = self._delay_for(attempt)
111
+ logger.debug(
112
+ "throttle: attempt %d/%d failed (%s); sleeping %.2fs",
113
+ attempt + 1,
114
+ self.max_attempts,
115
+ _error_code(exc) or type(exc).__name__,
116
+ delay,
117
+ )
118
+ self._sleep(delay)
119
+ assert last_exc is not None
120
+ raise ThrottleExhausted(self.max_attempts, last_exc) from last_exc
bedrock_kit/schema.py ADDED
@@ -0,0 +1,156 @@
1
+ """JsonSchema - parse + validate + repair JSON output from Bedrock.
2
+
3
+ Models still emit invalid JSON sometimes: trailing commas, smart quotes,
4
+ markdown fences around the JSON. We do three passes before giving up:
5
+
6
+ 1. json.loads on the raw text
7
+ 2. lightweight repair: strip ```json fences, strip leading/trailing prose,
8
+ trim trailing comma before } or ]
9
+ 3. ask the LLM again with the parse error as context (if `repair=True`
10
+ and a retry callback is provided)
11
+
12
+ Each pass is opt-in. The retry-with-LLM step requires the caller to wire it
13
+ in via BedrockClient.invoke; standalone JsonSchema only does passes 1 and 2.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import re
20
+ from collections.abc import Callable
21
+ from typing import Any, Generic, TypeVar
22
+
23
+ from bedrock_kit.exceptions import JsonParseError
24
+
25
+ T = TypeVar("T")
26
+
27
+
28
+ _FENCE_RE = re.compile(r"^\s*```(?:json|JSON)?\s*\n?|\n?```\s*$", re.MULTILINE)
29
+ _TRAILING_COMMA_RE = re.compile(r",(\s*[}\]])")
30
+
31
+
32
+ def _strip_fences(text: str) -> str:
33
+ return _FENCE_RE.sub("", text).strip()
34
+
35
+
36
+ def _extract_first_json_object(text: str) -> str:
37
+ """Find the first balanced {...} or [...] in the text. Useful when the model
38
+ wraps JSON in prose like 'Here you go: { ... } let me know if...'."""
39
+ if not text:
40
+ return text
41
+ open_chars = {"{": "}", "[": "]"}
42
+ for i, ch in enumerate(text):
43
+ if ch in open_chars:
44
+ close = open_chars[ch]
45
+ depth = 0
46
+ in_str = False
47
+ escape = False
48
+ for j in range(i, len(text)):
49
+ c = text[j]
50
+ if escape:
51
+ escape = False
52
+ continue
53
+ if c == "\\":
54
+ escape = True
55
+ continue
56
+ if c == '"':
57
+ in_str = not in_str
58
+ continue
59
+ if in_str:
60
+ continue
61
+ if c == ch:
62
+ depth += 1
63
+ elif c == close:
64
+ depth -= 1
65
+ if depth == 0:
66
+ return text[i : j + 1]
67
+ break
68
+ return text
69
+
70
+
71
+ def _light_repair(text: str) -> str:
72
+ text = _strip_fences(text)
73
+ text = _extract_first_json_object(text)
74
+ text = _TRAILING_COMMA_RE.sub(r"\1", text)
75
+ return text.strip()
76
+
77
+
78
+ def parse_json(text: str, *, repair: bool = True) -> Any:
79
+ """Parse JSON from `text`. With repair=True, try a light repair pass on failure."""
80
+ try:
81
+ return json.loads(text)
82
+ except json.JSONDecodeError:
83
+ if not repair:
84
+ raise
85
+ repaired = _light_repair(text)
86
+ return json.loads(repaired)
87
+
88
+
89
+ class JsonSchema(Generic[T]):
90
+ """Validate-and-repair wrapper around a pydantic model.
91
+
92
+ from pydantic import BaseModel
93
+
94
+ class Answer(BaseModel):
95
+ text: str
96
+ confidence: float
97
+
98
+ schema = JsonSchema(Answer, repair=True, max_repair_attempts=2)
99
+ parsed: Answer = schema.parse(response_text)
100
+
101
+ `max_repair_attempts` is for the LLM-retry path used by BedrockClient.invoke.
102
+ Standalone .parse() does the JSON repair pass once; if you want the LLM
103
+ retry behavior, pass the schema to client.invoke(..., response_schema=schema).
104
+ """
105
+
106
+ def __init__(
107
+ self,
108
+ model: type,
109
+ *,
110
+ repair: bool = True,
111
+ max_repair_attempts: int = 2,
112
+ validator: Callable[[Any], T] | None = None,
113
+ ) -> None:
114
+ self.model = model
115
+ self.repair = repair
116
+ self.max_repair_attempts = max_repair_attempts
117
+ self._validator = validator or self._default_validator(model)
118
+
119
+ @staticmethod
120
+ def _default_validator(model: type) -> Callable[[Any], Any]:
121
+ # pydantic v2: model.model_validate
122
+ if hasattr(model, "model_validate"):
123
+ return model.model_validate # type: ignore[no-any-return]
124
+ # dataclass-style or callable
125
+ return lambda obj: model(**obj) if isinstance(obj, dict) else model(obj)
126
+
127
+ def parse(self, text: str) -> T:
128
+ """Parse one response. Does NOT call the LLM again - that's BedrockClient's job."""
129
+ attempts = 0
130
+ last_err: Exception | None = None
131
+ candidates = [text]
132
+ if self.repair:
133
+ candidates.append(_light_repair(text))
134
+ for candidate in candidates:
135
+ attempts += 1
136
+ try:
137
+ obj = json.loads(candidate)
138
+ return self._validator(obj)
139
+ except Exception as e: # noqa: BLE001 - we collect and re-raise
140
+ last_err = e
141
+ continue
142
+ raise JsonParseError(
143
+ f"could not parse response into {self.model.__name__}: {last_err}",
144
+ raw_text=text,
145
+ attempts=attempts,
146
+ )
147
+
148
+ def repair_prompt(self, raw: str, error: Exception) -> str:
149
+ """Build a follow-up prompt for the LLM to fix its output."""
150
+ return (
151
+ "Your previous response could not be parsed as valid JSON for the "
152
+ f"{self.model.__name__} schema. The error was:\n\n{error}\n\n"
153
+ f"Your previous output was:\n\n{raw}\n\n"
154
+ "Return ONLY a valid JSON object that matches the schema. No prose, "
155
+ "no markdown fences."
156
+ )
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: bedrock-kit
3
+ Version: 0.1.0
4
+ Summary: Small, opinionated AWS Bedrock client wrapper: adaptive throttle, cache-aware cost tracking, and structured-output parse-and-repair. Single-cloud, single-purpose.
5
+ Project-URL: Homepage, https://github.com/MukundaKatta/bedrock-kit
6
+ Project-URL: Issues, https://github.com/MukundaKatta/bedrock-kit/issues
7
+ Project-URL: Source, https://github.com/MukundaKatta/bedrock-kit
8
+ Author-email: Mukunda Rao Katta <mukunda.vjcs6@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: anthropic,aws,bedrock,claude,cost-tracking,llm,mlops,rag,retry,structured-output
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: pydantic>=2.0
27
+ Provides-Extra: boto
28
+ Requires-Dist: boto3>=1.28; extra == 'boto'
29
+ Provides-Extra: dev
30
+ Requires-Dist: pandas>=2.0; extra == 'dev'
31
+ Requires-Dist: pytest>=8.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.4; extra == 'dev'
33
+ Provides-Extra: pandas
34
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # bedrock-kit
38
+
39
+ [![CI](https://github.com/MukundaKatta/bedrock-kit/actions/workflows/ci.yml/badge.svg)](https://github.com/MukundaKatta/bedrock-kit/actions/workflows/ci.yml)
40
+ [![PyPI](https://img.shields.io/pypi/v/bedrock-kit.svg)](https://pypi.org/project/bedrock-kit/)
41
+ [![Python](https://img.shields.io/pypi/pyversions/bedrock-kit.svg)](https://pypi.org/project/bedrock-kit/)
42
+ [![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
43
+
44
+ **Small, opinionated AWS Bedrock client wrapper.**
45
+
46
+ Every Bedrock production team rebuilds the same three things: adaptive
47
+ throttle for `ThrottlingException`, per-call cost tracking that handles
48
+ cache-read tokens, and structured-output parsing-with-repair. `bedrock-kit`
49
+ ships those, and nothing else. Single-cloud, single-purpose. No proxy
50
+ server. Wraps the official boto3 client; you can inject a fake for testing.
51
+
52
+ ## Install
53
+
54
+ ```bash
55
+ pip install "bedrock-kit[boto]"
56
+ # optional
57
+ pip install "bedrock-kit[boto,pandas]" # adds CostLedger.to_pandas()
58
+ ```
59
+
60
+ ## Quickstart
61
+
62
+ ```python
63
+ from bedrock_kit import BedrockClient, AdaptiveThrottle, CostLedger
64
+
65
+ ledger = CostLedger()
66
+ client = BedrockClient(
67
+ region="us-east-1",
68
+ retry=AdaptiveThrottle(max_attempts=8, base_delay=1.0, max_delay=30.0),
69
+ cost_ledger=ledger,
70
+ )
71
+
72
+ resp = client.invoke(
73
+ model_id="anthropic.claude-sonnet-4-5",
74
+ messages=[{"role": "user", "content": "Hello"}],
75
+ system="You are concise.",
76
+ max_tokens=512,
77
+ )
78
+
79
+ resp.text # the model's reply
80
+ resp.usage.input_tokens # 12
81
+ resp.usage.cache_read_tokens # 0
82
+ resp.cost_usd # 0.000176
83
+ resp.cache_hit # False
84
+ ledger.total_usd # accumulates across all calls
85
+ ledger.by_model # {"anthropic.claude-sonnet-4-5": 0.000176}
86
+ ```
87
+
88
+ ## Structured output with repair
89
+
90
+ ```python
91
+ from pydantic import BaseModel
92
+ from bedrock_kit import JsonSchema
93
+
94
+ class Sentiment(BaseModel):
95
+ label: str
96
+ confidence: float
97
+
98
+ resp = client.invoke(
99
+ model_id="anthropic.claude-sonnet-4-5",
100
+ messages=[{"role": "user", "content": "Classify: 'this is great!'"}],
101
+ response_schema=JsonSchema(Sentiment, max_repair_attempts=2),
102
+ )
103
+ resp.parsed # Sentiment(label="positive", confidence=0.95)
104
+ ```
105
+
106
+ If the model returns invalid JSON, `bedrock-kit` first does a light local
107
+ repair pass (strip markdown fences, trailing commas, surrounding prose).
108
+ If that still fails, it asks the model to fix its own output, up to
109
+ `max_repair_attempts` times.
110
+
111
+ ## Adaptive throttle
112
+
113
+ ```python
114
+ throttle = AdaptiveThrottle(
115
+ max_attempts=8, # total attempts (incl. first)
116
+ base_delay=1.0, # seconds
117
+ max_delay=30.0,
118
+ jitter=True, # full-jitter (uniform(0, capped_delay))
119
+ )
120
+ ```
121
+
122
+ Retries on Bedrock throttle codes: `ThrottlingException`,
123
+ `TooManyRequestsException`, `ServiceUnavailableException`,
124
+ `ProvisionedThroughputExceededException`, `ModelTimeoutException`. Does
125
+ **not** retry validation, auth, or model-not-found errors - those are
126
+ your bugs to fix, not transient.
127
+
128
+ ## Cost tracking
129
+
130
+ `CostLedger` ships pricing for popular Anthropic Bedrock models. Override
131
+ or extend with `pricing=`:
132
+
133
+ ```python
134
+ from bedrock_kit import CostLedger, Pricing
135
+
136
+ ledger = CostLedger(
137
+ pricing={
138
+ "amazon.nova-pro-v1:0": Pricing(
139
+ input=0.8, output=3.2, cache_read=0.2, cache_write=1.0
140
+ ),
141
+ },
142
+ strict=True, # raise PricingNotFoundError on unknown models
143
+ )
144
+ ```
145
+
146
+ Default pricing is best-effort and dated; verify against
147
+ [aws.amazon.com/bedrock/pricing](https://aws.amazon.com/bedrock/pricing)
148
+ before using these numbers for billing.
149
+
150
+ ## Why not LiteLLM?
151
+
152
+ LiteLLM is great if you need cross-provider routing. `bedrock-kit` is for
153
+ the case where you've already decided on Bedrock, you don't want a 46k-LOC
154
+ multi-provider abstraction, and you want a small surface a security team
155
+ can audit. We don't proxy, don't include a server, don't ship a UI. We're
156
+ < 1000 LOC of Python.
157
+
158
+ ## What it explicitly does NOT do
159
+
160
+ - No multi-provider routing
161
+ - No proxy server, no UI
162
+ - No prompt management
163
+ - No agent loop
164
+ - No image generation
165
+ - No SageMaker, Bedrock Agents, or Knowledge Bases SDK wrapping
166
+ - No streaming or cancellation yet (planned for v0.2)
167
+ - No OpenTelemetry emission yet (planned for v0.2)
168
+
169
+ ## Testing without AWS
170
+
171
+ The default `BedrockClient` makes a real boto3 client. For tests, inject a
172
+ fake that quacks like one:
173
+
174
+ ```python
175
+ from bedrock_kit import BedrockClient, AdaptiveThrottle
176
+
177
+ class FakeClient:
178
+ def converse(self, **kwargs):
179
+ return {"output": {"message": {"content": [{"text": "stub"}]}},
180
+ "stopReason": "end_turn",
181
+ "usage": {"inputTokens": 1, "outputTokens": 1}}
182
+
183
+ client = BedrockClient(client=FakeClient(), retry=AdaptiveThrottle(sleep_fn=lambda _: None))
184
+ ```
185
+
186
+ ## Status
187
+
188
+ v0.1 - alpha. Public API may change before v1.0. Issues and PRs welcome.
@@ -0,0 +1,10 @@
1
+ bedrock_kit/__init__.py,sha256=JmrMV-0j1RxQiJpY4iA2dg7pVhHwTulqiiu9E8Ra2c0,930
2
+ bedrock_kit/client.py,sha256=3_1wjyd6hMiBjR5BzzdNIPL-EttuENMw2O5eL1HsKEc,8718
3
+ bedrock_kit/cost.py,sha256=2dgzajQQFXPwWHXIS7GAJW3B-2U9p4zYd5FnCEQfajY,7772
4
+ bedrock_kit/exceptions.py,sha256=35vZlOvMOvSOyBrxYYeJ8JfQBuUzdNHtRBe0zIGgTU0,895
5
+ bedrock_kit/retry.py,sha256=cyJhqZUhq0Sj7eYequg1KpbFEA3m94S5Nk8Q7rfyvM0,4228
6
+ bedrock_kit/schema.py,sha256=TfDpEyS0oiDeXL5jrUgKSU5uGcNCqji4Ed4O8UAlvPQ,5273
7
+ bedrock_kit-0.1.0.dist-info/METADATA,sha256=jAlkIfLLvMslu5RlCPhvf5zGrz0U67eHYhllebKpfeo,6571
8
+ bedrock_kit-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ bedrock_kit-0.1.0.dist-info/licenses/LICENSE,sha256=p1GujHnprYaKo-fuZc9Tpy9i711QOy8PeYBhNM0VOdw,1074
10
+ bedrock_kit-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mukunda Rao Katta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.