powerailabs-contextkit 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: powerailabs-contextkit
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt.
5
5
  Author: Raghav Mishra
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "powerailabs-contextkit"
3
- version = "0.2.0"
3
+ version = "0.4.0"
4
4
  description = "Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt."
5
5
  requires-python = ">=3.11"
6
6
  license = "MIT"
@@ -9,6 +9,7 @@ import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]``
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import inspect
12
13
  from collections.abc import Callable
13
14
  from dataclasses import dataclass, field
14
15
  from typing import Any
@@ -104,6 +105,7 @@ class Context:
104
105
  reserve_output: int = 0,
105
106
  compressor: Any = None,
106
107
  order: str = "default",
108
+ image_tokens: int = 0,
107
109
  ) -> None:
108
110
  if order not in _ORDERS:
109
111
  raise ValueError(f"order must be one of {_ORDERS}, got {order!r}")
@@ -112,6 +114,7 @@ class Context:
112
114
  self.reserve_output = reserve_output
113
115
  self._compressor = compressor
114
116
  self.order = order
117
+ self.image_tokens = image_tokens # token cost charged per image part in multimodal blocks
115
118
  self._blocks: list[Block] = []
116
119
  self._report: AssemblyReport | None = None
117
120
  self._messages: list[dict] = []
@@ -143,6 +146,17 @@ class Context:
143
146
  _, report = self._pack(budget_tokens, emit=False)
144
147
  return report
145
148
 
149
+ async def aassemble(self) -> list[dict]:
150
+ """Async assemble — like :meth:`assemble` but awaits ``async`` summarize callbacks.
151
+
152
+ Use this when a block's ``summarizer`` is a coroutine (e.g. an LLM summarizer). The sync
153
+ :meth:`assemble` falls back to truncation for async summarizers.
154
+ """
155
+ messages, report = await self._apack(self.budget_tokens, emit=True)
156
+ self._messages = messages
157
+ self._report = report
158
+ return messages
159
+
146
160
  def for_anthropic(self) -> tuple[str, list[dict]]:
147
161
  """Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
148
162
 
@@ -154,38 +168,150 @@ class Context:
154
168
  rest = [m for m in self._messages if m["role"] != "system"]
155
169
  return system, rest
156
170
 
171
+ def for_gemini(self) -> tuple[str, list[dict]]:
172
+ """Gemini adapter: returns ``(system_instruction, contents)``.
173
+
174
+ ``contents`` are ``{"role": "user"|"model", "parts": [{"text": ...}]}`` (Gemini uses
175
+ ``model``, not ``assistant``); system blocks become the separate ``system_instruction``.
176
+ """
177
+ if not self._messages:
178
+ self.assemble()
179
+ system = "\n\n".join(m["content"] for m in self._messages if m["role"] == "system")
180
+ contents = [
181
+ {
182
+ "role": "model" if m["role"] == "assistant" else "user",
183
+ "parts": [{"text": m["content"]}],
184
+ }
185
+ for m in self._messages
186
+ if m["role"] != "system"
187
+ ]
188
+ return system, contents
189
+
190
+ def for_bedrock(self) -> tuple[list[dict], list[dict]]:
191
+ """Bedrock Converse adapter: returns ``(system, messages)``.
192
+
193
+ ``system`` is ``[{"text": ...}]`` (or empty); ``messages`` are
194
+ ``{"role": "user"|"assistant", "content": [{"text": ...}]}`` — Bedrock allows only those
195
+ two roles, so non-user blocks map to ``assistant``.
196
+ """
197
+ if not self._messages:
198
+ self.assemble()
199
+ system_text = "\n\n".join(m["content"] for m in self._messages if m["role"] == "system")
200
+ system = [{"text": system_text}] if system_text else []
201
+ messages = [
202
+ {
203
+ "role": "user" if m["role"] == "user" else "assistant",
204
+ "content": [{"text": m["content"]}],
205
+ }
206
+ for m in self._messages
207
+ if m["role"] != "system"
208
+ ]
209
+ return system, messages
210
+
157
211
  # ------------------------------------------------------------------ internals
158
212
 
159
- def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
160
- effective = max(0, budget_tokens - self.reserve_output)
213
+ def _ordered_blocks(self) -> list[tuple[int, Block]]:
161
214
  # (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
162
- order = sorted(
163
- enumerate(self._blocks),
164
- key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
215
+ return sorted(
216
+ enumerate(self._blocks), key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0])
165
217
  )
166
218
 
219
+ def _block_tokens(self, block: Block) -> int:
220
+ """Token cost of a block, charging ``image_tokens`` per image part in multimodal content."""
221
+ content = block.content
222
+ if isinstance(content, list):
223
+ text = "".join(
224
+ p.get("text", "") for p in content if isinstance(p, dict) and "text" in p
225
+ )
226
+ images = sum(
227
+ 1
228
+ for p in content
229
+ if isinstance(p, dict) and p.get("type") in ("image", "image_url")
230
+ )
231
+ return tokens.count(text, self.model) + images * self.image_tokens
232
+ return tokens.count(str(content), self.model)
233
+
234
+ def _finish(
235
+ self, budget_tokens: int, used: int, decisions: list, kept: list, *, emit: bool
236
+ ) -> tuple[list[dict], AssemblyReport]:
237
+ ordered = _order_blocks(kept, self.order)
238
+ messages = [{"role": block.role, "content": content} for _, block, content in ordered]
239
+ report = AssemblyReport(
240
+ budget=budget_tokens,
241
+ used=used,
242
+ reserved_output=self.reserve_output,
243
+ model=self.model,
244
+ decisions=decisions,
245
+ order=self.order,
246
+ )
247
+ if emit:
248
+ bus.emit(report)
249
+ return messages, report
250
+
251
+ def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
252
+ effective = max(0, budget_tokens - self.reserve_output)
167
253
  used = 0
168
254
  decisions: list[BlockDecision] = []
169
- kept: list[tuple[int, Block, str]] = [] # (insertion_index, block, rendered_content)
255
+ kept: list[tuple[int, Block, Any]] = [] # (insertion_index, block, rendered_content)
170
256
 
171
- for idx, block in order:
172
- text = block.content if isinstance(block.content, str) else str(block.content)
173
- before = tokens.count(text, self.model)
257
+ for idx, block in self._ordered_blocks():
258
+ before = self._block_tokens(block)
174
259
  remaining = effective - used
175
260
 
176
261
  if before <= remaining:
177
262
  used += before
178
- kept.append((idx, block, text))
263
+ kept.append((idx, block, block.content))
179
264
  decisions.append(BlockDecision(block.role, "kept", before, before))
180
265
  continue
266
+ if block.pin:
267
+ raise BudgetError(
268
+ f"pinned block(s) exceed budget: need >{before} tokens, "
269
+ f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
270
+ )
271
+ if not isinstance(block.content, str): # can't shrink a multimodal/list block
272
+ decisions.append(
273
+ BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
274
+ )
275
+ continue
276
+
277
+ new_text, action, note = self._evict(block, block.content, remaining)
278
+ if new_text is None:
279
+ decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
280
+ continue
281
+ after = tokens.count(new_text, self.model)
282
+ used += after
283
+ kept.append((idx, block, new_text))
284
+ decisions.append(BlockDecision(block.role, action, before, after, note))
285
+
286
+ return self._finish(budget_tokens, used, decisions, kept, emit=emit)
287
+
288
+ async def _apack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
289
+ effective = max(0, budget_tokens - self.reserve_output)
290
+ used = 0
291
+ decisions: list[BlockDecision] = []
292
+ kept: list[tuple[int, Block, Any]] = []
293
+
294
+ for idx, block in self._ordered_blocks():
295
+ before = self._block_tokens(block)
296
+ remaining = effective - used
181
297
 
298
+ if before <= remaining:
299
+ used += before
300
+ kept.append((idx, block, block.content))
301
+ decisions.append(BlockDecision(block.role, "kept", before, before))
302
+ continue
182
303
  if block.pin:
183
304
  raise BudgetError(
184
305
  f"pinned block(s) exceed budget: need >{before} tokens, "
185
306
  f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
186
307
  )
308
+ if not isinstance(block.content, str):
309
+ decisions.append(
310
+ BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
311
+ )
312
+ continue
187
313
 
188
- new_text, action, note = self._evict(block, text, remaining)
314
+ new_text, action, note = await self._aevict(block, block.content, remaining)
189
315
  if new_text is None:
190
316
  decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
191
317
  continue
@@ -194,19 +320,20 @@ class Context:
194
320
  kept.append((idx, block, new_text))
195
321
  decisions.append(BlockDecision(block.role, action, before, after, note))
196
322
 
197
- ordered = _order_blocks(kept, self.order)
198
- messages = [{"role": block.role, "content": content} for _, block, content in ordered]
199
- report = AssemblyReport(
200
- budget=budget_tokens,
201
- used=used,
202
- reserved_output=self.reserve_output,
203
- model=self.model,
204
- decisions=decisions,
205
- order=self.order,
206
- )
207
- if emit:
208
- bus.emit(report)
209
- return messages, report
323
+ return self._finish(budget_tokens, used, decisions, kept, emit=emit)
324
+
325
+ async def _aevict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
326
+ """Async eviction: await an async summarizer; delegate everything else to ``_evict``."""
327
+ if (
328
+ block.evict == "summarize"
329
+ and block.summarizer is not None
330
+ and inspect.iscoroutinefunction(block.summarizer)
331
+ ):
332
+ summary = await block.summarizer(text, remaining)
333
+ if tokens.count(summary, self.model) > remaining:
334
+ summary = _truncate_to_tokens(summary, remaining, self.model)
335
+ return summary, "summarized", ""
336
+ return self._evict(block, text, remaining)
210
337
 
211
338
  def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
212
339
  """Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
@@ -220,15 +347,20 @@ class Context:
220
347
  return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
221
348
 
222
349
  if strategy == "summarize":
223
- if block.summarizer is not None:
350
+ if block.summarizer is not None and not inspect.iscoroutinefunction(block.summarizer):
224
351
  summary = block.summarizer(text, remaining)
225
352
  if tokens.count(summary, self.model) > remaining:
226
353
  summary = _truncate_to_tokens(summary, remaining, self.model)
227
354
  return summary, "summarized", ""
355
+ note = (
356
+ "async summarizer needs aassemble()"
357
+ if block.summarizer is not None
358
+ else "no summarizer"
359
+ )
228
360
  return (
229
361
  _truncate_to_tokens(text, remaining, self.model),
230
362
  "truncated",
231
- ("no summarizer; fell back to truncate"),
363
+ f"{note}; truncated",
232
364
  )
233
365
 
234
366
  if strategy == "compress":
@@ -153,3 +153,78 @@ def test_for_anthropic_splits_system():
153
153
  system, messages = ctx.for_anthropic()
154
154
  assert system == "you are helpful"
155
155
  assert all(m["role"] != "system" for m in messages)
156
+
157
+
158
+ def test_multimodal_image_token_cost():
159
+ ctx = Context(budget_tokens=1000, model="gpt-4o", image_tokens=85)
160
+ block = Block(
161
+ [{"type": "text", "text": "look"}, {"type": "image", "image_url": "..."}],
162
+ priority=9,
163
+ pin=True,
164
+ role="user",
165
+ )
166
+ ctx.add(block)
167
+ ctx.assemble()
168
+ d = ctx.report().decisions[0]
169
+ # text("look") ~1 tok + 1 image * 85 = ~86
170
+ assert d.tokens_before >= 85
171
+ # multimodal content is preserved as a list in the rendered message
172
+ assert isinstance(ctx.assemble()[0]["content"], list)
173
+
174
+
175
+ def test_multimodal_block_dropped_when_too_large():
176
+ ctx = Context(budget_tokens=20, model="gpt-4o", image_tokens=1000)
177
+ ctx.add(Block("keep", priority=10, role="system"))
178
+ ctx.add(Block([{"type": "image"}], priority=1, role="user", evict="drop_oldest"))
179
+ ctx.assemble()
180
+ dropped = [d for d in ctx.report().decisions if d.action == "dropped"]
181
+ assert len(dropped) == 1
182
+
183
+
184
+ async def test_async_summarizer_via_aassemble():
185
+ calls = {"n": 0}
186
+
187
+ async def summarizer(text, target):
188
+ calls["n"] += 1
189
+ return "async summary"
190
+
191
+ ctx = Context(budget_tokens=12, model="gpt-4o")
192
+ ctx.add(Block("s", priority=10, role="system"))
193
+ ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
194
+ msgs = await ctx.aassemble()
195
+ assert calls["n"] == 1 # the async summarizer ran
196
+ assert "async summary" in [m["content"] for m in msgs]
197
+ assert any(d.action == "summarized" for d in ctx.report().decisions)
198
+
199
+
200
+ def test_sync_assemble_falls_back_for_async_summarizer():
201
+ async def summarizer(text, target):
202
+ return "nope"
203
+
204
+ ctx = Context(budget_tokens=12, model="gpt-4o")
205
+ ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
206
+ ctx.assemble() # sync path can't await -> truncates with a note
207
+ d = ctx.report().decisions[0]
208
+ assert d.action == "truncated" and "aassemble" in d.note
209
+
210
+
211
+ def test_for_gemini_adapter():
212
+ ctx = Context(budget_tokens=1000, model="gpt-4o")
213
+ ctx.add(Block("be helpful", priority=10, pin=True, role="system"))
214
+ ctx.add(Block("prior reply", priority=5, role="assistant"))
215
+ ctx.add(Block("question", priority=9, pin=True, role="user"))
216
+ system, contents = ctx.for_gemini()
217
+ assert system == "be helpful"
218
+ roles = [c["role"] for c in contents]
219
+ assert "model" in roles and "user" in roles and "system" not in roles # assistant -> model
220
+ assert contents[0]["parts"] == [{"text": "be helpful"}] or contents[0]["parts"][0]["text"]
221
+
222
+
223
+ def test_for_bedrock_adapter():
224
+ ctx = Context(budget_tokens=1000, model="gpt-4o")
225
+ ctx.add(Block("be helpful", priority=10, pin=True, role="system"))
226
+ ctx.add(Block("question", priority=9, pin=True, role="user"))
227
+ system, messages = ctx.for_bedrock()
228
+ assert system == [{"text": "be helpful"}]
229
+ assert messages == [{"role": "user", "content": [{"text": "question"}]}]
230
+ assert all(m["role"] in ("user", "assistant") for m in messages)