powerailabs-contextkit 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: powerailabs-contextkit
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt.
5
5
  Author: Raghav Mishra
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "powerailabs-contextkit"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  description = "Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt."
5
5
  requires-python = ">=3.11"
6
6
  license = "MIT"
@@ -9,6 +9,7 @@ import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]``
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import inspect
12
13
  from collections.abc import Callable
13
14
  from dataclasses import dataclass, field
14
15
  from typing import Any
@@ -104,6 +105,7 @@ class Context:
104
105
  reserve_output: int = 0,
105
106
  compressor: Any = None,
106
107
  order: str = "default",
108
+ image_tokens: int = 0,
107
109
  ) -> None:
108
110
  if order not in _ORDERS:
109
111
  raise ValueError(f"order must be one of {_ORDERS}, got {order!r}")
@@ -112,6 +114,7 @@ class Context:
112
114
  self.reserve_output = reserve_output
113
115
  self._compressor = compressor
114
116
  self.order = order
117
+ self.image_tokens = image_tokens # token cost charged per image part in multimodal blocks
115
118
  self._blocks: list[Block] = []
116
119
  self._report: AssemblyReport | None = None
117
120
  self._messages: list[dict] = []
@@ -143,6 +146,17 @@ class Context:
143
146
  _, report = self._pack(budget_tokens, emit=False)
144
147
  return report
145
148
 
149
+ async def aassemble(self) -> list[dict]:
150
+ """Async assemble — like :meth:`assemble` but awaits ``async`` summarize callbacks.
151
+
152
+ Use this when a block's ``summarizer`` is a coroutine (e.g. an LLM summarizer). The sync
153
+ :meth:`assemble` falls back to truncation for async summarizers.
154
+ """
155
+ messages, report = await self._apack(self.budget_tokens, emit=True)
156
+ self._messages = messages
157
+ self._report = report
158
+ return messages
159
+
146
160
  def for_anthropic(self) -> tuple[str, list[dict]]:
147
161
  """Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
148
162
 
@@ -196,36 +210,108 @@ class Context:
196
210
 
197
211
  # ------------------------------------------------------------------ internals
198
212
 
199
- def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
200
- effective = max(0, budget_tokens - self.reserve_output)
213
+ def _ordered_blocks(self) -> list[tuple[int, Block]]:
201
214
  # (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
202
- order = sorted(
203
- enumerate(self._blocks),
204
- key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
215
+ return sorted(
216
+ enumerate(self._blocks), key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0])
205
217
  )
206
218
 
219
+ def _block_tokens(self, block: Block) -> int:
220
+ """Token cost of a block, charging ``image_tokens`` per image part in multimodal content."""
221
+ content = block.content
222
+ if isinstance(content, list):
223
+ text = "".join(
224
+ p.get("text", "") for p in content if isinstance(p, dict) and "text" in p
225
+ )
226
+ images = sum(
227
+ 1
228
+ for p in content
229
+ if isinstance(p, dict) and p.get("type") in ("image", "image_url")
230
+ )
231
+ return tokens.count(text, self.model) + images * self.image_tokens
232
+ return tokens.count(str(content), self.model)
233
+
234
+ def _finish(
235
+ self, budget_tokens: int, used: int, decisions: list, kept: list, *, emit: bool
236
+ ) -> tuple[list[dict], AssemblyReport]:
237
+ ordered = _order_blocks(kept, self.order)
238
+ messages = [{"role": block.role, "content": content} for _, block, content in ordered]
239
+ report = AssemblyReport(
240
+ budget=budget_tokens,
241
+ used=used,
242
+ reserved_output=self.reserve_output,
243
+ model=self.model,
244
+ decisions=decisions,
245
+ order=self.order,
246
+ )
247
+ if emit:
248
+ bus.emit(report)
249
+ return messages, report
250
+
251
+ def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
252
+ effective = max(0, budget_tokens - self.reserve_output)
207
253
  used = 0
208
254
  decisions: list[BlockDecision] = []
209
- kept: list[tuple[int, Block, str]] = [] # (insertion_index, block, rendered_content)
255
+ kept: list[tuple[int, Block, Any]] = [] # (insertion_index, block, rendered_content)
210
256
 
211
- for idx, block in order:
212
- text = block.content if isinstance(block.content, str) else str(block.content)
213
- before = tokens.count(text, self.model)
257
+ for idx, block in self._ordered_blocks():
258
+ before = self._block_tokens(block)
214
259
  remaining = effective - used
215
260
 
216
261
  if before <= remaining:
217
262
  used += before
218
- kept.append((idx, block, text))
263
+ kept.append((idx, block, block.content))
219
264
  decisions.append(BlockDecision(block.role, "kept", before, before))
220
265
  continue
266
+ if block.pin:
267
+ raise BudgetError(
268
+ f"pinned block(s) exceed budget: need >{before} tokens, "
269
+ f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
270
+ )
271
+ if not isinstance(block.content, str): # can't shrink a multimodal/list block
272
+ decisions.append(
273
+ BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
274
+ )
275
+ continue
221
276
 
277
+ new_text, action, note = self._evict(block, block.content, remaining)
278
+ if new_text is None:
279
+ decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
280
+ continue
281
+ after = tokens.count(new_text, self.model)
282
+ used += after
283
+ kept.append((idx, block, new_text))
284
+ decisions.append(BlockDecision(block.role, action, before, after, note))
285
+
286
+ return self._finish(budget_tokens, used, decisions, kept, emit=emit)
287
+
288
+ async def _apack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
289
+ effective = max(0, budget_tokens - self.reserve_output)
290
+ used = 0
291
+ decisions: list[BlockDecision] = []
292
+ kept: list[tuple[int, Block, Any]] = []
293
+
294
+ for idx, block in self._ordered_blocks():
295
+ before = self._block_tokens(block)
296
+ remaining = effective - used
297
+
298
+ if before <= remaining:
299
+ used += before
300
+ kept.append((idx, block, block.content))
301
+ decisions.append(BlockDecision(block.role, "kept", before, before))
302
+ continue
222
303
  if block.pin:
223
304
  raise BudgetError(
224
305
  f"pinned block(s) exceed budget: need >{before} tokens, "
225
306
  f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
226
307
  )
308
+ if not isinstance(block.content, str):
309
+ decisions.append(
310
+ BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
311
+ )
312
+ continue
227
313
 
228
- new_text, action, note = self._evict(block, text, remaining)
314
+ new_text, action, note = await self._aevict(block, block.content, remaining)
229
315
  if new_text is None:
230
316
  decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
231
317
  continue
@@ -234,19 +320,20 @@ class Context:
234
320
  kept.append((idx, block, new_text))
235
321
  decisions.append(BlockDecision(block.role, action, before, after, note))
236
322
 
237
- ordered = _order_blocks(kept, self.order)
238
- messages = [{"role": block.role, "content": content} for _, block, content in ordered]
239
- report = AssemblyReport(
240
- budget=budget_tokens,
241
- used=used,
242
- reserved_output=self.reserve_output,
243
- model=self.model,
244
- decisions=decisions,
245
- order=self.order,
246
- )
247
- if emit:
248
- bus.emit(report)
249
- return messages, report
323
+ return self._finish(budget_tokens, used, decisions, kept, emit=emit)
324
+
325
+ async def _aevict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
326
+ """Async eviction: await an async summarizer; delegate everything else to ``_evict``."""
327
+ if (
328
+ block.evict == "summarize"
329
+ and block.summarizer is not None
330
+ and inspect.iscoroutinefunction(block.summarizer)
331
+ ):
332
+ summary = await block.summarizer(text, remaining)
333
+ if tokens.count(summary, self.model) > remaining:
334
+ summary = _truncate_to_tokens(summary, remaining, self.model)
335
+ return summary, "summarized", ""
336
+ return self._evict(block, text, remaining)
250
337
 
251
338
  def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
252
339
  """Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
@@ -260,15 +347,20 @@ class Context:
260
347
  return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
261
348
 
262
349
  if strategy == "summarize":
263
- if block.summarizer is not None:
350
+ if block.summarizer is not None and not inspect.iscoroutinefunction(block.summarizer):
264
351
  summary = block.summarizer(text, remaining)
265
352
  if tokens.count(summary, self.model) > remaining:
266
353
  summary = _truncate_to_tokens(summary, remaining, self.model)
267
354
  return summary, "summarized", ""
355
+ note = (
356
+ "async summarizer needs aassemble()"
357
+ if block.summarizer is not None
358
+ else "no summarizer"
359
+ )
268
360
  return (
269
361
  _truncate_to_tokens(text, remaining, self.model),
270
362
  "truncated",
271
- ("no summarizer; fell back to truncate"),
363
+ f"{note}; truncated",
272
364
  )
273
365
 
274
366
  if strategy == "compress":
@@ -155,6 +155,59 @@ def test_for_anthropic_splits_system():
155
155
  assert all(m["role"] != "system" for m in messages)
156
156
 
157
157
 
158
+ def test_multimodal_image_token_cost():
159
+ ctx = Context(budget_tokens=1000, model="gpt-4o", image_tokens=85)
160
+ block = Block(
161
+ [{"type": "text", "text": "look"}, {"type": "image", "image_url": "..."}],
162
+ priority=9,
163
+ pin=True,
164
+ role="user",
165
+ )
166
+ ctx.add(block)
167
+ ctx.assemble()
168
+ d = ctx.report().decisions[0]
169
+ # text("look") ~1 tok + 1 image * 85 = ~86
170
+ assert d.tokens_before >= 85
171
+ # multimodal content is preserved as a list in the rendered message
172
+ assert isinstance(ctx.assemble()[0]["content"], list)
173
+
174
+
175
+ def test_multimodal_block_dropped_when_too_large():
176
+ ctx = Context(budget_tokens=20, model="gpt-4o", image_tokens=1000)
177
+ ctx.add(Block("keep", priority=10, role="system"))
178
+ ctx.add(Block([{"type": "image"}], priority=1, role="user", evict="drop_oldest"))
179
+ ctx.assemble()
180
+ dropped = [d for d in ctx.report().decisions if d.action == "dropped"]
181
+ assert len(dropped) == 1
182
+
183
+
184
+ async def test_async_summarizer_via_aassemble():
185
+ calls = {"n": 0}
186
+
187
+ async def summarizer(text, target):
188
+ calls["n"] += 1
189
+ return "async summary"
190
+
191
+ ctx = Context(budget_tokens=12, model="gpt-4o")
192
+ ctx.add(Block("s", priority=10, role="system"))
193
+ ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
194
+ msgs = await ctx.aassemble()
195
+ assert calls["n"] == 1 # the async summarizer ran
196
+ assert "async summary" in [m["content"] for m in msgs]
197
+ assert any(d.action == "summarized" for d in ctx.report().decisions)
198
+
199
+
200
+ def test_sync_assemble_falls_back_for_async_summarizer():
201
+ async def summarizer(text, target):
202
+ return "nope"
203
+
204
+ ctx = Context(budget_tokens=12, model="gpt-4o")
205
+ ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
206
+ ctx.assemble() # sync path can't await -> truncates with a note
207
+ d = ctx.report().decisions[0]
208
+ assert d.action == "truncated" and "aassemble" in d.note
209
+
210
+
158
211
  def test_for_gemini_adapter():
159
212
  ctx = Context(budget_tokens=1000, model="gpt-4o")
160
213
  ctx.add(Block("be helpful", priority=10, pin=True, role="system"))