powerailabs-contextkit 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/PKG-INFO +1 -1
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/pyproject.toml +1 -1
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/src/powerailabs/contextkit/__init__.py +118 -26
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/tests/test_contextkit.py +53 -0
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/.gitignore +0 -0
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/README.md +0 -0
- {powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/src/powerailabs/contextkit/py.typed +0 -0
{powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/src/powerailabs/contextkit/__init__.py
RENAMED
|
@@ -9,6 +9,7 @@ import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]``
|
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
11
11
|
|
|
12
|
+
import inspect
|
|
12
13
|
from collections.abc import Callable
|
|
13
14
|
from dataclasses import dataclass, field
|
|
14
15
|
from typing import Any
|
|
@@ -104,6 +105,7 @@ class Context:
|
|
|
104
105
|
reserve_output: int = 0,
|
|
105
106
|
compressor: Any = None,
|
|
106
107
|
order: str = "default",
|
|
108
|
+
image_tokens: int = 0,
|
|
107
109
|
) -> None:
|
|
108
110
|
if order not in _ORDERS:
|
|
109
111
|
raise ValueError(f"order must be one of {_ORDERS}, got {order!r}")
|
|
@@ -112,6 +114,7 @@ class Context:
|
|
|
112
114
|
self.reserve_output = reserve_output
|
|
113
115
|
self._compressor = compressor
|
|
114
116
|
self.order = order
|
|
117
|
+
self.image_tokens = image_tokens # token cost charged per image part in multimodal blocks
|
|
115
118
|
self._blocks: list[Block] = []
|
|
116
119
|
self._report: AssemblyReport | None = None
|
|
117
120
|
self._messages: list[dict] = []
|
|
@@ -143,6 +146,17 @@ class Context:
|
|
|
143
146
|
_, report = self._pack(budget_tokens, emit=False)
|
|
144
147
|
return report
|
|
145
148
|
|
|
149
|
+
async def aassemble(self) -> list[dict]:
|
|
150
|
+
"""Async assemble — like :meth:`assemble` but awaits ``async`` summarize callbacks.
|
|
151
|
+
|
|
152
|
+
Use this when a block's ``summarizer`` is a coroutine (e.g. an LLM summarizer). The sync
|
|
153
|
+
:meth:`assemble` falls back to truncation for async summarizers.
|
|
154
|
+
"""
|
|
155
|
+
messages, report = await self._apack(self.budget_tokens, emit=True)
|
|
156
|
+
self._messages = messages
|
|
157
|
+
self._report = report
|
|
158
|
+
return messages
|
|
159
|
+
|
|
146
160
|
def for_anthropic(self) -> tuple[str, list[dict]]:
|
|
147
161
|
"""Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
|
|
148
162
|
|
|
@@ -196,36 +210,108 @@ class Context:
|
|
|
196
210
|
|
|
197
211
|
# ------------------------------------------------------------------ internals
|
|
198
212
|
|
|
199
|
-
def
|
|
200
|
-
effective = max(0, budget_tokens - self.reserve_output)
|
|
213
|
+
def _ordered_blocks(self) -> list[tuple[int, Block]]:
|
|
201
214
|
# (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
|
|
202
|
-
|
|
203
|
-
enumerate(self._blocks),
|
|
204
|
-
key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
|
|
215
|
+
return sorted(
|
|
216
|
+
enumerate(self._blocks), key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0])
|
|
205
217
|
)
|
|
206
218
|
|
|
219
|
+
def _block_tokens(self, block: Block) -> int:
|
|
220
|
+
"""Token cost of a block, charging ``image_tokens`` per image part in multimodal content."""
|
|
221
|
+
content = block.content
|
|
222
|
+
if isinstance(content, list):
|
|
223
|
+
text = "".join(
|
|
224
|
+
p.get("text", "") for p in content if isinstance(p, dict) and "text" in p
|
|
225
|
+
)
|
|
226
|
+
images = sum(
|
|
227
|
+
1
|
|
228
|
+
for p in content
|
|
229
|
+
if isinstance(p, dict) and p.get("type") in ("image", "image_url")
|
|
230
|
+
)
|
|
231
|
+
return tokens.count(text, self.model) + images * self.image_tokens
|
|
232
|
+
return tokens.count(str(content), self.model)
|
|
233
|
+
|
|
234
|
+
def _finish(
|
|
235
|
+
self, budget_tokens: int, used: int, decisions: list, kept: list, *, emit: bool
|
|
236
|
+
) -> tuple[list[dict], AssemblyReport]:
|
|
237
|
+
ordered = _order_blocks(kept, self.order)
|
|
238
|
+
messages = [{"role": block.role, "content": content} for _, block, content in ordered]
|
|
239
|
+
report = AssemblyReport(
|
|
240
|
+
budget=budget_tokens,
|
|
241
|
+
used=used,
|
|
242
|
+
reserved_output=self.reserve_output,
|
|
243
|
+
model=self.model,
|
|
244
|
+
decisions=decisions,
|
|
245
|
+
order=self.order,
|
|
246
|
+
)
|
|
247
|
+
if emit:
|
|
248
|
+
bus.emit(report)
|
|
249
|
+
return messages, report
|
|
250
|
+
|
|
251
|
+
def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
|
|
252
|
+
effective = max(0, budget_tokens - self.reserve_output)
|
|
207
253
|
used = 0
|
|
208
254
|
decisions: list[BlockDecision] = []
|
|
209
|
-
kept: list[tuple[int, Block,
|
|
255
|
+
kept: list[tuple[int, Block, Any]] = [] # (insertion_index, block, rendered_content)
|
|
210
256
|
|
|
211
|
-
for idx, block in
|
|
212
|
-
|
|
213
|
-
before = tokens.count(text, self.model)
|
|
257
|
+
for idx, block in self._ordered_blocks():
|
|
258
|
+
before = self._block_tokens(block)
|
|
214
259
|
remaining = effective - used
|
|
215
260
|
|
|
216
261
|
if before <= remaining:
|
|
217
262
|
used += before
|
|
218
|
-
kept.append((idx, block,
|
|
263
|
+
kept.append((idx, block, block.content))
|
|
219
264
|
decisions.append(BlockDecision(block.role, "kept", before, before))
|
|
220
265
|
continue
|
|
266
|
+
if block.pin:
|
|
267
|
+
raise BudgetError(
|
|
268
|
+
f"pinned block(s) exceed budget: need >{before} tokens, "
|
|
269
|
+
f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
|
|
270
|
+
)
|
|
271
|
+
if not isinstance(block.content, str): # can't shrink a multimodal/list block
|
|
272
|
+
decisions.append(
|
|
273
|
+
BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
|
|
274
|
+
)
|
|
275
|
+
continue
|
|
221
276
|
|
|
277
|
+
new_text, action, note = self._evict(block, block.content, remaining)
|
|
278
|
+
if new_text is None:
|
|
279
|
+
decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
|
|
280
|
+
continue
|
|
281
|
+
after = tokens.count(new_text, self.model)
|
|
282
|
+
used += after
|
|
283
|
+
kept.append((idx, block, new_text))
|
|
284
|
+
decisions.append(BlockDecision(block.role, action, before, after, note))
|
|
285
|
+
|
|
286
|
+
return self._finish(budget_tokens, used, decisions, kept, emit=emit)
|
|
287
|
+
|
|
288
|
+
async def _apack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
|
|
289
|
+
effective = max(0, budget_tokens - self.reserve_output)
|
|
290
|
+
used = 0
|
|
291
|
+
decisions: list[BlockDecision] = []
|
|
292
|
+
kept: list[tuple[int, Block, Any]] = []
|
|
293
|
+
|
|
294
|
+
for idx, block in self._ordered_blocks():
|
|
295
|
+
before = self._block_tokens(block)
|
|
296
|
+
remaining = effective - used
|
|
297
|
+
|
|
298
|
+
if before <= remaining:
|
|
299
|
+
used += before
|
|
300
|
+
kept.append((idx, block, block.content))
|
|
301
|
+
decisions.append(BlockDecision(block.role, "kept", before, before))
|
|
302
|
+
continue
|
|
222
303
|
if block.pin:
|
|
223
304
|
raise BudgetError(
|
|
224
305
|
f"pinned block(s) exceed budget: need >{before} tokens, "
|
|
225
306
|
f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
|
|
226
307
|
)
|
|
308
|
+
if not isinstance(block.content, str):
|
|
309
|
+
decisions.append(
|
|
310
|
+
BlockDecision(block.role, "dropped", before, 0, "multimodal: too large")
|
|
311
|
+
)
|
|
312
|
+
continue
|
|
227
313
|
|
|
228
|
-
new_text, action, note = self.
|
|
314
|
+
new_text, action, note = await self._aevict(block, block.content, remaining)
|
|
229
315
|
if new_text is None:
|
|
230
316
|
decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
|
|
231
317
|
continue
|
|
@@ -234,19 +320,20 @@ class Context:
|
|
|
234
320
|
kept.append((idx, block, new_text))
|
|
235
321
|
decisions.append(BlockDecision(block.role, action, before, after, note))
|
|
236
322
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
323
|
+
return self._finish(budget_tokens, used, decisions, kept, emit=emit)
|
|
324
|
+
|
|
325
|
+
async def _aevict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
|
|
326
|
+
"""Async eviction: await an async summarizer; delegate everything else to ``_evict``."""
|
|
327
|
+
if (
|
|
328
|
+
block.evict == "summarize"
|
|
329
|
+
and block.summarizer is not None
|
|
330
|
+
and inspect.iscoroutinefunction(block.summarizer)
|
|
331
|
+
):
|
|
332
|
+
summary = await block.summarizer(text, remaining)
|
|
333
|
+
if tokens.count(summary, self.model) > remaining:
|
|
334
|
+
summary = _truncate_to_tokens(summary, remaining, self.model)
|
|
335
|
+
return summary, "summarized", ""
|
|
336
|
+
return self._evict(block, text, remaining)
|
|
250
337
|
|
|
251
338
|
def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
|
|
252
339
|
"""Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
|
|
@@ -260,15 +347,20 @@ class Context:
|
|
|
260
347
|
return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
|
|
261
348
|
|
|
262
349
|
if strategy == "summarize":
|
|
263
|
-
if block.summarizer is not None:
|
|
350
|
+
if block.summarizer is not None and not inspect.iscoroutinefunction(block.summarizer):
|
|
264
351
|
summary = block.summarizer(text, remaining)
|
|
265
352
|
if tokens.count(summary, self.model) > remaining:
|
|
266
353
|
summary = _truncate_to_tokens(summary, remaining, self.model)
|
|
267
354
|
return summary, "summarized", ""
|
|
355
|
+
note = (
|
|
356
|
+
"async summarizer needs aassemble()"
|
|
357
|
+
if block.summarizer is not None
|
|
358
|
+
else "no summarizer"
|
|
359
|
+
)
|
|
268
360
|
return (
|
|
269
361
|
_truncate_to_tokens(text, remaining, self.model),
|
|
270
362
|
"truncated",
|
|
271
|
-
|
|
363
|
+
f"{note}; truncated",
|
|
272
364
|
)
|
|
273
365
|
|
|
274
366
|
if strategy == "compress":
|
|
@@ -155,6 +155,59 @@ def test_for_anthropic_splits_system():
|
|
|
155
155
|
assert all(m["role"] != "system" for m in messages)
|
|
156
156
|
|
|
157
157
|
|
|
158
|
+
def test_multimodal_image_token_cost():
|
|
159
|
+
ctx = Context(budget_tokens=1000, model="gpt-4o", image_tokens=85)
|
|
160
|
+
block = Block(
|
|
161
|
+
[{"type": "text", "text": "look"}, {"type": "image", "image_url": "..."}],
|
|
162
|
+
priority=9,
|
|
163
|
+
pin=True,
|
|
164
|
+
role="user",
|
|
165
|
+
)
|
|
166
|
+
ctx.add(block)
|
|
167
|
+
ctx.assemble()
|
|
168
|
+
d = ctx.report().decisions[0]
|
|
169
|
+
# text("look") ~1 tok + 1 image * 85 = ~86
|
|
170
|
+
assert d.tokens_before >= 85
|
|
171
|
+
# multimodal content is preserved as a list in the rendered message
|
|
172
|
+
assert isinstance(ctx.assemble()[0]["content"], list)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_multimodal_block_dropped_when_too_large():
|
|
176
|
+
ctx = Context(budget_tokens=20, model="gpt-4o", image_tokens=1000)
|
|
177
|
+
ctx.add(Block("keep", priority=10, role="system"))
|
|
178
|
+
ctx.add(Block([{"type": "image"}], priority=1, role="user", evict="drop_oldest"))
|
|
179
|
+
ctx.assemble()
|
|
180
|
+
dropped = [d for d in ctx.report().decisions if d.action == "dropped"]
|
|
181
|
+
assert len(dropped) == 1
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
async def test_async_summarizer_via_aassemble():
|
|
185
|
+
calls = {"n": 0}
|
|
186
|
+
|
|
187
|
+
async def summarizer(text, target):
|
|
188
|
+
calls["n"] += 1
|
|
189
|
+
return "async summary"
|
|
190
|
+
|
|
191
|
+
ctx = Context(budget_tokens=12, model="gpt-4o")
|
|
192
|
+
ctx.add(Block("s", priority=10, role="system"))
|
|
193
|
+
ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
|
|
194
|
+
msgs = await ctx.aassemble()
|
|
195
|
+
assert calls["n"] == 1 # the async summarizer ran
|
|
196
|
+
assert "async summary" in [m["content"] for m in msgs]
|
|
197
|
+
assert any(d.action == "summarized" for d in ctx.report().decisions)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_sync_assemble_falls_back_for_async_summarizer():
|
|
201
|
+
async def summarizer(text, target):
|
|
202
|
+
return "nope"
|
|
203
|
+
|
|
204
|
+
ctx = Context(budget_tokens=12, model="gpt-4o")
|
|
205
|
+
ctx.add(Block("z" * 400, priority=1, role="user", evict="summarize", summarizer=summarizer))
|
|
206
|
+
ctx.assemble() # sync path can't await -> truncates with a note
|
|
207
|
+
d = ctx.report().decisions[0]
|
|
208
|
+
assert d.action == "truncated" and "aassemble" in d.note
|
|
209
|
+
|
|
210
|
+
|
|
158
211
|
def test_for_gemini_adapter():
|
|
159
212
|
ctx = Context(budget_tokens=1000, model="gpt-4o")
|
|
160
213
|
ctx.add(Block("be helpful", priority=10, pin=True, role="system"))
|
|
File without changes
|
|
File without changes
|
{powerailabs_contextkit-0.3.0 → powerailabs_contextkit-0.4.0}/src/powerailabs/contextkit/py.typed
RENAMED
|
File without changes
|