powerailabs-contextkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""powerailabs.contextkit — assemble context within a token budget, with a receipt.
|
|
2
|
+
|
|
3
|
+
Treat the context window like a packed suitcase: declare ``Block``s with priority, pin, and a
|
|
4
|
+
per-block eviction rule; :meth:`Context.assemble` packs them to a token budget (deterministically)
|
|
5
|
+
and :meth:`Context.report` returns the receipt — what was kept, shrunk, or dropped, with the token
|
|
6
|
+
math. Depends only on ``powerailabs-core`` (``tokens`` + the ``Compressor`` protocol). Tools never
|
|
7
|
+
import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]`` extra.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from powerailabs.core import bus, tokens
|
|
17
|
+
|
|
18
|
+
__all__ = ["Block", "Context", "AssemblyReport", "BlockDecision", "BudgetError"]
|
|
19
|
+
|
|
20
|
+
EvictStrategy = str # "drop_oldest" | "truncate" | "summarize" | "compress"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BudgetError(Exception):
|
|
24
|
+
"""Raised when pinned blocks alone exceed the budget (they are never evicted)."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Block:
|
|
29
|
+
"""A unit of context with packing intent. See docs/contextkit.md §6.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
content: The block text (multimodal lists are a roadmap item; start text-only).
|
|
33
|
+
priority: Higher is admitted first; ties break by insertion order (deterministic).
|
|
34
|
+
pin: Pinned blocks are never evicted (assembly raises if pinned blocks alone overflow).
|
|
35
|
+
evict: Strategy when this block overflows the remaining budget.
|
|
36
|
+
role: Provider message role: ``system`` | ``user`` | ``assistant`` | ``tool``.
|
|
37
|
+
summarizer: Callback ``(content, target_tokens) -> str`` used when ``evict="summarize"``.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
content: str | list
|
|
41
|
+
priority: int = 0
|
|
42
|
+
pin: bool = False
|
|
43
|
+
evict: EvictStrategy = "drop_oldest"
|
|
44
|
+
role: str = "user"
|
|
45
|
+
summarizer: Callable[[str, int], str] | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class BlockDecision:
|
|
50
|
+
"""What happened to one block during assembly (a line on the receipt)."""
|
|
51
|
+
|
|
52
|
+
role: str
|
|
53
|
+
action: str # "kept" | "truncated" | "summarized" | "compressed" | "dropped"
|
|
54
|
+
tokens_before: int
|
|
55
|
+
tokens_after: int
|
|
56
|
+
note: str = ""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class AssemblyReport:
|
|
61
|
+
"""The receipt: budget math + per-block decisions. See docs/contextkit.md §6."""
|
|
62
|
+
|
|
63
|
+
budget: int
|
|
64
|
+
used: int
|
|
65
|
+
reserved_output: int
|
|
66
|
+
model: str
|
|
67
|
+
decisions: list[BlockDecision] = field(default_factory=list)
|
|
68
|
+
|
|
69
|
+
def __str__(self) -> str:
|
|
70
|
+
lines = [
|
|
71
|
+
f"AssemblyReport(model={self.model}) "
|
|
72
|
+
f"budget={self.budget} reserved_output={self.reserved_output} "
|
|
73
|
+
f"used={self.used}/{self.budget - self.reserved_output}",
|
|
74
|
+
]
|
|
75
|
+
for d in self.decisions:
|
|
76
|
+
arrow = f"{d.tokens_before}->{d.tokens_after}tok"
|
|
77
|
+
note = f" # {d.note}" if d.note else ""
|
|
78
|
+
lines.append(f" [{d.action:<10}] {d.role:<9} {arrow}{note}")
|
|
79
|
+
return "\n".join(lines)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Render order: system first, conversational/context middle, the user turn last.
|
|
83
|
+
_ROLE_RANK = {"system": 0, "tool": 1, "assistant": 2, "user": 3}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Context:
|
|
87
|
+
"""A token-budgeted, declarative context assembler. See docs/contextkit.md §3, §5."""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
budget_tokens: int,
|
|
92
|
+
model: str,
|
|
93
|
+
reserve_output: int = 0,
|
|
94
|
+
compressor: Any = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
self.budget_tokens = budget_tokens
|
|
97
|
+
self.model = model
|
|
98
|
+
self.reserve_output = reserve_output
|
|
99
|
+
self._compressor = compressor
|
|
100
|
+
self._blocks: list[Block] = []
|
|
101
|
+
self._report: AssemblyReport | None = None
|
|
102
|
+
self._messages: list[dict] = []
|
|
103
|
+
|
|
104
|
+
def add(self, block: Block) -> Context:
|
|
105
|
+
"""Add a block. Returns ``self`` for chaining."""
|
|
106
|
+
self._blocks.append(block)
|
|
107
|
+
return self
|
|
108
|
+
|
|
109
|
+
def assemble(self) -> list[dict]:
|
|
110
|
+
"""Pack blocks within the budget; return provider-ready messages (OpenAI/Foundry shape).
|
|
111
|
+
|
|
112
|
+
Deterministic: stable sort by ``(pinned, priority, insertion order)``. Emits the
|
|
113
|
+
:class:`AssemblyReport` onto core's bus so ``acttrace`` records what the model saw.
|
|
114
|
+
"""
|
|
115
|
+
messages, report = self._pack(self.budget_tokens, emit=True)
|
|
116
|
+
self._messages = messages
|
|
117
|
+
self._report = report
|
|
118
|
+
return messages
|
|
119
|
+
|
|
120
|
+
def report(self) -> AssemblyReport:
|
|
121
|
+
"""Return the receipt for the most recent :meth:`assemble`. TODO until first assemble."""
|
|
122
|
+
if self._report is None:
|
|
123
|
+
raise RuntimeError("call assemble() before report()")
|
|
124
|
+
return self._report
|
|
125
|
+
|
|
126
|
+
def whatif(self, budget_tokens: int) -> AssemblyReport:
|
|
127
|
+
"""Preview the assembly at a different budget without committing (no bus emit)."""
|
|
128
|
+
_, report = self._pack(budget_tokens, emit=False)
|
|
129
|
+
return report
|
|
130
|
+
|
|
131
|
+
def for_anthropic(self) -> tuple[str, list[dict]]:
|
|
132
|
+
"""Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
|
|
133
|
+
|
|
134
|
+
Returns ``(system_text, messages)`` from the most recent :meth:`assemble`.
|
|
135
|
+
"""
|
|
136
|
+
if not self._messages:
|
|
137
|
+
self.assemble()
|
|
138
|
+
system = "\n\n".join(m["content"] for m in self._messages if m["role"] == "system")
|
|
139
|
+
rest = [m for m in self._messages if m["role"] != "system"]
|
|
140
|
+
return system, rest
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------ internals
|
|
143
|
+
|
|
144
|
+
def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
|
|
145
|
+
effective = max(0, budget_tokens - self.reserve_output)
|
|
146
|
+
# (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
|
|
147
|
+
order = sorted(
|
|
148
|
+
enumerate(self._blocks),
|
|
149
|
+
key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
used = 0
|
|
153
|
+
decisions: list[BlockDecision] = []
|
|
154
|
+
kept: list[tuple[int, str, str]] = [] # (insertion_index, role, rendered_content)
|
|
155
|
+
|
|
156
|
+
for idx, block in order:
|
|
157
|
+
text = block.content if isinstance(block.content, str) else str(block.content)
|
|
158
|
+
before = tokens.count(text, self.model)
|
|
159
|
+
remaining = effective - used
|
|
160
|
+
|
|
161
|
+
if before <= remaining:
|
|
162
|
+
used += before
|
|
163
|
+
kept.append((idx, block.role, text))
|
|
164
|
+
decisions.append(BlockDecision(block.role, "kept", before, before))
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
if block.pin:
|
|
168
|
+
raise BudgetError(
|
|
169
|
+
f"pinned block(s) exceed budget: need >{before} tokens, "
|
|
170
|
+
f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
new_text, action, note = self._evict(block, text, remaining)
|
|
174
|
+
if new_text is None:
|
|
175
|
+
decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
|
|
176
|
+
continue
|
|
177
|
+
after = tokens.count(new_text, self.model)
|
|
178
|
+
used += after
|
|
179
|
+
kept.append((idx, block.role, new_text))
|
|
180
|
+
decisions.append(BlockDecision(block.role, action, before, after, note))
|
|
181
|
+
|
|
182
|
+
kept.sort(key=lambda k: (_ROLE_RANK.get(k[1], 1), k[0]))
|
|
183
|
+
messages = [{"role": role, "content": content} for _, role, content in kept]
|
|
184
|
+
report = AssemblyReport(
|
|
185
|
+
budget=budget_tokens,
|
|
186
|
+
used=used,
|
|
187
|
+
reserved_output=self.reserve_output,
|
|
188
|
+
model=self.model,
|
|
189
|
+
decisions=decisions,
|
|
190
|
+
)
|
|
191
|
+
if emit:
|
|
192
|
+
bus.emit(report)
|
|
193
|
+
return messages, report
|
|
194
|
+
|
|
195
|
+
def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
|
|
196
|
+
"""Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
|
|
197
|
+
strategy = block.evict
|
|
198
|
+
|
|
199
|
+
if strategy == "drop_oldest" or remaining <= 0:
|
|
200
|
+
note = "no room" if remaining <= 0 and strategy != "drop_oldest" else ""
|
|
201
|
+
return None, "dropped", note
|
|
202
|
+
|
|
203
|
+
if strategy == "truncate":
|
|
204
|
+
return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
|
|
205
|
+
|
|
206
|
+
if strategy == "summarize":
|
|
207
|
+
if block.summarizer is not None:
|
|
208
|
+
summary = block.summarizer(text, remaining)
|
|
209
|
+
if tokens.count(summary, self.model) > remaining:
|
|
210
|
+
summary = _truncate_to_tokens(summary, remaining, self.model)
|
|
211
|
+
return summary, "summarized", ""
|
|
212
|
+
return (
|
|
213
|
+
_truncate_to_tokens(text, remaining, self.model),
|
|
214
|
+
"truncated",
|
|
215
|
+
("no summarizer; fell back to truncate"),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if strategy == "compress":
|
|
219
|
+
compressor = self._get_compressor()
|
|
220
|
+
if compressor is not None:
|
|
221
|
+
small = _call_compressor(compressor, text, remaining, self.model)
|
|
222
|
+
if tokens.count(small, self.model) > remaining:
|
|
223
|
+
small = _truncate_to_tokens(small, remaining, self.model)
|
|
224
|
+
return small, "compressed", ""
|
|
225
|
+
return (
|
|
226
|
+
_truncate_to_tokens(text, remaining, self.model),
|
|
227
|
+
"truncated",
|
|
228
|
+
("squeeze not installed; fell back to truncate"),
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Unknown strategy: be safe, drop.
|
|
232
|
+
return None, "dropped", f"unknown evict strategy {strategy!r}"
|
|
233
|
+
|
|
234
|
+
def _get_compressor(self) -> Any:
|
|
235
|
+
if self._compressor is not None:
|
|
236
|
+
return self._compressor
|
|
237
|
+
# Optional plugin, discovered at runtime via the contextkit[squeeze] extra.
|
|
238
|
+
import importlib
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
return importlib.import_module("powerailabs.squeeze").compress
|
|
242
|
+
except ModuleNotFoundError:
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _call_compressor(compressor: Any, text: str, target: int, model: str) -> str:
|
|
247
|
+
"""Call either a Compressor-protocol object or a ``squeeze.compress``-style callable."""
|
|
248
|
+
if hasattr(compressor, "compress"):
|
|
249
|
+
small, _handle = compressor.compress(text, target_tokens=target, model=model)
|
|
250
|
+
else:
|
|
251
|
+
small, _handle = compressor(text, target_tokens=target)
|
|
252
|
+
return small
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _truncate_to_tokens(text: str, target: int, model: str) -> str:
|
|
256
|
+
"""Trim ``text`` to at most ``target`` tokens (deterministic char-ratio shrink)."""
|
|
257
|
+
if target <= 0:
|
|
258
|
+
return ""
|
|
259
|
+
current = tokens.count(text, model)
|
|
260
|
+
if current <= target:
|
|
261
|
+
return text
|
|
262
|
+
ratio = max(1, len(text)) / max(1, current)
|
|
263
|
+
cut = text[: int(target * ratio)]
|
|
264
|
+
while cut and tokens.count(cut, model) > target:
|
|
265
|
+
cut = cut[: int(len(cut) * 0.9)]
|
|
266
|
+
return cut
|
|
File without changes
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: powerailabs-contextkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt.
|
|
5
|
+
Author: Raghav Mishra
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: powerailabs-core<0.2,>=0.1
|
|
9
|
+
Provides-Extra: squeeze
|
|
10
|
+
Requires-Dist: powerailabs-squeeze<0.2,>=0.1; extra == 'squeeze'
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# powerailabs-contextkit
|
|
14
|
+
|
|
15
|
+
Treat the context window like a packed suitcase, not a string you concatenate. Declare blocks
|
|
16
|
+
with priorities and eviction rules; contextkit fits them to a token budget and tells you exactly
|
|
17
|
+
what it kept, shrank, and dropped.
|
|
18
|
+
|
|
19
|
+
**Every assembled prompt comes with a receipt.**
|
|
20
|
+
|
|
21
|
+
 
|
|
22
|
+
|
|
23
|
+
🚧 building (v0) · `pip install powerailabs-contextkit` · `from powerailabs.contextkit import Context, Block`
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from powerailabs.contextkit import Context, Block
|
|
27
|
+
|
|
28
|
+
ctx = Context(budget_tokens=8000, model="claude-opus-4-8", reserve_output=1000)
|
|
29
|
+
ctx.add(Block(system_prompt, priority=10, pin=True, role="system"))
|
|
30
|
+
ctx.add(Block(retrieved_docs, priority=5, evict="compress")) # uses squeeze if installed
|
|
31
|
+
ctx.add(Block(chat_history, priority=3, evict="drop_oldest"))
|
|
32
|
+
ctx.add(Block(user_msg, priority=9, pin=True, role="user"))
|
|
33
|
+
|
|
34
|
+
messages = ctx.assemble() # provider-ready messages, guaranteed within budget
|
|
35
|
+
print(ctx.report()) # the receipt: kept / truncated / dropped + token math
|
|
36
|
+
# [kept ] system 42->42tok
|
|
37
|
+
# [compressed] user 3120->980tok
|
|
38
|
+
# [dropped ] user 610->0tok # chat_history didn't fit
|
|
39
|
+
preview = ctx.whatif(budget_tokens=4000) # same inputs, tighter budget, no commit
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Inbound:** call contextkit *before* the model call to build the messages you send — it applies
|
|
43
|
+
whenever you assemble the prompt yourself. Assembly is deterministic (stable sort by pinned →
|
|
44
|
+
priority → insertion). `evict="compress"` wires in `powerailabs-contextkit[squeeze]` by shape (no
|
|
45
|
+
import); without it, `compress` falls back to `truncate`. The `report()` decisions flow onto
|
|
46
|
+
core's event stream, so `acttrace` records what context the model actually saw.
|
|
47
|
+
|
|
48
|
+
See [`docs/contextkit.md`](../../docs/contextkit.md). *Part of the PowerAI Labs stack — github.com/PowerAI-Labs/powerailabs.*
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
powerailabs/contextkit/__init__.py,sha256=FRW5v3LmYZVmqOyu5BmHqKespWd7bVvUaxSp2FTHM18,10469
|
|
2
|
+
powerailabs/contextkit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
powerailabs_contextkit-0.1.0.dist-info/METADATA,sha256=Y_EmgF6vplnxrEj68OcDG3CNkq-SbERn1cUQhqB30J4,2390
|
|
4
|
+
powerailabs_contextkit-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
5
|
+
powerailabs_contextkit-0.1.0.dist-info/RECORD,,
|