powerailabs-contextkit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ build/
5
+ dist/
6
+ .venv/
7
+ .uv/
8
+ .ruff_cache/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .coverage
12
+ htmlcov/
13
+ .idea/
14
+ .vscode/
15
+ .DS_Store
16
+ *.log
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: powerailabs-contextkit
3
+ Version: 0.1.0
4
+ Summary: Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt.
5
+ Author: Raghav Mishra
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: powerailabs-core<0.2,>=0.1
9
+ Provides-Extra: squeeze
10
+ Requires-Dist: powerailabs-squeeze<0.2,>=0.1; extra == 'squeeze'
11
+ Description-Content-Type: text/markdown
12
+
13
+ # powerailabs-contextkit
14
+
15
+ Treat the context window like a packed suitcase, not a string you concatenate. Declare blocks
16
+ with priorities and eviction rules; contextkit fits them to a token budget and tells you exactly
17
+ what it kept, shrank, and dropped.
18
+
19
+ **Every assembled prompt comes with a receipt.**
20
+
21
+ ![status](https://img.shields.io/badge/status-building-yellow) ![license](https://img.shields.io/badge/license-MIT-blue)
22
+
23
+ ๐Ÿšง building (v0) ยท `pip install powerailabs-contextkit` ยท `from powerailabs.contextkit import Context, Block`
24
+
25
+ ```python
26
+ from powerailabs.contextkit import Context, Block
27
+
28
+ ctx = Context(budget_tokens=8000, model="claude-opus-4-8", reserve_output=1000)
29
+ ctx.add(Block(system_prompt, priority=10, pin=True, role="system"))
30
+ ctx.add(Block(retrieved_docs, priority=5, evict="compress")) # uses squeeze if installed
31
+ ctx.add(Block(chat_history, priority=3, evict="drop_oldest"))
32
+ ctx.add(Block(user_msg, priority=9, pin=True, role="user"))
33
+
34
+ messages = ctx.assemble() # provider-ready messages, guaranteed within budget
35
+ print(ctx.report()) # the receipt: kept / truncated / dropped + token math
36
+ # [kept ] system 42->42tok
37
+ # [compressed] user 3120->980tok
38
+ # [dropped ] user 610->0tok # chat_history didn't fit
39
+ preview = ctx.whatif(budget_tokens=4000) # same inputs, tighter budget, no commit
40
+ ```
41
+
42
+ **Inbound:** call contextkit *before* the model call to build the messages you send โ€” it applies
43
+ whenever you assemble the prompt yourself. Assembly is deterministic (stable sort by pinned โ†’
44
+ priority โ†’ insertion). `evict="compress"` wires in `powerailabs-contextkit[squeeze]` by shape (no
45
+ import); without it, `compress` falls back to `truncate`. The `report()` decisions flow onto
46
+ core's event stream, so `acttrace` records what context the model actually saw.
47
+
48
+ See [`docs/contextkit.md`](../../docs/contextkit.md). *Part of the PowerAI Labs stack โ€” github.com/PowerAI-Labs/powerailabs.*
@@ -0,0 +1,36 @@
1
+ # powerailabs-contextkit
2
+
3
+ Treat the context window like a packed suitcase, not a string you concatenate. Declare blocks
4
+ with priorities and eviction rules; contextkit fits them to a token budget and tells you exactly
5
+ what it kept, shrank, and dropped.
6
+
7
+ **Every assembled prompt comes with a receipt.**
8
+
9
+ ![status](https://img.shields.io/badge/status-building-yellow) ![license](https://img.shields.io/badge/license-MIT-blue)
10
+
11
+ ๐Ÿšง building (v0) ยท `pip install powerailabs-contextkit` ยท `from powerailabs.contextkit import Context, Block`
12
+
13
+ ```python
14
+ from powerailabs.contextkit import Context, Block
15
+
16
+ ctx = Context(budget_tokens=8000, model="claude-opus-4-8", reserve_output=1000)
17
+ ctx.add(Block(system_prompt, priority=10, pin=True, role="system"))
18
+ ctx.add(Block(retrieved_docs, priority=5, evict="compress")) # uses squeeze if installed
19
+ ctx.add(Block(chat_history, priority=3, evict="drop_oldest"))
20
+ ctx.add(Block(user_msg, priority=9, pin=True, role="user"))
21
+
22
+ messages = ctx.assemble() # provider-ready messages, guaranteed within budget
23
+ print(ctx.report()) # the receipt: kept / truncated / dropped + token math
24
+ # [kept ] system 42->42tok
25
+ # [compressed] user 3120->980tok
26
+ # [dropped ] user 610->0tok # chat_history didn't fit
27
+ preview = ctx.whatif(budget_tokens=4000) # same inputs, tighter budget, no commit
28
+ ```
29
+
30
+ **Inbound:** call contextkit *before* the model call to build the messages you send โ€” it applies
31
+ whenever you assemble the prompt yourself. Assembly is deterministic (stable sort by pinned โ†’
32
+ priority โ†’ insertion). `evict="compress"` wires in `powerailabs-contextkit[squeeze]` by shape (no
33
+ import); without it, `compress` falls back to `truncate`. The `report()` decisions flow onto
34
+ core's event stream, so `acttrace` records what context the model actually saw.
35
+
36
+ See [`docs/contextkit.md`](../../docs/contextkit.md). *Part of the PowerAI Labs stack โ€” github.com/PowerAI-Labs/powerailabs.*
@@ -0,0 +1,20 @@
1
+ [project]
2
+ name = "powerailabs-contextkit"
3
+ version = "0.1.0"
4
+ description = "Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt."
5
+ requires-python = ">=3.11"
6
+ license = "MIT"
7
+ authors = [{ name = "Raghav Mishra" }]
8
+ readme = "README.md"
9
+ dependencies = ["powerailabs-core>=0.1,<0.2"]
10
+
11
+ [project.optional-dependencies]
12
+ # Wires squeeze in as the evict="compress" strategy (via core's Compressor protocol).
13
+ squeeze = ["powerailabs-squeeze>=0.1,<0.2"]
14
+
15
+ [build-system]
16
+ requires = ["hatchling"]
17
+ build-backend = "hatchling.build"
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["src/powerailabs"] # contributes powerailabs/contextkit only โ€” NEVER add src/powerailabs/__init__.py
@@ -0,0 +1,266 @@
1
+ """powerailabs.contextkit โ€” assemble context within a token budget, with a receipt.
2
+
3
+ Treat the context window like a packed suitcase: declare ``Block``s with priority, pin, and a
4
+ per-block eviction rule; :meth:`Context.assemble` packs them to a token budget (deterministically)
5
+ and :meth:`Context.report` returns the receipt โ€” what was kept, shrunk, or dropped, with the token
6
+ math. Depends only on ``powerailabs-core`` (``tokens`` + the ``Compressor`` protocol). Tools never
7
+ import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]`` extra.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Callable
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ from powerailabs.core import bus, tokens
17
+
18
+ __all__ = ["Block", "Context", "AssemblyReport", "BlockDecision", "BudgetError"]
19
+
20
+ EvictStrategy = str # "drop_oldest" | "truncate" | "summarize" | "compress"
21
+
22
+
23
+ class BudgetError(Exception):
24
+ """Raised when pinned blocks alone exceed the budget (they are never evicted)."""
25
+
26
+
27
+ @dataclass
28
+ class Block:
29
+ """A unit of context with packing intent. See docs/contextkit.md ยง6.
30
+
31
+ Attributes:
32
+ content: The block text (multimodal lists are a roadmap item; start text-only).
33
+ priority: Higher is admitted first; ties break by insertion order (deterministic).
34
+ pin: Pinned blocks are never evicted (assembly raises if pinned blocks alone overflow).
35
+ evict: Strategy when this block overflows the remaining budget.
36
+ role: Provider message role: ``system`` | ``user`` | ``assistant`` | ``tool``.
37
+ summarizer: Callback ``(content, target_tokens) -> str`` used when ``evict="summarize"``.
38
+ """
39
+
40
+ content: str | list
41
+ priority: int = 0
42
+ pin: bool = False
43
+ evict: EvictStrategy = "drop_oldest"
44
+ role: str = "user"
45
+ summarizer: Callable[[str, int], str] | None = None
46
+
47
+
48
+ @dataclass
49
+ class BlockDecision:
50
+ """What happened to one block during assembly (a line on the receipt)."""
51
+
52
+ role: str
53
+ action: str # "kept" | "truncated" | "summarized" | "compressed" | "dropped"
54
+ tokens_before: int
55
+ tokens_after: int
56
+ note: str = ""
57
+
58
+
59
+ @dataclass
60
+ class AssemblyReport:
61
+ """The receipt: budget math + per-block decisions. See docs/contextkit.md ยง6."""
62
+
63
+ budget: int
64
+ used: int
65
+ reserved_output: int
66
+ model: str
67
+ decisions: list[BlockDecision] = field(default_factory=list)
68
+
69
+ def __str__(self) -> str:
70
+ lines = [
71
+ f"AssemblyReport(model={self.model}) "
72
+ f"budget={self.budget} reserved_output={self.reserved_output} "
73
+ f"used={self.used}/{self.budget - self.reserved_output}",
74
+ ]
75
+ for d in self.decisions:
76
+ arrow = f"{d.tokens_before}->{d.tokens_after}tok"
77
+ note = f" # {d.note}" if d.note else ""
78
+ lines.append(f" [{d.action:<10}] {d.role:<9} {arrow}{note}")
79
+ return "\n".join(lines)
80
+
81
+
82
+ # Render order: system first, conversational/context middle, the user turn last.
83
+ _ROLE_RANK = {"system": 0, "tool": 1, "assistant": 2, "user": 3}
84
+
85
+
86
+ class Context:
87
+ """A token-budgeted, declarative context assembler. See docs/contextkit.md ยง3, ยง5."""
88
+
89
+ def __init__(
90
+ self,
91
+ budget_tokens: int,
92
+ model: str,
93
+ reserve_output: int = 0,
94
+ compressor: Any = None,
95
+ ) -> None:
96
+ self.budget_tokens = budget_tokens
97
+ self.model = model
98
+ self.reserve_output = reserve_output
99
+ self._compressor = compressor
100
+ self._blocks: list[Block] = []
101
+ self._report: AssemblyReport | None = None
102
+ self._messages: list[dict] = []
103
+
104
+ def add(self, block: Block) -> Context:
105
+ """Add a block. Returns ``self`` for chaining."""
106
+ self._blocks.append(block)
107
+ return self
108
+
109
+ def assemble(self) -> list[dict]:
110
+ """Pack blocks within the budget; return provider-ready messages (OpenAI/Foundry shape).
111
+
112
+ Deterministic: stable sort by ``(pinned, priority, insertion order)``. Emits the
113
+ :class:`AssemblyReport` onto core's bus so ``acttrace`` records what the model saw.
114
+ """
115
+ messages, report = self._pack(self.budget_tokens, emit=True)
116
+ self._messages = messages
117
+ self._report = report
118
+ return messages
119
+
120
+ def report(self) -> AssemblyReport:
121
+ """Return the receipt for the most recent :meth:`assemble`. TODO until first assemble."""
122
+ if self._report is None:
123
+ raise RuntimeError("call assemble() before report()")
124
+ return self._report
125
+
126
+ def whatif(self, budget_tokens: int) -> AssemblyReport:
127
+ """Preview the assembly at a different budget without committing (no bus emit)."""
128
+ _, report = self._pack(budget_tokens, emit=False)
129
+ return report
130
+
131
+ def for_anthropic(self) -> tuple[str, list[dict]]:
132
+ """Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
133
+
134
+ Returns ``(system_text, messages)`` from the most recent :meth:`assemble`.
135
+ """
136
+ if not self._messages:
137
+ self.assemble()
138
+ system = "\n\n".join(m["content"] for m in self._messages if m["role"] == "system")
139
+ rest = [m for m in self._messages if m["role"] != "system"]
140
+ return system, rest
141
+
142
+ # ------------------------------------------------------------------ internals
143
+
144
+ def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
145
+ effective = max(0, budget_tokens - self.reserve_output)
146
+ # (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
147
+ order = sorted(
148
+ enumerate(self._blocks),
149
+ key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
150
+ )
151
+
152
+ used = 0
153
+ decisions: list[BlockDecision] = []
154
+ kept: list[tuple[int, str, str]] = [] # (insertion_index, role, rendered_content)
155
+
156
+ for idx, block in order:
157
+ text = block.content if isinstance(block.content, str) else str(block.content)
158
+ before = tokens.count(text, self.model)
159
+ remaining = effective - used
160
+
161
+ if before <= remaining:
162
+ used += before
163
+ kept.append((idx, block.role, text))
164
+ decisions.append(BlockDecision(block.role, "kept", before, before))
165
+ continue
166
+
167
+ if block.pin:
168
+ raise BudgetError(
169
+ f"pinned block(s) exceed budget: need >{before} tokens, "
170
+ f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
171
+ )
172
+
173
+ new_text, action, note = self._evict(block, text, remaining)
174
+ if new_text is None:
175
+ decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
176
+ continue
177
+ after = tokens.count(new_text, self.model)
178
+ used += after
179
+ kept.append((idx, block.role, new_text))
180
+ decisions.append(BlockDecision(block.role, action, before, after, note))
181
+
182
+ kept.sort(key=lambda k: (_ROLE_RANK.get(k[1], 1), k[0]))
183
+ messages = [{"role": role, "content": content} for _, role, content in kept]
184
+ report = AssemblyReport(
185
+ budget=budget_tokens,
186
+ used=used,
187
+ reserved_output=self.reserve_output,
188
+ model=self.model,
189
+ decisions=decisions,
190
+ )
191
+ if emit:
192
+ bus.emit(report)
193
+ return messages, report
194
+
195
+ def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
196
+ """Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
197
+ strategy = block.evict
198
+
199
+ if strategy == "drop_oldest" or remaining <= 0:
200
+ note = "no room" if remaining <= 0 and strategy != "drop_oldest" else ""
201
+ return None, "dropped", note
202
+
203
+ if strategy == "truncate":
204
+ return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
205
+
206
+ if strategy == "summarize":
207
+ if block.summarizer is not None:
208
+ summary = block.summarizer(text, remaining)
209
+ if tokens.count(summary, self.model) > remaining:
210
+ summary = _truncate_to_tokens(summary, remaining, self.model)
211
+ return summary, "summarized", ""
212
+ return (
213
+ _truncate_to_tokens(text, remaining, self.model),
214
+ "truncated",
215
+ ("no summarizer; fell back to truncate"),
216
+ )
217
+
218
+ if strategy == "compress":
219
+ compressor = self._get_compressor()
220
+ if compressor is not None:
221
+ small = _call_compressor(compressor, text, remaining, self.model)
222
+ if tokens.count(small, self.model) > remaining:
223
+ small = _truncate_to_tokens(small, remaining, self.model)
224
+ return small, "compressed", ""
225
+ return (
226
+ _truncate_to_tokens(text, remaining, self.model),
227
+ "truncated",
228
+ ("squeeze not installed; fell back to truncate"),
229
+ )
230
+
231
+ # Unknown strategy: be safe, drop.
232
+ return None, "dropped", f"unknown evict strategy {strategy!r}"
233
+
234
+ def _get_compressor(self) -> Any:
235
+ if self._compressor is not None:
236
+ return self._compressor
237
+ # Optional plugin, discovered at runtime via the contextkit[squeeze] extra.
238
+ import importlib
239
+
240
+ try:
241
+ return importlib.import_module("powerailabs.squeeze").compress
242
+ except ModuleNotFoundError:
243
+ return None
244
+
245
+
246
+ def _call_compressor(compressor: Any, text: str, target: int, model: str) -> str:
247
+ """Call either a Compressor-protocol object or a ``squeeze.compress``-style callable."""
248
+ if hasattr(compressor, "compress"):
249
+ small, _handle = compressor.compress(text, target_tokens=target, model=model)
250
+ else:
251
+ small, _handle = compressor(text, target_tokens=target)
252
+ return small
253
+
254
+
255
+ def _truncate_to_tokens(text: str, target: int, model: str) -> str:
256
+ """Trim ``text`` to at most ``target`` tokens (deterministic char-ratio shrink)."""
257
+ if target <= 0:
258
+ return ""
259
+ current = tokens.count(text, model)
260
+ if current <= target:
261
+ return text
262
+ ratio = max(1, len(text)) / max(1, current)
263
+ cut = text[: int(target * ratio)]
264
+ while cut and tokens.count(cut, model) > target:
265
+ cut = cut[: int(len(cut) * 0.9)]
266
+ return cut
@@ -0,0 +1,113 @@
1
+ """Budgeted assembly + the receipt. Deterministic, offline (heuristic token counts)."""
2
+
3
+ import pytest
4
+ from powerailabs.contextkit import Block, BudgetError, Context
5
+ from powerailabs.core import bus, tokens
6
+
7
+
8
+ @pytest.fixture(autouse=True)
9
+ def _heuristic_tokens(monkeypatch):
10
+ # Force the offline heuristic so token math is deterministic regardless of tiktoken.
11
+ monkeypatch.setattr(tokens, "_tiktoken_encoding", lambda model: None)
12
+ yield
13
+
14
+
15
+ def test_public_api_present():
16
+ import powerailabs.contextkit as ck
17
+
18
+ for name in ("Block", "Context", "AssemblyReport", "BlockDecision", "BudgetError"):
19
+ assert hasattr(ck, name)
20
+
21
+
22
+ def test_block_defaults():
23
+ b = Block("hi", priority=5, pin=True, role="system")
24
+ assert b.evict == "drop_oldest" and b.role == "system" and b.pin
25
+
26
+
27
+ def test_assemble_keeps_everything_under_budget():
28
+ ctx = Context(budget_tokens=1000, model="gpt-4o")
29
+ ctx.add(Block("system prompt", priority=10, pin=True, role="system"))
30
+ ctx.add(Block("the question", priority=9, pin=True, role="user"))
31
+ messages = ctx.assemble()
32
+ assert [m["role"] for m in messages] == ["system", "user"] # system first, user last
33
+ assert all(d.action == "kept" for d in ctx.report().decisions)
34
+
35
+
36
+ def test_drop_oldest_evicts_low_priority_when_tight():
37
+ ctx = Context(budget_tokens=8, model="gpt-4o") # ~8 tokens of room
38
+ ctx.add(Block("x" * 4, priority=10, role="system")) # ~1 tok, kept
39
+ ctx.add(Block("y" * 200, priority=1, role="user", evict="drop_oldest")) # too big -> dropped
40
+ messages = ctx.assemble()
41
+ roles = [m["role"] for m in messages]
42
+ assert "user" not in roles # low-priority block was dropped
43
+ dropped = [d for d in ctx.report().decisions if d.action == "dropped"]
44
+ assert len(dropped) == 1 and dropped[0].role == "user"
45
+
46
+
47
+ def test_truncate_shrinks_to_fit():
48
+ ctx = Context(budget_tokens=12, model="gpt-4o")
49
+ ctx.add(Block("s", priority=10, role="system"))
50
+ ctx.add(Block("z" * 400, priority=1, role="user", evict="truncate"))
51
+ ctx.assemble()
52
+ decision = next(d for d in ctx.report().decisions if d.role == "user")
53
+ assert decision.action == "truncated"
54
+ assert decision.tokens_after < decision.tokens_before
55
+ assert ctx.report().used <= ctx.report().budget - ctx.report().reserved_output
56
+
57
+
58
+ def test_pinned_overflow_raises():
59
+ ctx = Context(budget_tokens=5, model="gpt-4o")
60
+ ctx.add(Block("w" * 400, priority=10, pin=True, role="system"))
61
+ with pytest.raises(BudgetError):
62
+ ctx.assemble()
63
+
64
+
65
+ def test_reserve_output_reduces_usable_budget():
66
+ ctx = Context(budget_tokens=100, model="gpt-4o", reserve_output=96)
67
+ ctx.add(Block("a" * 200, priority=1, role="user", evict="truncate"))
68
+ ctx.assemble()
69
+ # only ~4 tokens usable -> truncated small
70
+ assert ctx.report().used <= 4
71
+
72
+
73
+ def test_whatif_does_not_commit():
74
+ ctx = Context(budget_tokens=1000, model="gpt-4o")
75
+ ctx.add(Block("hello there", priority=5, role="user"))
76
+ ctx.assemble()
77
+ committed_used = ctx.report().used
78
+ preview = ctx.whatif(budget_tokens=3)
79
+ assert preview.budget == 3
80
+ assert ctx.report().used == committed_used # committed report unchanged
81
+
82
+
83
+ def test_assembly_is_deterministic():
84
+ def build():
85
+ c = Context(budget_tokens=50, model="gpt-4o")
86
+ c.add(Block("alpha", priority=5, role="user"))
87
+ c.add(Block("beta", priority=5, role="assistant"))
88
+ c.add(Block("gamma", priority=9, role="system"))
89
+ return c.assemble()
90
+
91
+ assert build() == build()
92
+
93
+
94
+ def test_report_emitted_on_bus():
95
+ bus._reset()
96
+ seen = []
97
+ bus.subscribe(seen.append)
98
+ try:
99
+ ctx = Context(budget_tokens=100, model="gpt-4o")
100
+ ctx.add(Block("hi", role="user"))
101
+ ctx.assemble()
102
+ finally:
103
+ bus._reset()
104
+ assert len(seen) == 1 and seen[0].model == "gpt-4o"
105
+
106
+
107
+ def test_for_anthropic_splits_system():
108
+ ctx = Context(budget_tokens=1000, model="claude-opus-4-8")
109
+ ctx.add(Block("you are helpful", priority=10, pin=True, role="system"))
110
+ ctx.add(Block("hello", priority=9, pin=True, role="user"))
111
+ system, messages = ctx.for_anthropic()
112
+ assert system == "you are helpful"
113
+ assert all(m["role"] != "system" for m in messages)