powerailabs-contextkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- powerailabs_contextkit-0.1.0/.gitignore +16 -0
- powerailabs_contextkit-0.1.0/PKG-INFO +48 -0
- powerailabs_contextkit-0.1.0/README.md +36 -0
- powerailabs_contextkit-0.1.0/pyproject.toml +20 -0
- powerailabs_contextkit-0.1.0/src/powerailabs/contextkit/__init__.py +266 -0
- powerailabs_contextkit-0.1.0/src/powerailabs/contextkit/py.typed +0 -0
- powerailabs_contextkit-0.1.0/tests/test_contextkit.py +113 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: powerailabs-contextkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt.
|
|
5
|
+
Author: Raghav Mishra
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: powerailabs-core<0.2,>=0.1
|
|
9
|
+
Provides-Extra: squeeze
|
|
10
|
+
Requires-Dist: powerailabs-squeeze<0.2,>=0.1; extra == 'squeeze'
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# powerailabs-contextkit
|
|
14
|
+
|
|
15
|
+
Treat the context window like a packed suitcase, not a string you concatenate. Declare blocks
|
|
16
|
+
with priorities and eviction rules; contextkit fits them to a token budget and tells you exactly
|
|
17
|
+
what it kept, shrank, and dropped.
|
|
18
|
+
|
|
19
|
+
**Every assembled prompt comes with a receipt.**
|
|
20
|
+
|
|
21
|
+
 
|
|
22
|
+
|
|
23
|
+
๐ง building (v0) ยท `pip install powerailabs-contextkit` ยท `from powerailabs.contextkit import Context, Block`
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from powerailabs.contextkit import Context, Block
|
|
27
|
+
|
|
28
|
+
ctx = Context(budget_tokens=8000, model="claude-opus-4-8", reserve_output=1000)
|
|
29
|
+
ctx.add(Block(system_prompt, priority=10, pin=True, role="system"))
|
|
30
|
+
ctx.add(Block(retrieved_docs, priority=5, evict="compress")) # uses squeeze if installed
|
|
31
|
+
ctx.add(Block(chat_history, priority=3, evict="drop_oldest"))
|
|
32
|
+
ctx.add(Block(user_msg, priority=9, pin=True, role="user"))
|
|
33
|
+
|
|
34
|
+
messages = ctx.assemble() # provider-ready messages, guaranteed within budget
|
|
35
|
+
print(ctx.report()) # the receipt: kept / truncated / dropped + token math
|
|
36
|
+
# [kept ] system 42->42tok
|
|
37
|
+
# [compressed] user 3120->980tok
|
|
38
|
+
# [dropped ] user 610->0tok # chat_history didn't fit
|
|
39
|
+
preview = ctx.whatif(budget_tokens=4000) # same inputs, tighter budget, no commit
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Inbound:** call contextkit *before* the model call to build the messages you send โ it applies
|
|
43
|
+
whenever you assemble the prompt yourself. Assembly is deterministic (stable sort by pinned โ
|
|
44
|
+
priority โ insertion). `evict="compress"` wires in `powerailabs-contextkit[squeeze]` by shape (no
|
|
45
|
+
import); without it, `compress` falls back to `truncate`. The `report()` decisions flow onto
|
|
46
|
+
core's event stream, so `acttrace` records what context the model actually saw.
|
|
47
|
+
|
|
48
|
+
See [`docs/contextkit.md`](../../docs/contextkit.md). *Part of the PowerAI Labs stack โ github.com/PowerAI-Labs/powerailabs.*
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# powerailabs-contextkit
|
|
2
|
+
|
|
3
|
+
Treat the context window like a packed suitcase, not a string you concatenate. Declare blocks
|
|
4
|
+
with priorities and eviction rules; contextkit fits them to a token budget and tells you exactly
|
|
5
|
+
what it kept, shrank, and dropped.
|
|
6
|
+
|
|
7
|
+
**Every assembled prompt comes with a receipt.**
|
|
8
|
+
|
|
9
|
+
 
|
|
10
|
+
|
|
11
|
+
๐ง building (v0) ยท `pip install powerailabs-contextkit` ยท `from powerailabs.contextkit import Context, Block`
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from powerailabs.contextkit import Context, Block
|
|
15
|
+
|
|
16
|
+
ctx = Context(budget_tokens=8000, model="claude-opus-4-8", reserve_output=1000)
|
|
17
|
+
ctx.add(Block(system_prompt, priority=10, pin=True, role="system"))
|
|
18
|
+
ctx.add(Block(retrieved_docs, priority=5, evict="compress")) # uses squeeze if installed
|
|
19
|
+
ctx.add(Block(chat_history, priority=3, evict="drop_oldest"))
|
|
20
|
+
ctx.add(Block(user_msg, priority=9, pin=True, role="user"))
|
|
21
|
+
|
|
22
|
+
messages = ctx.assemble() # provider-ready messages, guaranteed within budget
|
|
23
|
+
print(ctx.report()) # the receipt: kept / truncated / dropped + token math
|
|
24
|
+
# [kept ] system 42->42tok
|
|
25
|
+
# [compressed] user 3120->980tok
|
|
26
|
+
# [dropped ] user 610->0tok # chat_history didn't fit
|
|
27
|
+
preview = ctx.whatif(budget_tokens=4000) # same inputs, tighter budget, no commit
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**Inbound:** call contextkit *before* the model call to build the messages you send โ it applies
|
|
31
|
+
whenever you assemble the prompt yourself. Assembly is deterministic (stable sort by pinned โ
|
|
32
|
+
priority โ insertion). `evict="compress"` wires in `powerailabs-contextkit[squeeze]` by shape (no
|
|
33
|
+
import); without it, `compress` falls back to `truncate`. The `report()` decisions flow onto
|
|
34
|
+
core's event stream, so `acttrace` records what context the model actually saw.
|
|
35
|
+
|
|
36
|
+
See [`docs/contextkit.md`](../../docs/contextkit.md). *Part of the PowerAI Labs stack โ github.com/PowerAI-Labs/powerailabs.*
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "powerailabs-contextkit"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Assemble: declare prioritized, pinnable context blocks; pack them to a token budget with an inspectable receipt."
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
authors = [{ name = "Raghav Mishra" }]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
dependencies = ["powerailabs-core>=0.1,<0.2"]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
# Wires squeeze in as the evict="compress" strategy (via core's Compressor protocol).
|
|
13
|
+
squeeze = ["powerailabs-squeeze>=0.1,<0.2"]
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["hatchling"]
|
|
17
|
+
build-backend = "hatchling.build"
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["src/powerailabs"] # contributes powerailabs/contextkit only โ NEVER add src/powerailabs/__init__.py
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""powerailabs.contextkit โ assemble context within a token budget, with a receipt.
|
|
2
|
+
|
|
3
|
+
Treat the context window like a packed suitcase: declare ``Block``s with priority, pin, and a
|
|
4
|
+
per-block eviction rule; :meth:`Context.assemble` packs them to a token budget (deterministically)
|
|
5
|
+
and :meth:`Context.report` returns the receipt โ what was kept, shrunk, or dropped, with the token
|
|
6
|
+
math. Depends only on ``powerailabs-core`` (``tokens`` + the ``Compressor`` protocol). Tools never
|
|
7
|
+
import each other; ``squeeze`` plugs in by shape via the ``contextkit[squeeze]`` extra.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from powerailabs.core import bus, tokens
|
|
17
|
+
|
|
18
|
+
__all__ = ["Block", "Context", "AssemblyReport", "BlockDecision", "BudgetError"]
|
|
19
|
+
|
|
20
|
+
EvictStrategy = str # "drop_oldest" | "truncate" | "summarize" | "compress"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BudgetError(Exception):
|
|
24
|
+
"""Raised when pinned blocks alone exceed the budget (they are never evicted)."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Block:
|
|
29
|
+
"""A unit of context with packing intent. See docs/contextkit.md ยง6.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
content: The block text (multimodal lists are a roadmap item; start text-only).
|
|
33
|
+
priority: Higher is admitted first; ties break by insertion order (deterministic).
|
|
34
|
+
pin: Pinned blocks are never evicted (assembly raises if pinned blocks alone overflow).
|
|
35
|
+
evict: Strategy when this block overflows the remaining budget.
|
|
36
|
+
role: Provider message role: ``system`` | ``user`` | ``assistant`` | ``tool``.
|
|
37
|
+
summarizer: Callback ``(content, target_tokens) -> str`` used when ``evict="summarize"``.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
content: str | list
|
|
41
|
+
priority: int = 0
|
|
42
|
+
pin: bool = False
|
|
43
|
+
evict: EvictStrategy = "drop_oldest"
|
|
44
|
+
role: str = "user"
|
|
45
|
+
summarizer: Callable[[str, int], str] | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class BlockDecision:
|
|
50
|
+
"""What happened to one block during assembly (a line on the receipt)."""
|
|
51
|
+
|
|
52
|
+
role: str
|
|
53
|
+
action: str # "kept" | "truncated" | "summarized" | "compressed" | "dropped"
|
|
54
|
+
tokens_before: int
|
|
55
|
+
tokens_after: int
|
|
56
|
+
note: str = ""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class AssemblyReport:
|
|
61
|
+
"""The receipt: budget math + per-block decisions. See docs/contextkit.md ยง6."""
|
|
62
|
+
|
|
63
|
+
budget: int
|
|
64
|
+
used: int
|
|
65
|
+
reserved_output: int
|
|
66
|
+
model: str
|
|
67
|
+
decisions: list[BlockDecision] = field(default_factory=list)
|
|
68
|
+
|
|
69
|
+
def __str__(self) -> str:
|
|
70
|
+
lines = [
|
|
71
|
+
f"AssemblyReport(model={self.model}) "
|
|
72
|
+
f"budget={self.budget} reserved_output={self.reserved_output} "
|
|
73
|
+
f"used={self.used}/{self.budget - self.reserved_output}",
|
|
74
|
+
]
|
|
75
|
+
for d in self.decisions:
|
|
76
|
+
arrow = f"{d.tokens_before}->{d.tokens_after}tok"
|
|
77
|
+
note = f" # {d.note}" if d.note else ""
|
|
78
|
+
lines.append(f" [{d.action:<10}] {d.role:<9} {arrow}{note}")
|
|
79
|
+
return "\n".join(lines)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Render order: system first, conversational/context middle, the user turn last.
|
|
83
|
+
_ROLE_RANK = {"system": 0, "tool": 1, "assistant": 2, "user": 3}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Context:
|
|
87
|
+
"""A token-budgeted, declarative context assembler. See docs/contextkit.md ยง3, ยง5."""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
budget_tokens: int,
|
|
92
|
+
model: str,
|
|
93
|
+
reserve_output: int = 0,
|
|
94
|
+
compressor: Any = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
self.budget_tokens = budget_tokens
|
|
97
|
+
self.model = model
|
|
98
|
+
self.reserve_output = reserve_output
|
|
99
|
+
self._compressor = compressor
|
|
100
|
+
self._blocks: list[Block] = []
|
|
101
|
+
self._report: AssemblyReport | None = None
|
|
102
|
+
self._messages: list[dict] = []
|
|
103
|
+
|
|
104
|
+
def add(self, block: Block) -> Context:
|
|
105
|
+
"""Add a block. Returns ``self`` for chaining."""
|
|
106
|
+
self._blocks.append(block)
|
|
107
|
+
return self
|
|
108
|
+
|
|
109
|
+
def assemble(self) -> list[dict]:
|
|
110
|
+
"""Pack blocks within the budget; return provider-ready messages (OpenAI/Foundry shape).
|
|
111
|
+
|
|
112
|
+
Deterministic: stable sort by ``(pinned, priority, insertion order)``. Emits the
|
|
113
|
+
:class:`AssemblyReport` onto core's bus so ``acttrace`` records what the model saw.
|
|
114
|
+
"""
|
|
115
|
+
messages, report = self._pack(self.budget_tokens, emit=True)
|
|
116
|
+
self._messages = messages
|
|
117
|
+
self._report = report
|
|
118
|
+
return messages
|
|
119
|
+
|
|
120
|
+
def report(self) -> AssemblyReport:
|
|
121
|
+
"""Return the receipt for the most recent :meth:`assemble`. TODO until first assemble."""
|
|
122
|
+
if self._report is None:
|
|
123
|
+
raise RuntimeError("call assemble() before report()")
|
|
124
|
+
return self._report
|
|
125
|
+
|
|
126
|
+
def whatif(self, budget_tokens: int) -> AssemblyReport:
|
|
127
|
+
"""Preview the assembly at a different budget without committing (no bus emit)."""
|
|
128
|
+
_, report = self._pack(budget_tokens, emit=False)
|
|
129
|
+
return report
|
|
130
|
+
|
|
131
|
+
def for_anthropic(self) -> tuple[str, list[dict]]:
|
|
132
|
+
"""Anthropic adapter: split system blocks out (the Messages API takes ``system`` apart).
|
|
133
|
+
|
|
134
|
+
Returns ``(system_text, messages)`` from the most recent :meth:`assemble`.
|
|
135
|
+
"""
|
|
136
|
+
if not self._messages:
|
|
137
|
+
self.assemble()
|
|
138
|
+
system = "\n\n".join(m["content"] for m in self._messages if m["role"] == "system")
|
|
139
|
+
rest = [m for m in self._messages if m["role"] != "system"]
|
|
140
|
+
return system, rest
|
|
141
|
+
|
|
142
|
+
# ------------------------------------------------------------------ internals
|
|
143
|
+
|
|
144
|
+
def _pack(self, budget_tokens: int, *, emit: bool) -> tuple[list[dict], AssemblyReport]:
|
|
145
|
+
effective = max(0, budget_tokens - self.reserve_output)
|
|
146
|
+
# (not pin) -> pinned (False) sorts first; then priority desc; then insertion order.
|
|
147
|
+
order = sorted(
|
|
148
|
+
enumerate(self._blocks),
|
|
149
|
+
key=lambda iv: (not iv[1].pin, -iv[1].priority, iv[0]),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
used = 0
|
|
153
|
+
decisions: list[BlockDecision] = []
|
|
154
|
+
kept: list[tuple[int, str, str]] = [] # (insertion_index, role, rendered_content)
|
|
155
|
+
|
|
156
|
+
for idx, block in order:
|
|
157
|
+
text = block.content if isinstance(block.content, str) else str(block.content)
|
|
158
|
+
before = tokens.count(text, self.model)
|
|
159
|
+
remaining = effective - used
|
|
160
|
+
|
|
161
|
+
if before <= remaining:
|
|
162
|
+
used += before
|
|
163
|
+
kept.append((idx, block.role, text))
|
|
164
|
+
decisions.append(BlockDecision(block.role, "kept", before, before))
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
if block.pin:
|
|
168
|
+
raise BudgetError(
|
|
169
|
+
f"pinned block(s) exceed budget: need >{before} tokens, "
|
|
170
|
+
f"{remaining} of {effective} remaining (reserve_output={self.reserve_output})"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
new_text, action, note = self._evict(block, text, remaining)
|
|
174
|
+
if new_text is None:
|
|
175
|
+
decisions.append(BlockDecision(block.role, "dropped", before, 0, note))
|
|
176
|
+
continue
|
|
177
|
+
after = tokens.count(new_text, self.model)
|
|
178
|
+
used += after
|
|
179
|
+
kept.append((idx, block.role, new_text))
|
|
180
|
+
decisions.append(BlockDecision(block.role, action, before, after, note))
|
|
181
|
+
|
|
182
|
+
kept.sort(key=lambda k: (_ROLE_RANK.get(k[1], 1), k[0]))
|
|
183
|
+
messages = [{"role": role, "content": content} for _, role, content in kept]
|
|
184
|
+
report = AssemblyReport(
|
|
185
|
+
budget=budget_tokens,
|
|
186
|
+
used=used,
|
|
187
|
+
reserved_output=self.reserve_output,
|
|
188
|
+
model=self.model,
|
|
189
|
+
decisions=decisions,
|
|
190
|
+
)
|
|
191
|
+
if emit:
|
|
192
|
+
bus.emit(report)
|
|
193
|
+
return messages, report
|
|
194
|
+
|
|
195
|
+
def _evict(self, block: Block, text: str, remaining: int) -> tuple[str | None, str, str]:
|
|
196
|
+
"""Apply a block's eviction strategy. Returns ``(content_or_None, action, note)``."""
|
|
197
|
+
strategy = block.evict
|
|
198
|
+
|
|
199
|
+
if strategy == "drop_oldest" or remaining <= 0:
|
|
200
|
+
note = "no room" if remaining <= 0 and strategy != "drop_oldest" else ""
|
|
201
|
+
return None, "dropped", note
|
|
202
|
+
|
|
203
|
+
if strategy == "truncate":
|
|
204
|
+
return _truncate_to_tokens(text, remaining, self.model), "truncated", ""
|
|
205
|
+
|
|
206
|
+
if strategy == "summarize":
|
|
207
|
+
if block.summarizer is not None:
|
|
208
|
+
summary = block.summarizer(text, remaining)
|
|
209
|
+
if tokens.count(summary, self.model) > remaining:
|
|
210
|
+
summary = _truncate_to_tokens(summary, remaining, self.model)
|
|
211
|
+
return summary, "summarized", ""
|
|
212
|
+
return (
|
|
213
|
+
_truncate_to_tokens(text, remaining, self.model),
|
|
214
|
+
"truncated",
|
|
215
|
+
("no summarizer; fell back to truncate"),
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if strategy == "compress":
|
|
219
|
+
compressor = self._get_compressor()
|
|
220
|
+
if compressor is not None:
|
|
221
|
+
small = _call_compressor(compressor, text, remaining, self.model)
|
|
222
|
+
if tokens.count(small, self.model) > remaining:
|
|
223
|
+
small = _truncate_to_tokens(small, remaining, self.model)
|
|
224
|
+
return small, "compressed", ""
|
|
225
|
+
return (
|
|
226
|
+
_truncate_to_tokens(text, remaining, self.model),
|
|
227
|
+
"truncated",
|
|
228
|
+
("squeeze not installed; fell back to truncate"),
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Unknown strategy: be safe, drop.
|
|
232
|
+
return None, "dropped", f"unknown evict strategy {strategy!r}"
|
|
233
|
+
|
|
234
|
+
def _get_compressor(self) -> Any:
|
|
235
|
+
if self._compressor is not None:
|
|
236
|
+
return self._compressor
|
|
237
|
+
# Optional plugin, discovered at runtime via the contextkit[squeeze] extra.
|
|
238
|
+
import importlib
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
return importlib.import_module("powerailabs.squeeze").compress
|
|
242
|
+
except ModuleNotFoundError:
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _call_compressor(compressor: Any, text: str, target: int, model: str) -> str:
|
|
247
|
+
"""Call either a Compressor-protocol object or a ``squeeze.compress``-style callable."""
|
|
248
|
+
if hasattr(compressor, "compress"):
|
|
249
|
+
small, _handle = compressor.compress(text, target_tokens=target, model=model)
|
|
250
|
+
else:
|
|
251
|
+
small, _handle = compressor(text, target_tokens=target)
|
|
252
|
+
return small
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _truncate_to_tokens(text: str, target: int, model: str) -> str:
|
|
256
|
+
"""Trim ``text`` to at most ``target`` tokens (deterministic char-ratio shrink)."""
|
|
257
|
+
if target <= 0:
|
|
258
|
+
return ""
|
|
259
|
+
current = tokens.count(text, model)
|
|
260
|
+
if current <= target:
|
|
261
|
+
return text
|
|
262
|
+
ratio = max(1, len(text)) / max(1, current)
|
|
263
|
+
cut = text[: int(target * ratio)]
|
|
264
|
+
while cut and tokens.count(cut, model) > target:
|
|
265
|
+
cut = cut[: int(len(cut) * 0.9)]
|
|
266
|
+
return cut
|
|
File without changes
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Budgeted assembly + the receipt. Deterministic, offline (heuristic token counts)."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from powerailabs.contextkit import Block, BudgetError, Context
|
|
5
|
+
from powerailabs.core import bus, tokens
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(autouse=True)
|
|
9
|
+
def _heuristic_tokens(monkeypatch):
|
|
10
|
+
# Force the offline heuristic so token math is deterministic regardless of tiktoken.
|
|
11
|
+
monkeypatch.setattr(tokens, "_tiktoken_encoding", lambda model: None)
|
|
12
|
+
yield
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_public_api_present():
|
|
16
|
+
import powerailabs.contextkit as ck
|
|
17
|
+
|
|
18
|
+
for name in ("Block", "Context", "AssemblyReport", "BlockDecision", "BudgetError"):
|
|
19
|
+
assert hasattr(ck, name)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_block_defaults():
|
|
23
|
+
b = Block("hi", priority=5, pin=True, role="system")
|
|
24
|
+
assert b.evict == "drop_oldest" and b.role == "system" and b.pin
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_assemble_keeps_everything_under_budget():
|
|
28
|
+
ctx = Context(budget_tokens=1000, model="gpt-4o")
|
|
29
|
+
ctx.add(Block("system prompt", priority=10, pin=True, role="system"))
|
|
30
|
+
ctx.add(Block("the question", priority=9, pin=True, role="user"))
|
|
31
|
+
messages = ctx.assemble()
|
|
32
|
+
assert [m["role"] for m in messages] == ["system", "user"] # system first, user last
|
|
33
|
+
assert all(d.action == "kept" for d in ctx.report().decisions)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_drop_oldest_evicts_low_priority_when_tight():
|
|
37
|
+
ctx = Context(budget_tokens=8, model="gpt-4o") # ~8 tokens of room
|
|
38
|
+
ctx.add(Block("x" * 4, priority=10, role="system")) # ~1 tok, kept
|
|
39
|
+
ctx.add(Block("y" * 200, priority=1, role="user", evict="drop_oldest")) # too big -> dropped
|
|
40
|
+
messages = ctx.assemble()
|
|
41
|
+
roles = [m["role"] for m in messages]
|
|
42
|
+
assert "user" not in roles # low-priority block was dropped
|
|
43
|
+
dropped = [d for d in ctx.report().decisions if d.action == "dropped"]
|
|
44
|
+
assert len(dropped) == 1 and dropped[0].role == "user"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_truncate_shrinks_to_fit():
|
|
48
|
+
ctx = Context(budget_tokens=12, model="gpt-4o")
|
|
49
|
+
ctx.add(Block("s", priority=10, role="system"))
|
|
50
|
+
ctx.add(Block("z" * 400, priority=1, role="user", evict="truncate"))
|
|
51
|
+
ctx.assemble()
|
|
52
|
+
decision = next(d for d in ctx.report().decisions if d.role == "user")
|
|
53
|
+
assert decision.action == "truncated"
|
|
54
|
+
assert decision.tokens_after < decision.tokens_before
|
|
55
|
+
assert ctx.report().used <= ctx.report().budget - ctx.report().reserved_output
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_pinned_overflow_raises():
|
|
59
|
+
ctx = Context(budget_tokens=5, model="gpt-4o")
|
|
60
|
+
ctx.add(Block("w" * 400, priority=10, pin=True, role="system"))
|
|
61
|
+
with pytest.raises(BudgetError):
|
|
62
|
+
ctx.assemble()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_reserve_output_reduces_usable_budget():
|
|
66
|
+
ctx = Context(budget_tokens=100, model="gpt-4o", reserve_output=96)
|
|
67
|
+
ctx.add(Block("a" * 200, priority=1, role="user", evict="truncate"))
|
|
68
|
+
ctx.assemble()
|
|
69
|
+
# only ~4 tokens usable -> truncated small
|
|
70
|
+
assert ctx.report().used <= 4
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_whatif_does_not_commit():
|
|
74
|
+
ctx = Context(budget_tokens=1000, model="gpt-4o")
|
|
75
|
+
ctx.add(Block("hello there", priority=5, role="user"))
|
|
76
|
+
ctx.assemble()
|
|
77
|
+
committed_used = ctx.report().used
|
|
78
|
+
preview = ctx.whatif(budget_tokens=3)
|
|
79
|
+
assert preview.budget == 3
|
|
80
|
+
assert ctx.report().used == committed_used # committed report unchanged
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_assembly_is_deterministic():
|
|
84
|
+
def build():
|
|
85
|
+
c = Context(budget_tokens=50, model="gpt-4o")
|
|
86
|
+
c.add(Block("alpha", priority=5, role="user"))
|
|
87
|
+
c.add(Block("beta", priority=5, role="assistant"))
|
|
88
|
+
c.add(Block("gamma", priority=9, role="system"))
|
|
89
|
+
return c.assemble()
|
|
90
|
+
|
|
91
|
+
assert build() == build()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_report_emitted_on_bus():
|
|
95
|
+
bus._reset()
|
|
96
|
+
seen = []
|
|
97
|
+
bus.subscribe(seen.append)
|
|
98
|
+
try:
|
|
99
|
+
ctx = Context(budget_tokens=100, model="gpt-4o")
|
|
100
|
+
ctx.add(Block("hi", role="user"))
|
|
101
|
+
ctx.assemble()
|
|
102
|
+
finally:
|
|
103
|
+
bus._reset()
|
|
104
|
+
assert len(seen) == 1 and seen[0].model == "gpt-4o"
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_for_anthropic_splits_system():
|
|
108
|
+
ctx = Context(budget_tokens=1000, model="claude-opus-4-8")
|
|
109
|
+
ctx.add(Block("you are helpful", priority=10, pin=True, role="system"))
|
|
110
|
+
ctx.add(Block("hello", priority=9, pin=True, role="user"))
|
|
111
|
+
system, messages = ctx.for_anthropic()
|
|
112
|
+
assert system == "you are helpful"
|
|
113
|
+
assert all(m["role"] != "system" for m in messages)
|