nodus-context 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nodus_context-0.1.0/LICENSE +21 -0
- nodus_context-0.1.0/PKG-INFO +186 -0
- nodus_context-0.1.0/README.md +169 -0
- nodus_context-0.1.0/nodus_context/__init__.py +56 -0
- nodus_context-0.1.0/nodus_context/budget.py +97 -0
- nodus_context-0.1.0/nodus_context/message.py +32 -0
- nodus_context-0.1.0/nodus_context/strategies.py +121 -0
- nodus_context-0.1.0/nodus_context/window.py +139 -0
- nodus_context-0.1.0/nodus_context.egg-info/PKG-INFO +186 -0
- nodus_context-0.1.0/nodus_context.egg-info/SOURCES.txt +16 -0
- nodus_context-0.1.0/nodus_context.egg-info/dependency_links.txt +1 -0
- nodus_context-0.1.0/nodus_context.egg-info/requires.txt +6 -0
- nodus_context-0.1.0/nodus_context.egg-info/top_level.txt +1 -0
- nodus_context-0.1.0/pyproject.toml +28 -0
- nodus_context-0.1.0/setup.cfg +4 -0
- nodus_context-0.1.0/tests/test_budget.py +82 -0
- nodus_context-0.1.0/tests/test_strategies.py +97 -0
- nodus_context-0.1.0/tests/test_window.py +98 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shawn Knight
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nodus-context
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM context window management: token budget, compaction strategies, tool result guards
|
|
5
|
+
Author: Shawn Knight
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Masterplanner25/nodus-context
|
|
8
|
+
Project-URL: Repository, https://github.com/Masterplanner25/nodus-context
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Provides-Extra: tiktoken
|
|
13
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "tiktoken"
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# nodus-context
|
|
19
|
+
|
|
20
|
+
**LLM context window management: token budget, compaction strategies, and
|
|
21
|
+
tool result guards for AI systems.**
|
|
22
|
+
|
|
23
|
+
Keeps agent conversations within finite LLM context windows without manual
|
|
24
|
+
message pruning. No required external dependencies — token counting uses a
|
|
25
|
+
word-count estimate by default; install `tiktoken` for accurate counts.
|
|
26
|
+
|
|
27
|
+
> **Status:** v0.1.0 — prepared, not yet published.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install nodus-context
|
|
35
|
+
|
|
36
|
+
# With accurate tiktoken-based token counting:
|
|
37
|
+
pip install "nodus-context[tiktoken]"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## What it provides
|
|
43
|
+
|
|
44
|
+
| Component | Purpose |
|
|
45
|
+
|---|---|
|
|
46
|
+
| `ContextBudget` | Token budget with utilization tracking and overflow detection |
|
|
47
|
+
| `ContextMessage` | Normalized message (role, content, token count) |
|
|
48
|
+
| `ContextWindow` | Message list with budget enforcement and compaction |
|
|
49
|
+
| `DropToolInternalsStrategy` | Remove intermediate tool calls, keep final pairs |
|
|
50
|
+
| `SummarizeStrategy` | Keep head + tail, replace middle with a summary message |
|
|
51
|
+
| `estimate_tokens` / `count_tokens` | Word-estimate or tiktoken-accurate counting |
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Quick start
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from nodus_context import ContextWindow, ContextBudget, ROLE_USER, ROLE_ASSISTANT
|
|
59
|
+
|
|
60
|
+
budget = ContextBudget(max_tokens=8192)
|
|
61
|
+
window = ContextWindow(budget=budget)
|
|
62
|
+
|
|
63
|
+
window.add(ROLE_USER, "What is the capital of France?")
|
|
64
|
+
window.add(ROLE_ASSISTANT, "The capital of France is Paris.")
|
|
65
|
+
|
|
66
|
+
messages = window.messages() # list of ContextMessage
|
|
67
|
+
print(f"Using {budget.used}/{budget.max_tokens} tokens")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## ContextBudget
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from nodus_context import ContextBudget
|
|
76
|
+
|
|
77
|
+
budget = ContextBudget(max_tokens=4096, reserve_tokens=512)
|
|
78
|
+
budget.add(150) # record token usage
|
|
79
|
+
budget.utilization # float 0.0–1.0
|
|
80
|
+
budget.available # tokens remaining (respects reserve)
|
|
81
|
+
budget.is_over_budget # True if used > max_tokens
|
|
82
|
+
budget.reset()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## ContextWindow
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from nodus_context import ContextWindow, ContextBudget, DropToolInternalsStrategy
|
|
91
|
+
|
|
92
|
+
window = ContextWindow(
|
|
93
|
+
budget=ContextBudget(max_tokens=8192),
|
|
94
|
+
compaction_strategy=DropToolInternalsStrategy(),
|
|
95
|
+
compaction_threshold=0.85, # compact when 85% full
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
window.add(role, content) # adds message, tracks tokens
|
|
99
|
+
window.add_raw(context_message) # add pre-built ContextMessage
|
|
100
|
+
window.messages() # current message list
|
|
101
|
+
window.compact() # trigger compaction manually
|
|
102
|
+
window.guard_tool_results(n=2) # keep only the last n tool result pairs
|
|
103
|
+
window.token_count # current total
|
|
104
|
+
window.message_count
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Compaction runs automatically when `utilization >= compaction_threshold`.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Compaction strategies
|
|
112
|
+
|
|
113
|
+
### DropToolInternalsStrategy
|
|
114
|
+
|
|
115
|
+
Removes intermediate `tool_use` / `tool_result` pairs, keeping only the
|
|
116
|
+
final `n` pairs. Reduces context without losing the outcome.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from nodus_context import DropToolInternalsStrategy
|
|
120
|
+
strategy = DropToolInternalsStrategy(keep_last_n_pairs=1)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### SummarizeStrategy
|
|
124
|
+
|
|
125
|
+
Keeps the first `head_count` and last `tail_count` messages; replaces
|
|
126
|
+
everything in between with a single summary message.
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from nodus_context import SummarizeStrategy
|
|
130
|
+
strategy = SummarizeStrategy(
|
|
131
|
+
head_count=2,
|
|
132
|
+
tail_count=4,
|
|
133
|
+
summary_fn=lambda msgs: "Summary of intermediate steps.",
|
|
134
|
+
)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
`summary_fn` can call an LLM — any callable that takes a list of
|
|
138
|
+
`ContextMessage` and returns a string.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Token counting
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from nodus_context import estimate_tokens, count_tokens
|
|
146
|
+
|
|
147
|
+
estimate_tokens("Hello, world!") # fast word-count estimate (no dep)
|
|
148
|
+
count_tokens("Hello, world!") # tiktoken if installed, else estimate
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Install `nodus-context[tiktoken]` for accurate counts.
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Role constants
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from nodus_context import (
|
|
159
|
+
ROLE_USER, ROLE_ASSISTANT, ROLE_SYSTEM,
|
|
160
|
+
ROLE_TOOL_USE, ROLE_TOOL_RESULT, ROLE_THINKING,
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Design
|
|
167
|
+
|
|
168
|
+
- **No required dependencies.** Token estimation uses `~4 chars per token`
|
|
169
|
+
heuristic. Accurate counting requires `tiktoken` (optional extra).
|
|
170
|
+
- **Protocol-based strategies.** Any callable satisfying `CompactionStrategy`
|
|
171
|
+
works — no inheritance needed.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Development
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
pip install -e ".[dev]"
|
|
179
|
+
pytest tests/ -q
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# nodus-context
|
|
2
|
+
|
|
3
|
+
**LLM context window management: token budget, compaction strategies, and
|
|
4
|
+
tool result guards for AI systems.**
|
|
5
|
+
|
|
6
|
+
Keeps agent conversations within finite LLM context windows without manual
|
|
7
|
+
message pruning. No required external dependencies — token counting uses a
|
|
8
|
+
word-count estimate by default; install `tiktoken` for accurate counts.
|
|
9
|
+
|
|
10
|
+
> **Status:** v0.1.0 — prepared, not yet published.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install nodus-context
|
|
18
|
+
|
|
19
|
+
# With accurate tiktoken-based token counting:
|
|
20
|
+
pip install "nodus-context[tiktoken]"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## What it provides
|
|
26
|
+
|
|
27
|
+
| Component | Purpose |
|
|
28
|
+
|---|---|
|
|
29
|
+
| `ContextBudget` | Token budget with utilization tracking and overflow detection |
|
|
30
|
+
| `ContextMessage` | Normalized message (role, content, token count) |
|
|
31
|
+
| `ContextWindow` | Message list with budget enforcement and compaction |
|
|
32
|
+
| `DropToolInternalsStrategy` | Remove intermediate tool calls, keep final pairs |
|
|
33
|
+
| `SummarizeStrategy` | Keep head + tail, replace middle with a summary message |
|
|
34
|
+
| `estimate_tokens` / `count_tokens` | Word-estimate or tiktoken-accurate counting |
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from nodus_context import ContextWindow, ContextBudget, ROLE_USER, ROLE_ASSISTANT
|
|
42
|
+
|
|
43
|
+
budget = ContextBudget(max_tokens=8192)
|
|
44
|
+
window = ContextWindow(budget=budget)
|
|
45
|
+
|
|
46
|
+
window.add(ROLE_USER, "What is the capital of France?")
|
|
47
|
+
window.add(ROLE_ASSISTANT, "The capital of France is Paris.")
|
|
48
|
+
|
|
49
|
+
messages = window.messages() # list of ContextMessage
|
|
50
|
+
print(f"Using {budget.used}/{budget.max_tokens} tokens")
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## ContextBudget
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from nodus_context import ContextBudget
|
|
59
|
+
|
|
60
|
+
budget = ContextBudget(max_tokens=4096, reserve_tokens=512)
|
|
61
|
+
budget.add(150) # record token usage
|
|
62
|
+
budget.utilization # float 0.0–1.0
|
|
63
|
+
budget.available # tokens remaining (respects reserve)
|
|
64
|
+
budget.is_over_budget # True if used > max_tokens
|
|
65
|
+
budget.reset()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## ContextWindow
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from nodus_context import ContextWindow, ContextBudget, DropToolInternalsStrategy
|
|
74
|
+
|
|
75
|
+
window = ContextWindow(
|
|
76
|
+
budget=ContextBudget(max_tokens=8192),
|
|
77
|
+
compaction_strategy=DropToolInternalsStrategy(),
|
|
78
|
+
compaction_threshold=0.85, # compact when 85% full
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
window.add(role, content) # adds message, tracks tokens
|
|
82
|
+
window.add_raw(context_message) # add pre-built ContextMessage
|
|
83
|
+
window.messages() # current message list
|
|
84
|
+
window.compact() # trigger compaction manually
|
|
85
|
+
window.guard_tool_results(n=2) # keep only the last n tool result pairs
|
|
86
|
+
window.token_count # current total
|
|
87
|
+
window.message_count
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Compaction runs automatically when `utilization >= compaction_threshold`.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Compaction strategies
|
|
95
|
+
|
|
96
|
+
### DropToolInternalsStrategy
|
|
97
|
+
|
|
98
|
+
Removes intermediate `tool_use` / `tool_result` pairs, keeping only the
|
|
99
|
+
final `n` pairs. Reduces context without losing the outcome.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from nodus_context import DropToolInternalsStrategy
|
|
103
|
+
strategy = DropToolInternalsStrategy(keep_last_n_pairs=1)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### SummarizeStrategy
|
|
107
|
+
|
|
108
|
+
Keeps the first `head_count` and last `tail_count` messages; replaces
|
|
109
|
+
everything in between with a single summary message.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from nodus_context import SummarizeStrategy
|
|
113
|
+
strategy = SummarizeStrategy(
|
|
114
|
+
head_count=2,
|
|
115
|
+
tail_count=4,
|
|
116
|
+
summary_fn=lambda msgs: "Summary of intermediate steps.",
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
`summary_fn` can call an LLM — any callable that takes a list of
|
|
121
|
+
`ContextMessage` and returns a string.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Token counting
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from nodus_context import estimate_tokens, count_tokens
|
|
129
|
+
|
|
130
|
+
estimate_tokens("Hello, world!") # fast word-count estimate (no dep)
|
|
131
|
+
count_tokens("Hello, world!") # tiktoken if installed, else estimate
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Install `nodus-context[tiktoken]` for accurate counts.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Role constants
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from nodus_context import (
|
|
142
|
+
ROLE_USER, ROLE_ASSISTANT, ROLE_SYSTEM,
|
|
143
|
+
ROLE_TOOL_USE, ROLE_TOOL_RESULT, ROLE_THINKING,
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Design
|
|
150
|
+
|
|
151
|
+
- **No required dependencies.** Token estimation uses `~4 chars per token`
|
|
152
|
+
heuristic. Accurate counting requires `tiktoken` (optional extra).
|
|
153
|
+
- **Protocol-based strategies.** Any callable satisfying `CompactionStrategy`
|
|
154
|
+
works — no inheritance needed.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Development
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
pip install -e ".[dev]"
|
|
162
|
+
pytest tests/ -q
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## License
|
|
168
|
+
|
|
169
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""nodus-context — LLM context window management.
|
|
2
|
+
|
|
3
|
+
Token budget tracking, message lifecycle, and compaction strategies for AI systems
|
|
4
|
+
that need to stay within finite LLM context windows.
|
|
5
|
+
|
|
6
|
+
Core types:
|
|
7
|
+
ContextMessage — one message (role, content, token_count)
|
|
8
|
+
ContextBudget — token budget (max, usage, utilization, overflow check)
|
|
9
|
+
ContextWindow — message list with budget enforcement + compaction
|
|
10
|
+
|
|
11
|
+
Token counting:
|
|
12
|
+
estimate_tokens — word-count estimate (~4 chars per token, no dep)
|
|
13
|
+
count_tokens — accurate via tiktoken if installed, else estimate
|
|
14
|
+
|
|
15
|
+
Compaction strategies:
|
|
16
|
+
CompactionStrategy — protocol any strategy must satisfy
|
|
17
|
+
DropToolInternalsStrategy — remove tool_use/tool_result except final pairs
|
|
18
|
+
SummarizeStrategy — keep head + tail, replace middle with summary
|
|
19
|
+
|
|
20
|
+
Role constants:
|
|
21
|
+
ROLE_USER, ROLE_ASSISTANT, ROLE_SYSTEM,
|
|
22
|
+
ROLE_TOOL_USE, ROLE_TOOL_RESULT, ROLE_THINKING
|
|
23
|
+
"""
|
|
24
|
+
from .budget import ContextBudget, count_tokens, estimate_tokens
|
|
25
|
+
from .message import (
|
|
26
|
+
ROLE_ASSISTANT,
|
|
27
|
+
ROLE_SYSTEM,
|
|
28
|
+
ROLE_THINKING,
|
|
29
|
+
ROLE_TOOL_RESULT,
|
|
30
|
+
ROLE_TOOL_USE,
|
|
31
|
+
ROLE_USER,
|
|
32
|
+
ContextMessage,
|
|
33
|
+
)
|
|
34
|
+
from .strategies import CompactionStrategy, DropToolInternalsStrategy, SummarizeStrategy
|
|
35
|
+
from .window import ContextWindow
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
# Core types
|
|
39
|
+
"ContextMessage",
|
|
40
|
+
"ContextBudget",
|
|
41
|
+
"ContextWindow",
|
|
42
|
+
# Token counting
|
|
43
|
+
"estimate_tokens",
|
|
44
|
+
"count_tokens",
|
|
45
|
+
# Strategies
|
|
46
|
+
"CompactionStrategy",
|
|
47
|
+
"DropToolInternalsStrategy",
|
|
48
|
+
"SummarizeStrategy",
|
|
49
|
+
# Role constants
|
|
50
|
+
"ROLE_USER",
|
|
51
|
+
"ROLE_ASSISTANT",
|
|
52
|
+
"ROLE_SYSTEM",
|
|
53
|
+
"ROLE_TOOL_USE",
|
|
54
|
+
"ROLE_TOOL_RESULT",
|
|
55
|
+
"ROLE_THINKING",
|
|
56
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""ContextBudget — track token usage against a finite context window."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ContextBudget:
|
|
10
|
+
"""Token budget for one LLM context window.
|
|
11
|
+
|
|
12
|
+
Attributes
|
|
13
|
+
----------
|
|
14
|
+
max_tokens: Hard limit for the context window.
|
|
15
|
+
warning_threshold: Fraction of capacity (0–1) at which ``is_warning``
|
|
16
|
+
becomes True. Default: 0.80 (80 % full).
|
|
17
|
+
current_usage: Running total of tokens charged so far.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
max_tokens: int
|
|
21
|
+
warning_threshold: float = 0.80
|
|
22
|
+
current_usage: int = 0
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def remaining(self) -> int:
|
|
26
|
+
return max(0, self.max_tokens - self.current_usage)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def utilization(self) -> float:
|
|
30
|
+
"""Fraction of max_tokens used (0.0–1.0+)."""
|
|
31
|
+
if self.max_tokens <= 0:
|
|
32
|
+
return 1.0
|
|
33
|
+
return self.current_usage / self.max_tokens
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def is_warning(self) -> bool:
|
|
37
|
+
return self.utilization >= self.warning_threshold
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def is_full(self) -> bool:
|
|
41
|
+
return self.current_usage >= self.max_tokens
|
|
42
|
+
|
|
43
|
+
def would_overflow(self, tokens: int) -> bool:
|
|
44
|
+
"""True if adding *tokens* would exceed max_tokens."""
|
|
45
|
+
return (self.current_usage + tokens) > self.max_tokens
|
|
46
|
+
|
|
47
|
+
def charge(self, tokens: int) -> None:
|
|
48
|
+
"""Increase current_usage by *tokens*."""
|
|
49
|
+
self.current_usage += tokens
|
|
50
|
+
|
|
51
|
+
def release(self, tokens: int) -> None:
|
|
52
|
+
"""Decrease current_usage by *tokens* (clamped to 0)."""
|
|
53
|
+
self.current_usage = max(0, self.current_usage - tokens)
|
|
54
|
+
|
|
55
|
+
def reset(self) -> None:
|
|
56
|
+
self.current_usage = 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── Token counting helpers ─────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
def estimate_tokens(content: Any) -> int:
|
|
62
|
+
"""Word-count estimate: ~4 characters per token. No external dependencies."""
|
|
63
|
+
if content is None:
|
|
64
|
+
return 0
|
|
65
|
+
if isinstance(content, str):
|
|
66
|
+
chars = len(content)
|
|
67
|
+
return max(1, chars // 4) if chars > 0 else 0
|
|
68
|
+
if isinstance(content, (list, tuple)):
|
|
69
|
+
return sum(estimate_tokens(item) for item in content)
|
|
70
|
+
if isinstance(content, dict):
|
|
71
|
+
return sum(
|
|
72
|
+
estimate_tokens(k) + estimate_tokens(v) for k, v in content.items()
|
|
73
|
+
)
|
|
74
|
+
return max(1, len(str(content)) // 4)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def count_tokens(content: Any, *, model: str | None = None) -> int:
|
|
78
|
+
"""Return accurate token count via tiktoken when installed; else estimate.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
content: The text or content object to count.
|
|
82
|
+
model: Optional model name for tiktoken encoding selection.
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
import tiktoken # noqa: PLC0415
|
|
86
|
+
|
|
87
|
+
enc_name = "cl100k_base"
|
|
88
|
+
if model:
|
|
89
|
+
try:
|
|
90
|
+
enc = tiktoken.encoding_for_model(model)
|
|
91
|
+
return len(enc.encode(str(content)))
|
|
92
|
+
except KeyError:
|
|
93
|
+
pass
|
|
94
|
+
enc = tiktoken.get_encoding(enc_name)
|
|
95
|
+
return len(enc.encode(str(content)))
|
|
96
|
+
except ImportError:
|
|
97
|
+
return estimate_tokens(content)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""ContextMessage — a single message in an LLM context window."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
# Valid role values (not enforced — kept as documentation)
|
|
8
|
+
ROLE_USER = "user"
|
|
9
|
+
ROLE_ASSISTANT = "assistant"
|
|
10
|
+
ROLE_SYSTEM = "system"
|
|
11
|
+
ROLE_TOOL_USE = "tool_use"
|
|
12
|
+
ROLE_TOOL_RESULT = "tool_result"
|
|
13
|
+
ROLE_THINKING = "thinking"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ContextMessage:
|
|
18
|
+
"""One message in an LLM context window.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
role: Sender role — see ROLE_* constants.
|
|
23
|
+
content: Message content (str or list of content blocks).
|
|
24
|
+
token_count: Estimated or measured token count. None = unknown; the
|
|
25
|
+
window computes an estimate lazily when needed.
|
|
26
|
+
message_id: Optional ID linking a tool_result to its tool_use block.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
role: str
|
|
30
|
+
content: Any
|
|
31
|
+
token_count: int | None = None
|
|
32
|
+
message_id: str | None = None
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Compaction strategies for reducing context window token usage."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
from .message import (
|
|
6
|
+
ROLE_ASSISTANT,
|
|
7
|
+
ROLE_SYSTEM,
|
|
8
|
+
ROLE_THINKING,
|
|
9
|
+
ROLE_TOOL_RESULT,
|
|
10
|
+
ROLE_TOOL_USE,
|
|
11
|
+
ROLE_USER,
|
|
12
|
+
ContextMessage,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from typing import Protocol, runtime_checkable
|
|
17
|
+
except ImportError:
|
|
18
|
+
from typing_extensions import Protocol, runtime_checkable # type: ignore[assignment]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@runtime_checkable
|
|
22
|
+
class CompactionStrategy(Protocol):
|
|
23
|
+
"""Protocol for context compaction strategies."""
|
|
24
|
+
|
|
25
|
+
def compact(self, messages: list[ContextMessage]) -> list[ContextMessage]:
|
|
26
|
+
"""Return a reduced list of messages preserving semantic meaning."""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DropToolInternalsStrategy:
|
|
31
|
+
"""Remove ``tool_use`` and ``tool_result`` blocks except the final pairs.
|
|
32
|
+
|
|
33
|
+
Preserves all ``user`` and ``assistant`` turns as well as ``system``
|
|
34
|
+
and ``thinking`` blocks. Does NOT call any LLM.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
keep_last_pairs: Number of final tool_use/tool_result pairs to keep.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, keep_last_pairs: int = 1) -> None:
|
|
41
|
+
self.keep_last_pairs = max(0, keep_last_pairs)
|
|
42
|
+
|
|
43
|
+
def compact(self, messages: list[ContextMessage]) -> list[ContextMessage]:
|
|
44
|
+
tool_result_indices = [
|
|
45
|
+
i for i, m in enumerate(messages) if m.role == ROLE_TOOL_RESULT
|
|
46
|
+
]
|
|
47
|
+
if len(tool_result_indices) <= self.keep_last_pairs:
|
|
48
|
+
return messages
|
|
49
|
+
|
|
50
|
+
keep_from = tool_result_indices[-self.keep_last_pairs] if self.keep_last_pairs else None
|
|
51
|
+
|
|
52
|
+
# IDs of tool_use blocks linked to kept tool_results
|
|
53
|
+
kept_ids: set[str] = set()
|
|
54
|
+
if keep_from is not None:
|
|
55
|
+
for m in messages[keep_from:]:
|
|
56
|
+
if m.role == ROLE_TOOL_RESULT and m.message_id:
|
|
57
|
+
kept_ids.add(m.message_id)
|
|
58
|
+
|
|
59
|
+
result: list[ContextMessage] = []
|
|
60
|
+
for i, m in enumerate(messages):
|
|
61
|
+
if m.role == ROLE_TOOL_RESULT:
|
|
62
|
+
if keep_from is not None and i >= keep_from:
|
|
63
|
+
result.append(m)
|
|
64
|
+
# else: drop
|
|
65
|
+
elif m.role == ROLE_TOOL_USE:
|
|
66
|
+
if m.message_id and m.message_id in kept_ids:
|
|
67
|
+
result.append(m)
|
|
68
|
+
elif keep_from is not None and i >= keep_from:
|
|
69
|
+
result.append(m)
|
|
70
|
+
# else: drop
|
|
71
|
+
else:
|
|
72
|
+
result.append(m)
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class SummarizeStrategy:
|
|
77
|
+
"""Keep the first *keep_first* and last *keep_last* messages; summarize the middle.
|
|
78
|
+
|
|
79
|
+
The middle messages are replaced with a single ``assistant`` block containing
|
|
80
|
+
the text returned by ``summary_fn``. When ``summary_fn`` is None the middle
|
|
81
|
+
is replaced with a placeholder.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
keep_first: Number of messages to keep at the start (e.g. system prompt).
|
|
85
|
+
keep_last: Number of messages to keep at the end (recent context).
|
|
86
|
+
summary_fn: Callable that receives the dropped messages and returns a
|
|
87
|
+
summary string. When None a placeholder is used.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
keep_first: int = 2,
|
|
93
|
+
keep_last: int = 10,
|
|
94
|
+
summary_fn: Optional[Callable[[list[ContextMessage]], str]] = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
self.keep_first = max(0, keep_first)
|
|
97
|
+
self.keep_last = max(0, keep_last)
|
|
98
|
+
self.summary_fn = summary_fn
|
|
99
|
+
|
|
100
|
+
def compact(self, messages: list[ContextMessage]) -> list[ContextMessage]:
|
|
101
|
+
total = len(messages)
|
|
102
|
+
if total <= self.keep_first + self.keep_last:
|
|
103
|
+
return messages
|
|
104
|
+
|
|
105
|
+
head = messages[: self.keep_first]
|
|
106
|
+
tail = messages[total - self.keep_last :]
|
|
107
|
+
middle = messages[self.keep_first : total - self.keep_last]
|
|
108
|
+
|
|
109
|
+
if not middle:
|
|
110
|
+
return messages
|
|
111
|
+
|
|
112
|
+
if self.summary_fn is not None:
|
|
113
|
+
summary_text = self.summary_fn(middle)
|
|
114
|
+
else:
|
|
115
|
+
summary_text = (
|
|
116
|
+
f"[{len(middle)} earlier messages summarized — "
|
|
117
|
+
f"approximately {sum((m.token_count or 0) for m in middle)} tokens]"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
summary_msg = ContextMessage(role=ROLE_ASSISTANT, content=summary_text)
|
|
121
|
+
return head + [summary_msg] + tail
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""ContextWindow — manages a list of ContextMessages within a ContextBudget."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from .budget import ContextBudget, estimate_tokens
|
|
7
|
+
from .message import ROLE_TOOL_RESULT, ROLE_TOOL_USE, ContextMessage
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from .strategies import CompactionStrategy
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ContextWindow:
|
|
14
|
+
"""Ordered list of ``ContextMessage`` objects with budget enforcement.
|
|
15
|
+
|
|
16
|
+
Usage::
|
|
17
|
+
|
|
18
|
+
budget = ContextBudget(max_tokens=128_000)
|
|
19
|
+
window = ContextWindow(budget)
|
|
20
|
+
window.add_force(ContextMessage(role="system", content="You are helpful."))
|
|
21
|
+
added = window.add(ContextMessage(role="user", content="Hello"))
|
|
22
|
+
if not added:
|
|
23
|
+
# window is full — compact before adding
|
|
24
|
+
window.compact(DropToolInternalsStrategy())
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, budget: ContextBudget) -> None:
|
|
28
|
+
self._budget = budget
|
|
29
|
+
self._messages: list[ContextMessage] = []
|
|
30
|
+
|
|
31
|
+
# ── Adding messages ────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
def add(self, message: ContextMessage) -> bool:
|
|
34
|
+
"""Add *message* only if it fits within the budget.
|
|
35
|
+
|
|
36
|
+
Returns False and does NOT add the message if adding it would overflow.
|
|
37
|
+
Token count is estimated lazily if ``message.token_count`` is None.
|
|
38
|
+
"""
|
|
39
|
+
tokens = self._resolve_tokens(message)
|
|
40
|
+
if self._budget.would_overflow(tokens):
|
|
41
|
+
return False
|
|
42
|
+
message = _with_token_count(message, tokens)
|
|
43
|
+
self._messages.append(message)
|
|
44
|
+
self._budget.charge(tokens)
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
def add_force(self, message: ContextMessage) -> None:
|
|
48
|
+
"""Add *message* regardless of budget (for system prompts, required context).
|
|
49
|
+
|
|
50
|
+
Charges the tokens but does NOT block on overflow.
|
|
51
|
+
"""
|
|
52
|
+
tokens = self._resolve_tokens(message)
|
|
53
|
+
message = _with_token_count(message, tokens)
|
|
54
|
+
self._messages.append(message)
|
|
55
|
+
self._budget.charge(tokens)
|
|
56
|
+
|
|
57
|
+
# ── Reading ────────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
def messages(self) -> list[ContextMessage]:
|
|
60
|
+
"""Return the current message list (live reference — do not mutate)."""
|
|
61
|
+
return self._messages
|
|
62
|
+
|
|
63
|
+
def snapshot(self) -> list[ContextMessage]:
|
|
64
|
+
"""Return a shallow copy of the message list."""
|
|
65
|
+
return list(self._messages)
|
|
66
|
+
|
|
67
|
+
def budget(self) -> ContextBudget:
|
|
68
|
+
return self._budget
|
|
69
|
+
|
|
70
|
+
def __len__(self) -> int:
|
|
71
|
+
return len(self._messages)
|
|
72
|
+
|
|
73
|
+
# ── Compaction ─────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
def compact(self, strategy: "CompactionStrategy") -> int:
|
|
76
|
+
"""Run *strategy* and return the number of tokens freed.
|
|
77
|
+
|
|
78
|
+
The strategy receives the full message list and returns a reduced list.
|
|
79
|
+
``current_usage`` is recomputed from the surviving messages.
|
|
80
|
+
"""
|
|
81
|
+
before = self._budget.current_usage
|
|
82
|
+
self._messages = strategy.compact(list(self._messages))
|
|
83
|
+
new_usage = sum(
|
|
84
|
+
self._resolve_tokens(m) for m in self._messages
|
|
85
|
+
)
|
|
86
|
+
self._budget.current_usage = new_usage
|
|
87
|
+
freed = before - new_usage
|
|
88
|
+
return max(0, freed)
|
|
89
|
+
|
|
90
|
+
def guard_tool_results(self, keep_last: int = 3) -> int:
|
|
91
|
+
"""Drop ``tool_result`` messages beyond the most recent *keep_last*.
|
|
92
|
+
|
|
93
|
+
Preserves all ``user`` and ``assistant`` turns. Drops the matching
|
|
94
|
+
``tool_use`` block for each dropped ``tool_result`` when possible.
|
|
95
|
+
|
|
96
|
+
Returns the number of tokens freed.
|
|
97
|
+
"""
|
|
98
|
+
# Collect tool_result indices in order
|
|
99
|
+
result_indices = [
|
|
100
|
+
i for i, m in enumerate(self._messages) if m.role == ROLE_TOOL_RESULT
|
|
101
|
+
]
|
|
102
|
+
if len(result_indices) <= keep_last:
|
|
103
|
+
return 0
|
|
104
|
+
|
|
105
|
+
drop_indices = set(result_indices[: -keep_last])
|
|
106
|
+
|
|
107
|
+
# Also drop the matching tool_use for each dropped tool_result
|
|
108
|
+
ids_to_drop = {
|
|
109
|
+
self._messages[i].message_id
|
|
110
|
+
for i in drop_indices
|
|
111
|
+
if self._messages[i].message_id
|
|
112
|
+
}
|
|
113
|
+
for i, m in enumerate(self._messages):
|
|
114
|
+
if m.role == ROLE_TOOL_USE and m.message_id in ids_to_drop:
|
|
115
|
+
drop_indices.add(i)
|
|
116
|
+
|
|
117
|
+
freed = sum(
|
|
118
|
+
self._resolve_tokens(self._messages[i]) for i in drop_indices
|
|
119
|
+
)
|
|
120
|
+
self._messages = [
|
|
121
|
+
m for i, m in enumerate(self._messages) if i not in drop_indices
|
|
122
|
+
]
|
|
123
|
+
self._budget.release(freed)
|
|
124
|
+
return freed
|
|
125
|
+
|
|
126
|
+
# ── Private ────────────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
def _resolve_tokens(self, message: ContextMessage) -> int:
|
|
129
|
+
if message.token_count is not None:
|
|
130
|
+
return message.token_count
|
|
131
|
+
return estimate_tokens(message.content)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _with_token_count(message: ContextMessage, count: int) -> ContextMessage:
|
|
135
|
+
"""Return message with token_count filled in (no-op if already set)."""
|
|
136
|
+
if message.token_count is not None:
|
|
137
|
+
return message
|
|
138
|
+
from dataclasses import replace
|
|
139
|
+
return replace(message, token_count=count)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nodus-context
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM context window management: token budget, compaction strategies, tool result guards
|
|
5
|
+
Author: Shawn Knight
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Masterplanner25/nodus-context
|
|
8
|
+
Project-URL: Repository, https://github.com/Masterplanner25/nodus-context
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Provides-Extra: tiktoken
|
|
13
|
+
Requires-Dist: tiktoken>=0.5.0; extra == "tiktoken"
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# nodus-context
|
|
19
|
+
|
|
20
|
+
**LLM context window management: token budget, compaction strategies, and
|
|
21
|
+
tool result guards for AI systems.**
|
|
22
|
+
|
|
23
|
+
Keeps agent conversations within finite LLM context windows without manual
|
|
24
|
+
message pruning. No required external dependencies — token counting uses a
|
|
25
|
+
word-count estimate by default; install `tiktoken` for accurate counts.
|
|
26
|
+
|
|
27
|
+
> **Status:** v0.1.0 — prepared, not yet published.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install nodus-context
|
|
35
|
+
|
|
36
|
+
# With accurate tiktoken-based token counting:
|
|
37
|
+
pip install "nodus-context[tiktoken]"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## What it provides
|
|
43
|
+
|
|
44
|
+
| Component | Purpose |
|
|
45
|
+
|---|---|
|
|
46
|
+
| `ContextBudget` | Token budget with utilization tracking and overflow detection |
|
|
47
|
+
| `ContextMessage` | Normalized message (role, content, token count) |
|
|
48
|
+
| `ContextWindow` | Message list with budget enforcement and compaction |
|
|
49
|
+
| `DropToolInternalsStrategy` | Remove intermediate tool calls, keep final pairs |
|
|
50
|
+
| `SummarizeStrategy` | Keep head + tail, replace middle with a summary message |
|
|
51
|
+
| `estimate_tokens` / `count_tokens` | Word-estimate or tiktoken-accurate counting |
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Quick start
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from nodus_context import ContextWindow, ContextBudget, ROLE_USER, ROLE_ASSISTANT
|
|
59
|
+
|
|
60
|
+
budget = ContextBudget(max_tokens=8192)
|
|
61
|
+
window = ContextWindow(budget=budget)
|
|
62
|
+
|
|
63
|
+
window.add(ROLE_USER, "What is the capital of France?")
|
|
64
|
+
window.add(ROLE_ASSISTANT, "The capital of France is Paris.")
|
|
65
|
+
|
|
66
|
+
messages = window.messages() # list of ContextMessage
|
|
67
|
+
print(f"Using {budget.used}/{budget.max_tokens} tokens")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## ContextBudget
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from nodus_context import ContextBudget
|
|
76
|
+
|
|
77
|
+
budget = ContextBudget(max_tokens=4096, reserve_tokens=512)
|
|
78
|
+
budget.add(150) # record token usage
|
|
79
|
+
budget.utilization # float 0.0–1.0
|
|
80
|
+
budget.available # tokens remaining (respects reserve)
|
|
81
|
+
budget.is_over_budget # True if used > max_tokens
|
|
82
|
+
budget.reset()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## ContextWindow
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from nodus_context import ContextWindow, ContextBudget, DropToolInternalsStrategy
|
|
91
|
+
|
|
92
|
+
window = ContextWindow(
|
|
93
|
+
budget=ContextBudget(max_tokens=8192),
|
|
94
|
+
compaction_strategy=DropToolInternalsStrategy(),
|
|
95
|
+
compaction_threshold=0.85, # compact when 85% full
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
window.add(role, content) # adds message, tracks tokens
|
|
99
|
+
window.add_raw(context_message) # add pre-built ContextMessage
|
|
100
|
+
window.messages() # current message list
|
|
101
|
+
window.compact() # trigger compaction manually
|
|
102
|
+
window.guard_tool_results(n=2) # keep only the last n tool result pairs
|
|
103
|
+
window.token_count # current total
|
|
104
|
+
window.message_count
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Compaction runs automatically when `utilization >= compaction_threshold`.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Compaction strategies
|
|
112
|
+
|
|
113
|
+
### DropToolInternalsStrategy
|
|
114
|
+
|
|
115
|
+
Removes intermediate `tool_use` / `tool_result` pairs, keeping only the
|
|
116
|
+
final `n` pairs. Reduces context without losing the outcome.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from nodus_context import DropToolInternalsStrategy
|
|
120
|
+
strategy = DropToolInternalsStrategy(keep_last_n_pairs=1)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### SummarizeStrategy
|
|
124
|
+
|
|
125
|
+
Keeps the first `head_count` and last `tail_count` messages; replaces
|
|
126
|
+
everything in between with a single summary message.
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
from nodus_context import SummarizeStrategy
|
|
130
|
+
strategy = SummarizeStrategy(
|
|
131
|
+
head_count=2,
|
|
132
|
+
tail_count=4,
|
|
133
|
+
summary_fn=lambda msgs: "Summary of intermediate steps.",
|
|
134
|
+
)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
`summary_fn` can call an LLM — any callable that takes a list of
|
|
138
|
+
`ContextMessage` and returns a string.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Token counting
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from nodus_context import estimate_tokens, count_tokens
|
|
146
|
+
|
|
147
|
+
estimate_tokens("Hello, world!") # fast word-count estimate (no dep)
|
|
148
|
+
count_tokens("Hello, world!") # tiktoken if installed, else estimate
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Install `nodus-context[tiktoken]` for accurate counts.
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Role constants
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from nodus_context import (
|
|
159
|
+
ROLE_USER, ROLE_ASSISTANT, ROLE_SYSTEM,
|
|
160
|
+
ROLE_TOOL_USE, ROLE_TOOL_RESULT, ROLE_THINKING,
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Design
|
|
167
|
+
|
|
168
|
+
- **No required dependencies.** Token estimation uses `~4 chars per token`
|
|
169
|
+
heuristic. Accurate counting requires `tiktoken` (optional extra).
|
|
170
|
+
- **Protocol-based strategies.** Any callable satisfying `CompactionStrategy`
|
|
171
|
+
works — no inheritance needed.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Development
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
pip install -e ".[dev]"
|
|
179
|
+
pytest tests/ -q
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
nodus_context/__init__.py
|
|
5
|
+
nodus_context/budget.py
|
|
6
|
+
nodus_context/message.py
|
|
7
|
+
nodus_context/strategies.py
|
|
8
|
+
nodus_context/window.py
|
|
9
|
+
nodus_context.egg-info/PKG-INFO
|
|
10
|
+
nodus_context.egg-info/SOURCES.txt
|
|
11
|
+
nodus_context.egg-info/dependency_links.txt
|
|
12
|
+
nodus_context.egg-info/requires.txt
|
|
13
|
+
nodus_context.egg-info/top_level.txt
|
|
14
|
+
tests/test_budget.py
|
|
15
|
+
tests/test_strategies.py
|
|
16
|
+
tests/test_window.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
nodus_context
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=80", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nodus-context"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "LLM context window management: token budget, compaction strategies, tool result guards"
|
|
9
|
+
authors = [{ name = "Shawn Knight" }]
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
dependencies = []
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
tiktoken = ["tiktoken>=0.5.0"]
|
|
17
|
+
dev = ["pytest>=8.0"]
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://github.com/Masterplanner25/nodus-context"
|
|
21
|
+
Repository = "https://github.com/Masterplanner25/nodus-context"
|
|
22
|
+
|
|
23
|
+
[tool.setuptools.packages.find]
|
|
24
|
+
where = ["."]
|
|
25
|
+
include = ["nodus_context*"]
|
|
26
|
+
|
|
27
|
+
[tool.pytest.ini_options]
|
|
28
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from nodus_context import ContextBudget, estimate_tokens, count_tokens
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_initial_state():
|
|
5
|
+
b = ContextBudget(max_tokens=1000)
|
|
6
|
+
assert b.current_usage == 0
|
|
7
|
+
assert b.remaining == 1000
|
|
8
|
+
assert b.utilization == 0.0
|
|
9
|
+
assert not b.is_warning
|
|
10
|
+
assert not b.is_full
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_charge_and_release():
|
|
14
|
+
b = ContextBudget(max_tokens=1000)
|
|
15
|
+
b.charge(400)
|
|
16
|
+
assert b.current_usage == 400
|
|
17
|
+
assert b.remaining == 600
|
|
18
|
+
b.release(200)
|
|
19
|
+
assert b.current_usage == 200
|
|
20
|
+
b.release(9999) # clamp to 0
|
|
21
|
+
assert b.current_usage == 0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_utilization():
|
|
25
|
+
b = ContextBudget(max_tokens=1000)
|
|
26
|
+
b.charge(800)
|
|
27
|
+
assert abs(b.utilization - 0.8) < 0.001
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_is_warning_at_threshold():
|
|
31
|
+
b = ContextBudget(max_tokens=1000, warning_threshold=0.8)
|
|
32
|
+
b.charge(799)
|
|
33
|
+
assert not b.is_warning
|
|
34
|
+
b.charge(1)
|
|
35
|
+
assert b.is_warning
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_is_full():
|
|
39
|
+
b = ContextBudget(max_tokens=100)
|
|
40
|
+
b.charge(100)
|
|
41
|
+
assert b.is_full
|
|
42
|
+
b.charge(1)
|
|
43
|
+
assert b.is_full # still full (over limit)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_would_overflow():
|
|
47
|
+
b = ContextBudget(max_tokens=100)
|
|
48
|
+
b.charge(90)
|
|
49
|
+
assert not b.would_overflow(10)
|
|
50
|
+
assert b.would_overflow(11)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_reset():
|
|
54
|
+
b = ContextBudget(max_tokens=100)
|
|
55
|
+
b.charge(50)
|
|
56
|
+
b.reset()
|
|
57
|
+
assert b.current_usage == 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ── estimate_tokens ────────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
def test_estimate_none():
|
|
63
|
+
assert estimate_tokens(None) == 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_estimate_string():
|
|
67
|
+
assert estimate_tokens("hello world") > 0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_estimate_empty_string():
|
|
71
|
+
assert estimate_tokens("") == 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_estimate_list():
|
|
75
|
+
result = estimate_tokens(["hello", "world"])
|
|
76
|
+
assert result > 0
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_count_tokens_falls_back_to_estimate():
|
|
80
|
+
# Without tiktoken installed, should still return something > 0
|
|
81
|
+
result = count_tokens("hello world")
|
|
82
|
+
assert result > 0
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from nodus_context import (
|
|
2
|
+
ContextMessage,
|
|
3
|
+
DropToolInternalsStrategy,
|
|
4
|
+
SummarizeStrategy,
|
|
5
|
+
)
|
|
6
|
+
from nodus_context.message import (
|
|
7
|
+
ROLE_ASSISTANT,
|
|
8
|
+
ROLE_TOOL_RESULT,
|
|
9
|
+
ROLE_TOOL_USE,
|
|
10
|
+
ROLE_USER,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _msg(role, content="x", token_count=10, message_id=None):
|
|
15
|
+
return ContextMessage(role=role, content=content,
|
|
16
|
+
token_count=token_count, message_id=message_id)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ── DropToolInternalsStrategy ─────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
def test_drop_tool_internals_keeps_last_pair():
|
|
22
|
+
msgs = [
|
|
23
|
+
_msg(ROLE_USER),
|
|
24
|
+
_msg(ROLE_TOOL_USE, message_id="a"),
|
|
25
|
+
_msg(ROLE_TOOL_RESULT, message_id="a"),
|
|
26
|
+
_msg(ROLE_TOOL_USE, message_id="b"),
|
|
27
|
+
_msg(ROLE_TOOL_RESULT, message_id="b"),
|
|
28
|
+
_msg(ROLE_ASSISTANT),
|
|
29
|
+
]
|
|
30
|
+
result = DropToolInternalsStrategy(keep_last_pairs=1).compact(msgs)
|
|
31
|
+
result_roles = [m.role for m in result]
|
|
32
|
+
# Should keep user, last tool pair, assistant
|
|
33
|
+
assert ROLE_USER in result_roles
|
|
34
|
+
assert ROLE_ASSISTANT in result_roles
|
|
35
|
+
tool_results = [m for m in result if m.role == ROLE_TOOL_RESULT]
|
|
36
|
+
assert len(tool_results) == 1
|
|
37
|
+
assert tool_results[0].message_id == "b"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_drop_tool_internals_noop_when_within_limit():
|
|
41
|
+
msgs = [_msg(ROLE_TOOL_RESULT, message_id="a")]
|
|
42
|
+
result = DropToolInternalsStrategy(keep_last_pairs=2).compact(msgs)
|
|
43
|
+
assert result == msgs
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_drop_tool_internals_zero_keep():
|
|
47
|
+
msgs = [
|
|
48
|
+
_msg(ROLE_USER),
|
|
49
|
+
_msg(ROLE_TOOL_USE, message_id="a"),
|
|
50
|
+
_msg(ROLE_TOOL_RESULT, message_id="a"),
|
|
51
|
+
]
|
|
52
|
+
result = DropToolInternalsStrategy(keep_last_pairs=0).compact(msgs)
|
|
53
|
+
assert all(m.role not in (ROLE_TOOL_USE, ROLE_TOOL_RESULT) for m in result)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_preserves_user_and_assistant():
|
|
57
|
+
msgs = [
|
|
58
|
+
_msg(ROLE_USER, content="q"),
|
|
59
|
+
_msg(ROLE_TOOL_USE, message_id="x"),
|
|
60
|
+
_msg(ROLE_TOOL_RESULT, message_id="x"),
|
|
61
|
+
_msg(ROLE_ASSISTANT, content="a"),
|
|
62
|
+
]
|
|
63
|
+
result = DropToolInternalsStrategy(keep_last_pairs=0).compact(msgs)
|
|
64
|
+
roles = [m.role for m in result]
|
|
65
|
+
assert ROLE_USER in roles
|
|
66
|
+
assert ROLE_ASSISTANT in roles
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ── SummarizeStrategy ─────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
def test_summarize_replaces_middle():
|
|
72
|
+
msgs = [_msg(ROLE_USER, content=f"msg{i}") for i in range(15)]
|
|
73
|
+
result = SummarizeStrategy(keep_first=2, keep_last=3).compact(msgs)
|
|
74
|
+
# head (2) + summary (1) + tail (3) = 6
|
|
75
|
+
assert len(result) == 6
|
|
76
|
+
# summary is in position 2
|
|
77
|
+
assert result[2].role == ROLE_ASSISTANT
|
|
78
|
+
assert "summarized" in result[2].content.lower()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_summarize_noop_when_small():
|
|
82
|
+
msgs = [_msg(ROLE_USER) for _ in range(4)]
|
|
83
|
+
result = SummarizeStrategy(keep_first=2, keep_last=3).compact(msgs)
|
|
84
|
+
assert result == msgs
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_summarize_custom_fn():
|
|
88
|
+
msgs = [_msg(ROLE_USER, content=f"m{i}", token_count=5) for i in range(10)]
|
|
89
|
+
fn_called = []
|
|
90
|
+
|
|
91
|
+
def my_fn(dropped):
|
|
92
|
+
fn_called.extend(dropped)
|
|
93
|
+
return "custom summary"
|
|
94
|
+
|
|
95
|
+
result = SummarizeStrategy(keep_first=1, keep_last=2, summary_fn=my_fn).compact(msgs)
|
|
96
|
+
assert len(fn_called) > 0
|
|
97
|
+
assert result[1].content == "custom summary"
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from nodus_context import ContextBudget, ContextMessage, ContextWindow, DropToolInternalsStrategy
|
|
3
|
+
from nodus_context.message import ROLE_TOOL_RESULT, ROLE_TOOL_USE
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _msg(role="user", content="hello", token_count=10, message_id=None):
|
|
7
|
+
return ContextMessage(role=role, content=content, token_count=token_count, message_id=message_id)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _window(max_tokens=1000):
|
|
11
|
+
return ContextWindow(ContextBudget(max_tokens=max_tokens))
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ── add / add_force ───────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
def test_add_returns_true_when_space():
|
|
17
|
+
w = _window(1000)
|
|
18
|
+
assert w.add(_msg(token_count=10)) is True
|
|
19
|
+
assert len(w) == 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_add_returns_false_when_full():
|
|
23
|
+
w = _window(max_tokens=5)
|
|
24
|
+
assert w.add(_msg(token_count=10)) is False
|
|
25
|
+
assert len(w) == 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_add_charges_budget():
|
|
29
|
+
w = _window(1000)
|
|
30
|
+
w.add(_msg(token_count=50))
|
|
31
|
+
assert w.budget().current_usage == 50
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_add_force_ignores_overflow():
|
|
35
|
+
w = _window(max_tokens=5)
|
|
36
|
+
w.add_force(_msg(token_count=100))
|
|
37
|
+
assert len(w) == 1
|
|
38
|
+
assert w.budget().current_usage == 100
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_add_estimates_tokens_when_missing():
|
|
42
|
+
w = _window(1000)
|
|
43
|
+
msg = ContextMessage(role="user", content="hello world test")
|
|
44
|
+
added = w.add(msg)
|
|
45
|
+
assert added is True
|
|
46
|
+
assert w.budget().current_usage > 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ── snapshot ──────────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
def test_snapshot_is_copy():
|
|
52
|
+
w = _window(1000)
|
|
53
|
+
w.add(_msg())
|
|
54
|
+
snap = w.snapshot()
|
|
55
|
+
snap.clear()
|
|
56
|
+
assert len(w) == 1 # original unchanged
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── compact ───────────────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
def test_compact_returns_tokens_freed():
|
|
62
|
+
w = _window(1000)
|
|
63
|
+
for _ in range(5):
|
|
64
|
+
w.add(_msg(role=ROLE_TOOL_USE, content="call", token_count=20, message_id="id1"))
|
|
65
|
+
w.add(_msg(role=ROLE_TOOL_RESULT, content="result", token_count=20, message_id="id1"))
|
|
66
|
+
freed = w.compact(DropToolInternalsStrategy(keep_last_pairs=1))
|
|
67
|
+
assert freed > 0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_compact_reduces_message_count():
|
|
71
|
+
w = _window(10000)
|
|
72
|
+
w.add(_msg(role="user", token_count=10))
|
|
73
|
+
for _ in range(4):
|
|
74
|
+
w.add(_msg(role=ROLE_TOOL_USE, content="c", token_count=10, message_id=f"id{_}"))
|
|
75
|
+
w.add(_msg(role=ROLE_TOOL_RESULT, content="r", token_count=10, message_id=f"id{_}"))
|
|
76
|
+
before = len(w)
|
|
77
|
+
w.compact(DropToolInternalsStrategy(keep_last_pairs=1))
|
|
78
|
+
assert len(w) < before
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ── guard_tool_results ────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def test_guard_tool_results_keeps_last():
|
|
84
|
+
w = _window(10000)
|
|
85
|
+
for i in range(5):
|
|
86
|
+
w.add(_msg(role=ROLE_TOOL_USE, token_count=10, message_id=f"id{i}"))
|
|
87
|
+
w.add(_msg(role=ROLE_TOOL_RESULT, token_count=10, message_id=f"id{i}"))
|
|
88
|
+
freed = w.guard_tool_results(keep_last=2)
|
|
89
|
+
assert freed > 0
|
|
90
|
+
result_msgs = [m for m in w.messages() if m.role == ROLE_TOOL_RESULT]
|
|
91
|
+
assert len(result_msgs) == 2
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_guard_noop_when_within_limit():
|
|
95
|
+
w = _window(10000)
|
|
96
|
+
w.add(_msg(role=ROLE_TOOL_RESULT, token_count=10, message_id="id1"))
|
|
97
|
+
freed = w.guard_tool_results(keep_last=3)
|
|
98
|
+
assert freed == 0
|