contpress 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contpress-0.1.5/.gitattributes +2 -0
- contpress-0.1.5/.github/workflows/ci.yml +41 -0
- contpress-0.1.5/.github/workflows/publish.yml +30 -0
- contpress-0.1.5/LICENSE +21 -0
- contpress-0.1.5/PKG-INFO +597 -0
- contpress-0.1.5/README.md +563 -0
- contpress-0.1.5/contextpress.png +0 -0
- contpress-0.1.5/pyproject.toml +42 -0
- contpress-0.1.5/src/contextpress/__init__.py +33 -0
- contpress-0.1.5/src/contextpress/budgets.py +63 -0
- contpress-0.1.5/src/contextpress/builder.py +49 -0
- contpress-0.1.5/src/contextpress/cache/__init__.py +5 -0
- contpress-0.1.5/src/contextpress/cache/exact.py +21 -0
- contpress-0.1.5/src/contextpress/cache/semantic.py +12 -0
- contpress-0.1.5/src/contextpress/cache/stores.py +31 -0
- contpress-0.1.5/src/contextpress/cli.py +86 -0
- contpress-0.1.5/src/contextpress/compressors/__init__.py +15 -0
- contpress-0.1.5/src/contextpress/compressors/base.py +8 -0
- contpress-0.1.5/src/contextpress/compressors/extractive.py +63 -0
- contpress-0.1.5/src/contextpress/compressors/llmlingua.py +28 -0
- contpress-0.1.5/src/contextpress/compressors/reports.py +49 -0
- contpress-0.1.5/src/contextpress/compressors/sentence_filter.py +7 -0
- contpress-0.1.5/src/contextpress/contracts.py +27 -0
- contpress-0.1.5/src/contextpress/core.py +109 -0
- contpress-0.1.5/src/contextpress/formatters.py +56 -0
- contpress-0.1.5/src/contextpress/memory/__init__.py +3 -0
- contpress-0.1.5/src/contextpress/memory/conversation.py +50 -0
- contpress-0.1.5/src/contextpress/memory/summarizer.py +7 -0
- contpress-0.1.5/src/contextpress/prompt_cache.py +21 -0
- contpress-0.1.5/src/contextpress/py.typed +1 -0
- contpress-0.1.5/src/contextpress/rag/__init__.py +3 -0
- contpress-0.1.5/src/contextpress/rag/chunk.py +25 -0
- contpress-0.1.5/src/contextpress/rag/filter.py +45 -0
- contpress-0.1.5/src/contextpress/rag/rerank.py +13 -0
- contpress-0.1.5/src/contextpress/reports.py +50 -0
- contpress-0.1.5/src/contextpress/tokenizer.py +53 -0
- contpress-0.1.5/src/contextpress/tools.py +45 -0
- contpress-0.1.5/tests/test_budget.py +9 -0
- contpress-0.1.5/tests/test_builder.py +17 -0
- contpress-0.1.5/tests/test_core.py +18 -0
- contpress-0.1.5/tests/test_extractive.py +15 -0
- contpress-0.1.5/tests/test_formatters.py +11 -0
- contpress-0.1.5/tests/test_tokenizer.py +9 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main", "master"]
|
|
6
|
+
pull_request:
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
name: Test Python ${{ matrix.python-version }}
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- name: Check out repository
|
|
24
|
+
uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Set up Python
|
|
27
|
+
uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
cache: pip
|
|
31
|
+
|
|
32
|
+
- name: Install package
|
|
33
|
+
run: |
|
|
34
|
+
python -m pip install --upgrade pip
|
|
35
|
+
python -m pip install -e ".[dev]"
|
|
36
|
+
|
|
37
|
+
- name: Run tests
|
|
38
|
+
run: python -m pytest
|
|
39
|
+
|
|
40
|
+
- name: Build package
|
|
41
|
+
run: python -m build
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
id-token: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
publish:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
environment: pypi
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Build distribution
|
|
24
|
+
run: |
|
|
25
|
+
python -m pip install --upgrade pip build twine
|
|
26
|
+
python -m build
|
|
27
|
+
twine check dist/*
|
|
28
|
+
|
|
29
|
+
- name: Publish distribution
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
contpress-0.1.5/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 JadeyGraham96
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
contpress-0.1.5/PKG-INFO
ADDED
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contpress
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: Reduce LLM token usage with token counting, prompt compression, caching, and context filtering.
|
|
5
|
+
Author: JadeyGraham96
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: rich>=13.0
|
|
11
|
+
Requires-Dist: tiktoken>=0.7.0
|
|
12
|
+
Provides-Extra: all
|
|
13
|
+
Requires-Dist: diskcache; extra == 'all'
|
|
14
|
+
Requires-Dist: faiss-cpu; extra == 'all'
|
|
15
|
+
Requires-Dist: langchain; extra == 'all'
|
|
16
|
+
Requires-Dist: langchain-community; extra == 'all'
|
|
17
|
+
Requires-Dist: llama-index; extra == 'all'
|
|
18
|
+
Requires-Dist: llmlingua; extra == 'all'
|
|
19
|
+
Requires-Dist: sentence-transformers; extra == 'all'
|
|
20
|
+
Provides-Extra: compress
|
|
21
|
+
Requires-Dist: llmlingua; extra == 'compress'
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
25
|
+
Provides-Extra: rag
|
|
26
|
+
Requires-Dist: langchain; extra == 'rag'
|
|
27
|
+
Requires-Dist: langchain-community; extra == 'rag'
|
|
28
|
+
Requires-Dist: llama-index; extra == 'rag'
|
|
29
|
+
Provides-Extra: semantic
|
|
30
|
+
Requires-Dist: diskcache; extra == 'semantic'
|
|
31
|
+
Requires-Dist: faiss-cpu; extra == 'semantic'
|
|
32
|
+
Requires-Dist: sentence-transformers; extra == 'semantic'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# contpress
|
|
36
|
+
|
|
37
|
+
<p align="center">
|
|
38
|
+
A practical Python toolkit for making every LLM token count.
|
|
39
|
+
</p>
|
|
40
|
+
|
|
41
|
+
<p align="center">
|
|
42
|
+
<img width="256" height="256" alt="contpress Logo" src="https://github.com/Arkay92/ContextPress/blob/main/contpress.png?raw=true" />
|
|
43
|
+
</p>
|
|
44
|
+
|
|
45
|
+
<p align="center">
|
|
46
|
+
<a href="https://github.com/Arkay92/ContextPress/actions/workflows/publish.yml"><img alt="Publish" src="https://github.com/Arkay92/ContextPress/actions/workflows/publish.yml/badge.svg" /></a>
|
|
47
|
+
<a href="https://pypi.org/project/contpress/"><img alt="PyPI" src="https://img.shields.io/pypi/v/contpress.svg" /></a>
|
|
48
|
+
<img alt="Python" src="https://img.shields.io/pypi/pyversions/contpress.svg" />
|
|
49
|
+
<img alt="Downloads" src="https://img.shields.io/pypi/dm/contpress.svg" />
|
|
50
|
+
<img alt="License" src="https://img.shields.io/pypi/l/contpress.svg" />
|
|
51
|
+
</p>
|
|
52
|
+
|
|
53
|
+
**contpress** combines:
|
|
54
|
+
|
|
55
|
+
- **Token counting and trimming** with model-aware encodings.
|
|
56
|
+
- **Token budget enforcement** for input, output reserve, system prompts, tools, RAG context, and history.
|
|
57
|
+
- **Compact prompt building** for consistent, low-waste prompt blocks.
|
|
58
|
+
- **Dependency-free extractive compression** for safe first-pass prompt reduction.
|
|
59
|
+
- **RAG context filtering** with keyword and sentence relevance modes.
|
|
60
|
+
- **Compact JSON, CSV, and table formatting** for reducing structured-data tokens.
|
|
61
|
+
- **Conversation memory pruning** that keeps system prompts, recent messages, decisions, constraints, and relevant context.
|
|
62
|
+
- **Output contract generation** for concise response schemas.
|
|
63
|
+
- **Prompt cache-aware formatting** to keep stable prompt blocks grouped.
|
|
64
|
+
- **Prompt and response caching surfaces** including exact cache and optional semantic cache support.
|
|
65
|
+
- **Usage reports** with original tokens, optimized tokens, saved tokens, ratios, and methods.
|
|
66
|
+
- **Optional dependencies** so the base package stays lightweight.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Why Preflight Optimization for LLM Prompts?
|
|
71
|
+
|
|
72
|
+
LLM prompts often grow through repeated instructions, irrelevant retrieved chunks,
|
|
73
|
+
verbose JSON, oversized chat history, and unbounded output requests:
|
|
74
|
+
|
|
75
|
+
```text
|
|
76
|
+
Task, instructions, context, tools, and history
|
|
77
|
+
-> Count tokens against the target model
|
|
78
|
+
-> Reserve output budget
|
|
79
|
+
-> Format prompt blocks compactly
|
|
80
|
+
-> Filter retrieved context
|
|
81
|
+
-> Compress or trim only when needed
|
|
82
|
+
|
|
83
|
+
-> Return optimized text
|
|
84
|
+
-> Report savings and methods
|
|
85
|
+
-> Feed the result to any LLM client
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
`contpress` is designed to reduce token usage before a request is sent:
|
|
89
|
+
|
|
90
|
+
- **Oversized prompts** from untrimmed documents, code, logs, and retrieved chunks.
|
|
91
|
+
- **Messy repeated instructions** that waste tokens and reduce prompt clarity.
|
|
92
|
+
- **Verbose structured data** where compact JSON or tables are enough.
|
|
93
|
+
- **RAG context bloat** from chunks that are only loosely related to the query.
|
|
94
|
+
- **Long conversation histories** with filler, confirmations, and stale context.
|
|
95
|
+
- **Unclear output budgets** where responses are allowed to grow without a contract.
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Architecture
|
|
100
|
+
|
|
101
|
+
```text
|
|
102
|
+
Prompt inputs
|
|
103
|
+
- task
|
|
104
|
+
- instructions
|
|
105
|
+
- context
|
|
106
|
+
- conversation history
|
|
107
|
+
- output contract
|
|
108
|
+
|
|
|
109
|
+
v
|
|
110
|
+
Preflight optimization
|
|
111
|
+
- token counting
|
|
112
|
+
- budget enforcement
|
|
113
|
+
- compact prompt layout
|
|
114
|
+
- extractive compression
|
|
115
|
+
- RAG context filtering
|
|
116
|
+
- compact JSON / CSV / table formatting
|
|
117
|
+
- memory pruning
|
|
118
|
+
|
|
|
119
|
+
v
|
|
120
|
+
OptimizedPrompt
|
|
121
|
+
- text
|
|
122
|
+
- report dict
|
|
123
|
+
- original token count
|
|
124
|
+
- optimized token count
|
|
125
|
+
- saved tokens
|
|
126
|
+
- methods used
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Install
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
pip install contpress
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
For LLMLingua prompt compression:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
pip install "contpress[compress]"
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
For semantic cache support:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
pip install "contpress[semantic]"
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
For RAG ecosystem integrations:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install "contpress[rag]"
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
For all optional integrations:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install "contpress[all]"
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
For development:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pip install -e ".[dev,all]"
|
|
165
|
+
pytest -q
|
|
166
|
+
python -m build
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Quick Start
|
|
172
|
+
|
|
173
|
+
### Optimize a Prompt
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from contpress import ContextPress
|
|
177
|
+
|
|
178
|
+
cp = ContextPress(
|
|
179
|
+
model="gpt-4o-mini",
|
|
180
|
+
max_input_tokens=4000,
|
|
181
|
+
max_output_tokens=500,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
optimized = cp.optimize(
|
|
185
|
+
task="Answer the user's question using the provided context.",
|
|
186
|
+
context=long_context,
|
|
187
|
+
instructions=[
|
|
188
|
+
"Be concise.",
|
|
189
|
+
"Use only relevant facts.",
|
|
190
|
+
"Return risks if uncertain.",
|
|
191
|
+
],
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
print(optimized.text)
|
|
195
|
+
print(optimized.report)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Token Counting
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from contpress import TokenCounter
|
|
202
|
+
|
|
203
|
+
counter = TokenCounter(model="gpt-4o-mini")
|
|
204
|
+
|
|
205
|
+
print(counter.count("hello world"))
|
|
206
|
+
print(counter.fits("long text", budget=8000))
|
|
207
|
+
print(counter.trim("long text", max_tokens=1000))
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Usage Report
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from contpress import UsageReport
|
|
214
|
+
|
|
215
|
+
report = UsageReport(
|
|
216
|
+
model="gpt-4o-mini",
|
|
217
|
+
input_tokens_before=10200,
|
|
218
|
+
input_tokens_after=3400,
|
|
219
|
+
output_tokens_limit=500,
|
|
220
|
+
methods=["sentence_filter", "compact_json", "trim"],
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
print(report.summary())
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## CLI
|
|
229
|
+
|
|
230
|
+
Count tokens in a file:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
contpress count README.md --model gpt-4o-mini
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
Trim a file to a maximum token count:
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
contpress trim prompt.txt --max-tokens 2000
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Compress a prompt:
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
contpress compress prompt.txt --target-tokens 1000
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
Compact JSON:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
contpress compact data.json
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Generate a budget report:
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
contpress report prompt.txt --budget 8000
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Main Features
|
|
263
|
+
|
|
264
|
+
### 1. **Token Counting**
|
|
265
|
+
|
|
266
|
+
Count, fit-check, and trim text using the target model encoding:
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
from contpress import TokenCounter
|
|
270
|
+
|
|
271
|
+
counter = TokenCounter(model="gpt-4o-mini")
|
|
272
|
+
tokens = counter.count(prompt)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### 2. **Budget Enforcement**
|
|
276
|
+
|
|
277
|
+
Reserve output tokens and account for system prompt or tool schema overhead:
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
from contpress import TokenBudget
|
|
281
|
+
|
|
282
|
+
budget = TokenBudget(
|
|
283
|
+
model="gpt-4o-mini",
|
|
284
|
+
max_input_tokens=8000,
|
|
285
|
+
reserve_output_tokens=1000,
|
|
286
|
+
system_prompt="You are concise.",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
print(budget.input_budget)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### 3. **Compact Prompt Builder**
|
|
293
|
+
|
|
294
|
+
Build repeatable prompt blocks without verbose formatting:
|
|
295
|
+
|
|
296
|
+
```python
|
|
297
|
+
from contpress import PromptBuilder
|
|
298
|
+
|
|
299
|
+
prompt = (
|
|
300
|
+
PromptBuilder()
|
|
301
|
+
.role("senior Python engineer")
|
|
302
|
+
.task("Refactor this code")
|
|
303
|
+
.constraints(["Preserve behaviour", "No new dependencies", "Keep diff small"])
|
|
304
|
+
.context(code)
|
|
305
|
+
.output(["patch", "risk notes", "test plan"])
|
|
306
|
+
.build()
|
|
307
|
+
)
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### 4. **Compact Structured Data**
|
|
311
|
+
|
|
312
|
+
Reduce JSON and tabular context before sending it to an LLM:
|
|
313
|
+
|
|
314
|
+
```python
|
|
315
|
+
from contpress import compact_json, compact_table, drop_nulls, shorten_keys
|
|
316
|
+
|
|
317
|
+
payload = drop_nulls(data)
|
|
318
|
+
payload = shorten_keys(payload, {"description": "d", "priority": "p"})
|
|
319
|
+
text = compact_json(payload)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### 5. **Extractive Compression**
|
|
323
|
+
|
|
324
|
+
Dependency-free compression keeps query-relevant sentences and preserves useful
|
|
325
|
+
signals such as numbers, URLs, headings, code identifiers, and requirements:
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
from contpress import ExtractiveCompressor
|
|
329
|
+
|
|
330
|
+
short = ExtractiveCompressor().compress(
|
|
331
|
+
text=long_context,
|
|
332
|
+
query="How do I reduce LLM token usage?",
|
|
333
|
+
max_tokens=1200,
|
|
334
|
+
)
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
### 6. **LLMLingua Compression**
|
|
338
|
+
|
|
339
|
+
Use Microsoft LLMLingua when you install the compression extra:
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from contpress.compressors import LLMLinguaCompressor
|
|
343
|
+
|
|
344
|
+
compressed = LLMLinguaCompressor().compress(
|
|
345
|
+
prompt=long_prompt,
|
|
346
|
+
instruction="Preserve code, numbers, entities, requirements, and constraints.",
|
|
347
|
+
target_tokens=1000,
|
|
348
|
+
)
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
Prompt compression can harm exact reasoning, code, legal wording, medical text,
|
|
352
|
+
or maths. It is not always a free speedup; preprocessing overhead can outweigh
|
|
353
|
+
gains for shorter prompts or mismatched model and hardware conditions.
|
|
354
|
+
|
|
355
|
+
### 7. **RAG Context Filtering**
|
|
356
|
+
|
|
357
|
+
Filter retrieved chunks before building the final prompt:
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
from contpress import ContextFilter
|
|
361
|
+
|
|
362
|
+
filtered = ContextFilter(model="gpt-4o-mini").filter(
|
|
363
|
+
query=user_question,
|
|
364
|
+
chunks=retrieved_chunks,
|
|
365
|
+
max_tokens=2500,
|
|
366
|
+
)
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
### 8. **Conversation Memory Pruning**
|
|
370
|
+
|
|
371
|
+
Keep system prompts, recent messages, relevant history, constraints, decisions,
|
|
372
|
+
preferences, and file names:
|
|
373
|
+
|
|
374
|
+
```python
|
|
375
|
+
from contpress import ConversationPruner
|
|
376
|
+
|
|
377
|
+
messages = ConversationPruner().prune(
|
|
378
|
+
messages=chat_history,
|
|
379
|
+
current_query="What changed in the latest code?",
|
|
380
|
+
max_tokens=3000,
|
|
381
|
+
)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
### 9. **Output Contracts**
|
|
385
|
+
|
|
386
|
+
Generate compact response contracts:
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
from contpress import OutputContract
|
|
390
|
+
|
|
391
|
+
contract = OutputContract(
|
|
392
|
+
fields={"summary": "one sentence", "risks": "short list"},
|
|
393
|
+
).prompt()
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
### 10. **Prompt Cache Layout**
|
|
397
|
+
|
|
398
|
+
Group stable and volatile blocks to improve prompt-cache friendliness:
|
|
399
|
+
|
|
400
|
+
```python
|
|
401
|
+
from contpress import PromptCacheLayout
|
|
402
|
+
|
|
403
|
+
prompt = (
|
|
404
|
+
PromptCacheLayout()
|
|
405
|
+
.stable("System", "You are a concise assistant.")
|
|
406
|
+
.stable("Rules", "Use only provided context.")
|
|
407
|
+
.volatile("User", user_question)
|
|
408
|
+
.build()
|
|
409
|
+
)
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
### 11. **Tool and Agent Trace Compaction**
|
|
413
|
+
|
|
414
|
+
Compact tool schemas and agent traces before placing them in context:
|
|
415
|
+
|
|
416
|
+
```python
|
|
417
|
+
from contpress import AgentTraceCompactor, ToolSchemaCompactor
|
|
418
|
+
|
|
419
|
+
compact_schema = ToolSchemaCompactor(drop_descriptions=True).compact(tool_schema)
|
|
420
|
+
compact_trace = AgentTraceCompactor().compact(events)
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
---
|
|
424
|
+
|
|
425
|
+
## Configuration
|
|
426
|
+
|
|
427
|
+
Tune prompt budgets with `TokenBudget`:
|
|
428
|
+
|
|
429
|
+
```python
|
|
430
|
+
from contpress import TokenBudget
|
|
431
|
+
|
|
432
|
+
budget = TokenBudget(
|
|
433
|
+
model="gpt-4o-mini",
|
|
434
|
+
max_input_tokens=8000,
|
|
435
|
+
reserve_output_tokens=1000,
|
|
436
|
+
system_prompt="You are concise.",
|
|
437
|
+
tool_schema=compact_schema,
|
|
438
|
+
rag_context_ratio=0.6,
|
|
439
|
+
history_ratio=0.3,
|
|
440
|
+
)
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
Tune optimization with `ContextPress`:
|
|
444
|
+
|
|
445
|
+
```python
|
|
446
|
+
from contpress import ContextPress
|
|
447
|
+
|
|
448
|
+
cp = ContextPress(
|
|
449
|
+
model="gpt-4o-mini",
|
|
450
|
+
max_input_tokens=6000,
|
|
451
|
+
reserve_output_tokens=800,
|
|
452
|
+
compression="extractive",
|
|
453
|
+
)
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
---
|
|
457
|
+
|
|
458
|
+
## Examples
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
from contpress import ContextPress
|
|
462
|
+
|
|
463
|
+
cp = ContextPress(
|
|
464
|
+
model="gpt-4o-mini",
|
|
465
|
+
max_input_tokens=6000,
|
|
466
|
+
reserve_output_tokens=800,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
optimized = cp.optimize(
|
|
470
|
+
task="Summarise the key issues in this codebase.",
|
|
471
|
+
context=repo_summary,
|
|
472
|
+
instructions=[
|
|
473
|
+
"Focus on bugs, security, maintainability, and performance.",
|
|
474
|
+
"Do not repeat obvious file names.",
|
|
475
|
+
"Return concise bullet points.",
|
|
476
|
+
],
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
print(optimized.report)
|
|
480
|
+
```
|
|
481
|
+
|
|
482
|
+
```bash
|
|
483
|
+
contpress count README.md
|
|
484
|
+
contpress report prompt.txt --budget 8000
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## Project Structure
|
|
490
|
+
|
|
491
|
+
```text
|
|
492
|
+
src/contextpress/
|
|
493
|
+
__init__.py # Public API
|
|
494
|
+
core.py # ContextPress and OptimizedPrompt
|
|
495
|
+
tokenizer.py # TokenCounter
|
|
496
|
+
budgets.py # TokenBudget
|
|
497
|
+
builder.py # PromptBuilder
|
|
498
|
+
formatters.py # Compact JSON, CSV, and table helpers
|
|
499
|
+
reports.py # UsageReport
|
|
500
|
+
contracts.py # OutputContract
|
|
501
|
+
prompt_cache.py # PromptCacheLayout
|
|
502
|
+
tools.py # ToolSchemaCompactor and AgentTraceCompactor
|
|
503
|
+
cli.py # Command-line interface
|
|
504
|
+
py.typed # Typing marker
|
|
505
|
+
compressors/ # Extractive, sentence, LLMLingua, reports, diffs
|
|
506
|
+
rag/ # Chunking, reranking, context filtering
|
|
507
|
+
cache/ # Exact cache, semantic cache surface, stores
|
|
508
|
+
memory/ # Conversation pruning and summarization
|
|
509
|
+
tests/
|
|
510
|
+
test_*.py # Unit tests
|
|
511
|
+
.github/
|
|
512
|
+
workflows/
|
|
513
|
+
ci.yml # Tests and package build
|
|
514
|
+
publish.yml # PyPI publishing workflow
|
|
515
|
+
pyproject.toml # Project metadata and dependencies
|
|
516
|
+
contpress.png # Project logo
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
---
|
|
520
|
+
|
|
521
|
+
## Development
|
|
522
|
+
|
|
523
|
+
```bash
|
|
524
|
+
# Install with dev extras
|
|
525
|
+
pip install -e ".[dev,all]"
|
|
526
|
+
|
|
527
|
+
# Run tests
|
|
528
|
+
pytest -q
|
|
529
|
+
|
|
530
|
+
# Build package
|
|
531
|
+
python -m build
|
|
532
|
+
```
|
|
533
|
+
|
|
534
|
+
---
|
|
535
|
+
|
|
536
|
+
## Publishing
|
|
537
|
+
|
|
538
|
+
GitHub Actions includes:
|
|
539
|
+
|
|
540
|
+
- `CI`: runs tests and builds the package on pushes and pull requests.
|
|
541
|
+
- `Publish to PyPI`: builds, checks, and publishes distributions when a `v*` tag is pushed.
|
|
542
|
+
|
|
543
|
+
The publish workflow uses PyPI trusted publishing. Configure the PyPI project
|
|
544
|
+
with this GitHub repository, the `pypi` environment, and the
|
|
545
|
+
`.github/workflows/publish.yml` workflow before pushing a version tag.
|
|
546
|
+
|
|
547
|
+
Trusted publishing settings on PyPI must match:
|
|
548
|
+
|
|
549
|
+
- PyPI project name: `contpress`
|
|
550
|
+
- GitHub owner: `Arkay92`
|
|
551
|
+
- GitHub repository: `ContextPress`
|
|
552
|
+
- Workflow name: `publish.yml`
|
|
553
|
+
- Environment name: `pypi`
|
|
554
|
+
|
|
555
|
+
If publishing fails with `403 Invalid API Token: OIDC scoped token is not valid
|
|
556
|
+
for project 'contpress'`, the workflow ran correctly but PyPI did not accept
|
|
557
|
+
the trusted publisher for that project. Delete and recreate the trusted publisher
|
|
558
|
+
on PyPI with the exact values above, including the `pypi` environment, then push
|
|
559
|
+
a new version tag such as `v0.1.5`.
|
|
560
|
+
|
|
561
|
+
---
|
|
562
|
+
|
|
563
|
+
## License
|
|
564
|
+
|
|
565
|
+
MIT
|
|
566
|
+
|
|
567
|
+
---
|
|
568
|
+
|
|
569
|
+
## Contributing
|
|
570
|
+
|
|
571
|
+
Contributions are welcome. Open an issue with the model, prompt shape, expected
|
|
572
|
+
budget, and the optimization behavior you expected.
|
|
573
|
+
|
|
574
|
+
---
|
|
575
|
+
|
|
576
|
+
## Citation
|
|
577
|
+
|
|
578
|
+
If you use contpress in research, please cite:
|
|
579
|
+
|
|
580
|
+
```bibtex
|
|
581
|
+
@software{contextpress2026,
|
|
582
|
+
title={contpress: A Practical Python Toolkit for Making Every LLM Token Count},
|
|
583
|
+
author={Arkay92},
|
|
584
|
+
url={https://github.com/Arkay92/ContextPress},
|
|
585
|
+
year={2026},
|
|
586
|
+
version={v0.1.5},
|
|
587
|
+
}
|
|
588
|
+
```
|
|
589
|
+
|
|
590
|
+
---
|
|
591
|
+
|
|
592
|
+
## Acknowledgments
|
|
593
|
+
|
|
594
|
+
- [tiktoken](https://github.com/openai/tiktoken) for fast model-aware tokenization.
|
|
595
|
+
- [LLMLingua](https://github.com/microsoft/LLMLingua) for optional prompt compression.
|
|
596
|
+
- [LangChain](https://www.langchain.com/) and [LlamaIndex](https://www.llamaindex.ai/) for RAG compression patterns.
|
|
597
|
+
- [FAISS](https://github.com/facebookresearch/faiss) and [sentence-transformers](https://www.sbert.net/) for semantic cache building blocks.
|