pixelprompt 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixelprompt-0.1.0/.github/workflows/publish.yml +55 -0
- pixelprompt-0.1.0/.github/workflows/tests.yml +37 -0
- pixelprompt-0.1.0/.gitignore +135 -0
- pixelprompt-0.1.0/LICENSE +21 -0
- pixelprompt-0.1.0/PKG-INFO +261 -0
- pixelprompt-0.1.0/README.md +224 -0
- pixelprompt-0.1.0/pyproject.toml +74 -0
- pixelprompt-0.1.0/src/pixelprompt/__init__.py +19 -0
- pixelprompt-0.1.0/src/pixelprompt/core.py +248 -0
- pixelprompt-0.1.0/src/pixelprompt/utils.py +44 -0
- pixelprompt-0.1.0/tests/__init__.py +1 -0
- pixelprompt-0.1.0/tests/test_core.py +165 -0
- pixelprompt-0.1.0/tests/test_utils.py +53 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
name: Build distribution
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.x"
|
|
22
|
+
|
|
23
|
+
- name: Install build dependencies
|
|
24
|
+
run: |
|
|
25
|
+
python -m pip install --upgrade pip
|
|
26
|
+
pip install build
|
|
27
|
+
|
|
28
|
+
- name: Build package
|
|
29
|
+
run: python -m build
|
|
30
|
+
|
|
31
|
+
- name: Store the distribution packages
|
|
32
|
+
uses: actions/upload-artifact@v4
|
|
33
|
+
with:
|
|
34
|
+
name: python-package-distributions
|
|
35
|
+
path: dist/
|
|
36
|
+
|
|
37
|
+
publish-to-pypi:
|
|
38
|
+
name: Publish to PyPI
|
|
39
|
+
needs: [build]
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
environment:
|
|
42
|
+
name: pypi
|
|
43
|
+
url: https://pypi.org/p/pixelprompt
|
|
44
|
+
permissions:
|
|
45
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
46
|
+
|
|
47
|
+
steps:
|
|
48
|
+
- name: Download all the dists
|
|
49
|
+
uses: actions/download-artifact@v4
|
|
50
|
+
with:
|
|
51
|
+
name: python-package-distributions
|
|
52
|
+
path: dist/
|
|
53
|
+
|
|
54
|
+
- name: Publish distribution to PyPI
|
|
55
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
15
|
+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: ${{ matrix.python-version }}
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: pytest
|
|
32
|
+
|
|
33
|
+
- name: Run linter
|
|
34
|
+
run: ruff check src/
|
|
35
|
+
|
|
36
|
+
- name: Check formatting
|
|
37
|
+
run: black --check src/ tests/
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
pip-wheel-metadata/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
*.manifest
|
|
32
|
+
*.spec
|
|
33
|
+
|
|
34
|
+
# Installer logs
|
|
35
|
+
pip-log.txt
|
|
36
|
+
pip-delete-this-directory.txt
|
|
37
|
+
|
|
38
|
+
# Unit test / coverage reports
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
.cache
|
|
45
|
+
nosetests.xml
|
|
46
|
+
coverage.xml
|
|
47
|
+
*.cover
|
|
48
|
+
*.py,cover
|
|
49
|
+
.hypothesis/
|
|
50
|
+
.pytest_cache/
|
|
51
|
+
|
|
52
|
+
# Translations
|
|
53
|
+
*.mo
|
|
54
|
+
*.pot
|
|
55
|
+
|
|
56
|
+
# Django stuff:
|
|
57
|
+
*.log
|
|
58
|
+
local_settings.py
|
|
59
|
+
db.sqlite3
|
|
60
|
+
db.sqlite3-journal
|
|
61
|
+
|
|
62
|
+
# Flask stuff:
|
|
63
|
+
instance/
|
|
64
|
+
.webassets-cache
|
|
65
|
+
|
|
66
|
+
# Scrapy stuff:
|
|
67
|
+
.scrapy
|
|
68
|
+
|
|
69
|
+
# Sphinx documentation
|
|
70
|
+
docs/_build/
|
|
71
|
+
|
|
72
|
+
# PyBuilder
|
|
73
|
+
target/
|
|
74
|
+
|
|
75
|
+
# Jupyter Notebook
|
|
76
|
+
.ipynb_checkpoints
|
|
77
|
+
|
|
78
|
+
# IPython
|
|
79
|
+
profile_default/
|
|
80
|
+
ipython_config.py
|
|
81
|
+
|
|
82
|
+
# pyenv
|
|
83
|
+
.python-version
|
|
84
|
+
|
|
85
|
+
# pipenv
|
|
86
|
+
Pipfile.lock
|
|
87
|
+
|
|
88
|
+
# PEP 582
|
|
89
|
+
__pypackages__/
|
|
90
|
+
|
|
91
|
+
# Celery stuff
|
|
92
|
+
celerybeat-schedule
|
|
93
|
+
celerybeat.pid
|
|
94
|
+
|
|
95
|
+
# SageMath parsed files
|
|
96
|
+
*.sage.py
|
|
97
|
+
|
|
98
|
+
# Environments
|
|
99
|
+
.env
|
|
100
|
+
.venv
|
|
101
|
+
env/
|
|
102
|
+
venv/
|
|
103
|
+
ENV/
|
|
104
|
+
env.bak/
|
|
105
|
+
venv.bak/
|
|
106
|
+
|
|
107
|
+
# Spyder project settings
|
|
108
|
+
.spyderproject
|
|
109
|
+
.spyproject
|
|
110
|
+
|
|
111
|
+
# Rope project settings
|
|
112
|
+
.ropeproject
|
|
113
|
+
|
|
114
|
+
# mkdocs documentation
|
|
115
|
+
/site
|
|
116
|
+
|
|
117
|
+
# mypy
|
|
118
|
+
.mypy_cache/
|
|
119
|
+
.dmypy.json
|
|
120
|
+
dmypy.json
|
|
121
|
+
|
|
122
|
+
# Pyre type checker
|
|
123
|
+
.pyre/
|
|
124
|
+
|
|
125
|
+
# IDEs
|
|
126
|
+
.vscode/
|
|
127
|
+
.idea/
|
|
128
|
+
*.swp
|
|
129
|
+
*.swo
|
|
130
|
+
*~
|
|
131
|
+
.DS_Store
|
|
132
|
+
|
|
133
|
+
# Project specific
|
|
134
|
+
.uv/
|
|
135
|
+
uv.lock
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Gabriele Venturi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", BASIS, WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
18
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
19
|
+
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
20
|
+
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
21
|
+
USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pixelprompt
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Compress LLM context by rendering text as optimized images
|
|
5
|
+
Project-URL: Homepage, https://github.com/sinaptik-ai/pixelprompt
|
|
6
|
+
Project-URL: Documentation, https://pixelprompt.readthedocs.io
|
|
7
|
+
Project-URL: Repository, https://github.com/sinaptik-ai/pixelprompt
|
|
8
|
+
Project-URL: Issues, https://github.com/sinaptik-ai/pixelprompt/issues
|
|
9
|
+
Author-email: Gabriele Venturi <gabriele@sinaptik.ai>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: compression,context,image,llm,token-optimization
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Multimedia :: Graphics :: Graphics Conversion
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Requires-Dist: anthropic>=0.7.0
|
|
26
|
+
Requires-Dist: pillow>=10.0.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
33
|
+
Provides-Extra: docs
|
|
34
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == 'docs'
|
|
35
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
# PixelPrompt
|
|
39
|
+
|
|
40
|
+
Compress LLM context by rendering text as optimized images. Based on the research paper *"Pixels Beat Tokens: Multimodal LLMs See Better With Image Sources for Text-Rich VQA"*.
|
|
41
|
+
|
|
42
|
+
## Why PixelPrompt?
|
|
43
|
+
|
|
44
|
+
When working with LLMs, token counts directly impact cost and latency. PixelPrompt converts text content into visually optimized PNG images, achieving **4-8x compression** compared to raw text tokens, while maintaining or improving accuracy.
|
|
45
|
+
|
|
46
|
+
**Key benefits:**
|
|
47
|
+
- 🎯 **Significant token savings** — text rendered as images uses fewer tokens
|
|
48
|
+
- 📊 **Flexible formatting** — control font size, layout, and visual hierarchy
|
|
49
|
+
- 🔄 **Automatic splitting** — large content automatically split across multiple images
|
|
50
|
+
- 🎨 **Configurable rendering** — customize fonts, colors, background
|
|
51
|
+
- 🚀 **Easy integration** — simple API for any LLM workflow
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv pip install pixelprompt
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Or with pip:
|
|
60
|
+
```bash
|
|
61
|
+
pip install pixelprompt
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from pixelprompt import PixelPrompt
|
|
68
|
+
|
|
69
|
+
# Initialize with default settings
|
|
70
|
+
pxl = PixelPrompt()
|
|
71
|
+
|
|
72
|
+
# Render text as image(s)
|
|
73
|
+
text = "Your long context here..."
|
|
74
|
+
images = pxl.render(text)
|
|
75
|
+
|
|
76
|
+
# Use with Claude API
|
|
77
|
+
from anthropic import Anthropic
|
|
78
|
+
|
|
79
|
+
client = Anthropic()
|
|
80
|
+
message = client.messages.create(
|
|
81
|
+
model="claude-3-5-sonnet-20241022",
|
|
82
|
+
max_tokens=1024,
|
|
83
|
+
messages=[
|
|
84
|
+
{
|
|
85
|
+
"role": "user",
|
|
86
|
+
"content": [
|
|
87
|
+
{
|
|
88
|
+
"type": "text",
|
|
89
|
+
"text": "Analyze this document:"
|
|
90
|
+
},
|
|
91
|
+
*[
|
|
92
|
+
{
|
|
93
|
+
"type": "image",
|
|
94
|
+
"source": {
|
|
95
|
+
"type": "base64",
|
|
96
|
+
"media_type": "image/png",
|
|
97
|
+
"data": img.base64()
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
for img in images
|
|
101
|
+
],
|
|
102
|
+
{
|
|
103
|
+
"type": "text",
|
|
104
|
+
"text": "What are the key points?"
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
print(message.content[0].text)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Configuration
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from pixelprompt import PixelPrompt, RenderConfig
|
|
118
|
+
|
|
119
|
+
config = RenderConfig(
|
|
120
|
+
font_size=9, # Default: 9 (range: 6-20)
|
|
121
|
+
font_family="monospace", # Default: "monospace"
|
|
122
|
+
width=1568, # Image width in pixels (default: 1568)
|
|
123
|
+
height=1568, # Image height in pixels (default: 1568)
|
|
124
|
+
background_color=(255, 255, 255), # RGB tuple (default: white)
|
|
125
|
+
text_color=(0, 0, 0), # RGB tuple (default: black)
|
|
126
|
+
padding=20, # Padding in pixels (default: 20)
|
|
127
|
+
line_spacing=1.2, # Line height multiplier (default: 1.2)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
pxl = PixelPrompt(config=config)
|
|
131
|
+
images = pxl.render(text)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Advanced Usage
|
|
135
|
+
|
|
136
|
+
### Analyze compression metrics
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from pixelprompt import estimate_tokens
|
|
140
|
+
|
|
141
|
+
text = "Your long context..."
|
|
142
|
+
original_tokens = estimate_tokens(text)
|
|
143
|
+
compressed_tokens = estimate_tokens(f"[Image with compressed content]")
|
|
144
|
+
|
|
145
|
+
compression_ratio = original_tokens / compressed_tokens
|
|
146
|
+
print(f"Compression: {compression_ratio:.1f}x")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Handle large documents
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
# Automatically splits into multiple images if content exceeds limits
|
|
153
|
+
images = pxl.render(long_document)
|
|
154
|
+
print(f"Generated {len(images)} images")
|
|
155
|
+
|
|
156
|
+
# Access individual images
|
|
157
|
+
for i, img in enumerate(images):
|
|
158
|
+
img.save(f"page_{i}.png")
|
|
159
|
+
print(f"Image {i}: {img.width}x{img.height}, size: {img.size_bytes} bytes")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Custom fonts
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
config = RenderConfig(
|
|
166
|
+
font_family="serif", # Options: "monospace", "serif", "sans-serif"
|
|
167
|
+
font_size=10,
|
|
168
|
+
)
|
|
169
|
+
pxl = PixelPrompt(config=config)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## API Reference
|
|
173
|
+
|
|
174
|
+
### `PixelPrompt`
|
|
175
|
+
|
|
176
|
+
Main class for rendering text to images.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
class PixelPrompt:
|
|
180
|
+
def __init__(self, config: RenderConfig | None = None):
|
|
181
|
+
"""Initialize with optional configuration."""
|
|
182
|
+
|
|
183
|
+
def render(self, text: str) -> list[RenderedImage]:
|
|
184
|
+
"""
|
|
185
|
+
Render text to one or more PNG images.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
text: Text content to render
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
List of RenderedImage objects
|
|
192
|
+
"""
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### `RenderConfig`
|
|
196
|
+
|
|
197
|
+
Configuration dataclass for rendering parameters.
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
@dataclass
|
|
201
|
+
class RenderConfig:
|
|
202
|
+
font_size: int = 9
|
|
203
|
+
font_family: str = "monospace"
|
|
204
|
+
width: int = 1568
|
|
205
|
+
height: int = 1568
|
|
206
|
+
background_color: tuple[int, int, int] = (255, 255, 255)
|
|
207
|
+
text_color: tuple[int, int, int] = (0, 0, 0)
|
|
208
|
+
padding: int = 20
|
|
209
|
+
line_spacing: float = 1.2
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### `RenderedImage`
|
|
213
|
+
|
|
214
|
+
Represents a single rendered image.
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
class RenderedImage:
|
|
218
|
+
width: int
|
|
219
|
+
height: int
|
|
220
|
+
size_bytes: int
|
|
221
|
+
|
|
222
|
+
def png_bytes(self) -> bytes:
|
|
223
|
+
"""Get raw PNG bytes."""
|
|
224
|
+
|
|
225
|
+
def base64(self) -> str:
|
|
226
|
+
"""Get base64-encoded PNG for API integration."""
|
|
227
|
+
|
|
228
|
+
def save(self, path: str) -> None:
|
|
229
|
+
"""Save to file."""
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Performance
|
|
233
|
+
|
|
234
|
+
Typical compression ratios (depends on content):
|
|
235
|
+
- **Code**: 4-6x compression
|
|
236
|
+
- **Technical prose**: 5-8x compression
|
|
237
|
+
- **JSON/Structured data**: 3-5x compression
|
|
238
|
+
- **Natural language**: 4-7x compression
|
|
239
|
+
|
|
240
|
+
Rendering time: ~100-200ms per image on modern hardware.
|
|
241
|
+
|
|
242
|
+
## Contributing
|
|
243
|
+
|
|
244
|
+
Contributions welcome! Please open issues or PRs on GitHub.
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT License — see LICENSE file for details.
|
|
249
|
+
|
|
250
|
+
## Citation
|
|
251
|
+
|
|
252
|
+
If you use PixelPrompt in research, please cite:
|
|
253
|
+
|
|
254
|
+
```bibtex
|
|
255
|
+
@software{pixelprompt,
|
|
256
|
+
author = {Venturi, Gabriele},
|
|
257
|
+
title = {PixelPrompt: Compress LLM Context by Rendering Text as Images},
|
|
258
|
+
year = {2026},
|
|
259
|
+
url = {https://github.com/sinaptik-ai/pixelprompt}
|
|
260
|
+
}
|
|
261
|
+
```
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# PixelPrompt
|
|
2
|
+
|
|
3
|
+
Compress LLM context by rendering text as optimized images. Based on the research paper *"Pixels Beat Tokens: Multimodal LLMs See Better With Image Sources for Text-Rich VQA"*.
|
|
4
|
+
|
|
5
|
+
## Why PixelPrompt?
|
|
6
|
+
|
|
7
|
+
When working with LLMs, token counts directly impact cost and latency. PixelPrompt converts text content into visually optimized PNG images, achieving **4-8x compression** compared to raw text tokens, while maintaining or improving accuracy.
|
|
8
|
+
|
|
9
|
+
**Key benefits:**
|
|
10
|
+
- 🎯 **Significant token savings** — text rendered as images uses fewer tokens
|
|
11
|
+
- 📊 **Flexible formatting** — control font size, layout, and visual hierarchy
|
|
12
|
+
- 🔄 **Automatic splitting** — large content automatically split across multiple images
|
|
13
|
+
- 🎨 **Configurable rendering** — customize fonts, colors, background
|
|
14
|
+
- 🚀 **Easy integration** — simple API for any LLM workflow
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
uv pip install pixelprompt
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or with pip:
|
|
23
|
+
```bash
|
|
24
|
+
pip install pixelprompt
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from pixelprompt import PixelPrompt
|
|
31
|
+
|
|
32
|
+
# Initialize with default settings
|
|
33
|
+
pxl = PixelPrompt()
|
|
34
|
+
|
|
35
|
+
# Render text as image(s)
|
|
36
|
+
text = "Your long context here..."
|
|
37
|
+
images = pxl.render(text)
|
|
38
|
+
|
|
39
|
+
# Use with Claude API
|
|
40
|
+
from anthropic import Anthropic
|
|
41
|
+
|
|
42
|
+
client = Anthropic()
|
|
43
|
+
message = client.messages.create(
|
|
44
|
+
model="claude-3-5-sonnet-20241022",
|
|
45
|
+
max_tokens=1024,
|
|
46
|
+
messages=[
|
|
47
|
+
{
|
|
48
|
+
"role": "user",
|
|
49
|
+
"content": [
|
|
50
|
+
{
|
|
51
|
+
"type": "text",
|
|
52
|
+
"text": "Analyze this document:"
|
|
53
|
+
},
|
|
54
|
+
*[
|
|
55
|
+
{
|
|
56
|
+
"type": "image",
|
|
57
|
+
"source": {
|
|
58
|
+
"type": "base64",
|
|
59
|
+
"media_type": "image/png",
|
|
60
|
+
"data": img.base64()
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
for img in images
|
|
64
|
+
],
|
|
65
|
+
{
|
|
66
|
+
"type": "text",
|
|
67
|
+
"text": "What are the key points?"
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
print(message.content[0].text)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Configuration
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from pixelprompt import PixelPrompt, RenderConfig
|
|
81
|
+
|
|
82
|
+
config = RenderConfig(
|
|
83
|
+
font_size=9, # Default: 9 (range: 6-20)
|
|
84
|
+
font_family="monospace", # Default: "monospace"
|
|
85
|
+
width=1568, # Image width in pixels (default: 1568)
|
|
86
|
+
height=1568, # Image height in pixels (default: 1568)
|
|
87
|
+
background_color=(255, 255, 255), # RGB tuple (default: white)
|
|
88
|
+
text_color=(0, 0, 0), # RGB tuple (default: black)
|
|
89
|
+
padding=20, # Padding in pixels (default: 20)
|
|
90
|
+
line_spacing=1.2, # Line height multiplier (default: 1.2)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
pxl = PixelPrompt(config=config)
|
|
94
|
+
images = pxl.render(text)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Advanced Usage
|
|
98
|
+
|
|
99
|
+
### Analyze compression metrics
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from pixelprompt import estimate_tokens
|
|
103
|
+
|
|
104
|
+
text = "Your long context..."
|
|
105
|
+
original_tokens = estimate_tokens(text)
|
|
106
|
+
compressed_tokens = estimate_tokens(f"[Image with compressed content]")
|
|
107
|
+
|
|
108
|
+
compression_ratio = original_tokens / compressed_tokens
|
|
109
|
+
print(f"Compression: {compression_ratio:.1f}x")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Handle large documents
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
# Automatically splits into multiple images if content exceeds limits
|
|
116
|
+
images = pxl.render(long_document)
|
|
117
|
+
print(f"Generated {len(images)} images")
|
|
118
|
+
|
|
119
|
+
# Access individual images
|
|
120
|
+
for i, img in enumerate(images):
|
|
121
|
+
img.save(f"page_{i}.png")
|
|
122
|
+
print(f"Image {i}: {img.width}x{img.height}, size: {img.size_bytes} bytes")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Custom fonts
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
config = RenderConfig(
|
|
129
|
+
font_family="serif", # Options: "monospace", "serif", "sans-serif"
|
|
130
|
+
font_size=10,
|
|
131
|
+
)
|
|
132
|
+
pxl = PixelPrompt(config=config)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## API Reference
|
|
136
|
+
|
|
137
|
+
### `PixelPrompt`
|
|
138
|
+
|
|
139
|
+
Main class for rendering text to images.
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
class PixelPrompt:
|
|
143
|
+
def __init__(self, config: RenderConfig | None = None):
|
|
144
|
+
"""Initialize with optional configuration."""
|
|
145
|
+
|
|
146
|
+
def render(self, text: str) -> list[RenderedImage]:
|
|
147
|
+
"""
|
|
148
|
+
Render text to one or more PNG images.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
text: Text content to render
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
List of RenderedImage objects
|
|
155
|
+
"""
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### `RenderConfig`
|
|
159
|
+
|
|
160
|
+
Configuration dataclass for rendering parameters.
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
@dataclass
|
|
164
|
+
class RenderConfig:
|
|
165
|
+
font_size: int = 9
|
|
166
|
+
font_family: str = "monospace"
|
|
167
|
+
width: int = 1568
|
|
168
|
+
height: int = 1568
|
|
169
|
+
background_color: tuple[int, int, int] = (255, 255, 255)
|
|
170
|
+
text_color: tuple[int, int, int] = (0, 0, 0)
|
|
171
|
+
padding: int = 20
|
|
172
|
+
line_spacing: float = 1.2
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### `RenderedImage`
|
|
176
|
+
|
|
177
|
+
Represents a single rendered image.
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
class RenderedImage:
|
|
181
|
+
width: int
|
|
182
|
+
height: int
|
|
183
|
+
size_bytes: int
|
|
184
|
+
|
|
185
|
+
def png_bytes(self) -> bytes:
|
|
186
|
+
"""Get raw PNG bytes."""
|
|
187
|
+
|
|
188
|
+
def base64(self) -> str:
|
|
189
|
+
"""Get base64-encoded PNG for API integration."""
|
|
190
|
+
|
|
191
|
+
def save(self, path: str) -> None:
|
|
192
|
+
"""Save to file."""
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
## Performance
|
|
196
|
+
|
|
197
|
+
Typical compression ratios (depends on content):
|
|
198
|
+
- **Code**: 4-6x compression
|
|
199
|
+
- **Technical prose**: 5-8x compression
|
|
200
|
+
- **JSON/Structured data**: 3-5x compression
|
|
201
|
+
- **Natural language**: 4-7x compression
|
|
202
|
+
|
|
203
|
+
Rendering time: ~100-200ms per image on modern hardware.
|
|
204
|
+
|
|
205
|
+
## Contributing
|
|
206
|
+
|
|
207
|
+
Contributions welcome! Please open issues or PRs on GitHub.
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
|
|
211
|
+
MIT License — see LICENSE file for details.
|
|
212
|
+
|
|
213
|
+
## Citation
|
|
214
|
+
|
|
215
|
+
If you use PixelPrompt in research, please cite:
|
|
216
|
+
|
|
217
|
+
```bibtex
|
|
218
|
+
@software{pixelprompt,
|
|
219
|
+
author = {Venturi, Gabriele},
|
|
220
|
+
title = {PixelPrompt: Compress LLM Context by Rendering Text as Images},
|
|
221
|
+
year = {2026},
|
|
222
|
+
url = {https://github.com/sinaptik-ai/pixelprompt}
|
|
223
|
+
}
|
|
224
|
+
```
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pixelprompt"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Compress LLM context by rendering text as optimized images"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Gabriele Venturi", email = "gabriele@sinaptik.ai"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["llm", "context", "compression", "image", "token-optimization"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Software Development :: Libraries",
|
|
27
|
+
"Topic :: Multimedia :: Graphics :: Graphics Conversion",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"pillow>=10.0.0",
|
|
32
|
+
"anthropic>=0.7.0",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=7.0.0",
|
|
38
|
+
"pytest-cov>=4.0.0",
|
|
39
|
+
"black>=23.0.0",
|
|
40
|
+
"ruff>=0.1.0",
|
|
41
|
+
"mypy>=1.0.0",
|
|
42
|
+
]
|
|
43
|
+
docs = [
|
|
44
|
+
"mkdocs>=1.5.0",
|
|
45
|
+
"mkdocs-material>=9.0.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/sinaptik-ai/pixelprompt"
|
|
50
|
+
Documentation = "https://pixelprompt.readthedocs.io"
|
|
51
|
+
Repository = "https://github.com/sinaptik-ai/pixelprompt"
|
|
52
|
+
Issues = "https://github.com/sinaptik-ai/pixelprompt/issues"
|
|
53
|
+
|
|
54
|
+
[tool.hatch.build.targets.wheel]
|
|
55
|
+
packages = ["src/pixelprompt"]
|
|
56
|
+
|
|
57
|
+
[tool.black]
|
|
58
|
+
line-length = 100
|
|
59
|
+
target-version = ["py38"]
|
|
60
|
+
|
|
61
|
+
[tool.ruff]
|
|
62
|
+
line-length = 100
|
|
63
|
+
target-version = "py38"
|
|
64
|
+
select = ["E", "F", "W", "I"]
|
|
65
|
+
|
|
66
|
+
[tool.mypy]
|
|
67
|
+
python_version = "3.8"
|
|
68
|
+
warn_return_any = true
|
|
69
|
+
warn_unused_configs = true
|
|
70
|
+
disallow_untyped_defs = false
|
|
71
|
+
|
|
72
|
+
[tool.pytest.ini_options]
|
|
73
|
+
testpaths = ["tests"]
|
|
74
|
+
addopts = "--cov=src/pixelprompt --cov-report=term-line"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PixelPrompt: Compress LLM context by rendering text as optimized images.
|
|
3
|
+
|
|
4
|
+
Based on research exploring multimodal LLM capabilities and token efficiency.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
__author__ = "Gabriele Venturi"
|
|
9
|
+
__email__ = "gabriele@sinaptik.ai"
|
|
10
|
+
|
|
11
|
+
from .core import PixelPrompt, RenderConfig, RenderedImage
|
|
12
|
+
from .utils import estimate_tokens
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"PixelPrompt",
|
|
16
|
+
"RenderConfig",
|
|
17
|
+
"RenderedImage",
|
|
18
|
+
"estimate_tokens",
|
|
19
|
+
]
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core PixelPrompt implementation for rendering text as optimized images.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import io
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class RenderConfig:
|
|
15
|
+
"""Configuration for text rendering to images."""
|
|
16
|
+
|
|
17
|
+
font_size: int = 9
|
|
18
|
+
"""Font size in points (range: 6-20). Default: 9."""
|
|
19
|
+
|
|
20
|
+
font_family: str = "monospace"
|
|
21
|
+
"""Font family: 'monospace', 'serif', or 'sans-serif'. Default: 'monospace'."""
|
|
22
|
+
|
|
23
|
+
width: int = 1568
|
|
24
|
+
"""Image width in pixels. Default: 1568."""
|
|
25
|
+
|
|
26
|
+
height: int = 1568
|
|
27
|
+
"""Image height in pixels. Default: 1568."""
|
|
28
|
+
|
|
29
|
+
background_color: tuple[int, int, int] = (255, 255, 255)
|
|
30
|
+
"""Background color as (R, G, B) tuple. Default: white (255, 255, 255)."""
|
|
31
|
+
|
|
32
|
+
text_color: tuple[int, int, int] = (0, 0, 0)
|
|
33
|
+
"""Text color as (R, G, B) tuple. Default: black (0, 0, 0)."""
|
|
34
|
+
|
|
35
|
+
padding: int = 20
|
|
36
|
+
"""Padding in pixels from image edges. Default: 20."""
|
|
37
|
+
|
|
38
|
+
line_spacing: float = 1.2
|
|
39
|
+
"""Line height multiplier. Default: 1.2."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class RenderedImage:
|
|
43
|
+
"""Represents a single rendered image."""
|
|
44
|
+
|
|
45
|
+
def __init__(self, image: Image.Image):
|
|
46
|
+
"""Initialize with PIL Image."""
|
|
47
|
+
self._image = image
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def width(self) -> int:
|
|
51
|
+
"""Image width in pixels."""
|
|
52
|
+
return self._image.width
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def height(self) -> int:
|
|
56
|
+
"""Image height in pixels."""
|
|
57
|
+
return self._image.height
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def size_bytes(self) -> int:
|
|
61
|
+
"""Approximate size in bytes (PNG-encoded)."""
|
|
62
|
+
return len(self.png_bytes())
|
|
63
|
+
|
|
64
|
+
def png_bytes(self) -> bytes:
|
|
65
|
+
"""Get raw PNG bytes."""
|
|
66
|
+
buffer = io.BytesIO()
|
|
67
|
+
self._image.save(buffer, format="PNG")
|
|
68
|
+
return buffer.getvalue()
|
|
69
|
+
|
|
70
|
+
def base64(self) -> str:
|
|
71
|
+
"""Get base64-encoded PNG for API integration."""
|
|
72
|
+
return base64.b64encode(self.png_bytes()).decode("utf-8")
|
|
73
|
+
|
|
74
|
+
def save(self, path: str) -> None:
|
|
75
|
+
"""Save image to file."""
|
|
76
|
+
self._image.save(path, format="PNG")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class PixelPrompt:
|
|
80
|
+
"""
|
|
81
|
+
Renders text content as optimized PNG images for LLM context compression.
|
|
82
|
+
|
|
83
|
+
Example:
|
|
84
|
+
>>> pxl = PixelPrompt()
|
|
85
|
+
>>> images = pxl.render("Long context here...")
|
|
86
|
+
>>> for img in images:
|
|
87
|
+
... img.save("output.png")
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(self, config: Optional[RenderConfig] = None):
|
|
91
|
+
"""
|
|
92
|
+
Initialize PixelPrompt.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
config: RenderConfig object. Uses defaults if None.
|
|
96
|
+
"""
|
|
97
|
+
self.config = config or RenderConfig()
|
|
98
|
+
self._validate_config()
|
|
99
|
+
self._load_fonts()
|
|
100
|
+
|
|
101
|
+
def _validate_config(self) -> None:
|
|
102
|
+
"""Validate configuration values."""
|
|
103
|
+
if not 6 <= self.config.font_size <= 20:
|
|
104
|
+
raise ValueError("font_size must be between 6 and 20")
|
|
105
|
+
if self.config.font_family not in ("monospace", "serif", "sans-serif"):
|
|
106
|
+
raise ValueError("font_family must be 'monospace', 'serif', or 'sans-serif'")
|
|
107
|
+
if self.config.width < 256 or self.config.height < 256:
|
|
108
|
+
raise ValueError("width and height must be at least 256 pixels")
|
|
109
|
+
if self.config.padding < 0:
|
|
110
|
+
raise ValueError("padding must be non-negative")
|
|
111
|
+
if self.config.line_spacing <= 0:
|
|
112
|
+
raise ValueError("line_spacing must be positive")
|
|
113
|
+
|
|
114
|
+
def _load_fonts(self) -> None:
|
|
115
|
+
"""Load available fonts for the system."""
|
|
116
|
+
font_names = {
|
|
117
|
+
"monospace": ["DejaVuSansMono", "Courier New", "Liberation Mono"],
|
|
118
|
+
"sans-serif": ["DejaVuSans", "Arial", "Liberation Sans"],
|
|
119
|
+
"serif": ["DejaVuSerif", "Times New Roman", "Liberation Serif"],
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
family_fonts = font_names.get(self.config.font_family, font_names["monospace"])
|
|
123
|
+
self._font = self._find_font(family_fonts)
|
|
124
|
+
|
|
125
|
+
def _find_font(self, font_names: list[str]) -> ImageFont.FreeTypeFont:
|
|
126
|
+
"""
|
|
127
|
+
Find an available TrueType font from the list.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
font_names: List of font names to try.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Loaded font or default fallback.
|
|
134
|
+
"""
|
|
135
|
+
# Common font paths on different systems
|
|
136
|
+
font_paths = [
|
|
137
|
+
"/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
|
|
138
|
+
"/System/Library/Fonts/Monaco.ttf",
|
|
139
|
+
"/Windows/Fonts/cour.ttf",
|
|
140
|
+
"/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf",
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
for path in font_paths:
|
|
144
|
+
try:
|
|
145
|
+
return ImageFont.truetype(path, self.config.font_size)
|
|
146
|
+
except (OSError, IOError):
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Fallback to default font
|
|
150
|
+
return ImageFont.load_default()
|
|
151
|
+
|
|
152
|
+
def render(self, text: str) -> list[RenderedImage]:
|
|
153
|
+
"""
|
|
154
|
+
Render text to one or more PNG images.
|
|
155
|
+
|
|
156
|
+
Large texts are automatically split across multiple images if needed.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
text: Text content to render.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
List of RenderedImage objects.
|
|
163
|
+
|
|
164
|
+
Raises:
|
|
165
|
+
ValueError: If text is empty.
|
|
166
|
+
"""
|
|
167
|
+
if not text or not text.strip():
|
|
168
|
+
raise ValueError("Text cannot be empty")
|
|
169
|
+
|
|
170
|
+
# Split text into chunks if needed
|
|
171
|
+
chunks = self._split_text(text)
|
|
172
|
+
|
|
173
|
+
# Render each chunk to an image
|
|
174
|
+
images = [self._render_chunk(chunk) for chunk in chunks]
|
|
175
|
+
|
|
176
|
+
return images
|
|
177
|
+
|
|
178
|
+
def _split_text(self, text: str) -> list[str]:
|
|
179
|
+
"""
|
|
180
|
+
Split text into chunks that fit on a single image.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
text: Text to split.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
List of text chunks.
|
|
187
|
+
"""
|
|
188
|
+
# Calculate how many lines fit on one image
|
|
189
|
+
available_height = self.config.height - 2 * self.config.padding
|
|
190
|
+
line_height = int(self.config.font_size * self.config.line_spacing)
|
|
191
|
+
|
|
192
|
+
if line_height == 0:
|
|
193
|
+
line_height = self.config.font_size
|
|
194
|
+
|
|
195
|
+
max_lines = max(1, available_height // line_height)
|
|
196
|
+
|
|
197
|
+
# Split text into lines
|
|
198
|
+
lines = text.split("\n")
|
|
199
|
+
|
|
200
|
+
# Group lines into chunks
|
|
201
|
+
chunks = []
|
|
202
|
+
current_chunk = []
|
|
203
|
+
|
|
204
|
+
for line in lines:
|
|
205
|
+
current_chunk.append(line)
|
|
206
|
+
if len(current_chunk) >= max_lines:
|
|
207
|
+
chunks.append("\n".join(current_chunk))
|
|
208
|
+
current_chunk = []
|
|
209
|
+
|
|
210
|
+
if current_chunk:
|
|
211
|
+
chunks.append("\n".join(current_chunk))
|
|
212
|
+
|
|
213
|
+
return chunks if chunks else [text]
|
|
214
|
+
|
|
215
|
+
def _render_chunk(self, text: str) -> RenderedImage:
|
|
216
|
+
"""
|
|
217
|
+
Render a single text chunk to an image.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
text: Text to render.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
RenderedImage object.
|
|
224
|
+
"""
|
|
225
|
+
# Create image with background color
|
|
226
|
+
image = Image.new(
|
|
227
|
+
"RGB",
|
|
228
|
+
(self.config.width, self.config.height),
|
|
229
|
+
self.config.background_color,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
draw = ImageDraw.Draw(image)
|
|
233
|
+
|
|
234
|
+
# Draw text
|
|
235
|
+
x = self.config.padding
|
|
236
|
+
y = self.config.padding
|
|
237
|
+
line_height = int(self.config.font_size * self.config.line_spacing)
|
|
238
|
+
|
|
239
|
+
for line in text.split("\n"):
|
|
240
|
+
draw.text(
|
|
241
|
+
(x, y),
|
|
242
|
+
line,
|
|
243
|
+
fill=self.config.text_color,
|
|
244
|
+
font=self._font,
|
|
245
|
+
)
|
|
246
|
+
y += line_height
|
|
247
|
+
|
|
248
|
+
return RenderedImage(image)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for PixelPrompt.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def estimate_tokens(text: str, model: str = "claude-3-5-sonnet-20241022") -> int:
|
|
7
|
+
"""
|
|
8
|
+
Estimate token count for text using Claude's token counting rules.
|
|
9
|
+
|
|
10
|
+
Uses a simple approximation: ~4 characters per token on average.
|
|
11
|
+
For precise counts, use the Anthropic token counting API.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
text: Text to estimate tokens for.
|
|
15
|
+
model: Model name (for future use with API-based counting).
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Estimated token count.
|
|
19
|
+
"""
|
|
20
|
+
if not text:
|
|
21
|
+
return 0
|
|
22
|
+
|
|
23
|
+
# Rough approximation: ~4 characters per token
|
|
24
|
+
# This is a conservative estimate for most Claude models
|
|
25
|
+
return max(1, len(text) // 4)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def estimate_compression_ratio(original_text: str, num_images: int) -> float:
|
|
29
|
+
"""
|
|
30
|
+
Estimate compression ratio when rendering text as images.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
original_text: Original text content.
|
|
34
|
+
num_images: Number of images generated.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Estimated compression ratio (original tokens / image tokens).
|
|
38
|
+
"""
|
|
39
|
+
original_tokens = estimate_tokens(original_text)
|
|
40
|
+
|
|
41
|
+
# Each image is typically counted as ~1-2 tokens by Claude vision
|
|
42
|
+
image_tokens = max(1, num_images * 2)
|
|
43
|
+
|
|
44
|
+
return original_tokens / image_tokens if image_tokens > 0 else 1.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Test suite for PixelPrompt."""
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Tests for PixelPrompt core functionality."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from pixelprompt import PixelPrompt, RenderConfig, RenderedImage
|
|
6
|
+
from pixelprompt.core import RenderedImage as RenderedImageClass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestRenderConfig:
|
|
10
|
+
"""Test RenderConfig dataclass."""
|
|
11
|
+
|
|
12
|
+
def test_default_config(self):
|
|
13
|
+
"""Test default configuration values."""
|
|
14
|
+
config = RenderConfig()
|
|
15
|
+
assert config.font_size == 9
|
|
16
|
+
assert config.width == 1568
|
|
17
|
+
assert config.height == 1568
|
|
18
|
+
assert config.background_color == (255, 255, 255)
|
|
19
|
+
assert config.text_color == (0, 0, 0)
|
|
20
|
+
|
|
21
|
+
def test_custom_config(self):
|
|
22
|
+
"""Test custom configuration values."""
|
|
23
|
+
config = RenderConfig(
|
|
24
|
+
font_size=12,
|
|
25
|
+
width=2048,
|
|
26
|
+
background_color=(240, 240, 240),
|
|
27
|
+
)
|
|
28
|
+
assert config.font_size == 12
|
|
29
|
+
assert config.width == 2048
|
|
30
|
+
assert config.background_color == (240, 240, 240)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TestPixelPrompt:
|
|
34
|
+
"""Test PixelPrompt main class."""
|
|
35
|
+
|
|
36
|
+
def test_initialization(self):
|
|
37
|
+
"""Test PixelPrompt initialization."""
|
|
38
|
+
pxl = PixelPrompt()
|
|
39
|
+
assert pxl.config.font_size == 9
|
|
40
|
+
assert pxl.config.width == 1568
|
|
41
|
+
|
|
42
|
+
def test_custom_initialization(self):
|
|
43
|
+
"""Test initialization with custom config."""
|
|
44
|
+
config = RenderConfig(font_size=12)
|
|
45
|
+
pxl = PixelPrompt(config=config)
|
|
46
|
+
assert pxl.config.font_size == 12
|
|
47
|
+
|
|
48
|
+
def test_invalid_font_size(self):
|
|
49
|
+
"""Test that invalid font size raises error."""
|
|
50
|
+
config = RenderConfig(font_size=25)
|
|
51
|
+
with pytest.raises(ValueError, match="font_size must be between 6 and 20"):
|
|
52
|
+
PixelPrompt(config=config)
|
|
53
|
+
|
|
54
|
+
def test_invalid_font_family(self):
|
|
55
|
+
"""Test that invalid font family raises error."""
|
|
56
|
+
config = RenderConfig(font_family="invalid")
|
|
57
|
+
with pytest.raises(ValueError, match="font_family must be"):
|
|
58
|
+
PixelPrompt(config=config)
|
|
59
|
+
|
|
60
|
+
def test_render_simple_text(self):
|
|
61
|
+
"""Test rendering simple text."""
|
|
62
|
+
pxl = PixelPrompt()
|
|
63
|
+
images = pxl.render("Hello, World!")
|
|
64
|
+
assert len(images) == 1
|
|
65
|
+
assert isinstance(images[0], RenderedImageClass)
|
|
66
|
+
|
|
67
|
+
def test_render_multiline_text(self):
|
|
68
|
+
"""Test rendering multiline text."""
|
|
69
|
+
pxl = PixelPrompt()
|
|
70
|
+
text = "Line 1\nLine 2\nLine 3"
|
|
71
|
+
images = pxl.render(text)
|
|
72
|
+
assert len(images) >= 1
|
|
73
|
+
|
|
74
|
+
def test_render_empty_text_raises_error(self):
|
|
75
|
+
"""Test that empty text raises error."""
|
|
76
|
+
pxl = PixelPrompt()
|
|
77
|
+
with pytest.raises(ValueError, match="Text cannot be empty"):
|
|
78
|
+
pxl.render("")
|
|
79
|
+
|
|
80
|
+
def test_render_whitespace_text_raises_error(self):
|
|
81
|
+
"""Test that whitespace-only text raises error."""
|
|
82
|
+
pxl = PixelPrompt()
|
|
83
|
+
with pytest.raises(ValueError, match="Text cannot be empty"):
|
|
84
|
+
pxl.render(" \n ")
|
|
85
|
+
|
|
86
|
+
def test_render_long_text_splits(self):
|
|
87
|
+
"""Test that long text is split into multiple images."""
|
|
88
|
+
pxl = PixelPrompt()
|
|
89
|
+
# Create text with many lines
|
|
90
|
+
text = "\n".join([f"Line {i}" for i in range(1000)])
|
|
91
|
+
images = pxl.render(text)
|
|
92
|
+
assert len(images) > 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class TestRenderedImage:
|
|
96
|
+
"""Test RenderedImage class."""
|
|
97
|
+
|
|
98
|
+
def test_rendered_image_properties(self):
|
|
99
|
+
"""Test RenderedImage properties."""
|
|
100
|
+
pxl = PixelPrompt()
|
|
101
|
+
images = pxl.render("Test")
|
|
102
|
+
img = images[0]
|
|
103
|
+
|
|
104
|
+
assert img.width == 1568
|
|
105
|
+
assert img.height == 1568
|
|
106
|
+
assert img.size_bytes > 0
|
|
107
|
+
|
|
108
|
+
def test_png_bytes(self):
|
|
109
|
+
"""Test PNG bytes export."""
|
|
110
|
+
pxl = PixelPrompt()
|
|
111
|
+
images = pxl.render("Test")
|
|
112
|
+
img = images[0]
|
|
113
|
+
|
|
114
|
+
png_bytes = img.png_bytes()
|
|
115
|
+
assert isinstance(png_bytes, bytes)
|
|
116
|
+
assert len(png_bytes) > 0
|
|
117
|
+
# PNG signature
|
|
118
|
+
assert png_bytes[:8] == b"\x89PNG\r\n\x1a\n"
|
|
119
|
+
|
|
120
|
+
def test_base64(self):
|
|
121
|
+
"""Test base64 encoding."""
|
|
122
|
+
pxl = PixelPrompt()
|
|
123
|
+
images = pxl.render("Test")
|
|
124
|
+
img = images[0]
|
|
125
|
+
|
|
126
|
+
base64_str = img.base64()
|
|
127
|
+
assert isinstance(base64_str, str)
|
|
128
|
+
assert len(base64_str) > 0
|
|
129
|
+
# Should be valid base64
|
|
130
|
+
import base64
|
|
131
|
+
try:
|
|
132
|
+
base64.b64decode(base64_str)
|
|
133
|
+
except Exception as e:
|
|
134
|
+
pytest.fail(f"Invalid base64: {e}")
|
|
135
|
+
|
|
136
|
+
def test_save_image(self, tmp_path):
|
|
137
|
+
"""Test saving image to file."""
|
|
138
|
+
pxl = PixelPrompt()
|
|
139
|
+
images = pxl.render("Test")
|
|
140
|
+
img = images[0]
|
|
141
|
+
|
|
142
|
+
output_path = tmp_path / "test.png"
|
|
143
|
+
img.save(str(output_path))
|
|
144
|
+
|
|
145
|
+
assert output_path.exists()
|
|
146
|
+
assert output_path.stat().st_size > 0
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class TestSplitText:
|
|
150
|
+
"""Test text splitting functionality."""
|
|
151
|
+
|
|
152
|
+
def test_split_short_text(self):
|
|
153
|
+
"""Test that short text is not split."""
|
|
154
|
+
pxl = PixelPrompt()
|
|
155
|
+
text = "Short text"
|
|
156
|
+
chunks = pxl._split_text(text)
|
|
157
|
+
assert len(chunks) == 1
|
|
158
|
+
|
|
159
|
+
def test_split_respects_newlines(self):
|
|
160
|
+
"""Test that text is split on newlines."""
|
|
161
|
+
pxl = PixelPrompt()
|
|
162
|
+
text = "Line 1\nLine 2\nLine 3"
|
|
163
|
+
chunks = pxl._split_text(text)
|
|
164
|
+
# Should have at least 1 chunk
|
|
165
|
+
assert len(chunks) >= 1
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Tests for PixelPrompt utility functions."""
|
|
2
|
+
|
|
3
|
+
from pixelprompt.utils import estimate_compression_ratio, estimate_tokens
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestEstimateTokens:
|
|
7
|
+
"""Test token estimation."""
|
|
8
|
+
|
|
9
|
+
def test_empty_text(self):
|
|
10
|
+
"""Test token count for empty text."""
|
|
11
|
+
assert estimate_tokens("") == 0
|
|
12
|
+
|
|
13
|
+
def test_short_text(self):
|
|
14
|
+
"""Test token count for short text."""
|
|
15
|
+
tokens = estimate_tokens("Hello")
|
|
16
|
+
assert tokens == 1 # 5 chars / 4 = 1
|
|
17
|
+
|
|
18
|
+
def test_longer_text(self):
|
|
19
|
+
"""Test token count for longer text."""
|
|
20
|
+
text = "a" * 100
|
|
21
|
+
tokens = estimate_tokens(text)
|
|
22
|
+
assert tokens == 25 # 100 / 4 = 25
|
|
23
|
+
|
|
24
|
+
def test_minimum_one_token(self):
|
|
25
|
+
"""Test that at least 1 token is returned."""
|
|
26
|
+
tokens = estimate_tokens("a")
|
|
27
|
+
assert tokens >= 1
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestEstimateCompressionRatio:
|
|
31
|
+
"""Test compression ratio estimation."""
|
|
32
|
+
|
|
33
|
+
def test_single_image_compression(self):
|
|
34
|
+
"""Test compression ratio with single image."""
|
|
35
|
+
text = "a" * 100
|
|
36
|
+
ratio = estimate_compression_ratio(text, 1)
|
|
37
|
+
# 100 chars = ~25 tokens, 1 image = ~2 tokens
|
|
38
|
+
assert ratio > 1
|
|
39
|
+
|
|
40
|
+
def test_multiple_images_compression(self):
|
|
41
|
+
"""Test compression ratio with multiple images."""
|
|
42
|
+
text = "a" * 100
|
|
43
|
+
ratio = estimate_compression_ratio(text, 2)
|
|
44
|
+
# 100 chars = ~25 tokens, 2 images = ~4 tokens
|
|
45
|
+
assert ratio > 1
|
|
46
|
+
|
|
47
|
+
def test_compression_improves_with_images(self):
|
|
48
|
+
"""Test that more images improve compression."""
|
|
49
|
+
text = "a" * 1000
|
|
50
|
+
ratio_1 = estimate_compression_ratio(text, 1)
|
|
51
|
+
ratio_2 = estimate_compression_ratio(text, 2)
|
|
52
|
+
# More images should improve compression ratio
|
|
53
|
+
assert ratio_1 > ratio_2
|