python-token-killer 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/CHANGELOG.md +27 -0
  2. python_token_killer-0.1.1/PKG-INFO +302 -0
  3. python_token_killer-0.1.1/README.md +252 -0
  4. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/benchmarks/bench.py +20 -7
  5. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/benchmarks/samples/python_module.py +24 -27
  6. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/examples/claude_code_skill.py +6 -2
  7. python_token_killer-0.1.1/examples/langgraph_agent.py +257 -0
  8. python_token_killer-0.1.1/examples/log_triage.py +141 -0
  9. python_token_killer-0.1.1/examples/rag_pipeline.py +209 -0
  10. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/__init__.py +1 -1
  11. python_token_killer-0.1.1/tests/__init__.py +0 -0
  12. python_token_killer-0.1.1/tests/adversarial/__init__.py +0 -0
  13. python_token_killer-0.1.1/tests/adversarial/test_concurrency.py +70 -0
  14. python_token_killer-0.1.1/tests/adversarial/test_contracts.py +149 -0
  15. python_token_killer-0.1.1/tests/adversarial/test_edge_cases.py +150 -0
  16. python_token_killer-0.1.1/tests/adversarial/test_mutation.py +37 -0
  17. python_token_killer-0.1.1/tests/adversarial/test_performance.py +95 -0
  18. python_token_killer-0.1.1/tests/adversarial/test_regex.py +44 -0
  19. python_token_killer-0.1.1/tests/adversarial/test_types.py +183 -0
  20. python_token_killer-0.1.1/tests/conftest.py +7 -0
  21. python_token_killer-0.1.1/tests/real_world/__init__.py +0 -0
  22. python_token_killer-0.1.1/tests/real_world/test_infra.py +114 -0
  23. python_token_killer-0.1.1/tests/real_world/test_lint.py +41 -0
  24. python_token_killer-0.1.1/tests/real_world/test_pipelines.py +57 -0
  25. python_token_killer-0.1.1/tests/real_world/test_test_runners.py +107 -0
  26. python_token_killer-0.1.1/tests/real_world/test_vcs.py +62 -0
  27. python_token_killer-0.1.1/tests/unit/__init__.py +0 -0
  28. python_token_killer-0.1.1/tests/unit/test_api.py +50 -0
  29. python_token_killer-0.1.1/tests/unit/test_base.py +98 -0
  30. python_token_killer-0.1.1/tests/unit/test_code.py +157 -0
  31. python_token_killer-0.1.1/tests/unit/test_detection.py +94 -0
  32. python_token_killer-0.1.1/tests/unit/test_dict.py +132 -0
  33. python_token_killer-0.1.1/tests/unit/test_diff.py +82 -0
  34. python_token_killer-0.1.1/tests/unit/test_list.py +70 -0
  35. python_token_killer-0.1.1/tests/unit/test_log.py +92 -0
  36. python_token_killer-0.1.1/tests/unit/test_text.py +92 -0
  37. python_token_killer-0.1.0/PKG-INFO +0 -269
  38. python_token_killer-0.1.0/README.md +0 -219
  39. python_token_killer-0.1.0/examples/clean_api_response.py +0 -44
  40. python_token_killer-0.1.0/examples/langchain_middleware.py +0 -97
  41. python_token_killer-0.1.0/tests/test_adversarial.py +0 -983
  42. python_token_killer-0.1.0/tests/test_ptk.py +0 -1022
  43. python_token_killer-0.1.0/tests/test_real_world.py +0 -620
  44. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/.gitignore +0 -0
  45. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/CONTRIBUTING.md +0 -0
  46. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/LICENSE +0 -0
  47. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/benchmarks/samples/api_response.json +0 -0
  48. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/benchmarks/samples/server_log.txt +0 -0
  49. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/pyproject.toml +0 -0
  50. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/_base.py +0 -0
  51. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/_types.py +0 -0
  52. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/__init__.py +0 -0
  53. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_code.py +0 -0
  54. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_dict.py +0 -0
  55. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_diff.py +0 -0
  56. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_list.py +0 -0
  57. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_log.py +0 -0
  58. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/minimizers/_text.py +0 -0
  59. {python_token_killer-0.1.0 → python_token_killer-0.1.1}/src/ptk/py.typed +0 -0
@@ -15,6 +15,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
15
15
 
16
16
  ---
17
17
 
18
+ ## [0.1.1] - 2026-04-12
19
+
20
+ ### Fixed
21
+
22
+ - Silent data loss in `_shorten_keys` when two keys map to the same abbreviation (e.g. `timestamp` and `created_at` both → `ts`). Second key now keeps its original name.
23
+ - `CodeMinimizer` corrupted URLs inside string literals (`https://` was stripped by the `//` comment regex). Replaced with a string-aware regex.
24
+ - `_shorten_dotted_keys` crashed with `TypeError` on non-string dict keys (`"."\ in 1`). Fixed with `isinstance(k, str)` guard.
25
+ - `DiffMinimizer` silently folded `` marker in large diffs.
26
+ - Markdown with `---` horizontal rule + `@@` mention was misdetected as diff.
27
+ - `_sample(n=1)` caused `ZeroDivisionError`.
28
+ - ALL CAPS words were abbreviated with wrong case (`IMPLEMENTATION` → `Impl` instead of `IMPL`).
29
+
30
+ ### Changed
31
+
32
+ - Test suite refactored from 3 monolithic files into 19 focused modules under `tests/unit/`, `tests/adversarial/`, `tests/real_world/`.
33
+ - Examples replaced with runnable, output-showing demos: `rag_pipeline.py`, `langgraph_agent.py`, `log_triage.py`.
34
+ - README restructured to lead with before/after comparison and cost math.
35
+ - CI now uses `uv sync --locked --only-group <group>` per job for minimal installs.
36
+ - Dependabot switched from `pip` to `uv` ecosystem to track `uv.lock` and `[dependency-groups]`.
37
+
38
+ ### Added
39
+
40
+ - `SECURITY.md` with private disclosure instructions and scope notes.
41
+ - `make test-unit`, `make test-adversarial`, `make test-real-world` targets.
42
+
43
+ ---
44
+
18
45
  ## [0.1.0] - 2026-04-09
19
46
 
20
47
  Initial public release.
@@ -0,0 +1,302 @@
1
+ Metadata-Version: 2.4
2
+ Name: python-token-killer
3
+ Version: 0.1.1
4
+ Summary: Minimize LLM tokens from Python objects — dicts, code, logs, diffs, and more.
5
+ Project-URL: Homepage, https://github.com/amahi2001/python-token-killer
6
+ Project-URL: Repository, https://github.com/amahi2001/python-token-killer
7
+ Project-URL: Issues, https://github.com/amahi2001/python-token-killer/issues
8
+ Project-URL: Changelog, https://github.com/amahi2001/python-token-killer/blob/main/CHANGELOG.md
9
+ Author-email: amahi2001 <amahi2001@gmail.com>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 ptk contributors
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: agents,claude,compression,context-window,langchain,langgraph,llm,nlp,openai,rag,tokens
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Programming Language :: Python :: 3.13
42
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
43
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
44
+ Classifier: Topic :: Text Processing
45
+ Classifier: Typing :: Typed
46
+ Requires-Python: >=3.10
47
+ Provides-Extra: tiktoken
48
+ Requires-Dist: tiktoken>=0.7; extra == 'tiktoken'
49
+ Description-Content-Type: text/markdown
50
+
51
+ <p align="center">
52
+ <img src="assets/mascot.png" alt="ptk" width="200"/>
53
+ </p>
54
+
55
+ <p align="center">
56
+ <strong>ptk — Python Token Killer</strong><br/>
57
+ <strong>Minimize LLM tokens from Python objects in one call</strong><br/>
58
+ Zero dependencies • Auto type detection • 361 tests
59
+ </p>
60
+
61
+ <table align="center">
62
+ <tr>
63
+ <td align="left" valign="middle">
64
+ <a href="https://github.com/amahi2001/python-token-killer/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/amahi2001/python-token-killer/ci.yml?branch=main&style=flat-square&label=CI" alt="CI"/></a><br/>
65
+ <img src="https://img.shields.io/badge/python-3.10+-3776AB?style=flat-square&logo=python&logoColor=white" alt="Python 3.10+"/><br/>
66
+ <img src="https://img.shields.io/badge/mypy-strict-blue?style=flat-square" alt="mypy strict"/><br/>
67
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-yellow?style=flat-square" alt="License"/></a>
68
+ </td>
69
+ </tr>
70
+ </table>
71
+
72
+ ---
73
+
74
+ ## The Problem
75
+
76
+ Every time your app calls an LLM, you're paying for tokens like these:
77
+
78
+ ```json
79
+ {
80
+ "user": {
81
+ "id": 8821,
82
+ "name": "Alice Chen",
83
+ "email": "alice@example.com",
84
+ "bio": null,
85
+ "avatar_url": null,
86
+ "phone": null,
87
+ "address": null,
88
+ "metadata": {},
89
+ "preferences": {
90
+ "theme": "dark",
91
+ "notifications": null,
92
+ "newsletter": null
93
+ },
94
+ "created_at": "2024-01-15T10:30:00Z",
95
+ "updated_at": "2024-06-20T14:22:00Z",
96
+ "last_login": null,
97
+ "is_verified": true,
98
+ "is_active": true
99
+ },
100
+ "errors": null,
101
+ "warnings": []
102
+ }
103
+ ```
104
+
105
+ One call to `ptk` later:
106
+
107
+ ```python
108
+ import ptk
109
+ ptk(response)
110
+ ```
111
+
112
+ ```json
113
+ {"user":{"id":8821,"name":"Alice Chen","email":"alice@example.com","preferences":{"theme":"dark"},"created_at":"2024-01-15T10:30:00Z","updated_at":"2024-06-20T14:22:00Z","is_verified":true,"is_active":true}}
114
+ ```
115
+
116
+ **52% fewer tokens. Zero information lost. Zero config.**
117
+
118
+ ```bash
119
+ pip install python-token-killer
120
+ # or
121
+ uv add python-token-killer
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Benchmarks
127
+
128
+ Real token counts via tiktoken (`cl100k_base` — same tokenizer as GPT-4 and Claude):
129
+
130
+ ```
131
+ Input Tokens (before) Tokens (after) Saved
132
+ ─────────────────────────────────────────────────────────────────────────
133
+ API response (JSON) 1,450 792 45%
134
+ Python module (code → sigs) 2,734 309 89%
135
+ CI log (58 lines, errors only) 1,389 231 83%
136
+ 50 user records (tabular) 2,774 922 67%
137
+ Verbose prose (text) 101 74 27%
138
+ ─────────────────────────────────────────────────────────────────────────
139
+ Total 11,182 2,627 76%
140
+ ```
141
+
142
+ At GPT-4o pricing ($2.50/1M input tokens), that 76% reduction on **10k tokens/day** saves ~**$6/month per user**. At scale, it compounds.
143
+
144
+ Run yourself: `python benchmarks/bench.py`
145
+
146
+ ---
147
+
148
+ ## How It Works
149
+
150
+ `ptk` detects your input type and routes to the right compression strategy automatically:
151
+
152
+ | Input | What happens | Saves |
153
+ |---|---|---|
154
+ | `dict` / `list` | Strips `null`, `""`, `[]`, `{}` recursively. Tabular encoding for uniform arrays. | 40–70% |
155
+ | Code | Strips comments (preserving `# noqa`, `# type: ignore`, `TODO`). Collapses docstrings. Extracts signatures. | 25–89% |
156
+ | Logs | Collapses duplicate lines with counts. Filters to errors + stack traces only. | 60–90% |
157
+ | Diffs | Folds unchanged context. Strips git noise (`index`, `old mode`). | 50–75% |
158
+ | Text | Abbreviates `implementation→impl`, `configuration→config`. Removes filler phrases. | 10–30% |
159
+
160
+ ---
161
+
162
+ ## Usage
163
+
164
+ ```python
165
+ import ptk
166
+
167
+ # Any Python object — auto-detected, one call
168
+ ptk.minimize(api_response) # dict/list → compact JSON, nulls stripped
169
+ ptk.minimize(source_code) # strips comments, collapses docstrings
170
+ ptk.minimize(log_output) # dedup repeated lines, keep errors
171
+ ptk.minimize(git_diff) # fold context, keep changes
172
+ ptk.minimize(any_object) # always returns a string, never raises
173
+
174
+ # Aggressive mode — maximum compression
175
+ ptk.minimize(response, aggressive=True)
176
+
177
+ # Force content type
178
+ ptk.minimize(text, content_type="code", mode="signatures") # sigs only
179
+ ptk.minimize(logs, content_type="log", errors_only=True) # errors only
180
+
181
+ # Stats — token counts + savings
182
+ ptk.stats(response)
183
+ # {
184
+ # "output": "...",
185
+ # "original_tokens": 1450,
186
+ # "minimized_tokens": 792,
187
+ # "savings_pct": 45.4,
188
+ # "content_type": "dict"
189
+ # }
190
+
191
+ # Callable shorthand
192
+ ptk(response) # same as ptk.minimize(response)
193
+ ```
194
+
195
+ ---
196
+
197
+ ## Real-World Examples
198
+
199
+ ### RAG Pipeline — compress retrieved documents before they enter the prompt
200
+
201
+ The most common place tokens are wasted in production. Retrieval returns full documents; you only need the content.
202
+
203
+ ```python
204
+ import ptk
205
+
206
+ def build_context(docs: list[dict]) -> str:
207
+ """Compress retrieved docs before injecting into an LLM prompt."""
208
+ chunks = []
209
+ for doc in docs:
210
+ content = ptk.minimize(doc["content"]) # strip boilerplate
211
+ chunks.append(f"[{doc['source']}]\n{content}")
212
+ return "\n\n---\n\n".join(chunks)
213
+ ```
214
+
215
+ See [`examples/rag_pipeline.py`](examples/rag_pipeline.py) for a full working demo with token counts.
216
+
217
+ ---
218
+
219
+ ### LangGraph / LangChain — compress tool outputs between nodes
220
+
221
+ ```python
222
+ import ptk
223
+
224
+ def compress_tool_output(state: dict) -> dict:
225
+ """Drop this node between any tool call and the next LLM call."""
226
+ state["messages"][-1]["content"] = ptk.minimize(
227
+ state["messages"][-1]["content"], aggressive=True
228
+ )
229
+ return state
230
+ ```
231
+
232
+ See [`examples/langgraph_agent.py`](examples/langgraph_agent.py) — a complete agent loop with live token savings printed per step.
233
+
234
+ ---
235
+
236
+ ### Log Triage — paste only what matters to Claude / GPT
237
+
238
+ ```python
239
+ import ptk
240
+
241
+ # 10,000-line CI log → only the failures, instantly
242
+ errors = ptk.minimize(ci_log, content_type="log", aggressive=True)
243
+ # Feed `errors` to your LLM. 80%+ fewer tokens, same diagnostic signal.
244
+ ```
245
+
246
+ See [`examples/log_triage.py`](examples/log_triage.py) — reads a real log file, shows before/after.
247
+
248
+ ---
249
+
250
+ ## API Reference
251
+
252
+ ### `ptk.minimize(obj, *, aggressive=False, content_type=None, **kw) → str`
253
+
254
+ - `aggressive=True` — maximum compression (timestamps stripped, sigs-only for code, errors-only for logs)
255
+ - `content_type` — override auto-detection: `"dict"`, `"list"`, `"code"`, `"log"`, `"diff"`, `"text"`
256
+ - `format` — dict output format: `"json"` (default), `"kv"`, `"tabular"`
257
+ - `mode` — code mode: `"clean"` (default) or `"signatures"`
258
+ - `errors_only` — log mode: keep only errors + stack traces
259
+
260
+ ### `ptk.stats(obj, **kw) → dict`
261
+
262
+ Same as `minimize` but returns `output`, `original_tokens`, `minimized_tokens`, `savings_pct`, `content_type`.
263
+
264
+ ### `ptk(obj)` — callable shorthand
265
+
266
+ The module itself is callable. `ptk(x)` is identical to `ptk.minimize(x)`.
267
+
268
+ ---
269
+
270
+ ## Comparison
271
+
272
+ | Tool | Type | What it does |
273
+ |---|---|---|
274
+ | **ptk** | Python library | One call, any Python object, zero deps |
275
+ | [RTK](https://github.com/rtk-ai/rtk) | Rust CLI | Compresses shell command output for coding agents |
276
+ | [claw-compactor](https://github.com/open-compress/claw-compactor) | Python library | 14-stage AST-aware pipeline, heavier setup |
277
+ | [LLMLingua](https://github.com/microsoft/LLMLingua) | Python library | Neural compression, requires GPU |
278
+
279
+ ---
280
+
281
+ ## Design
282
+
283
+ - **Zero required dependencies** — stdlib only. `tiktoken` optional for exact token counts.
284
+ - **Never raises** — any Python object produces a string. Circular refs, `bytes`, `nan`, generators — all handled.
285
+ - **Never mutates** — your input is always untouched.
286
+ - **Thread-safe** — stateless singleton minimizers.
287
+ - **Fast** — precompiled regexes, `frozenset` lookups, single-pass algorithms. Microseconds per call.
288
+
289
+ ---
290
+
291
+ ## Development
292
+
293
+ ```bash
294
+ git clone https://github.com/amahi2001/python-token-killer.git
295
+ cd python-token-killer
296
+ uv sync # installs all dev dependencies, creates .venv automatically
297
+ make check # lint + typecheck + 361 tests
298
+ ```
299
+
300
+ ## License
301
+
302
+ MIT
@@ -0,0 +1,252 @@
1
+ <p align="center">
2
+ <img src="assets/mascot.png" alt="ptk" width="200"/>
3
+ </p>
4
+
5
+ <p align="center">
6
+ <strong>ptk — Python Token Killer</strong><br/>
7
+ <strong>Minimize LLM tokens from Python objects in one call</strong><br/>
8
+ Zero dependencies • Auto type detection • 361 tests
9
+ </p>
10
+
11
+ <table align="center">
12
+ <tr>
13
+ <td align="left" valign="middle">
14
+ <a href="https://github.com/amahi2001/python-token-killer/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/amahi2001/python-token-killer/ci.yml?branch=main&style=flat-square&label=CI" alt="CI"/></a><br/>
15
+ <img src="https://img.shields.io/badge/python-3.10+-3776AB?style=flat-square&logo=python&logoColor=white" alt="Python 3.10+"/><br/>
16
+ <img src="https://img.shields.io/badge/mypy-strict-blue?style=flat-square" alt="mypy strict"/><br/>
17
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-yellow?style=flat-square" alt="License"/></a>
18
+ </td>
19
+ </tr>
20
+ </table>
21
+
22
+ ---
23
+
24
+ ## The Problem
25
+
26
+ Every time your app calls an LLM, you're paying for tokens like these:
27
+
28
+ ```json
29
+ {
30
+ "user": {
31
+ "id": 8821,
32
+ "name": "Alice Chen",
33
+ "email": "alice@example.com",
34
+ "bio": null,
35
+ "avatar_url": null,
36
+ "phone": null,
37
+ "address": null,
38
+ "metadata": {},
39
+ "preferences": {
40
+ "theme": "dark",
41
+ "notifications": null,
42
+ "newsletter": null
43
+ },
44
+ "created_at": "2024-01-15T10:30:00Z",
45
+ "updated_at": "2024-06-20T14:22:00Z",
46
+ "last_login": null,
47
+ "is_verified": true,
48
+ "is_active": true
49
+ },
50
+ "errors": null,
51
+ "warnings": []
52
+ }
53
+ ```
54
+
55
+ One call to `ptk` later:
56
+
57
+ ```python
58
+ import ptk
59
+ ptk(response)
60
+ ```
61
+
62
+ ```json
63
+ {"user":{"id":8821,"name":"Alice Chen","email":"alice@example.com","preferences":{"theme":"dark"},"created_at":"2024-01-15T10:30:00Z","updated_at":"2024-06-20T14:22:00Z","is_verified":true,"is_active":true}}
64
+ ```
65
+
66
+ **52% fewer tokens. Zero information lost. Zero config.**
67
+
68
+ ```bash
69
+ pip install python-token-killer
70
+ # or
71
+ uv add python-token-killer
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Benchmarks
77
+
78
+ Real token counts via tiktoken (`cl100k_base` — same tokenizer as GPT-4 and Claude):
79
+
80
+ ```
81
+ Input Tokens (before) Tokens (after) Saved
82
+ ─────────────────────────────────────────────────────────────────────────
83
+ API response (JSON) 1,450 792 45%
84
+ Python module (code → sigs) 2,734 309 89%
85
+ CI log (58 lines, errors only) 1,389 231 83%
86
+ 50 user records (tabular) 2,774 922 67%
87
+ Verbose prose (text) 101 74 27%
88
+ ─────────────────────────────────────────────────────────────────────────
89
+ Total 11,182 2,627 76%
90
+ ```
91
+
92
+ At GPT-4o pricing ($2.50/1M input tokens), that 76% reduction on **10k tokens/day** saves ~**$6/month per user**. At scale, it compounds.
93
+
94
+ Run yourself: `python benchmarks/bench.py`
95
+
96
+ ---
97
+
98
+ ## How It Works
99
+
100
+ `ptk` detects your input type and routes to the right compression strategy automatically:
101
+
102
+ | Input | What happens | Saves |
103
+ |---|---|---|
104
+ | `dict` / `list` | Strips `null`, `""`, `[]`, `{}` recursively. Tabular encoding for uniform arrays. | 40–70% |
105
+ | Code | Strips comments (preserving `# noqa`, `# type: ignore`, `TODO`). Collapses docstrings. Extracts signatures. | 25–89% |
106
+ | Logs | Collapses duplicate lines with counts. Filters to errors + stack traces only. | 60–90% |
107
+ | Diffs | Folds unchanged context. Strips git noise (`index`, `old mode`). | 50–75% |
108
+ | Text | Abbreviates `implementation→impl`, `configuration→config`. Removes filler phrases. | 10–30% |
109
+
110
+ ---
111
+
112
+ ## Usage
113
+
114
+ ```python
115
+ import ptk
116
+
117
+ # Any Python object — auto-detected, one call
118
+ ptk.minimize(api_response) # dict/list → compact JSON, nulls stripped
119
+ ptk.minimize(source_code) # strips comments, collapses docstrings
120
+ ptk.minimize(log_output) # dedup repeated lines, keep errors
121
+ ptk.minimize(git_diff) # fold context, keep changes
122
+ ptk.minimize(any_object) # always returns a string, never raises
123
+
124
+ # Aggressive mode — maximum compression
125
+ ptk.minimize(response, aggressive=True)
126
+
127
+ # Force content type
128
+ ptk.minimize(text, content_type="code", mode="signatures") # sigs only
129
+ ptk.minimize(logs, content_type="log", errors_only=True) # errors only
130
+
131
+ # Stats — token counts + savings
132
+ ptk.stats(response)
133
+ # {
134
+ # "output": "...",
135
+ # "original_tokens": 1450,
136
+ # "minimized_tokens": 792,
137
+ # "savings_pct": 45.4,
138
+ # "content_type": "dict"
139
+ # }
140
+
141
+ # Callable shorthand
142
+ ptk(response) # same as ptk.minimize(response)
143
+ ```
144
+
145
+ ---
146
+
147
+ ## Real-World Examples
148
+
149
+ ### RAG Pipeline — compress retrieved documents before they enter the prompt
150
+
151
+ The most common place tokens are wasted in production. Retrieval returns full documents; you only need the content.
152
+
153
+ ```python
154
+ import ptk
155
+
156
+ def build_context(docs: list[dict]) -> str:
157
+ """Compress retrieved docs before injecting into an LLM prompt."""
158
+ chunks = []
159
+ for doc in docs:
160
+ content = ptk.minimize(doc["content"]) # strip boilerplate
161
+ chunks.append(f"[{doc['source']}]\n{content}")
162
+ return "\n\n---\n\n".join(chunks)
163
+ ```
164
+
165
+ See [`examples/rag_pipeline.py`](examples/rag_pipeline.py) for a full working demo with token counts.
166
+
167
+ ---
168
+
169
+ ### LangGraph / LangChain — compress tool outputs between nodes
170
+
171
+ ```python
172
+ import ptk
173
+
174
+ def compress_tool_output(state: dict) -> dict:
175
+ """Drop this node between any tool call and the next LLM call."""
176
+ state["messages"][-1]["content"] = ptk.minimize(
177
+ state["messages"][-1]["content"], aggressive=True
178
+ )
179
+ return state
180
+ ```
181
+
182
+ See [`examples/langgraph_agent.py`](examples/langgraph_agent.py) — a complete agent loop with live token savings printed per step.
183
+
184
+ ---
185
+
186
+ ### Log Triage — paste only what matters to Claude / GPT
187
+
188
+ ```python
189
+ import ptk
190
+
191
+ # 10,000-line CI log → only the failures, instantly
192
+ errors = ptk.minimize(ci_log, content_type="log", aggressive=True)
193
+ # Feed `errors` to your LLM. 80%+ fewer tokens, same diagnostic signal.
194
+ ```
195
+
196
+ See [`examples/log_triage.py`](examples/log_triage.py) — reads a real log file, shows before/after.
197
+
198
+ ---
199
+
200
+ ## API Reference
201
+
202
+ ### `ptk.minimize(obj, *, aggressive=False, content_type=None, **kw) → str`
203
+
204
+ - `aggressive=True` — maximum compression (timestamps stripped, sigs-only for code, errors-only for logs)
205
+ - `content_type` — override auto-detection: `"dict"`, `"list"`, `"code"`, `"log"`, `"diff"`, `"text"`
206
+ - `format` — dict output format: `"json"` (default), `"kv"`, `"tabular"`
207
+ - `mode` — code mode: `"clean"` (default) or `"signatures"`
208
+ - `errors_only` — log mode: keep only errors + stack traces
209
+
210
+ ### `ptk.stats(obj, **kw) → dict`
211
+
212
+ Same as `minimize` but returns `output`, `original_tokens`, `minimized_tokens`, `savings_pct`, `content_type`.
213
+
214
+ ### `ptk(obj)` — callable shorthand
215
+
216
+ The module itself is callable. `ptk(x)` is identical to `ptk.minimize(x)`.
217
+
218
+ ---
219
+
220
+ ## Comparison
221
+
222
+ | Tool | Type | What it does |
223
+ |---|---|---|
224
+ | **ptk** | Python library | One call, any Python object, zero deps |
225
+ | [RTK](https://github.com/rtk-ai/rtk) | Rust CLI | Compresses shell command output for coding agents |
226
+ | [claw-compactor](https://github.com/open-compress/claw-compactor) | Python library | 14-stage AST-aware pipeline, heavier setup |
227
+ | [LLMLingua](https://github.com/microsoft/LLMLingua) | Python library | Neural compression, requires GPU |
228
+
229
+ ---
230
+
231
+ ## Design
232
+
233
+ - **Zero required dependencies** — stdlib only. `tiktoken` optional for exact token counts.
234
+ - **Never raises** — any Python object produces a string. Circular refs, `bytes`, `nan`, generators — all handled.
235
+ - **Never mutates** — your input is always untouched.
236
+ - **Thread-safe** — stateless singleton minimizers.
237
+ - **Fast** — precompiled regexes, `frozenset` lookups, single-pass algorithms. Microseconds per call.
238
+
239
+ ---
240
+
241
+ ## Development
242
+
243
+ ```bash
244
+ git clone https://github.com/amahi2001/python-token-killer.git
245
+ cd python-token-killer
246
+ uv sync # installs all dev dependencies, creates .venv automatically
247
+ make check # lint + typecheck + 361 tests
248
+ ```
249
+
250
+ ## License
251
+
252
+ MIT
@@ -27,7 +27,9 @@ def count_tokens(text: str) -> int:
27
27
 
28
28
  def bench(name: str, obj: object, *, content_type: str | None = None) -> dict:
29
29
  """Run a single benchmark and return results."""
30
- original_str = json.dumps(obj, indent=2, default=str) if isinstance(obj, (dict, list)) else str(obj)
30
+ original_str = (
31
+ json.dumps(obj, indent=2, default=str) if isinstance(obj, (dict, list)) else str(obj)
32
+ )
31
33
  orig_tokens = count_tokens(original_str)
32
34
 
33
35
  # default mode
@@ -46,17 +48,23 @@ def bench(name: str, obj: object, *, content_type: str | None = None) -> dict:
46
48
  "name": name,
47
49
  "original_tokens": orig_tokens,
48
50
  "default_tokens": default_tokens,
49
- "default_savings_pct": round((1 - default_tokens / orig_tokens) * 100, 1) if orig_tokens else 0,
51
+ "default_savings_pct": round((1 - default_tokens / orig_tokens) * 100, 1)
52
+ if orig_tokens
53
+ else 0,
50
54
  "default_us": round(default_ns / 1000),
51
55
  "aggressive_tokens": aggro_tokens,
52
- "aggressive_savings_pct": round((1 - aggro_tokens / orig_tokens) * 100, 1) if orig_tokens else 0,
56
+ "aggressive_savings_pct": round((1 - aggro_tokens / orig_tokens) * 100, 1)
57
+ if orig_tokens
58
+ else 0,
53
59
  "aggressive_us": round(aggro_ns / 1000),
54
60
  }
55
61
 
56
62
 
57
63
  def main() -> None:
58
64
  print(f"ptk v{ptk.__version__} benchmark (tiktoken cl100k_base)\n")
59
- print(f"{'Benchmark':<30} {'Original':>8} {'Default':>8} {'Saved':>7} {'Aggro':>8} {'Saved':>7} {'Time':>8}")
65
+ print(
66
+ f"{'Benchmark':<30} {'Original':>8} {'Default':>8} {'Saved':>7} {'Aggro':>8} {'Saved':>7} {'Time':>8}"
67
+ )
60
68
  print("-" * 92)
61
69
 
62
70
  results: list[dict] = []
@@ -84,9 +92,14 @@ def main() -> None:
84
92
 
85
93
  # 5. List of records
86
94
  records = [
87
- {"id": i, "name": f"user_{i}", "email": f"u{i}@company.com",
88
- "active": i % 3 != 0, "role": ["admin", "member", "viewer"][i % 3],
89
- "last_login": None if i % 4 == 0 else f"2024-08-0{(i%9)+1}"}
95
+ {
96
+ "id": i,
97
+ "name": f"user_{i}",
98
+ "email": f"u{i}@company.com",
99
+ "active": i % 3 != 0,
100
+ "role": ["admin", "member", "viewer"][i % 3],
101
+ "last_login": None if i % 4 == 0 else f"2024-08-0{(i % 9) + 1}",
102
+ }
90
103
  for i in range(50)
91
104
  ]
92
105
  results.append(bench("50 user records (list)", records))