tokenimizer 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +626 -0
- package/dist/cli.js +30335 -0
- package/dist/registry/index.json +96 -0
- package/dist/registry/routing.json +43 -0
- package/dist/registry/skills/ask-less/aider.yml +4 -0
- package/dist/registry/skills/ask-less/claude-code.md +14 -0
- package/dist/registry/skills/ask-less/copilot.md +14 -0
- package/dist/registry/skills/ask-less/cursor.md +14 -0
- package/dist/registry/skills/ask-less/generic.md +14 -0
- package/dist/registry/skills/ask-less/windsurf.md +14 -0
- package/dist/registry/skills/caveman/aider.yml +5 -0
- package/dist/registry/skills/caveman/claude-code.md +11 -0
- package/dist/registry/skills/caveman/copilot.md +9 -0
- package/dist/registry/skills/caveman/cursor.md +11 -0
- package/dist/registry/skills/caveman/windsurf.md +9 -0
- package/dist/registry/skills/context-compressor/aider.yml +4 -0
- package/dist/registry/skills/context-compressor/claude-code.md +26 -0
- package/dist/registry/skills/context-compressor/generic.md +26 -0
- package/dist/registry/skills/cost-guardrails/aider.yml +4 -0
- package/dist/registry/skills/cost-guardrails/claude-code.md +29 -0
- package/dist/registry/skills/cost-guardrails/generic.md +23 -0
- package/dist/registry/skills/patch-only/aider.yml +4 -0
- package/dist/registry/skills/patch-only/claude-code.md +12 -0
- package/dist/registry/skills/patch-only/copilot.md +12 -0
- package/dist/registry/skills/patch-only/cursor.md +12 -0
- package/dist/registry/skills/patch-only/windsurf.md +12 -0
- package/dist/registry/skills/planner-executor/aider.yml +5 -0
- package/dist/registry/skills/planner-executor/claude-code.md +23 -0
- package/dist/registry/skills/planner-executor/copilot.md +22 -0
- package/dist/registry/skills/planner-executor/cursor.md +22 -0
- package/dist/registry/skills/planner-executor/generic.md +23 -0
- package/dist/registry/skills/planner-executor/windsurf.md +22 -0
- package/dist/registry/skills/repo-aware-context/aider.yml +5 -0
- package/dist/registry/skills/repo-aware-context/claude-code.md +23 -0
- package/dist/registry/skills/repo-aware-context/generic.md +21 -0
- package/package.json +68 -0
- package/scripts/prepare.mjs +12 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 ElOrlyman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.svg" alt="tokenimizer" width="280" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# tokenimizer
|
|
6
|
+
|
|
7
|
+
The operating system for token-efficient AI workflows.
|
|
8
|
+
|
|
9
|
+
Install once per project. Every AI assistant you use — Claude Code, Copilot, Cursor, Windsurf,
|
|
10
|
+
Aider — reads the same instructions and immediately costs less to run.
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npx tokenimizer init
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## How it works
|
|
19
|
+
|
|
20
|
+
tokenimizer operates across three complementary layers. You configure once; all three run
|
|
21
|
+
automatically from that point forward.
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
┌─────────────────────────────────────────────────────────────────────────────┐
|
|
25
|
+
│ YOUR PROJECT DIRECTORY │
|
|
26
|
+
│ │
|
|
27
|
+
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
|
28
|
+
│ │ LAYER 1 — SKILL FILES (always on, zero overhead) │ │
|
|
29
|
+
│ │ │ │
|
|
30
|
+
│ │ .claude/CLAUDE.md ← Claude Code reads this on start │ │
|
|
31
|
+
│ │ .cursorrules ← Cursor reads this on start │ │
|
|
32
|
+
│ │ .github/copilot-instructions.md ← Copilot reads this │ │
|
|
33
|
+
│ │ .windsurfrules ← Windsurf reads this on start │ │
|
|
34
|
+
│ │ .aider.conf.yml ← Aider reads this on start │ │
|
|
35
|
+
│ │ │ │
|
|
36
|
+
│ │ Each file contains fenced skill blocks installed by tokenimizer. │ │
|
|
37
|
+
│ │ The AI reads them every session. No process running. No cost. │ │
|
|
38
|
+
│ └─────────────────────────────────────────────────────────────────────┘ │
|
|
39
|
+
│ │
|
|
40
|
+
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
|
41
|
+
│ │ LAYER 2 — GIT HOOKS (event-driven, fires at git checkpoints) │ │
|
|
42
|
+
│ │ │ │
|
|
43
|
+
│ │ post-commit → regenerate context docs (project_summary, handoff)│ │
|
|
44
|
+
│ │ post-checkout → refresh handoff when you switch branches │ │
|
|
45
|
+
│ │ post-merge → rebuild repo memory index after a pull/merge │ │
|
|
46
|
+
│ │ pre-push → warn if index is stale (non-blocking) │ │
|
|
47
|
+
│ │ │ │
|
|
48
|
+
│ │ Hooks are non-blocking. They never fail a git operation. │ │
|
|
49
|
+
│ └─────────────────────────────────────────────────────────────────────┘ │
|
|
50
|
+
│ │
|
|
51
|
+
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
|
52
|
+
│ │ LAYER 3 — BACKGROUND WATCHER (optional, opt-in) │ │
|
|
53
|
+
│ │ │ │
|
|
54
|
+
│ │ tokenimizer watch → chokidar daemon watches src/** │ │
|
|
55
|
+
│ │ marks index stale when files change │ │
|
|
56
|
+
│ │ writes only to .tokenimizer/ — never │ │
|
|
57
|
+
│ │ touches your source files │ │
|
|
58
|
+
│ └─────────────────────────────────────────────────────────────────────┘ │
|
|
59
|
+
│ │
|
|
60
|
+
│ .tokenimizer/ │
|
|
61
|
+
│ ├── context/ ← lifecycle docs (user-maintained, commit these) │
|
|
62
|
+
│ │ ├── project_summary.md │
|
|
63
|
+
│ │ ├── architecture.md │
|
|
64
|
+
│ │ ├── session_summary.md │
|
|
65
|
+
│ │ ├── progress.md │
|
|
66
|
+
│ │ ├── handoff.md │
|
|
67
|
+
│ │ └── current_task.md │
|
|
68
|
+
│ ├── cache/ ← generated index (gitignored) │
|
|
69
|
+
│ │ ├── symbols.json │
|
|
70
|
+
│ │ ├── api-map.json │
|
|
71
|
+
│ │ ├── dependency-graph.json │
|
|
72
|
+
│ │ ├── architecture.json │
|
|
73
|
+
│ │ ├── conventions.md │
|
|
74
|
+
│ │ ├── glossary.md │
|
|
75
|
+
│ │ └── wiki.md │
|
|
76
|
+
│ ├── snapshots/ ← session checkpoints (gitignored) │
|
|
77
|
+
│ └── hooks/ ← hook scripts called by .git/hooks/ (gitignored) │
|
|
78
|
+
└─────────────────────────────────────────────────────────────────────────────┘
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
# Interactive setup — detects all your AI assistants and installs skills
|
|
87
|
+
npx tokenimizer init
|
|
88
|
+
|
|
89
|
+
# Or install globally
|
|
90
|
+
npm install -g tokenimizer
|
|
91
|
+
tokenimizer init
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## The 7 token-saving skills
|
|
97
|
+
|
|
98
|
+
Skills are instruction blocks installed into your AI assistant config files. The AI reads them
|
|
99
|
+
on every session start. You install once; the savings apply automatically forever.
|
|
100
|
+
|
|
101
|
+
### Recommended — install these first
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
#### Caveman Mode `–65% output tokens`
|
|
106
|
+
|
|
107
|
+
The single highest-impact skill. Instructs the AI to cut all verbosity: no preambles, no
|
|
108
|
+
"Great question!", no trailing summaries of what it just did. Technical accuracy is
|
|
109
|
+
preserved — only padding is removed.
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
Before (caveman off):
|
|
113
|
+
"Great catch! Looking at auth.ts, I can see that on line 42 there is a
|
|
114
|
+
missing await keyword before the verifyToken() function call, which would
|
|
115
|
+
cause a Promise to be returned instead of the resolved value..."
|
|
116
|
+
|
|
117
|
+
After (caveman on):
|
|
118
|
+
"Bug in auth.ts:42 — missing await on verifyToken()"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
A typical code session generates 3–5x fewer output tokens with this skill active.
|
|
122
|
+
|
|
123
|
+
Activate with: `/caveman` or "caveman mode"
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
#### Patch-Only Coding `–80% output tokens on edits`
|
|
128
|
+
|
|
129
|
+
Instructs the AI to output only the changed lines — never rewrite entire files. Uses unified
|
|
130
|
+
diff format, function-level replacements, or line-range patches with context markers.
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
Before (patch-only off):
|
|
134
|
+
[entire 300-line file repeated with one line changed]
|
|
135
|
+
|
|
136
|
+
After (patch-only on):
|
|
137
|
+
--- a/src/auth.ts
|
|
138
|
+
+++ b/src/auth.ts
|
|
139
|
+
@@ -40,7 +40,7 @@
|
|
140
|
+
const token = req.headers.authorization;
|
|
141
|
+
- const user = verifyToken(token);
|
|
142
|
+
+ const user = await verifyToken(token);
|
|
143
|
+
return user;
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
If the whole file must be rewritten, the AI states why first.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
#### Context Compressor `–70% input tokens on session restart`
|
|
151
|
+
|
|
152
|
+
Instructs the AI to compress long conversations into a structured ~500-token re-entry block
|
|
153
|
+
when asked. Paste it as the first message of a new session to restore full context at a
|
|
154
|
+
fraction of the cost.
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
/compress → produces:
|
|
158
|
+
|
|
159
|
+
## Session Snapshot — 2026-06-28
|
|
160
|
+
|
|
161
|
+
**Project:** tokenimizer v0.2.0 — token-efficient AI workflow CLI
|
|
162
|
+
**Stack:** TypeScript, Node.js, esbuild, commander, vitest
|
|
163
|
+
**What was decided:**
|
|
164
|
+
- Three-layer architecture: skill files + git hooks + background watcher
|
|
165
|
+
- No heavy deps: readline for UI, fast-glob + regex for indexing
|
|
166
|
+
**Current state:** v2 complete — 7 skills, 14 commands, 57 tests passing
|
|
167
|
+
**Next step:** Publish to npm
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Pair with `tokenimizer checkpoint` to save snapshots to `.tokenimizer/snapshots/`.
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
#### Repo-Aware Minimal Context `–40% input tokens`
|
|
175
|
+
|
|
176
|
+
Instructs the AI to load context progressively instead of pulling in entire repositories
|
|
177
|
+
upfront. The loading order is enforced:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
1. project_summary.md (~500 tokens)
|
|
181
|
+
2. Relevant module (~200 tokens)
|
|
182
|
+
3. Specific files (only what's needed)
|
|
183
|
+
4. Specific functions (patch-level, not full file)
|
|
184
|
+
5. Solve
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
The AI must ask explicitly for more context if needed rather than pre-loading everything.
|
|
188
|
+
Locks out node_modules, lockfiles, build outputs, and log files by default.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
### Optional — situational
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
#### Ask-Less Mode `eliminates 1–3 roundtrips per task`
|
|
197
|
+
|
|
198
|
+
Instructs the AI to make a reasonable assumption, state it in one line, and proceed — instead
|
|
199
|
+
of asking clarifying questions before acting.
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
Format: "Assuming [X]. If wrong, correct me."
|
|
203
|
+
|
|
204
|
+
Only stops to ask when ambiguity would cause irreversible harm:
|
|
205
|
+
- Deleting files not mentioned
|
|
206
|
+
- Force-pushing git history
|
|
207
|
+
- Changing a public API contract
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
#### Planner / Executor Split `–20% input / –30% output`
|
|
213
|
+
|
|
214
|
+
Two-phase workflow that separates reasoning from mechanical execution:
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
PLAN phase → reason about the problem, produce a structured plan, write no code
|
|
218
|
+
EXECUTE phase → follow the plan mechanically, output only diffs and confirmations
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
This routes expensive reasoning to capable models and mechanical implementation to cheaper
|
|
222
|
+
ones. The plan phase output is compact and can be handed off between model tiers.
|
|
223
|
+
|
|
224
|
+
Trigger with: `/plan [task]` then `/execute`
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
#### Cost Guardrails `prevents runaway large operations`
|
|
229
|
+
|
|
230
|
+
Before executing any operation that touches more than 5 files or generates more than ~500
|
|
231
|
+
lines, the AI produces an estimate and asks for confirmation:
|
|
232
|
+
|
|
233
|
+
```
|
|
234
|
+
Cost estimate:
|
|
235
|
+
Scope: 8 files, ~420 lines affected
|
|
236
|
+
Input tokens: ~8,000–12,000 (estimate)
|
|
237
|
+
Output tokens: ~3,000–5,000 (estimate)
|
|
238
|
+
Risk: medium — modifies auth module
|
|
239
|
+
|
|
240
|
+
Proceed?
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Estimates are always labeled as estimates. Say "yes" or "just do it" to proceed.
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
## Token impact summary
|
|
248
|
+
|
|
249
|
+
| Skill | Input tokens | Output tokens | When it fires |
|
|
250
|
+
|---|---|---|---|
|
|
251
|
+
| Caveman Mode | — | **–65%** | Every response |
|
|
252
|
+
| Patch-Only Coding | — | **–80%** | Every code edit |
|
|
253
|
+
| Context Compressor | **–70%** | — | Session restart |
|
|
254
|
+
| Repo-Aware Context | **–40%** | — | Every new session |
|
|
255
|
+
| Ask-Less Mode | –15% | –10% | Before each task |
|
|
256
|
+
| Planner/Executor | –20% | –30% | Complex tasks |
|
|
257
|
+
| Cost Guardrails | –25% | –20% | Large operations |
|
|
258
|
+
|
|
259
|
+
All figures are estimates. Actual savings depend on your workflow, model, and task type.
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## The mechanics — why these numbers matter
|
|
264
|
+
|
|
265
|
+
### Output tokens cost 3–5x more than input tokens
|
|
266
|
+
|
|
267
|
+
Every major AI provider charges significantly more for generated output than for input context.
|
|
268
|
+
At current published rates:
|
|
269
|
+
|
|
270
|
+
| Provider / Model | Input | Output | Ratio |
|
|
271
|
+
|---|---|---|---|
|
|
272
|
+
| Claude Sonnet 4.6 | $3 / MTok | $15 / MTok | **5×** |
|
|
273
|
+
| Claude Opus 4.8 | $15 / MTok | $75 / MTok | **5×** |
|
|
274
|
+
| Claude Haiku 4.5 | $0.80 / MTok | $4 / MTok | **5×** |
|
|
275
|
+
| GPT-4o | $2.50 / MTok | $10 / MTok | **4×** |
|
|
276
|
+
| GPT-4o-mini | $0.15 / MTok | $0.60 / MTok | **4×** |
|
|
277
|
+
|
|
278
|
+
This means **output suppression skills (Caveman Mode, Patch-Only) have the highest dollar
|
|
279
|
+
return per token saved** — roughly 4–5× better ROI than the same reduction in input tokens.
|
|
280
|
+
|
|
281
|
+
### Where each skill attacks the bill
|
|
282
|
+
|
|
283
|
+
**Output token suppression (highest ROI):**
|
|
284
|
+
|
|
285
|
+
- **Caveman Mode** targets conversational filler — preambles, "Great question!", trailing
|
|
286
|
+
summaries, explanatory prose. These add zero technical value but can double response
|
|
287
|
+
length in a typical session. Removing them cuts output 50–70% while preserving precision.
|
|
288
|
+
|
|
289
|
+
- **Patch-Only Coding** targets code output. Without it, an AI editing three lines of a
|
|
290
|
+
300-line file often reprints the whole file "for context." With it, output is a function
|
|
291
|
+
replacement or a unified diff — the same information in ~5% of the tokens.
|
|
292
|
+
|
|
293
|
+
**Input token compression (second priority):**
|
|
294
|
+
|
|
295
|
+
- **Context Compressor** prevents you from carrying 50,000+ tokens of raw chat history into
|
|
296
|
+
a new session. A compressed snapshot restores full context at 300–600 tokens. Starting
|
|
297
|
+
fresh from a snapshot instead of a continued conversation saves ~70% of input tokens on
|
|
298
|
+
session restart.
|
|
299
|
+
|
|
300
|
+
- **Repo-Aware Minimal Context** blocks the aggressive context loading common in IDE agents
|
|
301
|
+
(reading lockfiles, entire directory trees, build artifacts). Enforcing progressive loading
|
|
302
|
+
(project summary → module → file → function) keeps the context window lean from the start.
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## Trade-offs and when it is NOT worth it
|
|
307
|
+
|
|
308
|
+
tokenimizer has a real cost: every installed skill injects a static instruction block into
|
|
309
|
+
your AI assistant's config file. Installing all 7 skills adds approximately **800–1,000
|
|
310
|
+
tokens of permanent system context** to every session (measured from the actual skill file
|
|
311
|
+
sizes in `src/registry/skills/`).
|
|
312
|
+
|
|
313
|
+
If you run short, one-off sessions — ask two questions, close the tab — the instruction
|
|
314
|
+
overhead can exceed what you save on output. The break-even point is roughly:
|
|
315
|
+
|
|
316
|
+
> **Break-even:** session output tokens saved > ~1,000 input tokens of overhead
|
|
317
|
+
|
|
318
|
+
For long coding sessions this break-even is crossed within minutes. For a single quick
|
|
319
|
+
lookup, it may never be crossed.
|
|
320
|
+
|
|
321
|
+
### Known caveats
|
|
322
|
+
|
|
323
|
+
**Skill toggles are more efficient than always-on**
|
|
324
|
+
|
|
325
|
+
Caveman Mode is blunt by design. It is the right default during intense coding and
|
|
326
|
+
refactoring, but it reduces explanatory quality for learning tasks or architectural
|
|
327
|
+
discussions where you want the AI to reason aloud. Use the toggle:
|
|
328
|
+
|
|
329
|
+
```
|
|
330
|
+
/caveman ← activate for focused implementation work
|
|
331
|
+
/normal ← deactivate when you need detailed explanations
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
**Patch-Only and native IDE tools can interact**
|
|
335
|
+
|
|
336
|
+
Claude Code and similar tools have built-in targeted edit primitives (they write specific
|
|
337
|
+
blocks, not whole files). The Patch-Only skill is compatible with these — it allows
|
|
338
|
+
function-level replacements as the primary format, not only strict unified diffs — but
|
|
339
|
+
on complex multi-file tasks the instruction overhead occasionally causes the model to
|
|
340
|
+
over-think the output format before acting. If you find this happening, uninstall just
|
|
341
|
+
the patch-only skill: `tokenimizer uninstall patch-only`.
|
|
342
|
+
|
|
343
|
+
**Context docs require maintenance**
|
|
344
|
+
|
|
345
|
+
`session_summary.md`, `progress.md`, and `current_task.md` are templates. If left
|
|
346
|
+
unfilled or stale they burn input tokens for no benefit. The git hooks (`post-commit`,
|
|
347
|
+
`post-checkout`) refresh the auto-generated docs automatically, but the user-maintained
|
|
348
|
+
files are yours to keep current.
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
## Is it worth it for your workflow?
|
|
353
|
+
|
|
354
|
+
| Profile | Worth it? | Reason |
|
|
355
|
+
|---|---|---|
|
|
356
|
+
| Heavy IDE agent usage (Claude Code, Cursor, Windsurf) | **Yes — high ROI** | Output tokens accumulate fast in long agentic sessions; caveman + patch-only cut the majority |
|
|
357
|
+
| Large codebase refactoring | **Yes — high ROI** | Progressive context loading prevents context window exhaustion; patch-only keeps diffs small |
|
|
358
|
+
| API cost-conscious teams | **Yes — high ROI** | Direct reduction in billable output tokens across Anthropic / OpenAI / Gemini keys |
|
|
359
|
+
| Learning new concepts or frameworks | **Selective** | Install context-compressor and repo-aware-context; skip caveman (you want the explanations) |
|
|
360
|
+
| Casual or beginner use | **Low ROI** | Terse caveman output reduces learning quality; short sessions don't amortize the overhead |
|
|
361
|
+
| Short one-off scripting | **Low ROI** | Static instruction overhead (~1,000 tokens) outweighs output savings on brief tasks |
|
|
362
|
+
|
|
363
|
+
**Recommended minimum setup** for most developers: install `caveman` and `context-compressor`,
|
|
364
|
+
skip the rest until you feel the pain they solve. Add skills one at a time so you can
|
|
365
|
+
measure their individual effect.
|
|
366
|
+
|
|
367
|
+
---
|
|
368
|
+
|
|
369
|
+
## Context lifecycle
|
|
370
|
+
|
|
371
|
+
`tokenimizer context init` generates six structured documents that tell the AI about your
|
|
372
|
+
project without loading the entire codebase:
|
|
373
|
+
|
|
374
|
+
| Document | Purpose | Target size |
|
|
375
|
+
|---|---|---|
|
|
376
|
+
| `project_summary.md` | Name, purpose, structure, key scripts | ~500 tokens |
|
|
377
|
+
| `architecture.md` | Source layout, config files present | ~1,000 tokens |
|
|
378
|
+
| `session_summary.md` | What you worked on this session | ~300 tokens |
|
|
379
|
+
| `progress.md` | Task list with completion status | ~400 tokens |
|
|
380
|
+
| `handoff.md` | Compact block for session restart | ~600 tokens |
|
|
381
|
+
| `current_task.md` | What you are working on right now | ~200 tokens |
|
|
382
|
+
|
|
383
|
+
Auto-generated docs (`project_summary`, `architecture`, `handoff`) are refreshed by
|
|
384
|
+
`tokenimizer context refresh` and by the `post-commit` git hook. User-maintained docs
|
|
385
|
+
(`session_summary`, `progress`, `current_task`) are templates — you fill them in.
|
|
386
|
+
|
|
387
|
+
These files live in `.tokenimizer/context/` and should be committed to your repo.
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
## Repository memory index
|
|
392
|
+
|
|
393
|
+
`tokenimizer index` scans your project with regex-based analysis (no AST parser required)
|
|
394
|
+
and produces LLM-optimized artifacts in `.tokenimizer/cache/`:
|
|
395
|
+
|
|
396
|
+
| Artifact | Contents |
|
|
397
|
+
|---|---|
|
|
398
|
+
| `symbols.json` | Every exported function, class, interface, type, and const with file:line |
|
|
399
|
+
| `api-map.json` | Public API surface — function signatures only |
|
|
400
|
+
| `dependency-graph.json` | Import map — which files import which |
|
|
401
|
+
| `architecture.json` | Folder structure with file counts and inferred module purposes |
|
|
402
|
+
| `conventions.md` | TypeScript strict flags, package manager, coding conventions |
|
|
403
|
+
| `glossary.md` | Domain terms extracted from README and docs |
|
|
404
|
+
| `wiki.md` | Auto-generated project wiki from README and docs |
|
|
405
|
+
|
|
406
|
+
The cache is gitignored. Rebuild with `tokenimizer index`. The background watcher
|
|
407
|
+
(`tokenimizer watch`) marks it stale automatically when source files change.
|
|
408
|
+
|
|
409
|
+
---
|
|
410
|
+
|
|
411
|
+
## Session compression workflow
|
|
412
|
+
|
|
413
|
+
```
|
|
414
|
+
During a long session:
|
|
415
|
+
|
|
416
|
+
User: /compress
|
|
417
|
+
AI: [produces Session Snapshot block — ~400 tokens]
|
|
418
|
+
|
|
419
|
+
tokenimizer checkpoint my-feature ← saves snapshot to .tokenimizer/snapshots/
|
|
420
|
+
|
|
421
|
+
Starting a new session:
|
|
422
|
+
|
|
423
|
+
tokenimizer handoff --stdout | pbcopy ← copy to clipboard
|
|
424
|
+
[paste as first message] ← AI has full context at ~400 tokens
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
Commands:
|
|
428
|
+
|
|
429
|
+
```bash
|
|
430
|
+
tokenimizer compress # print compressed block to terminal
|
|
431
|
+
tokenimizer checkpoint [label] # save snapshot to .tokenimizer/snapshots/
|
|
432
|
+
tokenimizer checkpoint --list # list all saved snapshots
|
|
433
|
+
tokenimizer handoff # print handoff block (optimized for pasting)
|
|
434
|
+
tokenimizer handoff --stdout # stdout only — pipe-friendly
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## Model routing
|
|
440
|
+
|
|
441
|
+
`tokenimizer recommend-model` classifies a task description into a model tier and
|
|
442
|
+
recommends the appropriate model tier for cost-efficiency:
|
|
443
|
+
|
|
444
|
+
```bash
|
|
445
|
+
tokenimizer recommend-model "fix typo in comment"
|
|
446
|
+
# Complexity: Trivial edit
|
|
447
|
+
# Recommendation: smallest available model / autocomplete
|
|
448
|
+
# Examples: claude-haiku-4-5, gpt-4o-mini, copilot inline
|
|
449
|
+
|
|
450
|
+
tokenimizer recommend-model "architect the new payment system"
|
|
451
|
+
# Complexity: Deep reasoning
|
|
452
|
+
# Recommendation: highest reasoning model available
|
|
453
|
+
# Examples: claude-opus-4-8, o3, claude-fable-5
|
|
454
|
+
|
|
455
|
+
tokenimizer recommend-model --list # show all 5 tiers
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
Routing rules are stored in `src/registry/routing.json` (bundled) and can be overridden
|
|
459
|
+
locally in `.tokenimizer/routing.json`. The config is community-maintained — model
|
|
460
|
+
capabilities and pricing change frequently enough that hardcoded rules become wrong.
|
|
461
|
+
|
|
462
|
+
---
|
|
463
|
+
|
|
464
|
+
## Full command reference
|
|
465
|
+
|
|
466
|
+
```bash
|
|
467
|
+
# Layer 1 — Skills
|
|
468
|
+
tokenimizer init # detect assistants + guided install wizard
|
|
469
|
+
tokenimizer install <skill-id> # install a skill to all detected assistants
|
|
470
|
+
tokenimizer uninstall <skill-id> # remove a skill cleanly
|
|
471
|
+
tokenimizer restore # rollback all changes to pre-install state
|
|
472
|
+
tokenimizer list # show skills + install status per assistant
|
|
473
|
+
tokenimizer doctor # health check — verify skills are intact
|
|
474
|
+
|
|
475
|
+
# Layer 2 — Git hooks
|
|
476
|
+
tokenimizer hooks install # write hooks to .git/hooks/
|
|
477
|
+
tokenimizer hooks uninstall # remove tokenimizer hooks only
|
|
478
|
+
tokenimizer hooks status # show which hooks are active
|
|
479
|
+
|
|
480
|
+
# Context lifecycle
|
|
481
|
+
tokenimizer context init # generate all 6 context docs
|
|
482
|
+
tokenimizer context refresh # regenerate auto-detected docs only
|
|
483
|
+
tokenimizer context status # show docs with token size estimates
|
|
484
|
+
|
|
485
|
+
# Repository index
|
|
486
|
+
tokenimizer index # build full repo memory cache
|
|
487
|
+
tokenimizer index --stale # show files changed since last index
|
|
488
|
+
|
|
489
|
+
# Session compression
|
|
490
|
+
tokenimizer compress # print compressed context block
|
|
491
|
+
tokenimizer checkpoint [label] # save named snapshot
|
|
492
|
+
tokenimizer checkpoint --list # list saved snapshots
|
|
493
|
+
tokenimizer handoff [--stdout] # generate session handoff block
|
|
494
|
+
|
|
495
|
+
# Model routing
|
|
496
|
+
tokenimizer recommend-model <task> # recommend model tier for a task
|
|
497
|
+
tokenimizer recommend-model --list # list all tiers
|
|
498
|
+
|
|
499
|
+
# Background watcher (Layer 3)
|
|
500
|
+
tokenimizer watch # start background watcher
|
|
501
|
+
tokenimizer watch --stop # stop watcher
|
|
502
|
+
tokenimizer watch --status # show watcher state
|
|
503
|
+
tokenimizer watch --stale # list files changed since last index
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
### Universal flags
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
--dry-run # preview changes, write nothing
|
|
510
|
+
--json # machine-readable JSON output (data → stdout, chrome suppressed)
|
|
511
|
+
--no-color # disable terminal colors
|
|
512
|
+
--no-emoji # disable emoji icons (falls back to ASCII)
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
---
|
|
516
|
+
|
|
517
|
+
## How skills are installed
|
|
518
|
+
|
|
519
|
+
tokenimizer uses section-fenced installs. It appends a marked block to each assistant's
|
|
520
|
+
config file and never overwrites your existing content:
|
|
521
|
+
|
|
522
|
+
```
|
|
523
|
+
<!-- tokenimizer:skill:caveman:begin -->
|
|
524
|
+
## Caveman Mode
|
|
525
|
+
|
|
526
|
+
Respond compressed. No preamble, no "Great question!", no trailing summary...
|
|
527
|
+
<!-- tokenimizer:skill:caveman:end -->
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
- **Install:** appends the block if not present
|
|
531
|
+
- **Uninstall:** removes only the fenced block, leaves everything else intact
|
|
532
|
+
- **Update:** replaces the block in-place
|
|
533
|
+
- **Conflict detection:** `tokenimizer doctor` checks installed skills against their expected content
|
|
534
|
+
|
|
535
|
+
Before any install, tokenimizer backs up the target file to `.tokenimizer/backups/`.
|
|
536
|
+
`tokenimizer restore` reverses everything.
|
|
537
|
+
|
|
538
|
+
---
|
|
539
|
+
|
|
540
|
+
## Supported AI assistants
|
|
541
|
+
|
|
542
|
+
| Assistant | Config file | Detected by |
|
|
543
|
+
|---|---|---|
|
|
544
|
+
| Claude Code | `.claude/CLAUDE.md` | `.claude/` directory or `claude` in PATH |
|
|
545
|
+
| Cursor | `.cursorrules` | `.cursor/` directory or `cursor` in PATH |
|
|
546
|
+
| GitHub Copilot | `.github/copilot-instructions.md` | `.github/copilot-instructions.md` or `gh` extension |
|
|
547
|
+
| Windsurf | `.windsurfrules` | `.windsurfrules` or `windsurf` in PATH |
|
|
548
|
+
| Aider | `.aider.conf.yml` | `.aider.conf.yml` or `aider` in PATH |
|
|
549
|
+
|
|
550
|
+
Each assistant gets a tailored version of each skill — not a generic copy. Skills that
|
|
551
|
+
don't have a per-assistant file fall back to `generic.md`.
|
|
552
|
+
|
|
553
|
+
---
|
|
554
|
+
|
|
555
|
+
## Project layout
|
|
556
|
+
|
|
557
|
+
```
|
|
558
|
+
src/
|
|
559
|
+
├── cli.ts # entry point + command registration
|
|
560
|
+
├── commands/ # one file per command
|
|
561
|
+
│ ├── init.ts # 9-step interactive wizard
|
|
562
|
+
│ ├── install.ts
|
|
563
|
+
│ ├── context.ts # context init / refresh / status
|
|
564
|
+
│ ├── compress.ts # compress / checkpoint / handoff
|
|
565
|
+
│ ├── index-cmd.ts # index + stale check
|
|
566
|
+
│ ├── recommend-model.ts
|
|
567
|
+
│ └── watch.ts
|
|
568
|
+
├── context/ # v2 — context intelligence
|
|
569
|
+
│ ├── lifecycle.ts # generate/refresh context docs
|
|
570
|
+
│ ├── indexer.ts # repo memory builder (regex, no AST)
|
|
571
|
+
│ ├── compressor.ts # session compression + checkpoints
|
|
572
|
+
│ └── tokenizer.ts # character-based token estimation
|
|
573
|
+
├── core/
|
|
574
|
+
│ ├── detector.ts # AI assistant detection
|
|
575
|
+
│ ├── installer.ts # section-fenced skill install/uninstall
|
|
576
|
+
│ ├── backup.ts # backup + restore
|
|
577
|
+
│ ├── registry.ts # skill registry loader
|
|
578
|
+
│ ├── hooks/
|
|
579
|
+
│ │ ├── manager.ts # git hook file management
|
|
580
|
+
│ │ └── scripts.ts # hook script generation
|
|
581
|
+
│ └── watcher/
|
|
582
|
+
│ └── index.ts # chokidar background daemon
|
|
583
|
+
├── registry/
|
|
584
|
+
│ ├── index.json # skill manifest
|
|
585
|
+
│ ├── routing.json # model routing tiers
|
|
586
|
+
│ └── skills/
|
|
587
|
+
│ ├── caveman/ # claude-code.md, cursor.md, copilot.md, windsurf.md, aider.yml
|
|
588
|
+
│ ├── patch-only/
|
|
589
|
+
│ ├── ask-less/
|
|
590
|
+
│ ├── planner-executor/
|
|
591
|
+
│ ├── context-compressor/
|
|
592
|
+
│ ├── repo-aware-context/
|
|
593
|
+
│ └── cost-guardrails/
|
|
594
|
+
├── ui/
|
|
595
|
+
│ ├── theme.ts # colors + Windows emoji detection
|
|
596
|
+
│ ├── io.ts # stderr/stdout discipline + JSON mode
|
|
597
|
+
│ ├── render.ts # tables + welcome banner
|
|
598
|
+
│ └── checkbox.ts # interactive multi-select (readline only)
|
|
599
|
+
└── tests/
|
|
600
|
+
├── tokenizer.test.ts
|
|
601
|
+
├── lifecycle.test.ts
|
|
602
|
+
├── compressor.test.ts
|
|
603
|
+
├── indexer.test.ts
|
|
604
|
+
└── hooks.test.ts
|
|
605
|
+
```
|
|
606
|
+
|
|
607
|
+
---
|
|
608
|
+
|
|
609
|
+
## Development
|
|
610
|
+
|
|
611
|
+
```bash
|
|
612
|
+
npm run build # esbuild bundle → dist/cli.js
|
|
613
|
+
npm run build:watch # rebuild on change
|
|
614
|
+
npm test # vitest (57 tests)
|
|
615
|
+
node dist/cli.js # run local build
|
|
616
|
+
```
|
|
617
|
+
|
|
618
|
+
**Stack:** TypeScript, esbuild (single-file CJS bundle), commander, cli-table3, fast-glob,
|
|
619
|
+
picocolors, chokidar, zod. No inquirer, no chalk, no ora — all UI is built from Node's
|
|
620
|
+
built-in `readline`.
|
|
621
|
+
|
|
622
|
+
---
|
|
623
|
+
|
|
624
|
+
## License
|
|
625
|
+
|
|
626
|
+
MIT
|