@drewpayment/mink 0.12.0 → 0.13.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dashboard/out/404.html +1 -1
  2. package/dashboard/out/action-log.html +1 -1
  3. package/dashboard/out/action-log.txt +1 -1
  4. package/dashboard/out/activity.html +1 -1
  5. package/dashboard/out/activity.txt +1 -1
  6. package/dashboard/out/bugs.html +1 -1
  7. package/dashboard/out/bugs.txt +1 -1
  8. package/dashboard/out/capture.html +1 -1
  9. package/dashboard/out/capture.txt +1 -1
  10. package/dashboard/out/config.html +1 -1
  11. package/dashboard/out/config.txt +1 -1
  12. package/dashboard/out/daemon.html +1 -1
  13. package/dashboard/out/daemon.txt +1 -1
  14. package/dashboard/out/design.html +1 -1
  15. package/dashboard/out/design.txt +1 -1
  16. package/dashboard/out/discord.html +1 -1
  17. package/dashboard/out/discord.txt +1 -1
  18. package/dashboard/out/file-index.html +1 -1
  19. package/dashboard/out/file-index.txt +1 -1
  20. package/dashboard/out/index.html +1 -1
  21. package/dashboard/out/index.txt +1 -1
  22. package/dashboard/out/insights.html +1 -1
  23. package/dashboard/out/insights.txt +1 -1
  24. package/dashboard/out/learning.html +1 -1
  25. package/dashboard/out/learning.txt +1 -1
  26. package/dashboard/out/overview.html +1 -1
  27. package/dashboard/out/overview.txt +1 -1
  28. package/dashboard/out/scheduler.html +1 -1
  29. package/dashboard/out/scheduler.txt +1 -1
  30. package/dashboard/out/sync.html +1 -1
  31. package/dashboard/out/sync.txt +1 -1
  32. package/dashboard/out/tokens.html +1 -1
  33. package/dashboard/out/tokens.txt +1 -1
  34. package/dashboard/out/waste.html +1 -1
  35. package/dashboard/out/waste.txt +1 -1
  36. package/dashboard/out/wiki.html +1 -1
  37. package/dashboard/out/wiki.txt +1 -1
  38. package/dist/cli.bun.js +748 -10
  39. package/dist/cli.node.js +752 -12
  40. package/package.json +1 -1
  41. package/src/cli.ts +14 -0
  42. package/src/commands/init.ts +5 -1
  43. package/src/commands/post-read.ts +18 -0
  44. package/src/commands/post-tool.ts +48 -0
  45. package/src/commands/retrieve.ts +32 -0
  46. package/src/core/code-skeleton.ts +108 -0
  47. package/src/core/compress-tool-output.ts +127 -0
  48. package/src/core/compression.ts +81 -0
  49. package/src/core/hook-output.ts +42 -0
  50. package/src/core/output-compression.ts +252 -0
  51. package/src/core/token-estimate.ts +40 -0
  52. package/src/repositories/compression-cache-repo.ts +97 -0
  53. package/src/repositories/token-ledger-repo.ts +87 -0
  54. package/src/storage/schema.ts +50 -1
  55. package/src/types/compression.ts +29 -0
  56. package/src/types/config.ts +40 -0
  57. package/src/types/hook-input.ts +4 -0
  58. package/src/types/token-ledger.ts +33 -0
  59. /package/dashboard/out/_next/static/{Cr7-P-E43jbsBjy4hA6wH → Yl3F-J4CwvYf6yWG-SSmG}/_buildManifest.js +0 -0
  60. /package/dashboard/out/_next/static/{Cr7-P-E43jbsBjy4hA6wH → Yl3F-J4CwvYf6yWG-SSmG}/_ssgManifest.js +0 -0
@@ -0,0 +1,252 @@
1
+ // Tool-output compression engine (spec 21 §Content-Aware Compression).
2
+ //
3
+ // Pure, deterministic, dependency-free. Each compressor takes a tool output
4
+ // string and returns a smaller body plus a note of what was dropped, or null
5
+ // when it has nothing worth substituting. No I/O, no DB, no token counting and
6
+ // no retrieval-affordance text — the pipeline (compress-tool-output.ts) owns
7
+ // eligibility, the holdout, the min-savings gate, the cache, and the
8
+ // "mink retrieve" footer. Keeping this layer pure makes every strategy trivially
9
+ // testable and prompt-cache-stable (identical input → identical output).
10
+ //
11
+ // The "file" strategy does line-based signature extraction; spec 21's phase 3
12
+ // upgrades it to richer AST skeletons behind this same interface.
13
+
14
+ import type { ContentKind, CompressionResult } from "../types/compression";
15
+ import { extractCodeSkeleton } from "./code-skeleton";
16
+
17
+ // Tuning constants. Fixed (not config) so output is deterministic and stable.
18
+ const SEARCH_MAX_PER_FILE = 5;
19
+ const LOG_HEAD = 40;
20
+ const LOG_TAIL = 40;
21
+ const TEXT_HEAD = 30;
22
+ const TEXT_TAIL = 20;
23
+ const JSON_ARRAY_HEAD = 20;
24
+ const JSON_ARRAY_TAIL = 5;
25
+
26
+ // Strip ANSI CSI escape sequences (colour, cursor moves) — pure noise in logs.
27
+ const ANSI = /\[[0-9;?]*[ -/]*[@-~]/g;
28
+
29
+ function stripAnsi(s: string): string {
30
+ return s.replace(ANSI, "");
31
+ }
32
+
33
+ function omittedMarker(n: number): string {
34
+ return ` … ${n} line${n === 1 ? "" : "s"} omitted — mink retrieve …`;
35
+ }
36
+
37
+ // Split into lines, dropping a single trailing empty line (from a final newline)
38
+ // so counts and windows aren't skewed by it.
39
+ function toLines(content: string): string[] {
40
+ const lines = content.split("\n");
41
+ if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
42
+ return lines;
43
+ }
44
+
45
+ // ── Logs / command output ───────────────────────────────────────────────────
46
+ // Strip ANSI, collapse runs of identical lines, then keep a head+tail window.
47
+ function compressLog(content: string): { compressed: string; omittedNote: string } | null {
48
+ const lines = toLines(stripAnsi(content));
49
+
50
+ // Collapse consecutive duplicates into "<line> (×N)".
51
+ const collapsed: string[] = [];
52
+ let i = 0;
53
+ while (i < lines.length) {
54
+ let run = 1;
55
+ while (i + run < lines.length && lines[i + run] === lines[i]) run++;
56
+ collapsed.push(run > 1 ? `${lines[i]} (×${run})` : lines[i]);
57
+ i += run;
58
+ }
59
+
60
+ if (collapsed.length <= LOG_HEAD + LOG_TAIL) {
61
+ // Only worth substituting if collapsing actually removed lines.
62
+ if (collapsed.length === lines.length) return null;
63
+ return {
64
+ compressed: collapsed.join("\n"),
65
+ omittedNote: `collapsed ${lines.length - collapsed.length} repeated line(s)`,
66
+ };
67
+ }
68
+
69
+ const omitted = collapsed.length - LOG_HEAD - LOG_TAIL;
70
+ const head = collapsed.slice(0, LOG_HEAD);
71
+ const tail = collapsed.slice(collapsed.length - LOG_TAIL);
72
+ return {
73
+ compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
74
+ omittedNote: `${omitted} of ${collapsed.length} log line(s) omitted (middle)`,
75
+ };
76
+ }
77
+
78
+ // ── Search / match results ──────────────────────────────────────────────────
79
+ // Dedup exact lines and cap matches per file (the file prefix before the first
80
+ // colon), appending a per-file "+K more" tally.
81
+ function compressSearch(content: string): { compressed: string; omittedNote: string } | null {
82
+ const lines = toLines(content);
83
+ const seen = new Set<string>();
84
+ const perFile = new Map<string, number>();
85
+ const omittedByFile = new Map<string, number>();
86
+ const out: string[] = [];
87
+
88
+ for (const line of lines) {
89
+ if (seen.has(line)) continue;
90
+ seen.add(line);
91
+ const colon = line.indexOf(":");
92
+ const file = colon > 0 ? line.slice(0, colon) : line;
93
+ const count = perFile.get(file) ?? 0;
94
+ if (count < SEARCH_MAX_PER_FILE) {
95
+ perFile.set(file, count + 1);
96
+ out.push(line);
97
+ } else {
98
+ omittedByFile.set(file, (omittedByFile.get(file) ?? 0) + 1);
99
+ }
100
+ }
101
+
102
+ let totalOmitted = 0;
103
+ for (const [file, n] of omittedByFile) {
104
+ totalOmitted += n;
105
+ out.push(` … +${n} more match(es) in ${file} — mink retrieve …`);
106
+ }
107
+ const dedupRemoved = lines.length - seen.size;
108
+
109
+ // Nothing changed → not worth substituting.
110
+ if (totalOmitted === 0 && dedupRemoved === 0) return null;
111
+
112
+ const notes: string[] = [];
113
+ if (totalOmitted > 0) notes.push(`${totalOmitted} match(es) capped`);
114
+ if (dedupRemoved > 0) notes.push(`${dedupRemoved} duplicate(s) removed`);
115
+ return { compressed: out.join("\n"), omittedNote: notes.join("; ") };
116
+ }
117
+
118
+ // ── Large file reads ────────────────────────────────────────────────────────
119
+ // Brace-aware structural skeleton (see code-skeleton.ts): declarations and class
120
+ // members with bodies elided. Falls back to a generic text window when the
121
+ // content has no recognisable structure.
122
+ function compressFile(
123
+ filePath: string,
124
+ content: string
125
+ ): { compressed: string; omittedNote: string } | null {
126
+ const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
127
+ const markdown = ext === ".md" || ext === ".mdx" || ext === ".markdown";
128
+ const skeleton = extractCodeSkeleton(content, { markdown });
129
+
130
+ if (!skeleton) {
131
+ // No recognisable structure — fall back to a generic text window.
132
+ return compressText(content);
133
+ }
134
+
135
+ const header =
136
+ `${filePath} — structural summary ` +
137
+ `(${skeleton.lines.length} signature(s) of ${skeleton.totalLines} lines)`;
138
+ return {
139
+ compressed: [header, ...skeleton.lines].join("\n"),
140
+ omittedNote: `bodies elided; ${skeleton.totalLines} lines available via mink retrieve`,
141
+ };
142
+ }
143
+
144
+ // ── Structured data ─────────────────────────────────────────────────────────
145
+ // Recursively "crush" JSON: sample any over-long array (at any depth), recursing
146
+ // into the elements that are kept. Records how many elements were dropped.
147
+ function crush(value: unknown): { value: unknown; omitted: number } {
148
+ if (Array.isArray(value)) {
149
+ let omitted = 0;
150
+ const mapEl = (el: unknown): unknown => {
151
+ const r = crush(el);
152
+ omitted += r.omitted;
153
+ return r.value;
154
+ };
155
+ if (value.length <= JSON_ARRAY_HEAD + JSON_ARRAY_TAIL) {
156
+ return { value: value.map(mapEl), omitted };
157
+ }
158
+ const dropped = value.length - JSON_ARRAY_HEAD - JSON_ARRAY_TAIL;
159
+ omitted += dropped;
160
+ const out = [
161
+ ...value.slice(0, JSON_ARRAY_HEAD).map(mapEl),
162
+ `… ${dropped} element(s) omitted — mink retrieve …`,
163
+ ...value.slice(value.length - JSON_ARRAY_TAIL).map(mapEl),
164
+ ];
165
+ return { value: out, omitted };
166
+ }
167
+ if (value && typeof value === "object") {
168
+ let omitted = 0;
169
+ const out: Record<string, unknown> = {};
170
+ for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
171
+ const r = crush(v);
172
+ omitted += r.omitted;
173
+ out[k] = r.value;
174
+ }
175
+ return { value: out, omitted };
176
+ }
177
+ return { value, omitted: 0 };
178
+ }
179
+
180
+ function compressJson(content: string): { compressed: string; omittedNote: string } | null {
181
+ let parsed: unknown;
182
+ try {
183
+ parsed = JSON.parse(content);
184
+ } catch {
185
+ return null;
186
+ }
187
+ const { value, omitted } = crush(parsed);
188
+ if (omitted === 0) return null;
189
+ return {
190
+ compressed: JSON.stringify(value, null, 2),
191
+ omittedNote: `${omitted} array element(s) sampled out`,
192
+ };
193
+ }
194
+
195
+ // ── Generic text ────────────────────────────────────────────────────────────
196
+ function compressText(content: string): { compressed: string; omittedNote: string } | null {
197
+ const lines = toLines(content);
198
+ if (lines.length <= TEXT_HEAD + TEXT_TAIL) return null;
199
+ const omitted = lines.length - TEXT_HEAD - TEXT_TAIL;
200
+ const head = lines.slice(0, TEXT_HEAD);
201
+ const tail = lines.slice(lines.length - TEXT_TAIL);
202
+ return {
203
+ compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
204
+ omittedNote: `${omitted} of ${lines.length} line(s) omitted (middle)`,
205
+ };
206
+ }
207
+
208
+ // ── Routing ─────────────────────────────────────────────────────────────────
209
+
210
+ export function detectContentKind(
211
+ toolName: string,
212
+ content: string,
213
+ filePath?: string
214
+ ): ContentKind {
215
+ const t = toolName.toLowerCase();
216
+ if (t === "read") return "file";
217
+ if (t === "grep" || t === "glob") return "search";
218
+ if (t === "bash") return "log";
219
+ // Generic / MCP output — sniff for JSON.
220
+ const head = content.trimStart()[0];
221
+ if (head === "{" || head === "[") {
222
+ try {
223
+ JSON.parse(content);
224
+ return "json";
225
+ } catch {
226
+ // not JSON — fall through
227
+ }
228
+ }
229
+ // A file path with no tool hint still implies a file read.
230
+ if (filePath) return "file";
231
+ return "text";
232
+ }
233
+
234
+ // Compress an output by its detected kind. Returns null when there is nothing
235
+ // worth substituting; the caller then passes the original through unchanged.
236
+ export function compressOutput(
237
+ toolName: string,
238
+ content: string,
239
+ filePath?: string
240
+ ): CompressionResult | null {
241
+ const kind = detectContentKind(toolName, content, filePath);
242
+ let result: { compressed: string; omittedNote: string } | null;
243
+ switch (kind) {
244
+ case "search": result = compressSearch(content); break;
245
+ case "log": result = compressLog(content); break;
246
+ case "file": result = compressFile(filePath ?? "file", content); break;
247
+ case "json": result = compressJson(content); break;
248
+ case "text": result = compressText(content); break;
249
+ }
250
+ if (!result) return null;
251
+ return { kind, compressed: result.compressed, omittedNote: result.omittedNote };
252
+ }
@@ -34,3 +34,43 @@ export function estimateTokens(content: string, filePath: string): number {
34
34
  }
35
35
  return Math.ceil(content.length / ratio);
36
36
  }
37
+
38
+ // A deterministic, dependency-free token counter that segments text the way a
39
+ // BPE tokenizer roughly would — on word, number, and punctuation boundaries —
40
+ // rather than dividing by a single flat character ratio. It is more faithful
41
+ // than `estimateTokens` (which exists for the file-index hot path and is pinned
42
+ // by exact-ratio tests), and crucially it does not need a file extension, so it
43
+ // can score arbitrary tool output (logs, search results, command output).
44
+ //
45
+ // It is intentionally NOT a real BPE vocabulary: Mink ships as a lean CLI with a
46
+ // single runtime dependency, and the compression-measurement use only needs a
47
+ // *consistent* estimator to compute an original-minus-compressed delta. The
48
+ // signature is stable, so a real BPE library can be dropped in behind it later
49
+ // without touching call sites.
50
+ //
51
+ // Segmentation: runs of ASCII letters and runs of digits each collapse to a
52
+ // handful of sub-word tokens; every other character (punctuation, symbols,
53
+ // non-ASCII, whitespace) is scored individually. Whitespace usually merges into
54
+ // an adjacent token in real tokenizers, so spaces and tabs cost nothing and only
55
+ // newlines count.
56
+ export function countTokens(text: string): number {
57
+ if (!text) return 0;
58
+ const segments = text.match(/[A-Za-z]+|[0-9]+|[^A-Za-z0-9]/g);
59
+ if (!segments) return 0;
60
+ let tokens = 0;
61
+ for (const seg of segments) {
62
+ const first = seg.charCodeAt(0);
63
+ if ((first >= 65 && first <= 90) || (first >= 97 && first <= 122)) {
64
+ tokens += Math.ceil(seg.length / 4); // word splits into ~4-char sub-words
65
+ } else if (first >= 48 && first <= 57) {
66
+ tokens += Math.ceil(seg.length / 3); // digit runs tokenize more finely
67
+ } else if (seg === "\n") {
68
+ tokens += 1; // newlines are their own token
69
+ } else if (seg === " " || seg === "\t" || seg === "\r") {
70
+ // whitespace merges into the adjacent token — no extra cost
71
+ } else {
72
+ tokens += 1; // punctuation / symbol / non-ASCII char
73
+ }
74
+ }
75
+ return tokens;
76
+ }
@@ -0,0 +1,97 @@
1
+ // Reversible-compression cache repository (spec 21 §Reversibility). Stores the
2
+ // byte-exact original of a compressed tool output keyed by a short retrieval
3
+ // token, with a TTL. `get` treats an expired row as a miss and evicts it lazily,
4
+ // so a stale token can never return partial or wrong content.
5
+ //
6
+ // This is a local cache — it is never injected into model context and is not
7
+ // part of the cross-device sync surface. device_id is recorded for audit only.
8
+
9
+ import { randomUUID } from "crypto";
10
+ import type { DbDriver } from "../storage/driver";
11
+ import type { CompressionCacheEntry, ContentKind } from "../types/compression";
12
+ import { openProjectDb } from "../storage/db";
13
+ import { getOrCreateDeviceId } from "../core/device";
14
+
15
+ export interface StoreInput {
16
+ toolName: string;
17
+ contentKind: ContentKind;
18
+ content: string;
19
+ retentionHours: number;
20
+ token?: string;
21
+ now?: Date;
22
+ }
23
+
24
+ export class CompressionCacheRepo {
25
+ constructor(private readonly db: DbDriver) {}
26
+
27
+ static for(cwd: string): CompressionCacheRepo {
28
+ return new CompressionCacheRepo(openProjectDb(cwd));
29
+ }
30
+
31
+ // Short, unambiguous token the model can paste into `mink retrieve`.
32
+ static newToken(): string {
33
+ return `mc-${randomUUID().slice(0, 8)}`;
34
+ }
35
+
36
+ // Store an original and return its retrieval token.
37
+ store(input: StoreInput, deviceId: string = getOrCreateDeviceId()): string {
38
+ const token = input.token ?? CompressionCacheRepo.newToken();
39
+ const now = input.now ?? new Date();
40
+ const createdAt = now.toISOString();
41
+ const expiresAt = new Date(
42
+ now.getTime() + Math.max(0, input.retentionHours) * 3_600_000
43
+ ).toISOString();
44
+ this.db.prepare(`
45
+ INSERT OR REPLACE INTO compression_cache
46
+ (token, created_at, expires_at, tool_name, content_kind,
47
+ content, size_bytes, device_id)
48
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
49
+ `).run(
50
+ token, createdAt, expiresAt, input.toolName, input.contentKind,
51
+ input.content, Buffer.byteLength(input.content, "utf-8"), deviceId
52
+ );
53
+ return token;
54
+ }
55
+
56
+ // Return the stored original, or null if the token is unknown or expired.
57
+ // An expired row is deleted on the way out (lazy eviction).
58
+ get(token: string, now: Date = new Date()): CompressionCacheEntry | null {
59
+ const row = this.db
60
+ .prepare("SELECT * FROM compression_cache WHERE token = ?")
61
+ .get(token) as Record<string, unknown> | undefined;
62
+ if (!row) return null;
63
+ const expiresAt = String(row.expires_at);
64
+ if (expiresAt <= now.toISOString()) {
65
+ try {
66
+ this.db.prepare("DELETE FROM compression_cache WHERE token = ?").run(token);
67
+ } catch {
68
+ // best effort — a failed eviction still reports a miss below
69
+ }
70
+ return null;
71
+ }
72
+ return {
73
+ token: String(row.token),
74
+ createdAt: String(row.created_at),
75
+ expiresAt,
76
+ toolName: String(row.tool_name),
77
+ contentKind: String(row.content_kind) as ContentKind,
78
+ content: String(row.content),
79
+ sizeBytes: Number(row.size_bytes),
80
+ };
81
+ }
82
+
83
+ // Delete every row whose TTL has elapsed. Returns the count removed.
84
+ evictExpired(now: Date = new Date()): number {
85
+ const r = this.db
86
+ .prepare("DELETE FROM compression_cache WHERE expires_at <= ?")
87
+ .run(now.toISOString());
88
+ return Number(r.changes);
89
+ }
90
+
91
+ count(): number {
92
+ const row = this.db
93
+ .prepare("SELECT COUNT(*) AS n FROM compression_cache")
94
+ .get() as { n: number };
95
+ return Number(row.n);
96
+ }
97
+ }
@@ -21,6 +21,9 @@ import type {
21
21
  TokenLedger,
22
22
  LedgerSession,
23
23
  LifetimeCounters,
24
+ CompressionEvent,
25
+ CompressionEventInput,
26
+ CompressionLifetime,
24
27
  } from "../types/token-ledger";
25
28
  import type { SessionSummary } from "../types/session";
26
29
  import type { WasteFlag, WastePattern } from "../types/waste-detection";
@@ -233,6 +236,90 @@ export class TokenLedgerRepo {
233
236
  });
234
237
  }
235
238
 
239
+ // ── Compression measurement (spec 21) ────────────────────────────────
240
+
241
+ // Record one compression decision and fold it into this device's
242
+ // compression-lifetime aggregates, transactionally so the row and the
243
+ // aggregate never drift. measured savings credits compressed arms only —
244
+ // a holdout arm saves nothing by construction.
245
+ recordCompression(
246
+ event: CompressionEventInput,
247
+ deviceId: string = getOrCreateDeviceId()
248
+ ): void {
249
+ const id = event.id ?? crypto.randomUUID();
250
+ const createdAt = event.createdAt ?? new Date().toISOString();
251
+ const holdout = event.holdout ? 1 : 0;
252
+ const savings = event.holdout
253
+ ? 0
254
+ : Math.max(0, event.originalTokens - event.compressedTokens);
255
+ this.db.transaction(() => {
256
+ this.db.prepare(`
257
+ INSERT OR REPLACE INTO ledger_compressions
258
+ (id, created_at, tool_name, content_kind,
259
+ original_tokens, compressed_tokens, holdout, device_id)
260
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
261
+ `).run(
262
+ id, createdAt, event.toolName, event.contentKind,
263
+ event.originalTokens, event.compressedTokens, holdout, deviceId
264
+ );
265
+ this.db.prepare(`
266
+ INSERT INTO ledger_compression_lifetime
267
+ (device_id, total_events, total_holdout_events,
268
+ total_original_tokens, total_compressed_tokens, total_measured_savings)
269
+ VALUES (?, ?, ?, ?, ?, ?)
270
+ ON CONFLICT(device_id) DO UPDATE SET
271
+ total_events = ledger_compression_lifetime.total_events + excluded.total_events,
272
+ total_holdout_events = ledger_compression_lifetime.total_holdout_events + excluded.total_holdout_events,
273
+ total_original_tokens = ledger_compression_lifetime.total_original_tokens + excluded.total_original_tokens,
274
+ total_compressed_tokens = ledger_compression_lifetime.total_compressed_tokens + excluded.total_compressed_tokens,
275
+ total_measured_savings = ledger_compression_lifetime.total_measured_savings + excluded.total_measured_savings
276
+ `).run(
277
+ deviceId, 1, holdout,
278
+ event.originalTokens, event.compressedTokens, savings
279
+ );
280
+ });
281
+ }
282
+
283
+ // Project-wide compression aggregates — summed across every device's row.
284
+ compressionLifetime(): CompressionLifetime {
285
+ const row = this.db.prepare(`
286
+ SELECT
287
+ COALESCE(SUM(total_events), 0) AS totalEvents,
288
+ COALESCE(SUM(total_holdout_events), 0) AS totalHoldoutEvents,
289
+ COALESCE(SUM(total_original_tokens), 0) AS totalOriginalTokens,
290
+ COALESCE(SUM(total_compressed_tokens), 0) AS totalCompressedTokens,
291
+ COALESCE(SUM(total_measured_savings), 0) AS totalMeasuredSavings
292
+ FROM ledger_compression_lifetime
293
+ `).get() as Record<string, number> | undefined;
294
+ return {
295
+ totalEvents: Number(row?.totalEvents ?? 0),
296
+ totalHoldoutEvents: Number(row?.totalHoldoutEvents ?? 0),
297
+ totalOriginalTokens: Number(row?.totalOriginalTokens ?? 0),
298
+ totalCompressedTokens: Number(row?.totalCompressedTokens ?? 0),
299
+ totalMeasuredSavings: Number(row?.totalMeasuredSavings ?? 0),
300
+ };
301
+ }
302
+
303
+ // Recent compression events, newest first. Primarily for inspection/tests.
304
+ compressionEvents(limit = 100): CompressionEvent[] {
305
+ const rows = this.db.prepare(`
306
+ SELECT id, created_at, tool_name, content_kind,
307
+ original_tokens, compressed_tokens, holdout
308
+ FROM ledger_compressions
309
+ ORDER BY created_at DESC
310
+ LIMIT ?
311
+ `).all(limit) as Array<Record<string, unknown>>;
312
+ return rows.map((r) => ({
313
+ id: String(r.id),
314
+ createdAt: String(r.created_at),
315
+ toolName: String(r.tool_name),
316
+ contentKind: String(r.content_kind),
317
+ originalTokens: Number(r.original_tokens),
318
+ compressedTokens: Number(r.compressed_tokens),
319
+ holdout: Number(r.holdout) === 1,
320
+ }));
321
+ }
322
+
236
323
  // ── Helpers ───────────────────────────────────────────────────────────
237
324
 
238
325
  private insertSessionRow(
@@ -12,7 +12,7 @@
12
12
  // - `meta(key, value)` holds versioning + migration markers. Keep it small;
13
13
  // per-store counters live in `counters` and `ledger_lifetime`.
14
14
 
15
- export const SCHEMA_VERSION = 1;
15
+ export const SCHEMA_VERSION = 3;
16
16
 
17
17
  export const INITIAL_SCHEMA = `
18
18
  CREATE TABLE IF NOT EXISTS meta (
@@ -176,6 +176,55 @@ CREATE TABLE IF NOT EXISTS counters (
176
176
  file_index_hits INTEGER NOT NULL DEFAULT 0,
177
177
  file_index_misses INTEGER NOT NULL DEFAULT 0
178
178
  );
179
+
180
+ -- Tool-output compression measurement (spec 21). One row per compression
181
+ -- decision: either a compressed arm (compressed_tokens < original_tokens) or a
182
+ -- holdout arm (left uncompressed for control, compressed_tokens = original_tokens).
183
+ -- These are append-only telemetry, independent of session lifecycle, written at
184
+ -- the moment a tool output is processed. New table → applied to existing DBs via
185
+ -- IF NOT EXISTS on the next open.
186
+ CREATE TABLE IF NOT EXISTS ledger_compressions (
187
+ id TEXT PRIMARY KEY,
188
+ created_at TEXT NOT NULL,
189
+ tool_name TEXT NOT NULL,
190
+ content_kind TEXT NOT NULL,
191
+ original_tokens INTEGER NOT NULL DEFAULT 0,
192
+ compressed_tokens INTEGER NOT NULL DEFAULT 0,
193
+ holdout INTEGER NOT NULL DEFAULT 0,
194
+ device_id TEXT NOT NULL
195
+ );
196
+ CREATE INDEX IF NOT EXISTS idx_ledger_compressions_created ON ledger_compressions(created_at);
197
+ CREATE INDEX IF NOT EXISTS idx_ledger_compressions_device ON ledger_compressions(device_id);
198
+
199
+ -- Per-device compression aggregates, summed across devices like ledger_lifetime.
200
+ -- measured_savings only credits compressed arms (holdout arms save nothing by
201
+ -- construction), so the reported figure is a true measured delta, not an estimate.
202
+ CREATE TABLE IF NOT EXISTS ledger_compression_lifetime (
203
+ device_id TEXT PRIMARY KEY,
204
+ total_events INTEGER NOT NULL DEFAULT 0,
205
+ total_holdout_events INTEGER NOT NULL DEFAULT 0,
206
+ total_original_tokens INTEGER NOT NULL DEFAULT 0,
207
+ total_compressed_tokens INTEGER NOT NULL DEFAULT 0,
208
+ total_measured_savings INTEGER NOT NULL DEFAULT 0
209
+ );
210
+
211
+ -- Reversible-compression cache (spec 21 §Reversibility). When a tool output is
212
+ -- compressed, the original is stored here keyed by a short retrieval token and
213
+ -- embedded in the compressed result; "mink retrieve <token>" returns it
214
+ -- byte-exact. Rows expire after the configured retention window; an expired or
215
+ -- unknown token is a graceful miss. This is a local cache, not synced state, so
216
+ -- (unlike other tables) it carries no merge semantics beyond device_id for audit.
217
+ CREATE TABLE IF NOT EXISTS compression_cache (
218
+ token TEXT PRIMARY KEY,
219
+ created_at TEXT NOT NULL,
220
+ expires_at TEXT NOT NULL,
221
+ tool_name TEXT NOT NULL,
222
+ content_kind TEXT NOT NULL,
223
+ content TEXT NOT NULL,
224
+ size_bytes INTEGER NOT NULL,
225
+ device_id TEXT NOT NULL
226
+ );
227
+ CREATE INDEX IF NOT EXISTS idx_compression_cache_expires ON compression_cache(expires_at);
179
228
  `;
180
229
 
181
230
  export interface DriverForSchema {
@@ -0,0 +1,29 @@
1
+ // Tool-output compression types (spec 21). The decision/config types live in
2
+ // src/core/compression.ts; these describe the reversible cache and the engine's
3
+ // content-aware output.
4
+
5
+ // What kind of tool output we detected, which selects the compressor and is
6
+ // recorded on the ledger event for later analysis.
7
+ export type ContentKind = "search" | "log" | "file" | "json" | "text";
8
+
9
+ // One stored original, retrievable byte-exact via `mink retrieve <token>` until
10
+ // it expires.
11
+ export interface CompressionCacheEntry {
12
+ token: string;
13
+ createdAt: string;
14
+ expiresAt: string;
15
+ toolName: string;
16
+ contentKind: ContentKind;
17
+ content: string;
18
+ sizeBytes: number;
19
+ }
20
+
21
+ // The result of compressing one output. `compressed` is the body the model will
22
+ // see (sans retrieval affordance, which the pipeline appends); `omittedNote`
23
+ // summarises what was dropped. A compressor returns null when it has nothing
24
+ // worth substituting.
25
+ export interface CompressionResult {
26
+ kind: ContentKind;
27
+ compressed: string;
28
+ omittedNote: string;
29
+ }
@@ -18,6 +18,11 @@ export interface GlobalConfig {
18
18
  "cli.auto-update-schedule"?: string;
19
19
  "cli.auto-update-package-manager"?: string;
20
20
  "projects.identity"?: string;
21
+ "compression.enabled"?: string;
22
+ "compression.threshold-tokens"?: string;
23
+ "compression.min-savings-ratio"?: string;
24
+ "compression.holdout-fraction"?: string;
25
+ "compression.retention-hours"?: string;
21
26
  }
22
27
 
23
28
  export type ConfigKey = keyof GlobalConfig & string;
@@ -179,6 +184,41 @@ export const CONFIG_KEYS: ConfigKeyMeta[] = [
179
184
  "Project identity strategy: path-derived (legacy) or git-remote (stable across machines)",
180
185
  scope: "shared",
181
186
  },
187
+ {
188
+ key: "compression.enabled",
189
+ default: "false",
190
+ envVar: "MINK_COMPRESSION_ENABLED",
191
+ description: "Enable tool-output compression (spec 21). Off until inline compression ships.",
192
+ scope: "shared",
193
+ },
194
+ {
195
+ key: "compression.threshold-tokens",
196
+ default: "800",
197
+ envVar: "MINK_COMPRESSION_THRESHOLD_TOKENS",
198
+ description: "Minimum estimated token size before a tool output is eligible for compression",
199
+ scope: "shared",
200
+ },
201
+ {
202
+ key: "compression.min-savings-ratio",
203
+ default: "0.25",
204
+ envVar: "MINK_COMPRESSION_MIN_SAVINGS_RATIO",
205
+ description: "Discard a compression attempt unless it saves at least this fraction of tokens",
206
+ scope: "shared",
207
+ },
208
+ {
209
+ key: "compression.holdout-fraction",
210
+ default: "0.1",
211
+ envVar: "MINK_COMPRESSION_HOLDOUT_FRACTION",
212
+ description: "Fraction of eligible outputs left uncompressed as a measured control group",
213
+ scope: "shared",
214
+ },
215
+ {
216
+ key: "compression.retention-hours",
217
+ default: "168",
218
+ envVar: "MINK_COMPRESSION_RETENTION_HOURS",
219
+ description: "How long compressed originals stay retrievable before eviction",
220
+ scope: "shared",
221
+ },
182
222
  ];
183
223
 
184
224
  const VALID_KEYS = new Set<string>(CONFIG_KEYS.map((k) => k.key));
@@ -19,6 +19,10 @@ export interface PostToolUseInput {
19
19
  // Edit tool
20
20
  old_string?: string;
21
21
  new_string?: string;
22
+ // Read tool — present for ranged reads; their output is a slice, so we
23
+ // don't substitute a whole-file summary for them (spec 21 edge case).
24
+ offset?: number;
25
+ limit?: number;
22
26
  };
23
27
  // Legacy / older hook payload shape — kept for backward compatibility.
24
28
  tool_output?: {