@drewpayment/mink 0.12.0 → 0.13.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/out/404.html +1 -1
- package/dashboard/out/action-log.html +1 -1
- package/dashboard/out/action-log.txt +1 -1
- package/dashboard/out/activity.html +1 -1
- package/dashboard/out/activity.txt +1 -1
- package/dashboard/out/bugs.html +1 -1
- package/dashboard/out/bugs.txt +1 -1
- package/dashboard/out/capture.html +1 -1
- package/dashboard/out/capture.txt +1 -1
- package/dashboard/out/config.html +1 -1
- package/dashboard/out/config.txt +1 -1
- package/dashboard/out/daemon.html +1 -1
- package/dashboard/out/daemon.txt +1 -1
- package/dashboard/out/design.html +1 -1
- package/dashboard/out/design.txt +1 -1
- package/dashboard/out/discord.html +1 -1
- package/dashboard/out/discord.txt +1 -1
- package/dashboard/out/file-index.html +1 -1
- package/dashboard/out/file-index.txt +1 -1
- package/dashboard/out/index.html +1 -1
- package/dashboard/out/index.txt +1 -1
- package/dashboard/out/insights.html +1 -1
- package/dashboard/out/insights.txt +1 -1
- package/dashboard/out/learning.html +1 -1
- package/dashboard/out/learning.txt +1 -1
- package/dashboard/out/overview.html +1 -1
- package/dashboard/out/overview.txt +1 -1
- package/dashboard/out/scheduler.html +1 -1
- package/dashboard/out/scheduler.txt +1 -1
- package/dashboard/out/sync.html +1 -1
- package/dashboard/out/sync.txt +1 -1
- package/dashboard/out/tokens.html +1 -1
- package/dashboard/out/tokens.txt +1 -1
- package/dashboard/out/waste.html +1 -1
- package/dashboard/out/waste.txt +1 -1
- package/dashboard/out/wiki.html +1 -1
- package/dashboard/out/wiki.txt +1 -1
- package/dist/cli.bun.js +748 -10
- package/dist/cli.node.js +752 -12
- package/package.json +1 -1
- package/src/cli.ts +14 -0
- package/src/commands/init.ts +5 -1
- package/src/commands/post-read.ts +18 -0
- package/src/commands/post-tool.ts +48 -0
- package/src/commands/retrieve.ts +32 -0
- package/src/core/code-skeleton.ts +108 -0
- package/src/core/compress-tool-output.ts +127 -0
- package/src/core/compression.ts +81 -0
- package/src/core/hook-output.ts +42 -0
- package/src/core/output-compression.ts +252 -0
- package/src/core/token-estimate.ts +40 -0
- package/src/repositories/compression-cache-repo.ts +97 -0
- package/src/repositories/token-ledger-repo.ts +87 -0
- package/src/storage/schema.ts +50 -1
- package/src/types/compression.ts +29 -0
- package/src/types/config.ts +40 -0
- package/src/types/hook-input.ts +4 -0
- package/src/types/token-ledger.ts +33 -0
- /package/dashboard/out/_next/static/{Cr7-P-E43jbsBjy4hA6wH → Yl3F-J4CwvYf6yWG-SSmG}/_buildManifest.js +0 -0
- /package/dashboard/out/_next/static/{Cr7-P-E43jbsBjy4hA6wH → Yl3F-J4CwvYf6yWG-SSmG}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
// Tool-output compression engine (spec 21 §Content-Aware Compression).
|
|
2
|
+
//
|
|
3
|
+
// Pure, deterministic, dependency-free. Each compressor takes a tool output
|
|
4
|
+
// string and returns a smaller body plus a note of what was dropped, or null
|
|
5
|
+
// when it has nothing worth substituting. No I/O, no DB, no token counting and
|
|
6
|
+
// no retrieval-affordance text — the pipeline (compress-tool-output.ts) owns
|
|
7
|
+
// eligibility, the holdout, the min-savings gate, the cache, and the
|
|
8
|
+
// "mink retrieve" footer. Keeping this layer pure makes every strategy trivially
|
|
9
|
+
// testable and prompt-cache-stable (identical input → identical output).
|
|
10
|
+
//
|
|
11
|
+
// The "file" strategy does line-based signature extraction; spec 21's phase 3
|
|
12
|
+
// upgrades it to richer AST skeletons behind this same interface.
|
|
13
|
+
|
|
14
|
+
import type { ContentKind, CompressionResult } from "../types/compression";
|
|
15
|
+
import { extractCodeSkeleton } from "./code-skeleton";
|
|
16
|
+
|
|
17
|
+
// Tuning constants. Fixed (not config) so output is deterministic and stable.
|
|
18
|
+
const SEARCH_MAX_PER_FILE = 5;
|
|
19
|
+
const LOG_HEAD = 40;
|
|
20
|
+
const LOG_TAIL = 40;
|
|
21
|
+
const TEXT_HEAD = 30;
|
|
22
|
+
const TEXT_TAIL = 20;
|
|
23
|
+
const JSON_ARRAY_HEAD = 20;
|
|
24
|
+
const JSON_ARRAY_TAIL = 5;
|
|
25
|
+
|
|
26
|
+
// Strip ANSI CSI escape sequences (colour, cursor moves) — pure noise in logs.
|
|
27
|
+
const ANSI = /\[[0-9;?]*[ -/]*[@-~]/g;
|
|
28
|
+
|
|
29
|
+
function stripAnsi(s: string): string {
|
|
30
|
+
return s.replace(ANSI, "");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function omittedMarker(n: number): string {
|
|
34
|
+
return ` … ${n} line${n === 1 ? "" : "s"} omitted — mink retrieve …`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Split into lines, dropping a single trailing empty line (from a final newline)
|
|
38
|
+
// so counts and windows aren't skewed by it.
|
|
39
|
+
function toLines(content: string): string[] {
|
|
40
|
+
const lines = content.split("\n");
|
|
41
|
+
if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
|
|
42
|
+
return lines;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Logs / command output ───────────────────────────────────────────────────
|
|
46
|
+
// Strip ANSI, collapse runs of identical lines, then keep a head+tail window.
|
|
47
|
+
function compressLog(content: string): { compressed: string; omittedNote: string } | null {
|
|
48
|
+
const lines = toLines(stripAnsi(content));
|
|
49
|
+
|
|
50
|
+
// Collapse consecutive duplicates into "<line> (×N)".
|
|
51
|
+
const collapsed: string[] = [];
|
|
52
|
+
let i = 0;
|
|
53
|
+
while (i < lines.length) {
|
|
54
|
+
let run = 1;
|
|
55
|
+
while (i + run < lines.length && lines[i + run] === lines[i]) run++;
|
|
56
|
+
collapsed.push(run > 1 ? `${lines[i]} (×${run})` : lines[i]);
|
|
57
|
+
i += run;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (collapsed.length <= LOG_HEAD + LOG_TAIL) {
|
|
61
|
+
// Only worth substituting if collapsing actually removed lines.
|
|
62
|
+
if (collapsed.length === lines.length) return null;
|
|
63
|
+
return {
|
|
64
|
+
compressed: collapsed.join("\n"),
|
|
65
|
+
omittedNote: `collapsed ${lines.length - collapsed.length} repeated line(s)`,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const omitted = collapsed.length - LOG_HEAD - LOG_TAIL;
|
|
70
|
+
const head = collapsed.slice(0, LOG_HEAD);
|
|
71
|
+
const tail = collapsed.slice(collapsed.length - LOG_TAIL);
|
|
72
|
+
return {
|
|
73
|
+
compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
|
|
74
|
+
omittedNote: `${omitted} of ${collapsed.length} log line(s) omitted (middle)`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ── Search / match results ──────────────────────────────────────────────────
|
|
79
|
+
// Dedup exact lines and cap matches per file (the file prefix before the first
|
|
80
|
+
// colon), appending a per-file "+K more" tally.
|
|
81
|
+
function compressSearch(content: string): { compressed: string; omittedNote: string } | null {
|
|
82
|
+
const lines = toLines(content);
|
|
83
|
+
const seen = new Set<string>();
|
|
84
|
+
const perFile = new Map<string, number>();
|
|
85
|
+
const omittedByFile = new Map<string, number>();
|
|
86
|
+
const out: string[] = [];
|
|
87
|
+
|
|
88
|
+
for (const line of lines) {
|
|
89
|
+
if (seen.has(line)) continue;
|
|
90
|
+
seen.add(line);
|
|
91
|
+
const colon = line.indexOf(":");
|
|
92
|
+
const file = colon > 0 ? line.slice(0, colon) : line;
|
|
93
|
+
const count = perFile.get(file) ?? 0;
|
|
94
|
+
if (count < SEARCH_MAX_PER_FILE) {
|
|
95
|
+
perFile.set(file, count + 1);
|
|
96
|
+
out.push(line);
|
|
97
|
+
} else {
|
|
98
|
+
omittedByFile.set(file, (omittedByFile.get(file) ?? 0) + 1);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let totalOmitted = 0;
|
|
103
|
+
for (const [file, n] of omittedByFile) {
|
|
104
|
+
totalOmitted += n;
|
|
105
|
+
out.push(` … +${n} more match(es) in ${file} — mink retrieve …`);
|
|
106
|
+
}
|
|
107
|
+
const dedupRemoved = lines.length - seen.size;
|
|
108
|
+
|
|
109
|
+
// Nothing changed → not worth substituting.
|
|
110
|
+
if (totalOmitted === 0 && dedupRemoved === 0) return null;
|
|
111
|
+
|
|
112
|
+
const notes: string[] = [];
|
|
113
|
+
if (totalOmitted > 0) notes.push(`${totalOmitted} match(es) capped`);
|
|
114
|
+
if (dedupRemoved > 0) notes.push(`${dedupRemoved} duplicate(s) removed`);
|
|
115
|
+
return { compressed: out.join("\n"), omittedNote: notes.join("; ") };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ── Large file reads ────────────────────────────────────────────────────────
|
|
119
|
+
// Brace-aware structural skeleton (see code-skeleton.ts): declarations and class
|
|
120
|
+
// members with bodies elided. Falls back to a generic text window when the
|
|
121
|
+
// content has no recognisable structure.
|
|
122
|
+
function compressFile(
|
|
123
|
+
filePath: string,
|
|
124
|
+
content: string
|
|
125
|
+
): { compressed: string; omittedNote: string } | null {
|
|
126
|
+
const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
|
|
127
|
+
const markdown = ext === ".md" || ext === ".mdx" || ext === ".markdown";
|
|
128
|
+
const skeleton = extractCodeSkeleton(content, { markdown });
|
|
129
|
+
|
|
130
|
+
if (!skeleton) {
|
|
131
|
+
// No recognisable structure — fall back to a generic text window.
|
|
132
|
+
return compressText(content);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const header =
|
|
136
|
+
`${filePath} — structural summary ` +
|
|
137
|
+
`(${skeleton.lines.length} signature(s) of ${skeleton.totalLines} lines)`;
|
|
138
|
+
return {
|
|
139
|
+
compressed: [header, ...skeleton.lines].join("\n"),
|
|
140
|
+
omittedNote: `bodies elided; ${skeleton.totalLines} lines available via mink retrieve`,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ── Structured data ─────────────────────────────────────────────────────────
|
|
145
|
+
// Recursively "crush" JSON: sample any over-long array (at any depth), recursing
|
|
146
|
+
// into the elements that are kept. Records how many elements were dropped.
|
|
147
|
+
function crush(value: unknown): { value: unknown; omitted: number } {
|
|
148
|
+
if (Array.isArray(value)) {
|
|
149
|
+
let omitted = 0;
|
|
150
|
+
const mapEl = (el: unknown): unknown => {
|
|
151
|
+
const r = crush(el);
|
|
152
|
+
omitted += r.omitted;
|
|
153
|
+
return r.value;
|
|
154
|
+
};
|
|
155
|
+
if (value.length <= JSON_ARRAY_HEAD + JSON_ARRAY_TAIL) {
|
|
156
|
+
return { value: value.map(mapEl), omitted };
|
|
157
|
+
}
|
|
158
|
+
const dropped = value.length - JSON_ARRAY_HEAD - JSON_ARRAY_TAIL;
|
|
159
|
+
omitted += dropped;
|
|
160
|
+
const out = [
|
|
161
|
+
...value.slice(0, JSON_ARRAY_HEAD).map(mapEl),
|
|
162
|
+
`… ${dropped} element(s) omitted — mink retrieve …`,
|
|
163
|
+
...value.slice(value.length - JSON_ARRAY_TAIL).map(mapEl),
|
|
164
|
+
];
|
|
165
|
+
return { value: out, omitted };
|
|
166
|
+
}
|
|
167
|
+
if (value && typeof value === "object") {
|
|
168
|
+
let omitted = 0;
|
|
169
|
+
const out: Record<string, unknown> = {};
|
|
170
|
+
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
|
|
171
|
+
const r = crush(v);
|
|
172
|
+
omitted += r.omitted;
|
|
173
|
+
out[k] = r.value;
|
|
174
|
+
}
|
|
175
|
+
return { value: out, omitted };
|
|
176
|
+
}
|
|
177
|
+
return { value, omitted: 0 };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function compressJson(content: string): { compressed: string; omittedNote: string } | null {
|
|
181
|
+
let parsed: unknown;
|
|
182
|
+
try {
|
|
183
|
+
parsed = JSON.parse(content);
|
|
184
|
+
} catch {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
const { value, omitted } = crush(parsed);
|
|
188
|
+
if (omitted === 0) return null;
|
|
189
|
+
return {
|
|
190
|
+
compressed: JSON.stringify(value, null, 2),
|
|
191
|
+
omittedNote: `${omitted} array element(s) sampled out`,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ── Generic text ────────────────────────────────────────────────────────────
|
|
196
|
+
function compressText(content: string): { compressed: string; omittedNote: string } | null {
|
|
197
|
+
const lines = toLines(content);
|
|
198
|
+
if (lines.length <= TEXT_HEAD + TEXT_TAIL) return null;
|
|
199
|
+
const omitted = lines.length - TEXT_HEAD - TEXT_TAIL;
|
|
200
|
+
const head = lines.slice(0, TEXT_HEAD);
|
|
201
|
+
const tail = lines.slice(lines.length - TEXT_TAIL);
|
|
202
|
+
return {
|
|
203
|
+
compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
|
|
204
|
+
omittedNote: `${omitted} of ${lines.length} line(s) omitted (middle)`,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// ── Routing ─────────────────────────────────────────────────────────────────
|
|
209
|
+
|
|
210
|
+
export function detectContentKind(
|
|
211
|
+
toolName: string,
|
|
212
|
+
content: string,
|
|
213
|
+
filePath?: string
|
|
214
|
+
): ContentKind {
|
|
215
|
+
const t = toolName.toLowerCase();
|
|
216
|
+
if (t === "read") return "file";
|
|
217
|
+
if (t === "grep" || t === "glob") return "search";
|
|
218
|
+
if (t === "bash") return "log";
|
|
219
|
+
// Generic / MCP output — sniff for JSON.
|
|
220
|
+
const head = content.trimStart()[0];
|
|
221
|
+
if (head === "{" || head === "[") {
|
|
222
|
+
try {
|
|
223
|
+
JSON.parse(content);
|
|
224
|
+
return "json";
|
|
225
|
+
} catch {
|
|
226
|
+
// not JSON — fall through
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// A file path with no tool hint still implies a file read.
|
|
230
|
+
if (filePath) return "file";
|
|
231
|
+
return "text";
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Compress an output by its detected kind. Returns null when there is nothing
|
|
235
|
+
// worth substituting; the caller then passes the original through unchanged.
|
|
236
|
+
export function compressOutput(
|
|
237
|
+
toolName: string,
|
|
238
|
+
content: string,
|
|
239
|
+
filePath?: string
|
|
240
|
+
): CompressionResult | null {
|
|
241
|
+
const kind = detectContentKind(toolName, content, filePath);
|
|
242
|
+
let result: { compressed: string; omittedNote: string } | null;
|
|
243
|
+
switch (kind) {
|
|
244
|
+
case "search": result = compressSearch(content); break;
|
|
245
|
+
case "log": result = compressLog(content); break;
|
|
246
|
+
case "file": result = compressFile(filePath ?? "file", content); break;
|
|
247
|
+
case "json": result = compressJson(content); break;
|
|
248
|
+
case "text": result = compressText(content); break;
|
|
249
|
+
}
|
|
250
|
+
if (!result) return null;
|
|
251
|
+
return { kind, compressed: result.compressed, omittedNote: result.omittedNote };
|
|
252
|
+
}
|
|
@@ -34,3 +34,43 @@ export function estimateTokens(content: string, filePath: string): number {
|
|
|
34
34
|
}
|
|
35
35
|
return Math.ceil(content.length / ratio);
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
// A deterministic, dependency-free token counter that segments text the way a
|
|
39
|
+
// BPE tokenizer roughly would — on word, number, and punctuation boundaries —
|
|
40
|
+
// rather than dividing by a single flat character ratio. It is more faithful
|
|
41
|
+
// than `estimateTokens` (which exists for the file-index hot path and is pinned
|
|
42
|
+
// by exact-ratio tests), and crucially it does not need a file extension, so it
|
|
43
|
+
// can score arbitrary tool output (logs, search results, command output).
|
|
44
|
+
//
|
|
45
|
+
// It is intentionally NOT a real BPE vocabulary: Mink ships as a lean CLI with a
|
|
46
|
+
// single runtime dependency, and the compression-measurement use only needs a
|
|
47
|
+
// *consistent* estimator to compute an original-minus-compressed delta. The
|
|
48
|
+
// signature is stable, so a real BPE library can be dropped in behind it later
|
|
49
|
+
// without touching call sites.
|
|
50
|
+
//
|
|
51
|
+
// Segmentation: runs of ASCII letters and runs of digits each collapse to a
|
|
52
|
+
// handful of sub-word tokens; every other character (punctuation, symbols,
|
|
53
|
+
// non-ASCII, whitespace) is scored individually. Whitespace usually merges into
|
|
54
|
+
// an adjacent token in real tokenizers, so spaces and tabs cost nothing and only
|
|
55
|
+
// newlines count.
|
|
56
|
+
export function countTokens(text: string): number {
|
|
57
|
+
if (!text) return 0;
|
|
58
|
+
const segments = text.match(/[A-Za-z]+|[0-9]+|[^A-Za-z0-9]/g);
|
|
59
|
+
if (!segments) return 0;
|
|
60
|
+
let tokens = 0;
|
|
61
|
+
for (const seg of segments) {
|
|
62
|
+
const first = seg.charCodeAt(0);
|
|
63
|
+
if ((first >= 65 && first <= 90) || (first >= 97 && first <= 122)) {
|
|
64
|
+
tokens += Math.ceil(seg.length / 4); // word splits into ~4-char sub-words
|
|
65
|
+
} else if (first >= 48 && first <= 57) {
|
|
66
|
+
tokens += Math.ceil(seg.length / 3); // digit runs tokenize more finely
|
|
67
|
+
} else if (seg === "\n") {
|
|
68
|
+
tokens += 1; // newlines are their own token
|
|
69
|
+
} else if (seg === " " || seg === "\t" || seg === "\r") {
|
|
70
|
+
// whitespace merges into the adjacent token — no extra cost
|
|
71
|
+
} else {
|
|
72
|
+
tokens += 1; // punctuation / symbol / non-ASCII char
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return tokens;
|
|
76
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// Reversible-compression cache repository (spec 21 §Reversibility). Stores the
|
|
2
|
+
// byte-exact original of a compressed tool output keyed by a short retrieval
|
|
3
|
+
// token, with a TTL. `get` treats an expired row as a miss and evicts it lazily,
|
|
4
|
+
// so a stale token can never return partial or wrong content.
|
|
5
|
+
//
|
|
6
|
+
// This is a local cache — it is never injected into model context and is not
|
|
7
|
+
// part of the cross-device sync surface. device_id is recorded for audit only.
|
|
8
|
+
|
|
9
|
+
import { randomUUID } from "crypto";
|
|
10
|
+
import type { DbDriver } from "../storage/driver";
|
|
11
|
+
import type { CompressionCacheEntry, ContentKind } from "../types/compression";
|
|
12
|
+
import { openProjectDb } from "../storage/db";
|
|
13
|
+
import { getOrCreateDeviceId } from "../core/device";
|
|
14
|
+
|
|
15
|
+
export interface StoreInput {
|
|
16
|
+
toolName: string;
|
|
17
|
+
contentKind: ContentKind;
|
|
18
|
+
content: string;
|
|
19
|
+
retentionHours: number;
|
|
20
|
+
token?: string;
|
|
21
|
+
now?: Date;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export class CompressionCacheRepo {
|
|
25
|
+
constructor(private readonly db: DbDriver) {}
|
|
26
|
+
|
|
27
|
+
static for(cwd: string): CompressionCacheRepo {
|
|
28
|
+
return new CompressionCacheRepo(openProjectDb(cwd));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Short, unambiguous token the model can paste into `mink retrieve`.
|
|
32
|
+
static newToken(): string {
|
|
33
|
+
return `mc-${randomUUID().slice(0, 8)}`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Store an original and return its retrieval token.
|
|
37
|
+
store(input: StoreInput, deviceId: string = getOrCreateDeviceId()): string {
|
|
38
|
+
const token = input.token ?? CompressionCacheRepo.newToken();
|
|
39
|
+
const now = input.now ?? new Date();
|
|
40
|
+
const createdAt = now.toISOString();
|
|
41
|
+
const expiresAt = new Date(
|
|
42
|
+
now.getTime() + Math.max(0, input.retentionHours) * 3_600_000
|
|
43
|
+
).toISOString();
|
|
44
|
+
this.db.prepare(`
|
|
45
|
+
INSERT OR REPLACE INTO compression_cache
|
|
46
|
+
(token, created_at, expires_at, tool_name, content_kind,
|
|
47
|
+
content, size_bytes, device_id)
|
|
48
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
49
|
+
`).run(
|
|
50
|
+
token, createdAt, expiresAt, input.toolName, input.contentKind,
|
|
51
|
+
input.content, Buffer.byteLength(input.content, "utf-8"), deviceId
|
|
52
|
+
);
|
|
53
|
+
return token;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Return the stored original, or null if the token is unknown or expired.
|
|
57
|
+
// An expired row is deleted on the way out (lazy eviction).
|
|
58
|
+
get(token: string, now: Date = new Date()): CompressionCacheEntry | null {
|
|
59
|
+
const row = this.db
|
|
60
|
+
.prepare("SELECT * FROM compression_cache WHERE token = ?")
|
|
61
|
+
.get(token) as Record<string, unknown> | undefined;
|
|
62
|
+
if (!row) return null;
|
|
63
|
+
const expiresAt = String(row.expires_at);
|
|
64
|
+
if (expiresAt <= now.toISOString()) {
|
|
65
|
+
try {
|
|
66
|
+
this.db.prepare("DELETE FROM compression_cache WHERE token = ?").run(token);
|
|
67
|
+
} catch {
|
|
68
|
+
// best effort — a failed eviction still reports a miss below
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
token: String(row.token),
|
|
74
|
+
createdAt: String(row.created_at),
|
|
75
|
+
expiresAt,
|
|
76
|
+
toolName: String(row.tool_name),
|
|
77
|
+
contentKind: String(row.content_kind) as ContentKind,
|
|
78
|
+
content: String(row.content),
|
|
79
|
+
sizeBytes: Number(row.size_bytes),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Delete every row whose TTL has elapsed. Returns the count removed.
|
|
84
|
+
evictExpired(now: Date = new Date()): number {
|
|
85
|
+
const r = this.db
|
|
86
|
+
.prepare("DELETE FROM compression_cache WHERE expires_at <= ?")
|
|
87
|
+
.run(now.toISOString());
|
|
88
|
+
return Number(r.changes);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
count(): number {
|
|
92
|
+
const row = this.db
|
|
93
|
+
.prepare("SELECT COUNT(*) AS n FROM compression_cache")
|
|
94
|
+
.get() as { n: number };
|
|
95
|
+
return Number(row.n);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
@@ -21,6 +21,9 @@ import type {
|
|
|
21
21
|
TokenLedger,
|
|
22
22
|
LedgerSession,
|
|
23
23
|
LifetimeCounters,
|
|
24
|
+
CompressionEvent,
|
|
25
|
+
CompressionEventInput,
|
|
26
|
+
CompressionLifetime,
|
|
24
27
|
} from "../types/token-ledger";
|
|
25
28
|
import type { SessionSummary } from "../types/session";
|
|
26
29
|
import type { WasteFlag, WastePattern } from "../types/waste-detection";
|
|
@@ -233,6 +236,90 @@ export class TokenLedgerRepo {
|
|
|
233
236
|
});
|
|
234
237
|
}
|
|
235
238
|
|
|
239
|
+
// ── Compression measurement (spec 21) ────────────────────────────────
|
|
240
|
+
|
|
241
|
+
// Record one compression decision and fold it into this device's
|
|
242
|
+
// compression-lifetime aggregates, transactionally so the row and the
|
|
243
|
+
// aggregate never drift. measured savings credits compressed arms only —
|
|
244
|
+
// a holdout arm saves nothing by construction.
|
|
245
|
+
recordCompression(
|
|
246
|
+
event: CompressionEventInput,
|
|
247
|
+
deviceId: string = getOrCreateDeviceId()
|
|
248
|
+
): void {
|
|
249
|
+
const id = event.id ?? crypto.randomUUID();
|
|
250
|
+
const createdAt = event.createdAt ?? new Date().toISOString();
|
|
251
|
+
const holdout = event.holdout ? 1 : 0;
|
|
252
|
+
const savings = event.holdout
|
|
253
|
+
? 0
|
|
254
|
+
: Math.max(0, event.originalTokens - event.compressedTokens);
|
|
255
|
+
this.db.transaction(() => {
|
|
256
|
+
this.db.prepare(`
|
|
257
|
+
INSERT OR REPLACE INTO ledger_compressions
|
|
258
|
+
(id, created_at, tool_name, content_kind,
|
|
259
|
+
original_tokens, compressed_tokens, holdout, device_id)
|
|
260
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
261
|
+
`).run(
|
|
262
|
+
id, createdAt, event.toolName, event.contentKind,
|
|
263
|
+
event.originalTokens, event.compressedTokens, holdout, deviceId
|
|
264
|
+
);
|
|
265
|
+
this.db.prepare(`
|
|
266
|
+
INSERT INTO ledger_compression_lifetime
|
|
267
|
+
(device_id, total_events, total_holdout_events,
|
|
268
|
+
total_original_tokens, total_compressed_tokens, total_measured_savings)
|
|
269
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
270
|
+
ON CONFLICT(device_id) DO UPDATE SET
|
|
271
|
+
total_events = ledger_compression_lifetime.total_events + excluded.total_events,
|
|
272
|
+
total_holdout_events = ledger_compression_lifetime.total_holdout_events + excluded.total_holdout_events,
|
|
273
|
+
total_original_tokens = ledger_compression_lifetime.total_original_tokens + excluded.total_original_tokens,
|
|
274
|
+
total_compressed_tokens = ledger_compression_lifetime.total_compressed_tokens + excluded.total_compressed_tokens,
|
|
275
|
+
total_measured_savings = ledger_compression_lifetime.total_measured_savings + excluded.total_measured_savings
|
|
276
|
+
`).run(
|
|
277
|
+
deviceId, 1, holdout,
|
|
278
|
+
event.originalTokens, event.compressedTokens, savings
|
|
279
|
+
);
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Project-wide compression aggregates — summed across every device's row.
|
|
284
|
+
compressionLifetime(): CompressionLifetime {
|
|
285
|
+
const row = this.db.prepare(`
|
|
286
|
+
SELECT
|
|
287
|
+
COALESCE(SUM(total_events), 0) AS totalEvents,
|
|
288
|
+
COALESCE(SUM(total_holdout_events), 0) AS totalHoldoutEvents,
|
|
289
|
+
COALESCE(SUM(total_original_tokens), 0) AS totalOriginalTokens,
|
|
290
|
+
COALESCE(SUM(total_compressed_tokens), 0) AS totalCompressedTokens,
|
|
291
|
+
COALESCE(SUM(total_measured_savings), 0) AS totalMeasuredSavings
|
|
292
|
+
FROM ledger_compression_lifetime
|
|
293
|
+
`).get() as Record<string, number> | undefined;
|
|
294
|
+
return {
|
|
295
|
+
totalEvents: Number(row?.totalEvents ?? 0),
|
|
296
|
+
totalHoldoutEvents: Number(row?.totalHoldoutEvents ?? 0),
|
|
297
|
+
totalOriginalTokens: Number(row?.totalOriginalTokens ?? 0),
|
|
298
|
+
totalCompressedTokens: Number(row?.totalCompressedTokens ?? 0),
|
|
299
|
+
totalMeasuredSavings: Number(row?.totalMeasuredSavings ?? 0),
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Recent compression events, newest first. Primarily for inspection/tests.
|
|
304
|
+
compressionEvents(limit = 100): CompressionEvent[] {
|
|
305
|
+
const rows = this.db.prepare(`
|
|
306
|
+
SELECT id, created_at, tool_name, content_kind,
|
|
307
|
+
original_tokens, compressed_tokens, holdout
|
|
308
|
+
FROM ledger_compressions
|
|
309
|
+
ORDER BY created_at DESC
|
|
310
|
+
LIMIT ?
|
|
311
|
+
`).all(limit) as Array<Record<string, unknown>>;
|
|
312
|
+
return rows.map((r) => ({
|
|
313
|
+
id: String(r.id),
|
|
314
|
+
createdAt: String(r.created_at),
|
|
315
|
+
toolName: String(r.tool_name),
|
|
316
|
+
contentKind: String(r.content_kind),
|
|
317
|
+
originalTokens: Number(r.original_tokens),
|
|
318
|
+
compressedTokens: Number(r.compressed_tokens),
|
|
319
|
+
holdout: Number(r.holdout) === 1,
|
|
320
|
+
}));
|
|
321
|
+
}
|
|
322
|
+
|
|
236
323
|
// ── Helpers ───────────────────────────────────────────────────────────
|
|
237
324
|
|
|
238
325
|
private insertSessionRow(
|
package/src/storage/schema.ts
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
// - `meta(key, value)` holds versioning + migration markers. Keep it small;
|
|
13
13
|
// per-store counters live in `counters` and `ledger_lifetime`.
|
|
14
14
|
|
|
15
|
-
export const SCHEMA_VERSION =
|
|
15
|
+
export const SCHEMA_VERSION = 3;
|
|
16
16
|
|
|
17
17
|
export const INITIAL_SCHEMA = `
|
|
18
18
|
CREATE TABLE IF NOT EXISTS meta (
|
|
@@ -176,6 +176,55 @@ CREATE TABLE IF NOT EXISTS counters (
|
|
|
176
176
|
file_index_hits INTEGER NOT NULL DEFAULT 0,
|
|
177
177
|
file_index_misses INTEGER NOT NULL DEFAULT 0
|
|
178
178
|
);
|
|
179
|
+
|
|
180
|
+
-- Tool-output compression measurement (spec 21). One row per compression
|
|
181
|
+
-- decision: either a compressed arm (compressed_tokens < original_tokens) or a
|
|
182
|
+
-- holdout arm (left uncompressed for control, compressed_tokens = original_tokens).
|
|
183
|
+
-- These are append-only telemetry, independent of session lifecycle, written at
|
|
184
|
+
-- the moment a tool output is processed. New table → applied to existing DBs via
|
|
185
|
+
-- IF NOT EXISTS on the next open.
|
|
186
|
+
CREATE TABLE IF NOT EXISTS ledger_compressions (
|
|
187
|
+
id TEXT PRIMARY KEY,
|
|
188
|
+
created_at TEXT NOT NULL,
|
|
189
|
+
tool_name TEXT NOT NULL,
|
|
190
|
+
content_kind TEXT NOT NULL,
|
|
191
|
+
original_tokens INTEGER NOT NULL DEFAULT 0,
|
|
192
|
+
compressed_tokens INTEGER NOT NULL DEFAULT 0,
|
|
193
|
+
holdout INTEGER NOT NULL DEFAULT 0,
|
|
194
|
+
device_id TEXT NOT NULL
|
|
195
|
+
);
|
|
196
|
+
CREATE INDEX IF NOT EXISTS idx_ledger_compressions_created ON ledger_compressions(created_at);
|
|
197
|
+
CREATE INDEX IF NOT EXISTS idx_ledger_compressions_device ON ledger_compressions(device_id);
|
|
198
|
+
|
|
199
|
+
-- Per-device compression aggregates, summed across devices like ledger_lifetime.
|
|
200
|
+
-- measured_savings only credits compressed arms (holdout arms save nothing by
|
|
201
|
+
-- construction), so the reported figure is a true measured delta, not an estimate.
|
|
202
|
+
CREATE TABLE IF NOT EXISTS ledger_compression_lifetime (
|
|
203
|
+
device_id TEXT PRIMARY KEY,
|
|
204
|
+
total_events INTEGER NOT NULL DEFAULT 0,
|
|
205
|
+
total_holdout_events INTEGER NOT NULL DEFAULT 0,
|
|
206
|
+
total_original_tokens INTEGER NOT NULL DEFAULT 0,
|
|
207
|
+
total_compressed_tokens INTEGER NOT NULL DEFAULT 0,
|
|
208
|
+
total_measured_savings INTEGER NOT NULL DEFAULT 0
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
-- Reversible-compression cache (spec 21 §Reversibility). When a tool output is
|
|
212
|
+
-- compressed, the original is stored here keyed by a short retrieval token and
|
|
213
|
+
-- embedded in the compressed result; "mink retrieve <token>" returns it
|
|
214
|
+
-- byte-exact. Rows expire after the configured retention window; an expired or
|
|
215
|
+
-- unknown token is a graceful miss. This is a local cache, not synced state, so
|
|
216
|
+
-- (unlike other tables) it carries no merge semantics beyond device_id for audit.
|
|
217
|
+
CREATE TABLE IF NOT EXISTS compression_cache (
|
|
218
|
+
token TEXT PRIMARY KEY,
|
|
219
|
+
created_at TEXT NOT NULL,
|
|
220
|
+
expires_at TEXT NOT NULL,
|
|
221
|
+
tool_name TEXT NOT NULL,
|
|
222
|
+
content_kind TEXT NOT NULL,
|
|
223
|
+
content TEXT NOT NULL,
|
|
224
|
+
size_bytes INTEGER NOT NULL,
|
|
225
|
+
device_id TEXT NOT NULL
|
|
226
|
+
);
|
|
227
|
+
CREATE INDEX IF NOT EXISTS idx_compression_cache_expires ON compression_cache(expires_at);
|
|
179
228
|
`;
|
|
180
229
|
|
|
181
230
|
export interface DriverForSchema {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// Tool-output compression types (spec 21). The decision/config types live in
|
|
2
|
+
// src/core/compression.ts; these describe the reversible cache and the engine's
|
|
3
|
+
// content-aware output.
|
|
4
|
+
|
|
5
|
+
// What kind of tool output we detected, which selects the compressor and is
|
|
6
|
+
// recorded on the ledger event for later analysis.
|
|
7
|
+
export type ContentKind = "search" | "log" | "file" | "json" | "text";
|
|
8
|
+
|
|
9
|
+
// One stored original, retrievable byte-exact via `mink retrieve <token>` until
|
|
10
|
+
// it expires.
|
|
11
|
+
export interface CompressionCacheEntry {
|
|
12
|
+
token: string;
|
|
13
|
+
createdAt: string;
|
|
14
|
+
expiresAt: string;
|
|
15
|
+
toolName: string;
|
|
16
|
+
contentKind: ContentKind;
|
|
17
|
+
content: string;
|
|
18
|
+
sizeBytes: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// The result of compressing one output. `compressed` is the body the model will
|
|
22
|
+
// see (sans retrieval affordance, which the pipeline appends); `omittedNote`
|
|
23
|
+
// summarises what was dropped. A compressor returns null when it has nothing
|
|
24
|
+
// worth substituting.
|
|
25
|
+
export interface CompressionResult {
|
|
26
|
+
kind: ContentKind;
|
|
27
|
+
compressed: string;
|
|
28
|
+
omittedNote: string;
|
|
29
|
+
}
|
package/src/types/config.ts
CHANGED
|
@@ -18,6 +18,11 @@ export interface GlobalConfig {
|
|
|
18
18
|
"cli.auto-update-schedule"?: string;
|
|
19
19
|
"cli.auto-update-package-manager"?: string;
|
|
20
20
|
"projects.identity"?: string;
|
|
21
|
+
"compression.enabled"?: string;
|
|
22
|
+
"compression.threshold-tokens"?: string;
|
|
23
|
+
"compression.min-savings-ratio"?: string;
|
|
24
|
+
"compression.holdout-fraction"?: string;
|
|
25
|
+
"compression.retention-hours"?: string;
|
|
21
26
|
}
|
|
22
27
|
|
|
23
28
|
export type ConfigKey = keyof GlobalConfig & string;
|
|
@@ -179,6 +184,41 @@ export const CONFIG_KEYS: ConfigKeyMeta[] = [
|
|
|
179
184
|
"Project identity strategy: path-derived (legacy) or git-remote (stable across machines)",
|
|
180
185
|
scope: "shared",
|
|
181
186
|
},
|
|
187
|
+
{
|
|
188
|
+
key: "compression.enabled",
|
|
189
|
+
default: "false",
|
|
190
|
+
envVar: "MINK_COMPRESSION_ENABLED",
|
|
191
|
+
description: "Enable tool-output compression (spec 21). Off until inline compression ships.",
|
|
192
|
+
scope: "shared",
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
key: "compression.threshold-tokens",
|
|
196
|
+
default: "800",
|
|
197
|
+
envVar: "MINK_COMPRESSION_THRESHOLD_TOKENS",
|
|
198
|
+
description: "Minimum estimated token size before a tool output is eligible for compression",
|
|
199
|
+
scope: "shared",
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
key: "compression.min-savings-ratio",
|
|
203
|
+
default: "0.25",
|
|
204
|
+
envVar: "MINK_COMPRESSION_MIN_SAVINGS_RATIO",
|
|
205
|
+
description: "Discard a compression attempt unless it saves at least this fraction of tokens",
|
|
206
|
+
scope: "shared",
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
key: "compression.holdout-fraction",
|
|
210
|
+
default: "0.1",
|
|
211
|
+
envVar: "MINK_COMPRESSION_HOLDOUT_FRACTION",
|
|
212
|
+
description: "Fraction of eligible outputs left uncompressed as a measured control group",
|
|
213
|
+
scope: "shared",
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
key: "compression.retention-hours",
|
|
217
|
+
default: "168",
|
|
218
|
+
envVar: "MINK_COMPRESSION_RETENTION_HOURS",
|
|
219
|
+
description: "How long compressed originals stay retrievable before eviction",
|
|
220
|
+
scope: "shared",
|
|
221
|
+
},
|
|
182
222
|
];
|
|
183
223
|
|
|
184
224
|
const VALID_KEYS = new Set<string>(CONFIG_KEYS.map((k) => k.key));
|
package/src/types/hook-input.ts
CHANGED
|
@@ -19,6 +19,10 @@ export interface PostToolUseInput {
|
|
|
19
19
|
// Edit tool
|
|
20
20
|
old_string?: string;
|
|
21
21
|
new_string?: string;
|
|
22
|
+
// Read tool — present for ranged reads; their output is a slice, so we
|
|
23
|
+
// don't substitute a whole-file summary for them (spec 21 edge case).
|
|
24
|
+
offset?: number;
|
|
25
|
+
limit?: number;
|
|
22
26
|
};
|
|
23
27
|
// Legacy / older hook payload shape — kept for backward compatibility.
|
|
24
28
|
tool_output?: {
|