@drewpayment/mink 0.13.0-beta.1 → 0.13.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -20
- package/dashboard/out/404.html +1 -1
- package/dashboard/out/_next/static/U9AeObddt4LmJkKRZpEfy/_buildManifest.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/activity/page-c285fb9f63d9a82a.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/bugs/page-f3ba7d8f50a96568.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/capture/page-e004bec9af99a244.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/compression/page-21e1af119b3f81ff.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/config/page-d47fb6f588ccfd4b.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/daemon/page-52f913e751416717.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/design/page-53a76719b9af5830.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/discord/page-04502d12c4a96cf7.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/file-index/page-a1bd10e04bb219d9.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/insights/page-7367274963571b6b.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/learning/{page-b766adc79099adb4.js → page-4a03cf7b9a6106fd.js} +1 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/overview/page-38b8430b5c56e807.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/scheduler/page-510b78c9b0a61012.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/sync/page-b7215c2a29a7d7a7.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/tokens/page-1be7ed35a5c9bd39.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/waste/page-24a726e6d63f771a.js +1 -0
- package/dashboard/out/_next/static/chunks/app/(panels)/wiki/page-230d2d1cae6507a8.js +1 -0
- package/dashboard/out/_next/static/chunks/app/layout-70a6d18f8e464960.js +1 -0
- package/dashboard/out/action-log.html +1 -1
- package/dashboard/out/action-log.txt +4 -4
- package/dashboard/out/activity.html +1 -1
- package/dashboard/out/activity.txt +5 -5
- package/dashboard/out/bugs.html +1 -1
- package/dashboard/out/bugs.txt +5 -5
- package/dashboard/out/capture.html +1 -1
- package/dashboard/out/capture.txt +5 -5
- package/dashboard/out/compression.html +1 -0
- package/dashboard/out/compression.txt +24 -0
- package/dashboard/out/config.html +1 -1
- package/dashboard/out/config.txt +5 -5
- package/dashboard/out/daemon.html +1 -1
- package/dashboard/out/daemon.txt +5 -5
- package/dashboard/out/design.html +1 -1
- package/dashboard/out/design.txt +5 -5
- package/dashboard/out/discord.html +1 -1
- package/dashboard/out/discord.txt +5 -5
- package/dashboard/out/file-index.html +1 -1
- package/dashboard/out/file-index.txt +5 -5
- package/dashboard/out/index.html +1 -1
- package/dashboard/out/index.txt +4 -4
- package/dashboard/out/insights.html +1 -1
- package/dashboard/out/insights.txt +5 -5
- package/dashboard/out/learning.html +1 -1
- package/dashboard/out/learning.txt +5 -5
- package/dashboard/out/overview.html +1 -1
- package/dashboard/out/overview.txt +5 -5
- package/dashboard/out/scheduler.html +1 -1
- package/dashboard/out/scheduler.txt +5 -5
- package/dashboard/out/sync.html +1 -1
- package/dashboard/out/sync.txt +5 -5
- package/dashboard/out/tokens.html +1 -1
- package/dashboard/out/tokens.txt +5 -5
- package/dashboard/out/waste.html +1 -1
- package/dashboard/out/waste.txt +5 -5
- package/dashboard/out/wiki.html +1 -1
- package/dashboard/out/wiki.txt +5 -5
- package/dist/cli.bun.js +1300 -908
- package/dist/cli.node.js +1319 -928
- package/package.json +1 -1
- package/src/cli.ts +17 -20
- package/src/commands/init.ts +14 -123
- package/src/commands/post-read.ts +18 -0
- package/src/commands/post-tool.ts +48 -0
- package/src/commands/retrieve.ts +32 -0
- package/src/commands/status.ts +13 -1
- package/src/core/code-skeleton.ts +108 -0
- package/src/core/compress-tool-output.ts +127 -0
- package/src/core/compression.ts +81 -0
- package/src/core/dashboard-api.ts +20 -1
- package/src/core/dashboard-server.ts +3 -0
- package/src/core/hook-output.ts +42 -0
- package/src/core/output-compression.ts +252 -0
- package/src/core/token-estimate.ts +40 -0
- package/src/repositories/compression-cache-repo.ts +97 -0
- package/src/repositories/token-ledger-repo.ts +142 -0
- package/src/storage/schema.ts +50 -1
- package/src/types/compression.ts +29 -0
- package/src/types/config.ts +40 -0
- package/src/types/dashboard.ts +22 -1
- package/src/types/hook-input.ts +4 -0
- package/src/types/token-ledger.ts +55 -0
- package/dashboard/out/_next/static/UWfkbJY4zr9fSt7O-CAge/_buildManifest.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/activity/page-096a97ba539d5323.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/bugs/page-449d31c133432458.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/capture/page-c6617aa0a8a7333e.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/config/page-aa0a0623b3fdd0d8.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/daemon/page-7cd3fac2f5d87a0d.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/design/page-5304675c96b6793b.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/discord/page-9940dde80ba2a69e.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/file-index/page-ecd8a753614e981e.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/insights/page-7909d8beb8d8ef7a.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/overview/page-7a9e86dcde67d6a9.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/scheduler/page-a88f93204c9742a1.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/sync/page-8a9ad4c36aa6cb65.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/tokens/page-8dac7d50d4db2756.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/waste/page-bcf56144faf7d133.js +0 -1
- package/dashboard/out/_next/static/chunks/app/(panels)/wiki/page-a32fdbd0bf58b30b.js +0 -1
- package/dashboard/out/_next/static/chunks/app/layout-782cd26e0ccc4514.js +0 -1
- package/src/core/agent-detect.ts +0 -88
- package/src/core/agent-pi.ts +0 -314
- package/src/core/prompt.ts +0 -27
- /package/dashboard/out/_next/static/{UWfkbJY4zr9fSt7O-CAge → U9AeObddt4LmJkKRZpEfy}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
// Tool-output compression — configuration and decision logic (spec 21).
|
|
2
|
+
//
|
|
3
|
+
// This module is pure: it reads config and makes the eligibility / holdout /
|
|
4
|
+
// min-savings decisions. It never touches the database or the tool payload, so
|
|
5
|
+
// it is trivially testable. Phase 2 wires the actual compressors and the
|
|
6
|
+
// reversible cache on top of these decisions; Phase 1 ships the measurement
|
|
7
|
+
// instrument and leaves `enabled` off by default.
|
|
8
|
+
|
|
9
|
+
import { resolveConfigValue } from "./global-config";
|
|
10
|
+
import type { ConfigKey } from "../types/config";
|
|
11
|
+
|
|
12
|
+
export interface CompressionConfig {
|
|
13
|
+
enabled: boolean;
|
|
14
|
+
thresholdTokens: number;
|
|
15
|
+
minSavingsRatio: number;
|
|
16
|
+
holdoutFraction: number;
|
|
17
|
+
retentionHours: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function numberValue(key: ConfigKey, fallback: number, min: number, max: number): number {
|
|
21
|
+
const raw = resolveConfigValue(key).value;
|
|
22
|
+
const n = Number(raw);
|
|
23
|
+
if (!Number.isFinite(n)) return fallback;
|
|
24
|
+
return Math.min(max, Math.max(min, n));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function loadCompressionConfig(): CompressionConfig {
|
|
28
|
+
return {
|
|
29
|
+
enabled: resolveConfigValue("compression.enabled").value === "true",
|
|
30
|
+
thresholdTokens: numberValue("compression.threshold-tokens", 800, 0, Number.MAX_SAFE_INTEGER),
|
|
31
|
+
minSavingsRatio: numberValue("compression.min-savings-ratio", 0.25, 0, 1),
|
|
32
|
+
holdoutFraction: numberValue("compression.holdout-fraction", 0.1, 0, 1),
|
|
33
|
+
retentionHours: numberValue("compression.retention-hours", 168, 0, Number.MAX_SAFE_INTEGER),
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// An output is eligible for compression only once it crosses the size threshold;
|
|
38
|
+
// small outputs are never touched (spec 21 §Eligibility).
|
|
39
|
+
export function isEligible(originalTokens: number, config: CompressionConfig): boolean {
|
|
40
|
+
return config.enabled && originalTokens >= config.thresholdTokens;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// A compression attempt is kept only if it saves at least the configured
|
|
44
|
+
// fraction of tokens; otherwise the original is used (spec 21 §Thresholds).
|
|
45
|
+
export function meetsMinSavings(
|
|
46
|
+
originalTokens: number,
|
|
47
|
+
compressedTokens: number,
|
|
48
|
+
config: CompressionConfig
|
|
49
|
+
): boolean {
|
|
50
|
+
if (originalTokens <= 0) return false;
|
|
51
|
+
const ratio = (originalTokens - compressedTokens) / originalTokens;
|
|
52
|
+
return ratio >= config.minSavingsRatio;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function measuredSavings(originalTokens: number, compressedTokens: number): number {
|
|
56
|
+
return Math.max(0, originalTokens - compressedTokens);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Deterministic FNV-1a hash → a stable fraction in [0, 1) for a given key. Used
|
|
60
|
+
// so holdout selection is stable per event: the same event always lands in the
|
|
61
|
+
// same arm, which keeps measurement from being double-counted (spec 21 edge
|
|
62
|
+
// case "Holdout selection must be stable for a given event").
|
|
63
|
+
function hashUnitInterval(key: string): number {
|
|
64
|
+
let h = 0x811c9dc5;
|
|
65
|
+
for (let i = 0; i < key.length; i++) {
|
|
66
|
+
h ^= key.charCodeAt(i);
|
|
67
|
+
h = Math.imul(h, 0x01000193);
|
|
68
|
+
}
|
|
69
|
+
// Map the 32-bit unsigned result into [0, 1).
|
|
70
|
+
return (h >>> 0) / 0x100000000;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Decide whether a given event is held out (left uncompressed as a control).
|
|
74
|
+
// Selection is deterministic in `eventKey`, so callers must pass a key that is
|
|
75
|
+
// stable for the event (e.g. a hash of the original output) and not, say, a
|
|
76
|
+
// timestamp.
|
|
77
|
+
export function selectHoldout(eventKey: string, fraction: number): boolean {
|
|
78
|
+
if (fraction <= 0) return false;
|
|
79
|
+
if (fraction >= 1) return true;
|
|
80
|
+
return hashUnitInterval(eventKey) < fraction;
|
|
81
|
+
}
|
|
@@ -80,7 +80,10 @@ import type {
|
|
|
80
80
|
WikiPanelPayload,
|
|
81
81
|
WikiNotePayload,
|
|
82
82
|
WikiTreeNode,
|
|
83
|
+
CompressionPayload,
|
|
83
84
|
} from "../types/dashboard";
|
|
85
|
+
import { TokenLedgerRepo } from "../repositories/token-ledger-repo";
|
|
86
|
+
import { loadCompressionConfig } from "./compression";
|
|
84
87
|
import { isDesignEvalReport } from "../types/design-eval";
|
|
85
88
|
import type { DesignEvalReport } from "../types/design-eval";
|
|
86
89
|
import type { FileIndex, FileIndexEntry } from "../types/file-index";
|
|
@@ -185,7 +188,7 @@ export function loadOverview(cwd: string): OverviewPayload {
|
|
|
185
188
|
checkJsonFile("scheduler-manifest.json", schedulerManifestPath(cwd)),
|
|
186
189
|
];
|
|
187
190
|
|
|
188
|
-
return { project, daemon, summary, stateFiles };
|
|
191
|
+
return { project, daemon, summary, compression: ledger.compression, stateFiles };
|
|
189
192
|
}
|
|
190
193
|
|
|
191
194
|
export function loadTokenLedgerPanel(cwd: string): TokenLedgerPayload {
|
|
@@ -194,6 +197,22 @@ export function loadTokenLedgerPanel(cwd: string): TokenLedgerPayload {
|
|
|
194
197
|
lifetime: ledger.lifetime,
|
|
195
198
|
sessions: ledger.sessions,
|
|
196
199
|
wasteFlags: ledger.wasteFlags ?? [],
|
|
200
|
+
compression: ledger.compression,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Dedicated Compression panel (spec 21, phase 4). Reads the measured
|
|
205
|
+
// compression aggregates, the holdout A/B split, per-kind/per-tool breakdowns,
|
|
206
|
+
// and recent events, plus whether compression is currently enabled.
|
|
207
|
+
export function loadCompressionPanel(cwd: string): CompressionPayload {
|
|
208
|
+
const repo = TokenLedgerRepo.for(cwd);
|
|
209
|
+
return {
|
|
210
|
+
enabled: loadCompressionConfig().enabled,
|
|
211
|
+
lifetime: repo.compressionLifetime(),
|
|
212
|
+
arms: repo.compressionArms(),
|
|
213
|
+
byKind: repo.compressionBreakdown("content_kind"),
|
|
214
|
+
byTool: repo.compressionBreakdown("tool_name"),
|
|
215
|
+
recent: repo.compressionEvents(50),
|
|
197
216
|
};
|
|
198
217
|
}
|
|
199
218
|
|
|
@@ -5,6 +5,7 @@ import { projectDir, designCapturesDir } from "./paths";
|
|
|
5
5
|
import {
|
|
6
6
|
loadOverview,
|
|
7
7
|
loadTokenLedgerPanel,
|
|
8
|
+
loadCompressionPanel,
|
|
8
9
|
loadFileIndexPanel,
|
|
9
10
|
loadSchedulerPanel,
|
|
10
11
|
loadLearningMemoryPanel,
|
|
@@ -525,6 +526,8 @@ export async function startDashboardServer(
|
|
|
525
526
|
return jsonResponse(loadOverview(resolvedCwd));
|
|
526
527
|
case "/api/token-ledger":
|
|
527
528
|
return jsonResponse(loadTokenLedgerPanel(resolvedCwd));
|
|
529
|
+
case "/api/compression":
|
|
530
|
+
return jsonResponse(loadCompressionPanel(resolvedCwd));
|
|
528
531
|
case "/api/file-index":
|
|
529
532
|
return jsonResponse(loadFileIndexPanel(resolvedCwd));
|
|
530
533
|
case "/api/scheduler":
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// Helpers for PostToolUse hooks that replace a tool's result (spec 21). The
|
|
2
|
+
// replacement mechanism is Claude Code's `hookSpecificOutput.updatedToolOutput`
|
|
3
|
+
// (verified against the hooks reference): whatever JSON we print to stdout here
|
|
4
|
+
// substitutes the original output before the model sees it.
|
|
5
|
+
|
|
6
|
+
import type { PostToolUseInput } from "../types/hook-input";
|
|
7
|
+
|
|
8
|
+
// Best-effort extraction of the human-visible text from a PostToolUse payload,
|
|
9
|
+
// across the shapes Claude Code uses for different tools (Bash stdout, Grep
|
|
10
|
+
// content, MCP results). Returns null when no text is present, in which case the
|
|
11
|
+
// caller must not compress (there is nothing to safely capture or replace).
|
|
12
|
+
export function extractToolOutputText(input: PostToolUseInput): string | null {
|
|
13
|
+
const tr = input.tool_response as Record<string, unknown> | undefined;
|
|
14
|
+
if (tr) {
|
|
15
|
+
if (typeof tr.content === "string") return tr.content;
|
|
16
|
+
if (Array.isArray(tr.content)) {
|
|
17
|
+
const parts = (tr.content as Array<{ text?: unknown }>)
|
|
18
|
+
.map((p) => (p && typeof p.text === "string" ? p.text : ""))
|
|
19
|
+
.filter((s) => s.length > 0);
|
|
20
|
+
if (parts.length > 0) return parts.join("");
|
|
21
|
+
}
|
|
22
|
+
if (typeof tr.stdout === "string" && tr.stdout.length > 0) return tr.stdout;
|
|
23
|
+
if (typeof tr.text === "string") return tr.text;
|
|
24
|
+
const file = tr.file as { content?: unknown } | undefined;
|
|
25
|
+
if (file && typeof file.content === "string") return file.content;
|
|
26
|
+
}
|
|
27
|
+
const to = input.tool_output;
|
|
28
|
+
if (to && typeof to.content === "string") return to.content;
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Print the replacement so Claude Code swaps it in for the original output.
|
|
33
|
+
export function emitUpdatedToolOutput(text: string): void {
|
|
34
|
+
process.stdout.write(
|
|
35
|
+
JSON.stringify({
|
|
36
|
+
hookSpecificOutput: {
|
|
37
|
+
hookEventName: "PostToolUse",
|
|
38
|
+
updatedToolOutput: text,
|
|
39
|
+
},
|
|
40
|
+
})
|
|
41
|
+
);
|
|
42
|
+
}
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
// Tool-output compression engine (spec 21 §Content-Aware Compression).
|
|
2
|
+
//
|
|
3
|
+
// Pure, deterministic, dependency-free. Each compressor takes a tool output
|
|
4
|
+
// string and returns a smaller body plus a note of what was dropped, or null
|
|
5
|
+
// when it has nothing worth substituting. No I/O, no DB, no token counting and
|
|
6
|
+
// no retrieval-affordance text — the pipeline (compress-tool-output.ts) owns
|
|
7
|
+
// eligibility, the holdout, the min-savings gate, the cache, and the
|
|
8
|
+
// "mink retrieve" footer. Keeping this layer pure makes every strategy trivially
|
|
9
|
+
// testable and prompt-cache-stable (identical input → identical output).
|
|
10
|
+
//
|
|
11
|
+
// The "file" strategy does line-based signature extraction; spec 21's phase 3
|
|
12
|
+
// upgrades it to richer AST skeletons behind this same interface.
|
|
13
|
+
|
|
14
|
+
import type { ContentKind, CompressionResult } from "../types/compression";
|
|
15
|
+
import { extractCodeSkeleton } from "./code-skeleton";
|
|
16
|
+
|
|
17
|
+
// Tuning constants. Fixed (not config) so output is deterministic and stable.
|
|
18
|
+
const SEARCH_MAX_PER_FILE = 5;
|
|
19
|
+
const LOG_HEAD = 40;
|
|
20
|
+
const LOG_TAIL = 40;
|
|
21
|
+
const TEXT_HEAD = 30;
|
|
22
|
+
const TEXT_TAIL = 20;
|
|
23
|
+
const JSON_ARRAY_HEAD = 20;
|
|
24
|
+
const JSON_ARRAY_TAIL = 5;
|
|
25
|
+
|
|
26
|
+
// Strip ANSI CSI escape sequences (colour, cursor moves) — pure noise in logs.
|
|
27
|
+
const ANSI = /\[[0-9;?]*[ -/]*[@-~]/g;
|
|
28
|
+
|
|
29
|
+
function stripAnsi(s: string): string {
|
|
30
|
+
return s.replace(ANSI, "");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function omittedMarker(n: number): string {
|
|
34
|
+
return ` … ${n} line${n === 1 ? "" : "s"} omitted — mink retrieve …`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Split into lines, dropping a single trailing empty line (from a final newline)
|
|
38
|
+
// so counts and windows aren't skewed by it.
|
|
39
|
+
function toLines(content: string): string[] {
|
|
40
|
+
const lines = content.split("\n");
|
|
41
|
+
if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
|
|
42
|
+
return lines;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Logs / command output ───────────────────────────────────────────────────
|
|
46
|
+
// Strip ANSI, collapse runs of identical lines, then keep a head+tail window.
|
|
47
|
+
function compressLog(content: string): { compressed: string; omittedNote: string } | null {
|
|
48
|
+
const lines = toLines(stripAnsi(content));
|
|
49
|
+
|
|
50
|
+
// Collapse consecutive duplicates into "<line> (×N)".
|
|
51
|
+
const collapsed: string[] = [];
|
|
52
|
+
let i = 0;
|
|
53
|
+
while (i < lines.length) {
|
|
54
|
+
let run = 1;
|
|
55
|
+
while (i + run < lines.length && lines[i + run] === lines[i]) run++;
|
|
56
|
+
collapsed.push(run > 1 ? `${lines[i]} (×${run})` : lines[i]);
|
|
57
|
+
i += run;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (collapsed.length <= LOG_HEAD + LOG_TAIL) {
|
|
61
|
+
// Only worth substituting if collapsing actually removed lines.
|
|
62
|
+
if (collapsed.length === lines.length) return null;
|
|
63
|
+
return {
|
|
64
|
+
compressed: collapsed.join("\n"),
|
|
65
|
+
omittedNote: `collapsed ${lines.length - collapsed.length} repeated line(s)`,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const omitted = collapsed.length - LOG_HEAD - LOG_TAIL;
|
|
70
|
+
const head = collapsed.slice(0, LOG_HEAD);
|
|
71
|
+
const tail = collapsed.slice(collapsed.length - LOG_TAIL);
|
|
72
|
+
return {
|
|
73
|
+
compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
|
|
74
|
+
omittedNote: `${omitted} of ${collapsed.length} log line(s) omitted (middle)`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ── Search / match results ──────────────────────────────────────────────────
|
|
79
|
+
// Dedup exact lines and cap matches per file (the file prefix before the first
|
|
80
|
+
// colon), appending a per-file "+K more" tally.
|
|
81
|
+
function compressSearch(content: string): { compressed: string; omittedNote: string } | null {
|
|
82
|
+
const lines = toLines(content);
|
|
83
|
+
const seen = new Set<string>();
|
|
84
|
+
const perFile = new Map<string, number>();
|
|
85
|
+
const omittedByFile = new Map<string, number>();
|
|
86
|
+
const out: string[] = [];
|
|
87
|
+
|
|
88
|
+
for (const line of lines) {
|
|
89
|
+
if (seen.has(line)) continue;
|
|
90
|
+
seen.add(line);
|
|
91
|
+
const colon = line.indexOf(":");
|
|
92
|
+
const file = colon > 0 ? line.slice(0, colon) : line;
|
|
93
|
+
const count = perFile.get(file) ?? 0;
|
|
94
|
+
if (count < SEARCH_MAX_PER_FILE) {
|
|
95
|
+
perFile.set(file, count + 1);
|
|
96
|
+
out.push(line);
|
|
97
|
+
} else {
|
|
98
|
+
omittedByFile.set(file, (omittedByFile.get(file) ?? 0) + 1);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let totalOmitted = 0;
|
|
103
|
+
for (const [file, n] of omittedByFile) {
|
|
104
|
+
totalOmitted += n;
|
|
105
|
+
out.push(` … +${n} more match(es) in ${file} — mink retrieve …`);
|
|
106
|
+
}
|
|
107
|
+
const dedupRemoved = lines.length - seen.size;
|
|
108
|
+
|
|
109
|
+
// Nothing changed → not worth substituting.
|
|
110
|
+
if (totalOmitted === 0 && dedupRemoved === 0) return null;
|
|
111
|
+
|
|
112
|
+
const notes: string[] = [];
|
|
113
|
+
if (totalOmitted > 0) notes.push(`${totalOmitted} match(es) capped`);
|
|
114
|
+
if (dedupRemoved > 0) notes.push(`${dedupRemoved} duplicate(s) removed`);
|
|
115
|
+
return { compressed: out.join("\n"), omittedNote: notes.join("; ") };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ── Large file reads ────────────────────────────────────────────────────────
|
|
119
|
+
// Brace-aware structural skeleton (see code-skeleton.ts): declarations and class
|
|
120
|
+
// members with bodies elided. Falls back to a generic text window when the
|
|
121
|
+
// content has no recognisable structure.
|
|
122
|
+
function compressFile(
|
|
123
|
+
filePath: string,
|
|
124
|
+
content: string
|
|
125
|
+
): { compressed: string; omittedNote: string } | null {
|
|
126
|
+
const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
|
|
127
|
+
const markdown = ext === ".md" || ext === ".mdx" || ext === ".markdown";
|
|
128
|
+
const skeleton = extractCodeSkeleton(content, { markdown });
|
|
129
|
+
|
|
130
|
+
if (!skeleton) {
|
|
131
|
+
// No recognisable structure — fall back to a generic text window.
|
|
132
|
+
return compressText(content);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const header =
|
|
136
|
+
`${filePath} — structural summary ` +
|
|
137
|
+
`(${skeleton.lines.length} signature(s) of ${skeleton.totalLines} lines)`;
|
|
138
|
+
return {
|
|
139
|
+
compressed: [header, ...skeleton.lines].join("\n"),
|
|
140
|
+
omittedNote: `bodies elided; ${skeleton.totalLines} lines available via mink retrieve`,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ── Structured data ─────────────────────────────────────────────────────────
|
|
145
|
+
// Recursively "crush" JSON: sample any over-long array (at any depth), recursing
|
|
146
|
+
// into the elements that are kept. Records how many elements were dropped.
|
|
147
|
+
function crush(value: unknown): { value: unknown; omitted: number } {
|
|
148
|
+
if (Array.isArray(value)) {
|
|
149
|
+
let omitted = 0;
|
|
150
|
+
const mapEl = (el: unknown): unknown => {
|
|
151
|
+
const r = crush(el);
|
|
152
|
+
omitted += r.omitted;
|
|
153
|
+
return r.value;
|
|
154
|
+
};
|
|
155
|
+
if (value.length <= JSON_ARRAY_HEAD + JSON_ARRAY_TAIL) {
|
|
156
|
+
return { value: value.map(mapEl), omitted };
|
|
157
|
+
}
|
|
158
|
+
const dropped = value.length - JSON_ARRAY_HEAD - JSON_ARRAY_TAIL;
|
|
159
|
+
omitted += dropped;
|
|
160
|
+
const out = [
|
|
161
|
+
...value.slice(0, JSON_ARRAY_HEAD).map(mapEl),
|
|
162
|
+
`… ${dropped} element(s) omitted — mink retrieve …`,
|
|
163
|
+
...value.slice(value.length - JSON_ARRAY_TAIL).map(mapEl),
|
|
164
|
+
];
|
|
165
|
+
return { value: out, omitted };
|
|
166
|
+
}
|
|
167
|
+
if (value && typeof value === "object") {
|
|
168
|
+
let omitted = 0;
|
|
169
|
+
const out: Record<string, unknown> = {};
|
|
170
|
+
for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
|
|
171
|
+
const r = crush(v);
|
|
172
|
+
omitted += r.omitted;
|
|
173
|
+
out[k] = r.value;
|
|
174
|
+
}
|
|
175
|
+
return { value: out, omitted };
|
|
176
|
+
}
|
|
177
|
+
return { value, omitted: 0 };
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function compressJson(content: string): { compressed: string; omittedNote: string } | null {
|
|
181
|
+
let parsed: unknown;
|
|
182
|
+
try {
|
|
183
|
+
parsed = JSON.parse(content);
|
|
184
|
+
} catch {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
const { value, omitted } = crush(parsed);
|
|
188
|
+
if (omitted === 0) return null;
|
|
189
|
+
return {
|
|
190
|
+
compressed: JSON.stringify(value, null, 2),
|
|
191
|
+
omittedNote: `${omitted} array element(s) sampled out`,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ── Generic text ────────────────────────────────────────────────────────────
|
|
196
|
+
function compressText(content: string): { compressed: string; omittedNote: string } | null {
|
|
197
|
+
const lines = toLines(content);
|
|
198
|
+
if (lines.length <= TEXT_HEAD + TEXT_TAIL) return null;
|
|
199
|
+
const omitted = lines.length - TEXT_HEAD - TEXT_TAIL;
|
|
200
|
+
const head = lines.slice(0, TEXT_HEAD);
|
|
201
|
+
const tail = lines.slice(lines.length - TEXT_TAIL);
|
|
202
|
+
return {
|
|
203
|
+
compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
|
|
204
|
+
omittedNote: `${omitted} of ${lines.length} line(s) omitted (middle)`,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// ── Routing ─────────────────────────────────────────────────────────────────
|
|
209
|
+
|
|
210
|
+
export function detectContentKind(
|
|
211
|
+
toolName: string,
|
|
212
|
+
content: string,
|
|
213
|
+
filePath?: string
|
|
214
|
+
): ContentKind {
|
|
215
|
+
const t = toolName.toLowerCase();
|
|
216
|
+
if (t === "read") return "file";
|
|
217
|
+
if (t === "grep" || t === "glob") return "search";
|
|
218
|
+
if (t === "bash") return "log";
|
|
219
|
+
// Generic / MCP output — sniff for JSON.
|
|
220
|
+
const head = content.trimStart()[0];
|
|
221
|
+
if (head === "{" || head === "[") {
|
|
222
|
+
try {
|
|
223
|
+
JSON.parse(content);
|
|
224
|
+
return "json";
|
|
225
|
+
} catch {
|
|
226
|
+
// not JSON — fall through
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
// A file path with no tool hint still implies a file read.
|
|
230
|
+
if (filePath) return "file";
|
|
231
|
+
return "text";
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Compress an output by its detected kind. Returns null when there is nothing
|
|
235
|
+
// worth substituting; the caller then passes the original through unchanged.
|
|
236
|
+
export function compressOutput(
|
|
237
|
+
toolName: string,
|
|
238
|
+
content: string,
|
|
239
|
+
filePath?: string
|
|
240
|
+
): CompressionResult | null {
|
|
241
|
+
const kind = detectContentKind(toolName, content, filePath);
|
|
242
|
+
let result: { compressed: string; omittedNote: string } | null;
|
|
243
|
+
switch (kind) {
|
|
244
|
+
case "search": result = compressSearch(content); break;
|
|
245
|
+
case "log": result = compressLog(content); break;
|
|
246
|
+
case "file": result = compressFile(filePath ?? "file", content); break;
|
|
247
|
+
case "json": result = compressJson(content); break;
|
|
248
|
+
case "text": result = compressText(content); break;
|
|
249
|
+
}
|
|
250
|
+
if (!result) return null;
|
|
251
|
+
return { kind, compressed: result.compressed, omittedNote: result.omittedNote };
|
|
252
|
+
}
|
|
@@ -34,3 +34,43 @@ export function estimateTokens(content: string, filePath: string): number {
|
|
|
34
34
|
}
|
|
35
35
|
return Math.ceil(content.length / ratio);
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
// A deterministic, dependency-free token counter that segments text the way a
|
|
39
|
+
// BPE tokenizer roughly would — on word, number, and punctuation boundaries —
|
|
40
|
+
// rather than dividing by a single flat character ratio. It is more faithful
|
|
41
|
+
// than `estimateTokens` (which exists for the file-index hot path and is pinned
|
|
42
|
+
// by exact-ratio tests), and crucially it does not need a file extension, so it
|
|
43
|
+
// can score arbitrary tool output (logs, search results, command output).
|
|
44
|
+
//
|
|
45
|
+
// It is intentionally NOT a real BPE vocabulary: Mink ships as a lean CLI with a
|
|
46
|
+
// single runtime dependency, and the compression-measurement use only needs a
|
|
47
|
+
// *consistent* estimator to compute an original-minus-compressed delta. The
|
|
48
|
+
// signature is stable, so a real BPE library can be dropped in behind it later
|
|
49
|
+
// without touching call sites.
|
|
50
|
+
//
|
|
51
|
+
// Segmentation: runs of ASCII letters and runs of digits each collapse to a
|
|
52
|
+
// handful of sub-word tokens; every other character (punctuation, symbols,
|
|
53
|
+
// non-ASCII, whitespace) is scored individually. Whitespace usually merges into
|
|
54
|
+
// an adjacent token in real tokenizers, so spaces and tabs cost nothing and only
|
|
55
|
+
// newlines count.
|
|
56
|
+
export function countTokens(text: string): number {
|
|
57
|
+
if (!text) return 0;
|
|
58
|
+
const segments = text.match(/[A-Za-z]+|[0-9]+|[^A-Za-z0-9]/g);
|
|
59
|
+
if (!segments) return 0;
|
|
60
|
+
let tokens = 0;
|
|
61
|
+
for (const seg of segments) {
|
|
62
|
+
const first = seg.charCodeAt(0);
|
|
63
|
+
if ((first >= 65 && first <= 90) || (first >= 97 && first <= 122)) {
|
|
64
|
+
tokens += Math.ceil(seg.length / 4); // word splits into ~4-char sub-words
|
|
65
|
+
} else if (first >= 48 && first <= 57) {
|
|
66
|
+
tokens += Math.ceil(seg.length / 3); // digit runs tokenize more finely
|
|
67
|
+
} else if (seg === "\n") {
|
|
68
|
+
tokens += 1; // newlines are their own token
|
|
69
|
+
} else if (seg === " " || seg === "\t" || seg === "\r") {
|
|
70
|
+
// whitespace merges into the adjacent token — no extra cost
|
|
71
|
+
} else {
|
|
72
|
+
tokens += 1; // punctuation / symbol / non-ASCII char
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return tokens;
|
|
76
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// Reversible-compression cache repository (spec 21 §Reversibility). Stores the
|
|
2
|
+
// byte-exact original of a compressed tool output keyed by a short retrieval
|
|
3
|
+
// token, with a TTL. `get` treats an expired row as a miss and evicts it lazily,
|
|
4
|
+
// so a stale token can never return partial or wrong content.
|
|
5
|
+
//
|
|
6
|
+
// This is a local cache — it is never injected into model context and is not
|
|
7
|
+
// part of the cross-device sync surface. device_id is recorded for audit only.
|
|
8
|
+
|
|
9
|
+
import { randomUUID } from "crypto";
|
|
10
|
+
import type { DbDriver } from "../storage/driver";
|
|
11
|
+
import type { CompressionCacheEntry, ContentKind } from "../types/compression";
|
|
12
|
+
import { openProjectDb } from "../storage/db";
|
|
13
|
+
import { getOrCreateDeviceId } from "../core/device";
|
|
14
|
+
|
|
15
|
+
export interface StoreInput {
|
|
16
|
+
toolName: string;
|
|
17
|
+
contentKind: ContentKind;
|
|
18
|
+
content: string;
|
|
19
|
+
retentionHours: number;
|
|
20
|
+
token?: string;
|
|
21
|
+
now?: Date;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export class CompressionCacheRepo {
|
|
25
|
+
constructor(private readonly db: DbDriver) {}
|
|
26
|
+
|
|
27
|
+
static for(cwd: string): CompressionCacheRepo {
|
|
28
|
+
return new CompressionCacheRepo(openProjectDb(cwd));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Short, unambiguous token the model can paste into `mink retrieve`.
|
|
32
|
+
static newToken(): string {
|
|
33
|
+
return `mc-${randomUUID().slice(0, 8)}`;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Store an original and return its retrieval token.
|
|
37
|
+
store(input: StoreInput, deviceId: string = getOrCreateDeviceId()): string {
|
|
38
|
+
const token = input.token ?? CompressionCacheRepo.newToken();
|
|
39
|
+
const now = input.now ?? new Date();
|
|
40
|
+
const createdAt = now.toISOString();
|
|
41
|
+
const expiresAt = new Date(
|
|
42
|
+
now.getTime() + Math.max(0, input.retentionHours) * 3_600_000
|
|
43
|
+
).toISOString();
|
|
44
|
+
this.db.prepare(`
|
|
45
|
+
INSERT OR REPLACE INTO compression_cache
|
|
46
|
+
(token, created_at, expires_at, tool_name, content_kind,
|
|
47
|
+
content, size_bytes, device_id)
|
|
48
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
49
|
+
`).run(
|
|
50
|
+
token, createdAt, expiresAt, input.toolName, input.contentKind,
|
|
51
|
+
input.content, Buffer.byteLength(input.content, "utf-8"), deviceId
|
|
52
|
+
);
|
|
53
|
+
return token;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Return the stored original, or null if the token is unknown or expired.
|
|
57
|
+
// An expired row is deleted on the way out (lazy eviction).
|
|
58
|
+
get(token: string, now: Date = new Date()): CompressionCacheEntry | null {
|
|
59
|
+
const row = this.db
|
|
60
|
+
.prepare("SELECT * FROM compression_cache WHERE token = ?")
|
|
61
|
+
.get(token) as Record<string, unknown> | undefined;
|
|
62
|
+
if (!row) return null;
|
|
63
|
+
const expiresAt = String(row.expires_at);
|
|
64
|
+
if (expiresAt <= now.toISOString()) {
|
|
65
|
+
try {
|
|
66
|
+
this.db.prepare("DELETE FROM compression_cache WHERE token = ?").run(token);
|
|
67
|
+
} catch {
|
|
68
|
+
// best effort — a failed eviction still reports a miss below
|
|
69
|
+
}
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
token: String(row.token),
|
|
74
|
+
createdAt: String(row.created_at),
|
|
75
|
+
expiresAt,
|
|
76
|
+
toolName: String(row.tool_name),
|
|
77
|
+
contentKind: String(row.content_kind) as ContentKind,
|
|
78
|
+
content: String(row.content),
|
|
79
|
+
sizeBytes: Number(row.size_bytes),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Delete every row whose TTL has elapsed. Returns the count removed.
|
|
84
|
+
evictExpired(now: Date = new Date()): number {
|
|
85
|
+
const r = this.db
|
|
86
|
+
.prepare("DELETE FROM compression_cache WHERE expires_at <= ?")
|
|
87
|
+
.run(now.toISOString());
|
|
88
|
+
return Number(r.changes);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
count(): number {
|
|
92
|
+
const row = this.db
|
|
93
|
+
.prepare("SELECT COUNT(*) AS n FROM compression_cache")
|
|
94
|
+
.get() as { n: number };
|
|
95
|
+
return Number(row.n);
|
|
96
|
+
}
|
|
97
|
+
}
|