@drewpayment/mink 0.13.0-beta.1 → 0.13.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +4 -20
  2. package/dashboard/out/404.html +1 -1
  3. package/dashboard/out/_next/static/U9AeObddt4LmJkKRZpEfy/_buildManifest.js +1 -0
  4. package/dashboard/out/_next/static/chunks/app/(panels)/activity/page-c285fb9f63d9a82a.js +1 -0
  5. package/dashboard/out/_next/static/chunks/app/(panels)/bugs/page-f3ba7d8f50a96568.js +1 -0
  6. package/dashboard/out/_next/static/chunks/app/(panels)/capture/page-e004bec9af99a244.js +1 -0
  7. package/dashboard/out/_next/static/chunks/app/(panels)/compression/page-21e1af119b3f81ff.js +1 -0
  8. package/dashboard/out/_next/static/chunks/app/(panels)/config/page-d47fb6f588ccfd4b.js +1 -0
  9. package/dashboard/out/_next/static/chunks/app/(panels)/daemon/page-52f913e751416717.js +1 -0
  10. package/dashboard/out/_next/static/chunks/app/(panels)/design/page-53a76719b9af5830.js +1 -0
  11. package/dashboard/out/_next/static/chunks/app/(panels)/discord/page-04502d12c4a96cf7.js +1 -0
  12. package/dashboard/out/_next/static/chunks/app/(panels)/file-index/page-a1bd10e04bb219d9.js +1 -0
  13. package/dashboard/out/_next/static/chunks/app/(panels)/insights/page-7367274963571b6b.js +1 -0
  14. package/dashboard/out/_next/static/chunks/app/(panels)/learning/{page-b766adc79099adb4.js → page-4a03cf7b9a6106fd.js} +1 -1
  15. package/dashboard/out/_next/static/chunks/app/(panels)/overview/page-38b8430b5c56e807.js +1 -0
  16. package/dashboard/out/_next/static/chunks/app/(panels)/scheduler/page-510b78c9b0a61012.js +1 -0
  17. package/dashboard/out/_next/static/chunks/app/(panels)/sync/page-b7215c2a29a7d7a7.js +1 -0
  18. package/dashboard/out/_next/static/chunks/app/(panels)/tokens/page-1be7ed35a5c9bd39.js +1 -0
  19. package/dashboard/out/_next/static/chunks/app/(panels)/waste/page-24a726e6d63f771a.js +1 -0
  20. package/dashboard/out/_next/static/chunks/app/(panels)/wiki/page-230d2d1cae6507a8.js +1 -0
  21. package/dashboard/out/_next/static/chunks/app/layout-70a6d18f8e464960.js +1 -0
  22. package/dashboard/out/action-log.html +1 -1
  23. package/dashboard/out/action-log.txt +4 -4
  24. package/dashboard/out/activity.html +1 -1
  25. package/dashboard/out/activity.txt +5 -5
  26. package/dashboard/out/bugs.html +1 -1
  27. package/dashboard/out/bugs.txt +5 -5
  28. package/dashboard/out/capture.html +1 -1
  29. package/dashboard/out/capture.txt +5 -5
  30. package/dashboard/out/compression.html +1 -0
  31. package/dashboard/out/compression.txt +24 -0
  32. package/dashboard/out/config.html +1 -1
  33. package/dashboard/out/config.txt +5 -5
  34. package/dashboard/out/daemon.html +1 -1
  35. package/dashboard/out/daemon.txt +5 -5
  36. package/dashboard/out/design.html +1 -1
  37. package/dashboard/out/design.txt +5 -5
  38. package/dashboard/out/discord.html +1 -1
  39. package/dashboard/out/discord.txt +5 -5
  40. package/dashboard/out/file-index.html +1 -1
  41. package/dashboard/out/file-index.txt +5 -5
  42. package/dashboard/out/index.html +1 -1
  43. package/dashboard/out/index.txt +4 -4
  44. package/dashboard/out/insights.html +1 -1
  45. package/dashboard/out/insights.txt +5 -5
  46. package/dashboard/out/learning.html +1 -1
  47. package/dashboard/out/learning.txt +5 -5
  48. package/dashboard/out/overview.html +1 -1
  49. package/dashboard/out/overview.txt +5 -5
  50. package/dashboard/out/scheduler.html +1 -1
  51. package/dashboard/out/scheduler.txt +5 -5
  52. package/dashboard/out/sync.html +1 -1
  53. package/dashboard/out/sync.txt +5 -5
  54. package/dashboard/out/tokens.html +1 -1
  55. package/dashboard/out/tokens.txt +5 -5
  56. package/dashboard/out/waste.html +1 -1
  57. package/dashboard/out/waste.txt +5 -5
  58. package/dashboard/out/wiki.html +1 -1
  59. package/dashboard/out/wiki.txt +5 -5
  60. package/dist/cli.bun.js +1300 -908
  61. package/dist/cli.node.js +1319 -928
  62. package/package.json +1 -1
  63. package/src/cli.ts +17 -20
  64. package/src/commands/init.ts +14 -123
  65. package/src/commands/post-read.ts +18 -0
  66. package/src/commands/post-tool.ts +48 -0
  67. package/src/commands/retrieve.ts +32 -0
  68. package/src/commands/status.ts +13 -1
  69. package/src/core/code-skeleton.ts +108 -0
  70. package/src/core/compress-tool-output.ts +127 -0
  71. package/src/core/compression.ts +81 -0
  72. package/src/core/dashboard-api.ts +20 -1
  73. package/src/core/dashboard-server.ts +3 -0
  74. package/src/core/hook-output.ts +42 -0
  75. package/src/core/output-compression.ts +252 -0
  76. package/src/core/token-estimate.ts +40 -0
  77. package/src/repositories/compression-cache-repo.ts +97 -0
  78. package/src/repositories/token-ledger-repo.ts +142 -0
  79. package/src/storage/schema.ts +50 -1
  80. package/src/types/compression.ts +29 -0
  81. package/src/types/config.ts +40 -0
  82. package/src/types/dashboard.ts +22 -1
  83. package/src/types/hook-input.ts +4 -0
  84. package/src/types/token-ledger.ts +55 -0
  85. package/dashboard/out/_next/static/UWfkbJY4zr9fSt7O-CAge/_buildManifest.js +0 -1
  86. package/dashboard/out/_next/static/chunks/app/(panels)/activity/page-096a97ba539d5323.js +0 -1
  87. package/dashboard/out/_next/static/chunks/app/(panels)/bugs/page-449d31c133432458.js +0 -1
  88. package/dashboard/out/_next/static/chunks/app/(panels)/capture/page-c6617aa0a8a7333e.js +0 -1
  89. package/dashboard/out/_next/static/chunks/app/(panels)/config/page-aa0a0623b3fdd0d8.js +0 -1
  90. package/dashboard/out/_next/static/chunks/app/(panels)/daemon/page-7cd3fac2f5d87a0d.js +0 -1
  91. package/dashboard/out/_next/static/chunks/app/(panels)/design/page-5304675c96b6793b.js +0 -1
  92. package/dashboard/out/_next/static/chunks/app/(panels)/discord/page-9940dde80ba2a69e.js +0 -1
  93. package/dashboard/out/_next/static/chunks/app/(panels)/file-index/page-ecd8a753614e981e.js +0 -1
  94. package/dashboard/out/_next/static/chunks/app/(panels)/insights/page-7909d8beb8d8ef7a.js +0 -1
  95. package/dashboard/out/_next/static/chunks/app/(panels)/overview/page-7a9e86dcde67d6a9.js +0 -1
  96. package/dashboard/out/_next/static/chunks/app/(panels)/scheduler/page-a88f93204c9742a1.js +0 -1
  97. package/dashboard/out/_next/static/chunks/app/(panels)/sync/page-8a9ad4c36aa6cb65.js +0 -1
  98. package/dashboard/out/_next/static/chunks/app/(panels)/tokens/page-8dac7d50d4db2756.js +0 -1
  99. package/dashboard/out/_next/static/chunks/app/(panels)/waste/page-bcf56144faf7d133.js +0 -1
  100. package/dashboard/out/_next/static/chunks/app/(panels)/wiki/page-a32fdbd0bf58b30b.js +0 -1
  101. package/dashboard/out/_next/static/chunks/app/layout-782cd26e0ccc4514.js +0 -1
  102. package/src/core/agent-detect.ts +0 -88
  103. package/src/core/agent-pi.ts +0 -314
  104. package/src/core/prompt.ts +0 -27
  105. /package/dashboard/out/_next/static/{UWfkbJY4zr9fSt7O-CAge → U9AeObddt4LmJkKRZpEfy}/_ssgManifest.js +0 -0
@@ -0,0 +1,81 @@
1
+ // Tool-output compression — configuration and decision logic (spec 21).
2
+ //
3
+ // This module is pure: it reads config and makes the eligibility / holdout /
4
+ // min-savings decisions. It never touches the database or the tool payload, so
5
+ // it is trivially testable. Phase 2 wires the actual compressors and the
6
+ // reversible cache on top of these decisions; Phase 1 ships the measurement
7
+ // instrument and leaves `enabled` off by default.
8
+
9
+ import { resolveConfigValue } from "./global-config";
10
+ import type { ConfigKey } from "../types/config";
11
+
12
+ export interface CompressionConfig {
13
+ enabled: boolean;
14
+ thresholdTokens: number;
15
+ minSavingsRatio: number;
16
+ holdoutFraction: number;
17
+ retentionHours: number;
18
+ }
19
+
20
+ function numberValue(key: ConfigKey, fallback: number, min: number, max: number): number {
21
+ const raw = resolveConfigValue(key).value;
22
+ const n = Number(raw);
23
+ if (!Number.isFinite(n)) return fallback;
24
+ return Math.min(max, Math.max(min, n));
25
+ }
26
+
27
+ export function loadCompressionConfig(): CompressionConfig {
28
+ return {
29
+ enabled: resolveConfigValue("compression.enabled").value === "true",
30
+ thresholdTokens: numberValue("compression.threshold-tokens", 800, 0, Number.MAX_SAFE_INTEGER),
31
+ minSavingsRatio: numberValue("compression.min-savings-ratio", 0.25, 0, 1),
32
+ holdoutFraction: numberValue("compression.holdout-fraction", 0.1, 0, 1),
33
+ retentionHours: numberValue("compression.retention-hours", 168, 0, Number.MAX_SAFE_INTEGER),
34
+ };
35
+ }
36
+
37
+ // An output is eligible for compression only once it crosses the size threshold;
38
+ // small outputs are never touched (spec 21 §Eligibility).
39
+ export function isEligible(originalTokens: number, config: CompressionConfig): boolean {
40
+ return config.enabled && originalTokens >= config.thresholdTokens;
41
+ }
42
+
43
+ // A compression attempt is kept only if it saves at least the configured
44
+ // fraction of tokens; otherwise the original is used (spec 21 §Thresholds).
45
+ export function meetsMinSavings(
46
+ originalTokens: number,
47
+ compressedTokens: number,
48
+ config: CompressionConfig
49
+ ): boolean {
50
+ if (originalTokens <= 0) return false;
51
+ const ratio = (originalTokens - compressedTokens) / originalTokens;
52
+ return ratio >= config.minSavingsRatio;
53
+ }
54
+
55
+ export function measuredSavings(originalTokens: number, compressedTokens: number): number {
56
+ return Math.max(0, originalTokens - compressedTokens);
57
+ }
58
+
59
+ // Deterministic FNV-1a hash → a stable fraction in [0, 1) for a given key. Used
60
+ // so holdout selection is stable per event: the same event always lands in the
61
+ // same arm, which keeps measurement from being double-counted (spec 21 edge
62
+ // case "Holdout selection must be stable for a given event").
63
+ function hashUnitInterval(key: string): number {
64
+ let h = 0x811c9dc5;
65
+ for (let i = 0; i < key.length; i++) {
66
+ h ^= key.charCodeAt(i);
67
+ h = Math.imul(h, 0x01000193);
68
+ }
69
+ // Map the 32-bit unsigned result into [0, 1).
70
+ return (h >>> 0) / 0x100000000;
71
+ }
72
+
73
+ // Decide whether a given event is held out (left uncompressed as a control).
74
+ // Selection is deterministic in `eventKey`, so callers must pass a key that is
75
+ // stable for the event (e.g. a hash of the original output) and not, say, a
76
+ // timestamp.
77
+ export function selectHoldout(eventKey: string, fraction: number): boolean {
78
+ if (fraction <= 0) return false;
79
+ if (fraction >= 1) return true;
80
+ return hashUnitInterval(eventKey) < fraction;
81
+ }
@@ -80,7 +80,10 @@ import type {
80
80
  WikiPanelPayload,
81
81
  WikiNotePayload,
82
82
  WikiTreeNode,
83
+ CompressionPayload,
83
84
  } from "../types/dashboard";
85
+ import { TokenLedgerRepo } from "../repositories/token-ledger-repo";
86
+ import { loadCompressionConfig } from "./compression";
84
87
  import { isDesignEvalReport } from "../types/design-eval";
85
88
  import type { DesignEvalReport } from "../types/design-eval";
86
89
  import type { FileIndex, FileIndexEntry } from "../types/file-index";
@@ -185,7 +188,7 @@ export function loadOverview(cwd: string): OverviewPayload {
185
188
  checkJsonFile("scheduler-manifest.json", schedulerManifestPath(cwd)),
186
189
  ];
187
190
 
188
- return { project, daemon, summary, stateFiles };
191
+ return { project, daemon, summary, compression: ledger.compression, stateFiles };
189
192
  }
190
193
 
191
194
  export function loadTokenLedgerPanel(cwd: string): TokenLedgerPayload {
@@ -194,6 +197,22 @@ export function loadTokenLedgerPanel(cwd: string): TokenLedgerPayload {
194
197
  lifetime: ledger.lifetime,
195
198
  sessions: ledger.sessions,
196
199
  wasteFlags: ledger.wasteFlags ?? [],
200
+ compression: ledger.compression,
201
+ };
202
+ }
203
+
204
+ // Dedicated Compression panel (spec 21, phase 4). Reads the measured
205
+ // compression aggregates, the holdout A/B split, per-kind/per-tool breakdowns,
206
+ // and recent events, plus whether compression is currently enabled.
207
+ export function loadCompressionPanel(cwd: string): CompressionPayload {
208
+ const repo = TokenLedgerRepo.for(cwd);
209
+ return {
210
+ enabled: loadCompressionConfig().enabled,
211
+ lifetime: repo.compressionLifetime(),
212
+ arms: repo.compressionArms(),
213
+ byKind: repo.compressionBreakdown("content_kind"),
214
+ byTool: repo.compressionBreakdown("tool_name"),
215
+ recent: repo.compressionEvents(50),
197
216
  };
198
217
  }
199
218
 
@@ -5,6 +5,7 @@ import { projectDir, designCapturesDir } from "./paths";
5
5
  import {
6
6
  loadOverview,
7
7
  loadTokenLedgerPanel,
8
+ loadCompressionPanel,
8
9
  loadFileIndexPanel,
9
10
  loadSchedulerPanel,
10
11
  loadLearningMemoryPanel,
@@ -525,6 +526,8 @@ export async function startDashboardServer(
525
526
  return jsonResponse(loadOverview(resolvedCwd));
526
527
  case "/api/token-ledger":
527
528
  return jsonResponse(loadTokenLedgerPanel(resolvedCwd));
529
+ case "/api/compression":
530
+ return jsonResponse(loadCompressionPanel(resolvedCwd));
528
531
  case "/api/file-index":
529
532
  return jsonResponse(loadFileIndexPanel(resolvedCwd));
530
533
  case "/api/scheduler":
@@ -0,0 +1,42 @@
1
+ // Helpers for PostToolUse hooks that replace a tool's result (spec 21). The
2
+ // replacement mechanism is Claude Code's `hookSpecificOutput.updatedToolOutput`
3
+ // (verified against the hooks reference): whatever JSON we print to stdout here
4
+ // substitutes the original output before the model sees it.
5
+
6
+ import type { PostToolUseInput } from "../types/hook-input";
7
+
8
+ // Best-effort extraction of the human-visible text from a PostToolUse payload,
9
+ // across the shapes Claude Code uses for different tools (Bash stdout, Grep
10
+ // content, MCP results). Returns null when no text is present, in which case the
11
+ // caller must not compress (there is nothing to safely capture or replace).
12
+ export function extractToolOutputText(input: PostToolUseInput): string | null {
13
+ const tr = input.tool_response as Record<string, unknown> | undefined;
14
+ if (tr) {
15
+ if (typeof tr.content === "string") return tr.content;
16
+ if (Array.isArray(tr.content)) {
17
+ const parts = (tr.content as Array<{ text?: unknown }>)
18
+ .map((p) => (p && typeof p.text === "string" ? p.text : ""))
19
+ .filter((s) => s.length > 0);
20
+ if (parts.length > 0) return parts.join("");
21
+ }
22
+ if (typeof tr.stdout === "string" && tr.stdout.length > 0) return tr.stdout;
23
+ if (typeof tr.text === "string") return tr.text;
24
+ const file = tr.file as { content?: unknown } | undefined;
25
+ if (file && typeof file.content === "string") return file.content;
26
+ }
27
+ const to = input.tool_output;
28
+ if (to && typeof to.content === "string") return to.content;
29
+ return null;
30
+ }
31
+
32
+ // Print the replacement so Claude Code swaps it in for the original output.
33
+ export function emitUpdatedToolOutput(text: string): void {
34
+ process.stdout.write(
35
+ JSON.stringify({
36
+ hookSpecificOutput: {
37
+ hookEventName: "PostToolUse",
38
+ updatedToolOutput: text,
39
+ },
40
+ })
41
+ );
42
+ }
@@ -0,0 +1,252 @@
1
+ // Tool-output compression engine (spec 21 §Content-Aware Compression).
2
+ //
3
+ // Pure, deterministic, dependency-free. Each compressor takes a tool output
4
+ // string and returns a smaller body plus a note of what was dropped, or null
5
+ // when it has nothing worth substituting. No I/O, no DB, no token counting and
6
+ // no retrieval-affordance text — the pipeline (compress-tool-output.ts) owns
7
+ // eligibility, the holdout, the min-savings gate, the cache, and the
8
+ // "mink retrieve" footer. Keeping this layer pure makes every strategy trivially
9
+ // testable and prompt-cache-stable (identical input → identical output).
10
+ //
11
+ // The "file" strategy does line-based signature extraction; spec 21's phase 3
12
+ // upgrades it to richer AST skeletons behind this same interface.
13
+
14
+ import type { ContentKind, CompressionResult } from "../types/compression";
15
+ import { extractCodeSkeleton } from "./code-skeleton";
16
+
17
+ // Tuning constants. Fixed (not config) so output is deterministic and stable.
18
+ const SEARCH_MAX_PER_FILE = 5;
19
+ const LOG_HEAD = 40;
20
+ const LOG_TAIL = 40;
21
+ const TEXT_HEAD = 30;
22
+ const TEXT_TAIL = 20;
23
+ const JSON_ARRAY_HEAD = 20;
24
+ const JSON_ARRAY_TAIL = 5;
25
+
26
+ // Strip ANSI CSI escape sequences (colour, cursor moves) — pure noise in logs.
27
+ const ANSI = /\[[0-9;?]*[ -/]*[@-~]/g;
28
+
29
+ function stripAnsi(s: string): string {
30
+ return s.replace(ANSI, "");
31
+ }
32
+
33
+ function omittedMarker(n: number): string {
34
+ return ` … ${n} line${n === 1 ? "" : "s"} omitted — mink retrieve …`;
35
+ }
36
+
37
+ // Split into lines, dropping a single trailing empty line (from a final newline)
38
+ // so counts and windows aren't skewed by it.
39
+ function toLines(content: string): string[] {
40
+ const lines = content.split("\n");
41
+ if (lines.length > 0 && lines[lines.length - 1] === "") lines.pop();
42
+ return lines;
43
+ }
44
+
45
+ // ── Logs / command output ───────────────────────────────────────────────────
46
+ // Strip ANSI, collapse runs of identical lines, then keep a head+tail window.
47
+ function compressLog(content: string): { compressed: string; omittedNote: string } | null {
48
+ const lines = toLines(stripAnsi(content));
49
+
50
+ // Collapse consecutive duplicates into "<line> (×N)".
51
+ const collapsed: string[] = [];
52
+ let i = 0;
53
+ while (i < lines.length) {
54
+ let run = 1;
55
+ while (i + run < lines.length && lines[i + run] === lines[i]) run++;
56
+ collapsed.push(run > 1 ? `${lines[i]} (×${run})` : lines[i]);
57
+ i += run;
58
+ }
59
+
60
+ if (collapsed.length <= LOG_HEAD + LOG_TAIL) {
61
+ // Only worth substituting if collapsing actually removed lines.
62
+ if (collapsed.length === lines.length) return null;
63
+ return {
64
+ compressed: collapsed.join("\n"),
65
+ omittedNote: `collapsed ${lines.length - collapsed.length} repeated line(s)`,
66
+ };
67
+ }
68
+
69
+ const omitted = collapsed.length - LOG_HEAD - LOG_TAIL;
70
+ const head = collapsed.slice(0, LOG_HEAD);
71
+ const tail = collapsed.slice(collapsed.length - LOG_TAIL);
72
+ return {
73
+ compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
74
+ omittedNote: `${omitted} of ${collapsed.length} log line(s) omitted (middle)`,
75
+ };
76
+ }
77
+
78
+ // ── Search / match results ──────────────────────────────────────────────────
79
+ // Dedup exact lines and cap matches per file (the file prefix before the first
80
+ // colon), appending a per-file "+K more" tally.
81
+ function compressSearch(content: string): { compressed: string; omittedNote: string } | null {
82
+ const lines = toLines(content);
83
+ const seen = new Set<string>();
84
+ const perFile = new Map<string, number>();
85
+ const omittedByFile = new Map<string, number>();
86
+ const out: string[] = [];
87
+
88
+ for (const line of lines) {
89
+ if (seen.has(line)) continue;
90
+ seen.add(line);
91
+ const colon = line.indexOf(":");
92
+ const file = colon > 0 ? line.slice(0, colon) : line;
93
+ const count = perFile.get(file) ?? 0;
94
+ if (count < SEARCH_MAX_PER_FILE) {
95
+ perFile.set(file, count + 1);
96
+ out.push(line);
97
+ } else {
98
+ omittedByFile.set(file, (omittedByFile.get(file) ?? 0) + 1);
99
+ }
100
+ }
101
+
102
+ let totalOmitted = 0;
103
+ for (const [file, n] of omittedByFile) {
104
+ totalOmitted += n;
105
+ out.push(` … +${n} more match(es) in ${file} — mink retrieve …`);
106
+ }
107
+ const dedupRemoved = lines.length - seen.size;
108
+
109
+ // Nothing changed → not worth substituting.
110
+ if (totalOmitted === 0 && dedupRemoved === 0) return null;
111
+
112
+ const notes: string[] = [];
113
+ if (totalOmitted > 0) notes.push(`${totalOmitted} match(es) capped`);
114
+ if (dedupRemoved > 0) notes.push(`${dedupRemoved} duplicate(s) removed`);
115
+ return { compressed: out.join("\n"), omittedNote: notes.join("; ") };
116
+ }
117
+
118
+ // ── Large file reads ────────────────────────────────────────────────────────
119
+ // Brace-aware structural skeleton (see code-skeleton.ts): declarations and class
120
+ // members with bodies elided. Falls back to a generic text window when the
121
+ // content has no recognisable structure.
122
+ function compressFile(
123
+ filePath: string,
124
+ content: string
125
+ ): { compressed: string; omittedNote: string } | null {
126
+ const ext = filePath.slice(filePath.lastIndexOf(".")).toLowerCase();
127
+ const markdown = ext === ".md" || ext === ".mdx" || ext === ".markdown";
128
+ const skeleton = extractCodeSkeleton(content, { markdown });
129
+
130
+ if (!skeleton) {
131
+ // No recognisable structure — fall back to a generic text window.
132
+ return compressText(content);
133
+ }
134
+
135
+ const header =
136
+ `${filePath} — structural summary ` +
137
+ `(${skeleton.lines.length} signature(s) of ${skeleton.totalLines} lines)`;
138
+ return {
139
+ compressed: [header, ...skeleton.lines].join("\n"),
140
+ omittedNote: `bodies elided; ${skeleton.totalLines} lines available via mink retrieve`,
141
+ };
142
+ }
143
+
144
+ // ── Structured data ─────────────────────────────────────────────────────────
145
+ // Recursively "crush" JSON: sample any over-long array (at any depth), recursing
146
+ // into the elements that are kept. Records how many elements were dropped.
147
+ function crush(value: unknown): { value: unknown; omitted: number } {
148
+ if (Array.isArray(value)) {
149
+ let omitted = 0;
150
+ const mapEl = (el: unknown): unknown => {
151
+ const r = crush(el);
152
+ omitted += r.omitted;
153
+ return r.value;
154
+ };
155
+ if (value.length <= JSON_ARRAY_HEAD + JSON_ARRAY_TAIL) {
156
+ return { value: value.map(mapEl), omitted };
157
+ }
158
+ const dropped = value.length - JSON_ARRAY_HEAD - JSON_ARRAY_TAIL;
159
+ omitted += dropped;
160
+ const out = [
161
+ ...value.slice(0, JSON_ARRAY_HEAD).map(mapEl),
162
+ `… ${dropped} element(s) omitted — mink retrieve …`,
163
+ ...value.slice(value.length - JSON_ARRAY_TAIL).map(mapEl),
164
+ ];
165
+ return { value: out, omitted };
166
+ }
167
+ if (value && typeof value === "object") {
168
+ let omitted = 0;
169
+ const out: Record<string, unknown> = {};
170
+ for (const [k, v] of Object.entries(value as Record<string, unknown>)) {
171
+ const r = crush(v);
172
+ omitted += r.omitted;
173
+ out[k] = r.value;
174
+ }
175
+ return { value: out, omitted };
176
+ }
177
+ return { value, omitted: 0 };
178
+ }
179
+
180
+ function compressJson(content: string): { compressed: string; omittedNote: string } | null {
181
+ let parsed: unknown;
182
+ try {
183
+ parsed = JSON.parse(content);
184
+ } catch {
185
+ return null;
186
+ }
187
+ const { value, omitted } = crush(parsed);
188
+ if (omitted === 0) return null;
189
+ return {
190
+ compressed: JSON.stringify(value, null, 2),
191
+ omittedNote: `${omitted} array element(s) sampled out`,
192
+ };
193
+ }
194
+
195
+ // ── Generic text ────────────────────────────────────────────────────────────
196
+ function compressText(content: string): { compressed: string; omittedNote: string } | null {
197
+ const lines = toLines(content);
198
+ if (lines.length <= TEXT_HEAD + TEXT_TAIL) return null;
199
+ const omitted = lines.length - TEXT_HEAD - TEXT_TAIL;
200
+ const head = lines.slice(0, TEXT_HEAD);
201
+ const tail = lines.slice(lines.length - TEXT_TAIL);
202
+ return {
203
+ compressed: [...head, omittedMarker(omitted), ...tail].join("\n"),
204
+ omittedNote: `${omitted} of ${lines.length} line(s) omitted (middle)`,
205
+ };
206
+ }
207
+
208
+ // ── Routing ─────────────────────────────────────────────────────────────────
209
+
210
+ export function detectContentKind(
211
+ toolName: string,
212
+ content: string,
213
+ filePath?: string
214
+ ): ContentKind {
215
+ const t = toolName.toLowerCase();
216
+ if (t === "read") return "file";
217
+ if (t === "grep" || t === "glob") return "search";
218
+ if (t === "bash") return "log";
219
+ // Generic / MCP output — sniff for JSON.
220
+ const head = content.trimStart()[0];
221
+ if (head === "{" || head === "[") {
222
+ try {
223
+ JSON.parse(content);
224
+ return "json";
225
+ } catch {
226
+ // not JSON — fall through
227
+ }
228
+ }
229
+ // A file path with no tool hint still implies a file read.
230
+ if (filePath) return "file";
231
+ return "text";
232
+ }
233
+
234
+ // Compress an output by its detected kind. Returns null when there is nothing
235
+ // worth substituting; the caller then passes the original through unchanged.
236
+ export function compressOutput(
237
+ toolName: string,
238
+ content: string,
239
+ filePath?: string
240
+ ): CompressionResult | null {
241
+ const kind = detectContentKind(toolName, content, filePath);
242
+ let result: { compressed: string; omittedNote: string } | null;
243
+ switch (kind) {
244
+ case "search": result = compressSearch(content); break;
245
+ case "log": result = compressLog(content); break;
246
+ case "file": result = compressFile(filePath ?? "file", content); break;
247
+ case "json": result = compressJson(content); break;
248
+ case "text": result = compressText(content); break;
249
+ }
250
+ if (!result) return null;
251
+ return { kind, compressed: result.compressed, omittedNote: result.omittedNote };
252
+ }
@@ -34,3 +34,43 @@ export function estimateTokens(content: string, filePath: string): number {
34
34
  }
35
35
  return Math.ceil(content.length / ratio);
36
36
  }
37
+
38
+ // A deterministic, dependency-free token counter that segments text the way a
39
+ // BPE tokenizer roughly would — on word, number, and punctuation boundaries —
40
+ // rather than dividing by a single flat character ratio. It is more faithful
41
+ // than `estimateTokens` (which exists for the file-index hot path and is pinned
42
+ // by exact-ratio tests), and crucially it does not need a file extension, so it
43
+ // can score arbitrary tool output (logs, search results, command output).
44
+ //
45
+ // It is intentionally NOT a real BPE vocabulary: Mink ships as a lean CLI with a
46
+ // single runtime dependency, and the compression-measurement use only needs a
47
+ // *consistent* estimator to compute an original-minus-compressed delta. The
48
+ // signature is stable, so a real BPE library can be dropped in behind it later
49
+ // without touching call sites.
50
+ //
51
+ // Segmentation: runs of ASCII letters and runs of digits each collapse to a
52
+ // handful of sub-word tokens; every other character (punctuation, symbols,
53
+ // non-ASCII, whitespace) is scored individually. Whitespace usually merges into
54
+ // an adjacent token in real tokenizers, so spaces and tabs cost nothing and only
55
+ // newlines count.
56
+ export function countTokens(text: string): number {
57
+ if (!text) return 0;
58
+ const segments = text.match(/[A-Za-z]+|[0-9]+|[^A-Za-z0-9]/g);
59
+ if (!segments) return 0;
60
+ let tokens = 0;
61
+ for (const seg of segments) {
62
+ const first = seg.charCodeAt(0);
63
+ if ((first >= 65 && first <= 90) || (first >= 97 && first <= 122)) {
64
+ tokens += Math.ceil(seg.length / 4); // word splits into ~4-char sub-words
65
+ } else if (first >= 48 && first <= 57) {
66
+ tokens += Math.ceil(seg.length / 3); // digit runs tokenize more finely
67
+ } else if (seg === "\n") {
68
+ tokens += 1; // newlines are their own token
69
+ } else if (seg === " " || seg === "\t" || seg === "\r") {
70
+ // whitespace merges into the adjacent token — no extra cost
71
+ } else {
72
+ tokens += 1; // punctuation / symbol / non-ASCII char
73
+ }
74
+ }
75
+ return tokens;
76
+ }
@@ -0,0 +1,97 @@
1
+ // Reversible-compression cache repository (spec 21 §Reversibility). Stores the
2
+ // byte-exact original of a compressed tool output keyed by a short retrieval
3
+ // token, with a TTL. `get` treats an expired row as a miss and evicts it lazily,
4
+ // so a stale token can never return partial or wrong content.
5
+ //
6
+ // This is a local cache — it is never injected into model context and is not
7
+ // part of the cross-device sync surface. device_id is recorded for audit only.
8
+
9
+ import { randomUUID } from "crypto";
10
+ import type { DbDriver } from "../storage/driver";
11
+ import type { CompressionCacheEntry, ContentKind } from "../types/compression";
12
+ import { openProjectDb } from "../storage/db";
13
+ import { getOrCreateDeviceId } from "../core/device";
14
+
15
+ export interface StoreInput {
16
+ toolName: string;
17
+ contentKind: ContentKind;
18
+ content: string;
19
+ retentionHours: number;
20
+ token?: string;
21
+ now?: Date;
22
+ }
23
+
24
+ export class CompressionCacheRepo {
25
+ constructor(private readonly db: DbDriver) {}
26
+
27
+ static for(cwd: string): CompressionCacheRepo {
28
+ return new CompressionCacheRepo(openProjectDb(cwd));
29
+ }
30
+
31
+ // Short, unambiguous token the model can paste into `mink retrieve`.
32
+ static newToken(): string {
33
+ return `mc-${randomUUID().slice(0, 8)}`;
34
+ }
35
+
36
+ // Store an original and return its retrieval token.
37
+ store(input: StoreInput, deviceId: string = getOrCreateDeviceId()): string {
38
+ const token = input.token ?? CompressionCacheRepo.newToken();
39
+ const now = input.now ?? new Date();
40
+ const createdAt = now.toISOString();
41
+ const expiresAt = new Date(
42
+ now.getTime() + Math.max(0, input.retentionHours) * 3_600_000
43
+ ).toISOString();
44
+ this.db.prepare(`
45
+ INSERT OR REPLACE INTO compression_cache
46
+ (token, created_at, expires_at, tool_name, content_kind,
47
+ content, size_bytes, device_id)
48
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
49
+ `).run(
50
+ token, createdAt, expiresAt, input.toolName, input.contentKind,
51
+ input.content, Buffer.byteLength(input.content, "utf-8"), deviceId
52
+ );
53
+ return token;
54
+ }
55
+
56
+ // Return the stored original, or null if the token is unknown or expired.
57
+ // An expired row is deleted on the way out (lazy eviction).
58
+ get(token: string, now: Date = new Date()): CompressionCacheEntry | null {
59
+ const row = this.db
60
+ .prepare("SELECT * FROM compression_cache WHERE token = ?")
61
+ .get(token) as Record<string, unknown> | undefined;
62
+ if (!row) return null;
63
+ const expiresAt = String(row.expires_at);
64
+ if (expiresAt <= now.toISOString()) {
65
+ try {
66
+ this.db.prepare("DELETE FROM compression_cache WHERE token = ?").run(token);
67
+ } catch {
68
+ // best effort — a failed eviction still reports a miss below
69
+ }
70
+ return null;
71
+ }
72
+ return {
73
+ token: String(row.token),
74
+ createdAt: String(row.created_at),
75
+ expiresAt,
76
+ toolName: String(row.tool_name),
77
+ contentKind: String(row.content_kind) as ContentKind,
78
+ content: String(row.content),
79
+ sizeBytes: Number(row.size_bytes),
80
+ };
81
+ }
82
+
83
+ // Delete every row whose TTL has elapsed. Returns the count removed.
84
+ evictExpired(now: Date = new Date()): number {
85
+ const r = this.db
86
+ .prepare("DELETE FROM compression_cache WHERE expires_at <= ?")
87
+ .run(now.toISOString());
88
+ return Number(r.changes);
89
+ }
90
+
91
+ count(): number {
92
+ const row = this.db
93
+ .prepare("SELECT COUNT(*) AS n FROM compression_cache")
94
+ .get() as { n: number };
95
+ return Number(row.n);
96
+ }
97
+ }