@drewpayment/mink 0.11.0 → 0.12.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/out/404.html +1 -1
- package/dashboard/out/action-log.html +1 -1
- package/dashboard/out/action-log.txt +1 -1
- package/dashboard/out/activity.html +1 -1
- package/dashboard/out/activity.txt +1 -1
- package/dashboard/out/bugs.html +1 -1
- package/dashboard/out/bugs.txt +1 -1
- package/dashboard/out/capture.html +1 -1
- package/dashboard/out/capture.txt +1 -1
- package/dashboard/out/config.html +1 -1
- package/dashboard/out/config.txt +1 -1
- package/dashboard/out/daemon.html +1 -1
- package/dashboard/out/daemon.txt +1 -1
- package/dashboard/out/design.html +1 -1
- package/dashboard/out/design.txt +1 -1
- package/dashboard/out/discord.html +1 -1
- package/dashboard/out/discord.txt +1 -1
- package/dashboard/out/file-index.html +1 -1
- package/dashboard/out/file-index.txt +1 -1
- package/dashboard/out/index.html +1 -1
- package/dashboard/out/index.txt +1 -1
- package/dashboard/out/insights.html +1 -1
- package/dashboard/out/insights.txt +1 -1
- package/dashboard/out/learning.html +1 -1
- package/dashboard/out/learning.txt +1 -1
- package/dashboard/out/overview.html +1 -1
- package/dashboard/out/overview.txt +1 -1
- package/dashboard/out/scheduler.html +1 -1
- package/dashboard/out/scheduler.txt +1 -1
- package/dashboard/out/sync.html +1 -1
- package/dashboard/out/sync.txt +1 -1
- package/dashboard/out/tokens.html +1 -1
- package/dashboard/out/tokens.txt +1 -1
- package/dashboard/out/waste.html +1 -1
- package/dashboard/out/waste.txt +1 -1
- package/dashboard/out/wiki.html +1 -1
- package/dashboard/out/wiki.txt +1 -1
- package/dist/cli.bun.js +90615 -0
- package/dist/cli.js +27 -92464
- package/dist/cli.node.js +93945 -0
- package/package.json +13 -2
- package/scripts/build.mjs +50 -0
- package/scripts/cli-shim.mjs +39 -0
- package/src/commands/bug-search.ts +2 -4
- package/src/commands/detect-waste.ts +24 -32
- package/src/commands/post-read.ts +10 -11
- package/src/commands/post-write.ts +13 -19
- package/src/commands/pre-read.ts +19 -24
- package/src/commands/scan.ts +103 -40
- package/src/commands/status.ts +45 -26
- package/src/core/bug-memory.ts +32 -34
- package/src/core/dashboard-api.ts +44 -22
- package/src/core/index-store.ts +23 -0
- package/src/core/paths.ts +7 -0
- package/src/core/scanner.ts +8 -4
- package/src/core/state-aggregator.ts +64 -7
- package/src/core/state-counters.ts +11 -31
- package/src/core/sync-merge-drivers.ts +164 -1
- package/src/core/sync.ts +9 -0
- package/src/core/token-ledger.ts +50 -4
- package/src/repositories/bug-memory-repo.ts +268 -0
- package/src/repositories/counters-repo.ts +88 -0
- package/src/repositories/file-index-repo.ts +238 -0
- package/src/repositories/token-ledger-repo.ts +412 -0
- package/src/storage/db.ts +121 -0
- package/src/storage/driver.bun.ts +99 -0
- package/src/storage/driver.node.ts +107 -0
- package/src/storage/driver.ts +76 -0
- package/src/storage/migrate-json.ts +415 -0
- package/src/storage/schema.ts +207 -0
- package/src/types/file-index.ts +9 -0
- /package/dashboard/out/_next/static/{I7QxkFr_LXY4BWGjogOs1 → 7bx94K8a7-O53mwi7UoEu}/_buildManifest.js +0 -0
- /package/dashboard/out/_next/static/{I7QxkFr_LXY4BWGjogOs1 → 7bx94K8a7-O53mwi7UoEu}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
// Bug-memory repository. Wraps the bug_memory + bug_tags + bug_related +
|
|
2
|
+
// bug_memory_fts tables in mink.db. The CLI surface (loadBugMemory,
|
|
3
|
+
// findDuplicate, lookupBugsForFile, searchBugs, hasBugForFileInSession)
|
|
4
|
+
// is preserved by the thin wrapper in src/core/bug-memory.ts; this file
|
|
5
|
+
// is where the SQLite queries live.
|
|
6
|
+
//
|
|
7
|
+
// Search uses FTS5 (porter+unicode61 tokenization) so the per-query cost
|
|
8
|
+
// stays sublinear in bug count. The score-vs-false-positive guards from
|
|
9
|
+
// the v1 Jaccard implementation are preserved: a 0.3 score threshold,
|
|
10
|
+
// file-path or tag-overlap match required when score is borderline,
|
|
11
|
+
// same-file matches get a 0.2 boost.
|
|
12
|
+
|
|
13
|
+
import { randomUUID } from "crypto";
|
|
14
|
+
import type { DbDriver } from "../storage/driver";
|
|
15
|
+
import type { BugEntry, BugMemory, SimilarityMatch } from "../types/bug-memory";
|
|
16
|
+
import { openProjectDb } from "../storage/db";
|
|
17
|
+
import { getOrCreateDeviceId } from "../core/device";
|
|
18
|
+
|
|
19
|
+
interface BugRow {
|
|
20
|
+
id: string;
|
|
21
|
+
created_at: string;
|
|
22
|
+
last_seen_at: string;
|
|
23
|
+
error_message: string;
|
|
24
|
+
file_path: string;
|
|
25
|
+
line_number: number | null;
|
|
26
|
+
root_cause: string;
|
|
27
|
+
fix_description: string;
|
|
28
|
+
occurrence_count: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function tokenize(text: string): Set<string> {
|
|
32
|
+
return new Set(
|
|
33
|
+
text.toLowerCase().split(/\W+/).filter((w) => w.length > 0)
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class BugMemoryRepo {
|
|
38
|
+
constructor(private readonly db: DbDriver) {}
|
|
39
|
+
|
|
40
|
+
static for(cwd: string): BugMemoryRepo {
|
|
41
|
+
return new BugMemoryRepo(openProjectDb(cwd));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ── Insert / upsert ────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
// Detect an exact-text duplicate of (errorMessage, filePath). Mirrors
|
|
47
|
+
// the v1 `findDuplicate` semantics — same (errorMessage, filePath)
|
|
48
|
+
// pair counts as a re-occurrence of the same bug.
|
|
49
|
+
findDuplicate(errorMessage: string, filePath: string): BugEntry | null {
|
|
50
|
+
const row = this.db
|
|
51
|
+
.prepare(
|
|
52
|
+
"SELECT * FROM bug_memory WHERE error_message = ? AND file_path = ? LIMIT 1"
|
|
53
|
+
)
|
|
54
|
+
.get(errorMessage, filePath);
|
|
55
|
+
if (!row) return null;
|
|
56
|
+
return this.hydrate(row as unknown as BugRow);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
add(
|
|
60
|
+
fields: Omit<BugEntry, "id" | "createdAt" | "lastSeenAt" | "occurrenceCount">
|
|
61
|
+
): BugEntry {
|
|
62
|
+
const existing = this.findDuplicate(fields.errorMessage, fields.filePath);
|
|
63
|
+
if (existing) {
|
|
64
|
+
this.incrementOccurrence(existing.id);
|
|
65
|
+
return this.lookup(existing.id) ?? existing;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const id = `bug-${randomUUID().slice(0, 8)}`;
|
|
69
|
+
const now = new Date().toISOString();
|
|
70
|
+
const deviceId = getOrCreateDeviceId();
|
|
71
|
+
|
|
72
|
+
this.db.transaction(() => {
|
|
73
|
+
this.db.prepare(`
|
|
74
|
+
INSERT INTO bug_memory
|
|
75
|
+
(id, created_at, last_seen_at, error_message, file_path, line_number,
|
|
76
|
+
root_cause, fix_description, occurrence_count, device_id)
|
|
77
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1, ?)
|
|
78
|
+
`).run(
|
|
79
|
+
id, now, now,
|
|
80
|
+
fields.errorMessage, fields.filePath, fields.lineNumber ?? null,
|
|
81
|
+
fields.rootCause, fields.fixDescription, deviceId
|
|
82
|
+
);
|
|
83
|
+
const insertTag = this.db.prepare(
|
|
84
|
+
"INSERT OR IGNORE INTO bug_tags (bug_id, tag) VALUES (?, ?)"
|
|
85
|
+
);
|
|
86
|
+
for (const tag of fields.tags ?? []) insertTag.run(id, tag);
|
|
87
|
+
const insertRelated = this.db.prepare(
|
|
88
|
+
"INSERT OR IGNORE INTO bug_related (bug_id, related_bug_id) VALUES (?, ?)"
|
|
89
|
+
);
|
|
90
|
+
for (const rel of fields.relatedBugIds ?? []) insertRelated.run(id, rel);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
return this.lookup(id)!;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
incrementOccurrence(id: string): void {
|
|
97
|
+
const now = new Date().toISOString();
|
|
98
|
+
this.db.prepare(
|
|
99
|
+
"UPDATE bug_memory SET occurrence_count = occurrence_count + 1, last_seen_at = ? WHERE id = ?"
|
|
100
|
+
).run(now, id);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ── Read ───────────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
lookup(id: string): BugEntry | null {
|
|
106
|
+
const row = this.db
|
|
107
|
+
.prepare("SELECT * FROM bug_memory WHERE id = ?")
|
|
108
|
+
.get(id);
|
|
109
|
+
if (!row) return null;
|
|
110
|
+
return this.hydrate(row as unknown as BugRow);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
lookupForFile(filePath: string): BugEntry[] {
|
|
114
|
+
const rows = this.db
|
|
115
|
+
.prepare(
|
|
116
|
+
"SELECT * FROM bug_memory WHERE file_path = ? ORDER BY last_seen_at DESC"
|
|
117
|
+
)
|
|
118
|
+
.all(filePath);
|
|
119
|
+
return (rows as unknown as BugRow[]).map((r) => this.hydrate(r));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
listAll(): BugEntry[] {
|
|
123
|
+
const rows = this.db
|
|
124
|
+
.prepare("SELECT * FROM bug_memory ORDER BY last_seen_at DESC")
|
|
125
|
+
.all();
|
|
126
|
+
return (rows as unknown as BugRow[]).map((r) => this.hydrate(r));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
count(): number {
|
|
130
|
+
const row = this.db.prepare("SELECT COUNT(*) AS n FROM bug_memory").get();
|
|
131
|
+
return Number((row as { n: number }).n);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
hasBugForFileInSession(filePath: string, sessionStartIso: string): boolean {
|
|
135
|
+
const row = this.db
|
|
136
|
+
.prepare(
|
|
137
|
+
"SELECT 1 FROM bug_memory WHERE file_path = ? AND created_at >= ? LIMIT 1"
|
|
138
|
+
)
|
|
139
|
+
.get(filePath, sessionStartIso);
|
|
140
|
+
return row !== undefined;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ── Search (FTS5) ──────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
// Preserves the v1 contract: scores in (0, 1+) range, 0.3 threshold,
|
|
146
|
+
// file-path/tag boost. FTS5's bm25 returns negative scores (lower =
|
|
147
|
+
// better), so we normalize via `1 / (1 + abs(bm25))` to land in (0, 1].
|
|
148
|
+
// The boost for same-file matches stays at +0.2 and the same false-
|
|
149
|
+
// positive guards (require file-path or tag overlap when borderline)
|
|
150
|
+
// apply.
|
|
151
|
+
searchBugs(
|
|
152
|
+
query: string,
|
|
153
|
+
options?: { filePath?: string }
|
|
154
|
+
): SimilarityMatch[] {
|
|
155
|
+
if (query.trim().length === 0) return [];
|
|
156
|
+
|
|
157
|
+
// FTS5 MATCH requires escaped phrase quoting for queries with
|
|
158
|
+
// punctuation. Build a phrase query if the input has anything
|
|
159
|
+
// weirder than alphanum + spaces.
|
|
160
|
+
const ftsQuery = buildFtsQuery(query);
|
|
161
|
+
if (ftsQuery === null) return [];
|
|
162
|
+
|
|
163
|
+
type FtsRow = { bug_id: string; bm25: number };
|
|
164
|
+
let ftsRows: FtsRow[] = [];
|
|
165
|
+
try {
|
|
166
|
+
ftsRows = this.db
|
|
167
|
+
.prepare(
|
|
168
|
+
"SELECT bug_id, bm25(bug_memory_fts) AS bm25 FROM bug_memory_fts WHERE bug_memory_fts MATCH ? ORDER BY bm25"
|
|
169
|
+
)
|
|
170
|
+
.all(ftsQuery) as unknown as FtsRow[];
|
|
171
|
+
} catch {
|
|
172
|
+
// FTS query parse error — fall back silently to no matches.
|
|
173
|
+
return [];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const queryTokens = tokenize(query);
|
|
177
|
+
const results: SimilarityMatch[] = [];
|
|
178
|
+
|
|
179
|
+
for (const row of ftsRows) {
|
|
180
|
+
const entry = this.lookup(row.bug_id);
|
|
181
|
+
if (!entry) continue;
|
|
182
|
+
|
|
183
|
+
// bm25 is negative; smaller magnitude == better match.
|
|
184
|
+
const ftsScore = 1 / (1 + Math.abs(row.bm25));
|
|
185
|
+
const matchReasons: string[] = ["fts"];
|
|
186
|
+
|
|
187
|
+
// Exact substring boost (matches v1 behavior).
|
|
188
|
+
let score = ftsScore;
|
|
189
|
+
if (entry.errorMessage.length > 0 && entry.errorMessage.includes(query)) {
|
|
190
|
+
score += 1.0;
|
|
191
|
+
matchReasons.unshift("exact_error_match");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const fileMatch = options?.filePath ? entry.filePath === options.filePath : false;
|
|
195
|
+
const tagMatch = entry.tags.some((tag) => queryTokens.has(tag.toLowerCase()));
|
|
196
|
+
|
|
197
|
+
// Same false-positive guard as v1: when the score is borderline
|
|
198
|
+
// (<= 0.3), only keep matches that also satisfy file-path or
|
|
199
|
+
// tag-overlap.
|
|
200
|
+
if (score <= 0.3 && !fileMatch && !tagMatch) continue;
|
|
201
|
+
|
|
202
|
+
if (fileMatch) {
|
|
203
|
+
score += 0.2;
|
|
204
|
+
matchReasons.push("file_path");
|
|
205
|
+
}
|
|
206
|
+
if (tagMatch) matchReasons.push("tags");
|
|
207
|
+
|
|
208
|
+
if (score > 0.3) {
|
|
209
|
+
results.push({ entry, score, matchReasons });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return results.sort((a, b) => b.score - a.score);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// ── Helpers ────────────────────────────────────────────────────────────
|
|
217
|
+
|
|
218
|
+
private hydrate(row: BugRow): BugEntry {
|
|
219
|
+
const tags = this.db
|
|
220
|
+
.prepare("SELECT tag FROM bug_tags WHERE bug_id = ? ORDER BY tag")
|
|
221
|
+
.all(row.id)
|
|
222
|
+
.map((r) => (r as { tag: string }).tag);
|
|
223
|
+
const relatedBugIds = this.db
|
|
224
|
+
.prepare(
|
|
225
|
+
"SELECT related_bug_id FROM bug_related WHERE bug_id = ? ORDER BY related_bug_id"
|
|
226
|
+
)
|
|
227
|
+
.all(row.id)
|
|
228
|
+
.map((r) => (r as { related_bug_id: string }).related_bug_id);
|
|
229
|
+
return {
|
|
230
|
+
id: row.id,
|
|
231
|
+
createdAt: row.created_at,
|
|
232
|
+
lastSeenAt: row.last_seen_at,
|
|
233
|
+
errorMessage: row.error_message,
|
|
234
|
+
filePath: row.file_path,
|
|
235
|
+
lineNumber: row.line_number ?? undefined,
|
|
236
|
+
rootCause: row.root_cause,
|
|
237
|
+
fixDescription: row.fix_description,
|
|
238
|
+
tags,
|
|
239
|
+
occurrenceCount: row.occurrence_count,
|
|
240
|
+
relatedBugIds,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Convert the entire repo to the legacy BugMemory snapshot shape. Used
|
|
245
|
+
// by callers (dashboard, status) that still expect `{ entries, nextId }`.
|
|
246
|
+
snapshot(): BugMemory {
|
|
247
|
+
return {
|
|
248
|
+
entries: this.listAll(),
|
|
249
|
+
// nextId was only used by the in-memory generator; new ids come
|
|
250
|
+
// from randomUUID, so any value > current count is safe.
|
|
251
|
+
nextId: this.count() + 1,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Build an FTS5 query string from arbitrary user input. FTS5's grammar
|
|
257
|
+
// treats colons, parens, quotes, etc. as operators — we phrase-quote the
|
|
258
|
+
// whole query to avoid syntax errors. Returns null for inputs that have
|
|
259
|
+
// no searchable tokens.
|
|
260
|
+
function buildFtsQuery(raw: string): string | null {
|
|
261
|
+
const trimmed = raw.trim();
|
|
262
|
+
if (trimmed.length === 0) return null;
|
|
263
|
+
// Drop characters that can't appear inside FTS5 phrase quotes.
|
|
264
|
+
const safe = trimmed.replace(/"/g, " ").trim();
|
|
265
|
+
if (safe.length === 0) return null;
|
|
266
|
+
// Quote so punctuation/colons/parens don't become operators.
|
|
267
|
+
return `"${safe}"`;
|
|
268
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
// Per-device hit/miss counters for the file index. Replaces the legacy
|
|
2
|
+
// `.mink-state-counters.json` file (read by the dashboard and by
|
|
3
|
+
// `mink status`) with a SQLite-backed table that's queryable per device
|
|
4
|
+
// or aggregated across all devices in a single SQL statement.
|
|
5
|
+
|
|
6
|
+
import type { DbDriver } from "../storage/driver";
|
|
7
|
+
import { openProjectDb } from "../storage/db";
|
|
8
|
+
import { getOrCreateDeviceId } from "../core/device";
|
|
9
|
+
|
|
10
|
+
const INCREMENT_HIT = `
|
|
11
|
+
INSERT INTO counters (device_id, file_index_hits, file_index_misses)
|
|
12
|
+
VALUES (?, 1, 0)
|
|
13
|
+
ON CONFLICT(device_id) DO UPDATE SET
|
|
14
|
+
file_index_hits = counters.file_index_hits + 1
|
|
15
|
+
`;
|
|
16
|
+
|
|
17
|
+
const INCREMENT_MISS = `
|
|
18
|
+
INSERT INTO counters (device_id, file_index_hits, file_index_misses)
|
|
19
|
+
VALUES (?, 0, 1)
|
|
20
|
+
ON CONFLICT(device_id) DO UPDATE SET
|
|
21
|
+
file_index_misses = counters.file_index_misses + 1
|
|
22
|
+
`;
|
|
23
|
+
|
|
24
|
+
export class CountersRepo {
|
|
25
|
+
constructor(private readonly db: DbDriver) {}
|
|
26
|
+
|
|
27
|
+
static for(cwd: string): CountersRepo {
|
|
28
|
+
return new CountersRepo(openProjectDb(cwd));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
incrementHit(deviceId: string = getOrCreateDeviceId()): void {
|
|
32
|
+
this.db.prepare(INCREMENT_HIT).run(deviceId);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
incrementMiss(deviceId: string = getOrCreateDeviceId()): void {
|
|
36
|
+
this.db.prepare(INCREMENT_MISS).run(deviceId);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Returns this device's hit + miss counts (zero for either if no row
|
|
40
|
+
// exists yet). The dashboard and `mink status` show per-device totals,
|
|
41
|
+
// but callers that want a project-wide view use totals().
|
|
42
|
+
forDevice(deviceId: string = getOrCreateDeviceId()): { hits: number; misses: number } {
|
|
43
|
+
const row = this.db
|
|
44
|
+
.prepare(
|
|
45
|
+
"SELECT file_index_hits, file_index_misses FROM counters WHERE device_id = ?"
|
|
46
|
+
)
|
|
47
|
+
.get(deviceId);
|
|
48
|
+
if (!row) return { hits: 0, misses: 0 };
|
|
49
|
+
return {
|
|
50
|
+
hits: Number((row as { file_index_hits: number }).file_index_hits),
|
|
51
|
+
misses: Number((row as { file_index_misses: number }).file_index_misses),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
totals(): { hits: number; misses: number } {
|
|
56
|
+
const row = this.db
|
|
57
|
+
.prepare(
|
|
58
|
+
"SELECT COALESCE(SUM(file_index_hits), 0) AS h, COALESCE(SUM(file_index_misses), 0) AS m FROM counters"
|
|
59
|
+
)
|
|
60
|
+
.get();
|
|
61
|
+
if (!row) return { hits: 0, misses: 0 };
|
|
62
|
+
return {
|
|
63
|
+
hits: Number((row as { h: number }).h),
|
|
64
|
+
misses: Number((row as { m: number }).m),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
perDevice(): Record<string, { hits: number; misses: number }> {
|
|
69
|
+
const rows = this.db
|
|
70
|
+
.prepare(
|
|
71
|
+
"SELECT device_id, file_index_hits, file_index_misses FROM counters"
|
|
72
|
+
)
|
|
73
|
+
.all();
|
|
74
|
+
const out: Record<string, { hits: number; misses: number }> = {};
|
|
75
|
+
for (const r of rows) {
|
|
76
|
+
const row = r as {
|
|
77
|
+
device_id: string;
|
|
78
|
+
file_index_hits: number;
|
|
79
|
+
file_index_misses: number;
|
|
80
|
+
};
|
|
81
|
+
out[row.device_id] = {
|
|
82
|
+
hits: Number(row.file_index_hits),
|
|
83
|
+
misses: Number(row.file_index_misses),
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
return out;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
// File-index repository. Wraps the file_index table in `mink.db` behind a
|
|
2
|
+
// stable function-based API that the wrapper in src/core/index-store.ts
|
|
3
|
+
// delegates to. Hook hot paths (pre-read, post-write) call exactly one
|
|
4
|
+
// method per hook invocation — no full-index load.
|
|
5
|
+
//
|
|
6
|
+
// All writes attribute the calling device via device_id so the cross-device
|
|
7
|
+
// sync merge driver (mink-db-merge) can reconcile origin. Counters that
|
|
8
|
+
// were previously kept in file-index.json's header (lifetimeHits /
|
|
9
|
+
// lifetimeMisses) live in the `counters` table indexed by device_id.
|
|
10
|
+
|
|
11
|
+
import type { DbDriver } from "../storage/driver";
|
|
12
|
+
import type { FileIndexEntry, StalenessReport } from "../types/file-index";
|
|
13
|
+
import { openProjectDb } from "../storage/db";
|
|
14
|
+
import { getOrCreateDeviceId } from "../core/device";
|
|
15
|
+
|
|
16
|
+
interface FileIndexRow {
|
|
17
|
+
file_path: string;
|
|
18
|
+
description: string;
|
|
19
|
+
estimated_tokens: number;
|
|
20
|
+
last_modified: string;
|
|
21
|
+
last_indexed: string;
|
|
22
|
+
mtime_ms: number;
|
|
23
|
+
content_hash: string | null;
|
|
24
|
+
size_bytes: number | null;
|
|
25
|
+
device_id: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function rowToEntry(row: FileIndexRow): FileIndexEntry {
|
|
29
|
+
return {
|
|
30
|
+
filePath: row.file_path,
|
|
31
|
+
description: row.description,
|
|
32
|
+
estimatedTokens: row.estimated_tokens,
|
|
33
|
+
lastModified: row.last_modified,
|
|
34
|
+
lastIndexed: row.last_indexed,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Mirror of upsertEntry's semantics under the JSON store, expressed as a
|
|
39
|
+
// single SQL upsert. Conflict resolution picks the more recent
|
|
40
|
+
// last_modified — matches the merge driver's per-row rule so a hook that
|
|
41
|
+
// runs concurrently with sync converges deterministically.
|
|
42
|
+
const UPSERT_SQL = `
|
|
43
|
+
INSERT INTO file_index
|
|
44
|
+
(file_path, description, estimated_tokens, last_modified, last_indexed,
|
|
45
|
+
mtime_ms, content_hash, size_bytes, device_id)
|
|
46
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
47
|
+
ON CONFLICT(file_path) DO UPDATE SET
|
|
48
|
+
description = excluded.description,
|
|
49
|
+
estimated_tokens = excluded.estimated_tokens,
|
|
50
|
+
last_modified = excluded.last_modified,
|
|
51
|
+
last_indexed = excluded.last_indexed,
|
|
52
|
+
mtime_ms = excluded.mtime_ms,
|
|
53
|
+
content_hash = COALESCE(excluded.content_hash, file_index.content_hash),
|
|
54
|
+
size_bytes = COALESCE(excluded.size_bytes, file_index.size_bytes),
|
|
55
|
+
device_id = excluded.device_id
|
|
56
|
+
`;
|
|
57
|
+
|
|
58
|
+
export interface UpsertOptions {
|
|
59
|
+
mtimeMs?: number;
|
|
60
|
+
contentHash?: string | null;
|
|
61
|
+
sizeBytes?: number | null;
|
|
62
|
+
deviceId?: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface IndexLookup {
|
|
66
|
+
lookupEntry(filePath: string): FileIndexEntry | null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export class FileIndexRepo implements IndexLookup {
|
|
70
|
+
constructor(private readonly db: DbDriver) {}
|
|
71
|
+
|
|
72
|
+
static for(cwd: string): FileIndexRepo {
|
|
73
|
+
return new FileIndexRepo(openProjectDb(cwd));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
upsert(entry: FileIndexEntry, opts: UpsertOptions = {}): void {
|
|
77
|
+
const deviceId = opts.deviceId ?? getOrCreateDeviceId();
|
|
78
|
+
this.db.prepare(UPSERT_SQL).run(
|
|
79
|
+
entry.filePath,
|
|
80
|
+
entry.description,
|
|
81
|
+
entry.estimatedTokens,
|
|
82
|
+
entry.lastModified,
|
|
83
|
+
entry.lastIndexed,
|
|
84
|
+
opts.mtimeMs ?? 0,
|
|
85
|
+
opts.contentHash ?? null,
|
|
86
|
+
opts.sizeBytes ?? null,
|
|
87
|
+
deviceId
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Bulk upsert — used by `mink scan` to push hundreds-to-thousands of
|
|
92
|
+
// entries in a single transaction. ~50x faster than individual upserts
|
|
93
|
+
// because SQLite skips per-row WAL fsync.
|
|
94
|
+
upsertMany(entries: Array<{ entry: FileIndexEntry; opts?: UpsertOptions }>): void {
|
|
95
|
+
if (entries.length === 0) return;
|
|
96
|
+
const defaultDevice = getOrCreateDeviceId();
|
|
97
|
+
const stmt = this.db.prepare(UPSERT_SQL);
|
|
98
|
+
this.db.transaction(() => {
|
|
99
|
+
for (const { entry, opts } of entries) {
|
|
100
|
+
stmt.run(
|
|
101
|
+
entry.filePath,
|
|
102
|
+
entry.description,
|
|
103
|
+
entry.estimatedTokens,
|
|
104
|
+
entry.lastModified,
|
|
105
|
+
entry.lastIndexed,
|
|
106
|
+
opts?.mtimeMs ?? 0,
|
|
107
|
+
opts?.contentHash ?? null,
|
|
108
|
+
opts?.sizeBytes ?? null,
|
|
109
|
+
opts?.deviceId ?? defaultDevice
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
lookupEntry(filePath: string): FileIndexEntry | null {
|
|
116
|
+
const row = this.db
|
|
117
|
+
.prepare(
|
|
118
|
+
"SELECT file_path, description, estimated_tokens, last_modified, last_indexed, mtime_ms, content_hash, size_bytes, device_id FROM file_index WHERE file_path = ?"
|
|
119
|
+
)
|
|
120
|
+
.get(filePath);
|
|
121
|
+
if (!row) return null;
|
|
122
|
+
return rowToEntry(row as unknown as FileIndexRow);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
remove(filePath: string): void {
|
|
126
|
+
this.db.prepare("DELETE FROM file_index WHERE file_path = ?").run(filePath);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Remove every entry that's NOT in `keep`. Used by `mink scan` to prune
|
|
130
|
+
// orphaned entries for files that have been deleted from disk. Expressed
|
|
131
|
+
// as a single statement using a temp table to avoid the SQLite parameter
|
|
132
|
+
// limit (defaults to 999).
|
|
133
|
+
retainOnly(keep: Iterable<string>): number {
|
|
134
|
+
const keepArr = [...keep];
|
|
135
|
+
this.db.transaction(() => {
|
|
136
|
+
this.db.exec("CREATE TEMP TABLE IF NOT EXISTS _retain (path TEXT PRIMARY KEY)");
|
|
137
|
+
this.db.exec("DELETE FROM _retain");
|
|
138
|
+
const stmt = this.db.prepare("INSERT OR IGNORE INTO _retain VALUES (?)");
|
|
139
|
+
for (const p of keepArr) stmt.run(p);
|
|
140
|
+
});
|
|
141
|
+
const r = this.db
|
|
142
|
+
.prepare("DELETE FROM file_index WHERE file_path NOT IN (SELECT path FROM _retain)")
|
|
143
|
+
.run();
|
|
144
|
+
return Number(r.changes);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Total count of indexed files. Cheap — backed by the PRIMARY KEY index.
|
|
148
|
+
totalFiles(): number {
|
|
149
|
+
const row = this.db.prepare("SELECT COUNT(*) AS n FROM file_index").get();
|
|
150
|
+
return Number((row as { n: number }).n);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Bulk list — used by analytics, dashboard, and `mink status`. Stays
|
|
154
|
+
// off the hook hot path. Returns rows already shaped to the public
|
|
155
|
+
// FileIndexEntry type.
|
|
156
|
+
listAll(): FileIndexEntry[] {
|
|
157
|
+
const rows = this.db
|
|
158
|
+
.prepare(
|
|
159
|
+
"SELECT file_path, description, estimated_tokens, last_modified, last_indexed, mtime_ms, content_hash, size_bytes, device_id FROM file_index ORDER BY file_path"
|
|
160
|
+
)
|
|
161
|
+
.all();
|
|
162
|
+
return rows.map((r) => rowToEntry(r as unknown as FileIndexRow));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// For Phase 5's incremental scan. Returns the subset of `scanned` whose
|
|
166
|
+
// mtime differs from what we have stored — i.e. needs re-extraction.
|
|
167
|
+
// Done as one query per chunk to avoid a 20k-row IN list, but still much
|
|
168
|
+
// cheaper than reading every file's content.
|
|
169
|
+
staleSet(scanned: Array<{ relativePath: string; mtimeMs: number }>): string[] {
|
|
170
|
+
if (scanned.length === 0) return [];
|
|
171
|
+
const stmt = this.db.prepare(
|
|
172
|
+
"SELECT mtime_ms FROM file_index WHERE file_path = ?"
|
|
173
|
+
);
|
|
174
|
+
const stale: string[] = [];
|
|
175
|
+
for (const f of scanned) {
|
|
176
|
+
const row = stmt.get(f.relativePath);
|
|
177
|
+
if (!row) {
|
|
178
|
+
stale.push(f.relativePath); // never seen before
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
181
|
+
const storedMs = Number((row as { mtime_ms: number }).mtime_ms);
|
|
182
|
+
if (storedMs !== Math.floor(f.mtimeMs)) {
|
|
183
|
+
stale.push(f.relativePath);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return stale;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Cheap content_hash lookup — used by `mink scan` to detect
|
|
190
|
+
// touch-without-edit cases (mtime changed, content didn't) and skip
|
|
191
|
+
// re-extraction in that case.
|
|
192
|
+
contentHashFor(filePath: string): string | null {
|
|
193
|
+
const row = this.db
|
|
194
|
+
.prepare("SELECT content_hash FROM file_index WHERE file_path = ?")
|
|
195
|
+
.get(filePath);
|
|
196
|
+
if (!row) return null;
|
|
197
|
+
const hash = (row as { content_hash: string | null }).content_hash;
|
|
198
|
+
return hash ?? null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Mirrors checkStaleness() under the JSON store: which files are on disk
|
|
202
|
+
// but not in the index (missing), and which are in the index but absent
|
|
203
|
+
// from disk (orphaned).
|
|
204
|
+
checkStaleness(scannedRelativePaths: string[]): StalenessReport {
|
|
205
|
+
const scannedSet = new Set(scannedRelativePaths);
|
|
206
|
+
const allIndexed = this.db
|
|
207
|
+
.prepare("SELECT file_path FROM file_index")
|
|
208
|
+
.all()
|
|
209
|
+
.map((r) => (r as { file_path: string }).file_path);
|
|
210
|
+
const indexedSet = new Set(allIndexed);
|
|
211
|
+
const missingFromIndex = scannedRelativePaths.filter((p) => !indexedSet.has(p));
|
|
212
|
+
const orphanedEntries = allIndexed.filter((p) => !scannedSet.has(p));
|
|
213
|
+
return {
|
|
214
|
+
missingFromIndex,
|
|
215
|
+
orphanedEntries,
|
|
216
|
+
isStale: missingFromIndex.length > 0 || orphanedEntries.length > 0,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Header analogues. lastScanTimestamp is the only header field that's
|
|
221
|
+
// genuinely a project-wide state value; hit/miss counters live in the
|
|
222
|
+
// counters table and are per-device. Stored in the meta table.
|
|
223
|
+
setLastScanTimestamp(iso: string): void {
|
|
224
|
+
this.db
|
|
225
|
+
.prepare(
|
|
226
|
+
"INSERT INTO meta (key, value) VALUES ('last_scan_timestamp', ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value"
|
|
227
|
+
)
|
|
228
|
+
.run(iso);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
getLastScanTimestamp(): string {
|
|
232
|
+
const row = this.db
|
|
233
|
+
.prepare("SELECT value FROM meta WHERE key = 'last_scan_timestamp'")
|
|
234
|
+
.get();
|
|
235
|
+
if (!row) return "";
|
|
236
|
+
return String((row as { value: string }).value);
|
|
237
|
+
}
|
|
238
|
+
}
|