botholomew 0.16.4 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -41
- package/package.json +4 -9
- package/src/chat/agent.ts +37 -40
- package/src/chat/session.ts +10 -10
- package/src/cli.ts +0 -2
- package/src/commands/capabilities.ts +35 -33
- package/src/commands/context.ts +133 -221
- package/src/commands/init.ts +22 -1
- package/src/commands/mcpx.ts +21 -8
- package/src/commands/nuke.ts +52 -15
- package/src/commands/prepare.ts +16 -13
- package/src/config/loader.ts +1 -8
- package/src/config/schemas.ts +6 -0
- package/src/constants.ts +16 -32
- package/src/init/index.ts +52 -27
- package/src/mcpx/client.ts +21 -5
- package/src/mem/client.ts +33 -0
- package/src/{context → prompts}/capabilities.ts +11 -7
- package/src/schedules/store.ts +1 -1
- package/src/tasks/store.ts +1 -1
- package/src/threads/store.ts +1 -1
- package/src/tools/capabilities/refresh.ts +1 -1
- package/src/tools/membot/adapter.ts +111 -0
- package/src/tools/membot/copy.ts +59 -0
- package/src/tools/membot/count_lines.ts +53 -0
- package/src/tools/membot/edit.ts +72 -0
- package/src/tools/membot/exists.ts +54 -0
- package/src/tools/membot/index.ts +26 -0
- package/src/tools/{context → membot}/pipe.ts +34 -32
- package/src/tools/registry.ts +6 -37
- package/src/tools/tool.ts +6 -8
- package/src/tui/App.tsx +3 -4
- package/src/tui/components/ContextPanel.tsx +109 -226
- package/src/tui/components/HelpPanel.tsx +2 -2
- package/src/tui/components/StatusBar.tsx +0 -6
- package/src/tui/components/ThreadPanel.tsx +8 -7
- package/src/tui/wrapDetail.ts +11 -0
- package/src/worker/heartbeat.ts +0 -20
- package/src/worker/index.ts +13 -13
- package/src/worker/llm.ts +7 -9
- package/src/worker/prompt.ts +25 -13
- package/src/worker/spawn.ts +1 -1
- package/src/worker/tick.ts +10 -9
- package/src/commands/db.ts +0 -119
- package/src/commands/with-db.ts +0 -22
- package/src/context/chunker.ts +0 -275
- package/src/context/embedder-impl.ts +0 -100
- package/src/context/embedder.ts +0 -9
- package/src/context/fetcher-errors.ts +0 -8
- package/src/context/fetcher.ts +0 -515
- package/src/context/locks.ts +0 -146
- package/src/context/markdown-converter.ts +0 -186
- package/src/context/reindex.ts +0 -198
- package/src/context/store.ts +0 -841
- package/src/context/url-utils.ts +0 -25
- package/src/db/connection.ts +0 -255
- package/src/db/doctor.ts +0 -235
- package/src/db/embeddings.ts +0 -317
- package/src/db/query.ts +0 -56
- package/src/db/schema.ts +0 -93
- package/src/db/sql/1-core_tables.sql +0 -53
- package/src/db/sql/10-dedupe_context_items.sql +0 -26
- package/src/db/sql/11-rebuild_hnsw.sql +0 -8
- package/src/db/sql/12-workers.sql +0 -66
- package/src/db/sql/13-drive-paths.sql +0 -47
- package/src/db/sql/14-drop_hnsw_index.sql +0 -8
- package/src/db/sql/15-fts_index.sql +0 -8
- package/src/db/sql/16-source_url.sql +0 -7
- package/src/db/sql/17-worker_log_path.sql +0 -3
- package/src/db/sql/18-reset_embeddings_for_local.sql +0 -39
- package/src/db/sql/19-disk_backed_index.sql +0 -36
- package/src/db/sql/2-logging_tables.sql +0 -24
- package/src/db/sql/20-drop_db_tables_for_files.sql +0 -19
- package/src/db/sql/3-daemon_state.sql +0 -5
- package/src/db/sql/4-unique_context_path.sql +0 -1
- package/src/db/sql/5-reset_embeddings_for_openai.sql +0 -1
- package/src/db/sql/6-vss_index.sql +0 -7
- package/src/db/sql/7-drop_embeddings_fk.sql +0 -23
- package/src/db/sql/8-task_output.sql +0 -1
- package/src/db/sql/9-source-type.sql +0 -1
- package/src/tools/context/read-large-result.ts +0 -33
- package/src/tools/dir/create.ts +0 -47
- package/src/tools/dir/size.ts +0 -77
- package/src/tools/dir/tree.ts +0 -124
- package/src/tools/file/copy.ts +0 -73
- package/src/tools/file/count-lines.ts +0 -54
- package/src/tools/file/delete.ts +0 -83
- package/src/tools/file/edit.ts +0 -76
- package/src/tools/file/exists.ts +0 -33
- package/src/tools/file/info.ts +0 -66
- package/src/tools/file/move.ts +0 -66
- package/src/tools/file/read.ts +0 -67
- package/src/tools/file/write.ts +0 -58
- package/src/tools/search/fuse.ts +0 -96
- package/src/tools/search/index.ts +0 -127
- package/src/tools/search/regexp.ts +0 -82
- package/src/tools/search/semantic.ts +0 -167
- /package/src/{db → utils}/uuid.ts +0 -0
package/src/context/url-utils.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Attempts to parse the input as a URL and returns true if the protocol is http or https.
|
|
3
|
-
*/
|
|
4
|
-
export function isUrl(input: string): boolean {
|
|
5
|
-
try {
|
|
6
|
-
const url = new URL(input);
|
|
7
|
-
return url.protocol === "http:" || url.protocol === "https:";
|
|
8
|
-
} catch {
|
|
9
|
-
return false;
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Strips HTML tags from a string, removing script/style blocks first,
|
|
15
|
-
* then all remaining tags, and collapsing whitespace.
|
|
16
|
-
*/
|
|
17
|
-
export function stripHtmlTags(html: string): string {
|
|
18
|
-
return html
|
|
19
|
-
.replace(/<script[\s\S]*?<\/script>/gi, "") // remove script blocks
|
|
20
|
-
.replace(/<style[\s\S]*?<\/style>/gi, "") // remove style blocks
|
|
21
|
-
.replace(/<[^>]*>/g, "") // remove all remaining tags
|
|
22
|
-
.replace(/[ \t]+/g, " ") // collapse horizontal whitespace
|
|
23
|
-
.replace(/\n{3,}/g, "\n\n") // collapse excessive newlines
|
|
24
|
-
.trim();
|
|
25
|
-
}
|
package/src/db/connection.ts
DELETED
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
import { DuckDBInstance } from "@duckdb/node-api";
|
|
2
|
-
|
|
3
|
-
type SqlParam = string | number | boolean | null | number[];
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Thin wrapper around DuckDB connection that provides a familiar
|
|
7
|
-
* query interface similar to bun:sqlite. Automatically translates
|
|
8
|
-
* ?N parameter placeholders to $N for DuckDB compatibility.
|
|
9
|
-
*/
|
|
10
|
-
export class DbConnection {
|
|
11
|
-
// biome-ignore lint/suspicious/noExplicitAny: DuckDB internal types
|
|
12
|
-
private conn: any;
|
|
13
|
-
// biome-ignore lint/suspicious/noExplicitAny: DuckDB internal types
|
|
14
|
-
private readonly ownedInstance: any;
|
|
15
|
-
private readonly dbPath: string;
|
|
16
|
-
private closed = false;
|
|
17
|
-
|
|
18
|
-
constructor(
|
|
19
|
-
// biome-ignore lint/suspicious/noExplicitAny: DuckDB internal types
|
|
20
|
-
conn: any,
|
|
21
|
-
// biome-ignore lint/suspicious/noExplicitAny: DuckDB internal types
|
|
22
|
-
ownedInstance: any,
|
|
23
|
-
dbPath: string,
|
|
24
|
-
) {
|
|
25
|
-
this.conn = conn;
|
|
26
|
-
this.ownedInstance = ownedInstance;
|
|
27
|
-
this.dbPath = dbPath;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/** Execute raw SQL with no return value. */
|
|
31
|
-
async exec(sql: string): Promise<void> {
|
|
32
|
-
await this.conn.run(sql);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/** Run a query and return the first row, or null. */
|
|
36
|
-
async queryGet<T = Record<string, unknown>>(
|
|
37
|
-
sql: string,
|
|
38
|
-
...params: SqlParam[]
|
|
39
|
-
): Promise<T | null> {
|
|
40
|
-
const translated = translateParams(sql);
|
|
41
|
-
const result = await this.conn.runAndReadAll(
|
|
42
|
-
translated,
|
|
43
|
-
flattenParams(params),
|
|
44
|
-
);
|
|
45
|
-
const rows = await result.getRowObjectsJS();
|
|
46
|
-
return (rows[0] ? convertRow(rows[0]) : null) as T | null;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
/** Run a query and return all rows. */
|
|
50
|
-
async queryAll<T = Record<string, unknown>>(
|
|
51
|
-
sql: string,
|
|
52
|
-
...params: SqlParam[]
|
|
53
|
-
): Promise<T[]> {
|
|
54
|
-
const translated = translateParams(sql);
|
|
55
|
-
const result = await this.conn.runAndReadAll(
|
|
56
|
-
translated,
|
|
57
|
-
flattenParams(params),
|
|
58
|
-
);
|
|
59
|
-
const rows = await result.getRowObjectsJS();
|
|
60
|
-
return rows.map(convertRow) as T[];
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/** Run a mutation and return the number of changed rows. */
|
|
64
|
-
async queryRun(
|
|
65
|
-
sql: string,
|
|
66
|
-
...params: SqlParam[]
|
|
67
|
-
): Promise<{ changes: number }> {
|
|
68
|
-
const translated = translateParams(sql);
|
|
69
|
-
const result = await this.conn.run(translated, flattenParams(params));
|
|
70
|
-
return { changes: result.rowsChanged };
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* Disconnect and release this connection's share of the DuckDB instance.
|
|
75
|
-
* For file-backed DBs, the instance is closed (and the OS file lock
|
|
76
|
-
* released) once every overlapping connection in this process has closed.
|
|
77
|
-
* For `:memory:` DBs, the instance is owned by this connection and closed
|
|
78
|
-
* immediately.
|
|
79
|
-
*/
|
|
80
|
-
close(): void {
|
|
81
|
-
if (this.closed) return;
|
|
82
|
-
this.closed = true;
|
|
83
|
-
this.conn.disconnectSync();
|
|
84
|
-
if (this.ownedInstance) {
|
|
85
|
-
this.ownedInstance.closeSync();
|
|
86
|
-
} else {
|
|
87
|
-
releaseInstance(this.dbPath);
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Convert DuckDB row values to JS-friendly types:
|
|
94
|
-
* - BigInt → number (safe for counts and IDs)
|
|
95
|
-
* - Date → ISO string (matches our TEXT column convention)
|
|
96
|
-
* - Nested arrays/objects are left as-is
|
|
97
|
-
*/
|
|
98
|
-
// biome-ignore lint/suspicious/noExplicitAny: row values are dynamic
|
|
99
|
-
function convertRow(row: Record<string, any>): Record<string, unknown> {
|
|
100
|
-
const out: Record<string, unknown> = {};
|
|
101
|
-
for (const [key, val] of Object.entries(row)) {
|
|
102
|
-
if (typeof val === "bigint") {
|
|
103
|
-
out[key] = Number(val);
|
|
104
|
-
} else {
|
|
105
|
-
out[key] = val;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
return out;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/** Translate ?N placeholders to $N for DuckDB. */
|
|
112
|
-
function translateParams(sql: string): string {
|
|
113
|
-
return sql.replace(/\?(\d+)/g, "$$$1");
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/** Flatten params, converting number[] to JSON strings for vector columns. */
|
|
117
|
-
function flattenParams(params: SqlParam[]): SqlParam[] {
|
|
118
|
-
return params.map((p) => (Array.isArray(p) ? JSON.stringify(p) : p));
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* Refcounted, process-local cache of open DuckDB instances keyed by dbPath.
|
|
123
|
-
*
|
|
124
|
-
* DuckDB's file lock is held at the instance level, so we must close the
|
|
125
|
-
* instance — not just the connection — to let another process acquire the
|
|
126
|
-
* writer lock. At the same time, opening two instances for the same file
|
|
127
|
-
* from one process is unsafe. This cache resolves both: overlapping
|
|
128
|
-
* `getConnection` calls in the same process share a single instance; once
|
|
129
|
-
* every connection has closed, the instance is closed and evicted, which
|
|
130
|
-
* releases the OS file lock.
|
|
131
|
-
*
|
|
132
|
-
* `:memory:` paths bypass the cache so each test/caller gets its own
|
|
133
|
-
* isolated in-memory database.
|
|
134
|
-
*/
|
|
135
|
-
interface CachedInstance {
|
|
136
|
-
// biome-ignore lint/suspicious/noExplicitAny: DuckDB internal types
|
|
137
|
-
instance: any;
|
|
138
|
-
refCount: number;
|
|
139
|
-
}
|
|
140
|
-
const instanceCache = new Map<string, CachedInstance>();
|
|
141
|
-
const pendingInstance = new Map<string, Promise<CachedInstance>>();
|
|
142
|
-
|
|
143
|
-
function isMemoryPath(path: string): boolean {
|
|
144
|
-
return path === ":memory:" || path.startsWith(":memory:");
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
async function acquireSharedInstance(dbPath: string): Promise<CachedInstance> {
|
|
148
|
-
const existing = instanceCache.get(dbPath);
|
|
149
|
-
if (existing) {
|
|
150
|
-
existing.refCount += 1;
|
|
151
|
-
return existing;
|
|
152
|
-
}
|
|
153
|
-
const inFlight = pendingInstance.get(dbPath);
|
|
154
|
-
if (inFlight) {
|
|
155
|
-
const cached = await inFlight;
|
|
156
|
-
cached.refCount += 1;
|
|
157
|
-
return cached;
|
|
158
|
-
}
|
|
159
|
-
const creation = (async () => {
|
|
160
|
-
const instance = await DuckDBInstance.create(dbPath);
|
|
161
|
-
const cached: CachedInstance = { instance, refCount: 1 };
|
|
162
|
-
instanceCache.set(dbPath, cached);
|
|
163
|
-
return cached;
|
|
164
|
-
})();
|
|
165
|
-
pendingInstance.set(dbPath, creation);
|
|
166
|
-
try {
|
|
167
|
-
return await creation;
|
|
168
|
-
} finally {
|
|
169
|
-
pendingInstance.delete(dbPath);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
function releaseInstance(dbPath: string): void {
|
|
174
|
-
const cached = instanceCache.get(dbPath);
|
|
175
|
-
if (!cached) return;
|
|
176
|
-
cached.refCount -= 1;
|
|
177
|
-
if (cached.refCount <= 0) {
|
|
178
|
-
instanceCache.delete(dbPath);
|
|
179
|
-
cached.instance.closeSync();
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
export async function getConnection(dbPath?: string): Promise<DbConnection> {
|
|
184
|
-
const path = dbPath ?? ":memory:";
|
|
185
|
-
|
|
186
|
-
if (isMemoryPath(path)) {
|
|
187
|
-
const instance = await DuckDBInstance.create(path);
|
|
188
|
-
const conn = await instance.connect();
|
|
189
|
-
await conn.run("INSTALL fts; LOAD fts;");
|
|
190
|
-
return new DbConnection(conn, instance, path);
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
const cached = await acquireSharedInstance(path);
|
|
194
|
-
try {
|
|
195
|
-
const conn = await cached.instance.connect();
|
|
196
|
-
// INSTALL is a no-op after the first successful install (the extension
|
|
197
|
-
// is persisted to the user's DuckDB extension directory). LOAD is
|
|
198
|
-
// cheap per connection.
|
|
199
|
-
await conn.run("INSTALL fts; LOAD fts;");
|
|
200
|
-
return new DbConnection(conn, null, path);
|
|
201
|
-
} catch (err) {
|
|
202
|
-
releaseInstance(path);
|
|
203
|
-
throw err;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
/**
|
|
208
|
-
* Open a DuckDB connection for a single logical unit of work and guarantee
|
|
209
|
-
* it is closed afterward. Retries on lock conflicts so two processes that
|
|
210
|
-
* race on the file lock cooperate instead of failing hard.
|
|
211
|
-
*
|
|
212
|
-
* Prefer one `withDb` per logical operation. The file lock is only released
|
|
213
|
-
* when every connection (across this process's overlapping callers) has
|
|
214
|
-
* been closed, so holding the connection across non-DB work (LLM calls,
|
|
215
|
-
* network I/O, filesystem walks) keeps other processes blocked.
|
|
216
|
-
*/
|
|
217
|
-
export async function withDb<T>(
|
|
218
|
-
dbPath: string,
|
|
219
|
-
fn: (conn: DbConnection) => Promise<T>,
|
|
220
|
-
): Promise<T> {
|
|
221
|
-
const conn = await withRetry(() => getConnection(dbPath));
|
|
222
|
-
try {
|
|
223
|
-
return await fn(conn);
|
|
224
|
-
} finally {
|
|
225
|
-
conn.close();
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* Retry `fn` with exponential backoff when it fails with a DuckDB file-lock
|
|
231
|
-
* conflict ("Conflicting lock is held…"). Other errors propagate immediately.
|
|
232
|
-
*/
|
|
233
|
-
export async function withRetry<T>(
|
|
234
|
-
fn: () => Promise<T>,
|
|
235
|
-
maxRetries = 8,
|
|
236
|
-
): Promise<T> {
|
|
237
|
-
let lastError: unknown;
|
|
238
|
-
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
239
|
-
try {
|
|
240
|
-
return await fn();
|
|
241
|
-
} catch (err) {
|
|
242
|
-
if (!isLockConflict(err)) throw err;
|
|
243
|
-
lastError = err;
|
|
244
|
-
if (attempt === maxRetries - 1) throw err;
|
|
245
|
-
// 100, 200, 400, 800, 1600, 3200, 6400, 12800 — up to ~25s total
|
|
246
|
-
await Bun.sleep(100 * 2 ** attempt);
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
throw lastError;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
function isLockConflict(err: unknown): boolean {
|
|
253
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
254
|
-
return msg.includes("Conflicting lock") || msg.includes("could not be set");
|
|
255
|
-
}
|
package/src/db/doctor.ts
DELETED
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
import { mkdir, rename, rm, stat } from "node:fs/promises";
|
|
2
|
-
import { dirname, join } from "node:path";
|
|
3
|
-
import { withDb } from "./connection.ts";
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Tables we probe for primary-key index integrity. Every user table has a
|
|
7
|
-
* single-column PK that we exercise with a self-update (SET pk = pk WHERE
|
|
8
|
-
* pk = ...). DuckDB still walks the index for the SET, which surfaces
|
|
9
|
-
* "Failed to delete all rows from index" FATAL errors when the index is
|
|
10
|
-
* out of sync with the row data. `_migrations` is excluded — it is small,
|
|
11
|
-
* append-only, and rebuilding it would defeat its purpose.
|
|
12
|
-
*/
|
|
13
|
-
export const PROBE_TABLES: ReadonlyArray<{ name: string; pk: string }> = [
|
|
14
|
-
{ name: "context_index", pk: "path" },
|
|
15
|
-
];
|
|
16
|
-
|
|
17
|
-
export type ProbeStatus = "ok" | "empty" | "missing" | "corrupt";
|
|
18
|
-
|
|
19
|
-
export interface ProbeResult {
|
|
20
|
-
table: string;
|
|
21
|
-
status: ProbeStatus;
|
|
22
|
-
/** Detail message when status is corrupt or missing. */
|
|
23
|
-
message?: string;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Probe a single table for index corruption by spawning a child Bun
|
|
28
|
-
* process. We use a child process because a corrupt PK index in DuckDB
|
|
29
|
-
* surfaces as a Bun panic (a C++ exception that unwinds past the NAPI
|
|
30
|
-
* boundary), which would kill the doctor itself. The child reports its
|
|
31
|
-
* verdict on stdout and exits.
|
|
32
|
-
*
|
|
33
|
-
* Uses absolute import path resolved against this file so the spawned
|
|
34
|
-
* Bun process picks up the same `@duckdb/node-api` install.
|
|
35
|
-
*/
|
|
36
|
-
export async function probeTable(
|
|
37
|
-
dbPath: string,
|
|
38
|
-
table: string,
|
|
39
|
-
pk: string,
|
|
40
|
-
): Promise<ProbeResult> {
|
|
41
|
-
const script = `
|
|
42
|
-
const { DuckDBInstance } = await import("@duckdb/node-api");
|
|
43
|
-
const dbPath = ${JSON.stringify(dbPath)};
|
|
44
|
-
const table = ${JSON.stringify(table)};
|
|
45
|
-
const pk = ${JSON.stringify(pk)};
|
|
46
|
-
let inst;
|
|
47
|
-
try {
|
|
48
|
-
inst = await DuckDBInstance.create(dbPath);
|
|
49
|
-
} catch (e) {
|
|
50
|
-
process.stdout.write("MISSING:" + (e?.message ?? String(e)));
|
|
51
|
-
process.exit(0);
|
|
52
|
-
}
|
|
53
|
-
const c = await inst.connect();
|
|
54
|
-
try {
|
|
55
|
-
const r = await c.runAndReadAll(\`SELECT \${pk} FROM \${table} LIMIT 1\`);
|
|
56
|
-
if (r.getRows().length === 0) {
|
|
57
|
-
process.stdout.write("EMPTY");
|
|
58
|
-
process.exit(0);
|
|
59
|
-
}
|
|
60
|
-
} catch (e) {
|
|
61
|
-
const msg = String(e?.message ?? e);
|
|
62
|
-
// Table doesn't exist yet (e.g., schema older than this doctor) — not
|
|
63
|
-
// a corruption signal, just skip it.
|
|
64
|
-
if (msg.includes("does not exist") || msg.includes("Catalog Error")) {
|
|
65
|
-
process.stdout.write("MISSING:" + msg);
|
|
66
|
-
process.exit(0);
|
|
67
|
-
}
|
|
68
|
-
process.stdout.write("CORRUPT:" + msg);
|
|
69
|
-
process.exit(2);
|
|
70
|
-
}
|
|
71
|
-
try {
|
|
72
|
-
await c.run(\`UPDATE \${table} SET \${pk} = \${pk} WHERE \${pk} = (SELECT \${pk} FROM \${table} LIMIT 1)\`);
|
|
73
|
-
process.stdout.write("OK");
|
|
74
|
-
process.exit(0);
|
|
75
|
-
} catch (e) {
|
|
76
|
-
process.stdout.write("CORRUPT:" + (e?.message ?? String(e)));
|
|
77
|
-
process.exit(2);
|
|
78
|
-
}
|
|
79
|
-
`;
|
|
80
|
-
|
|
81
|
-
// Discard the child's stderr. When the probe panics, Bun writes a multi-
|
|
82
|
-
// line crash banner there which would otherwise spill into our table
|
|
83
|
-
// output via the fallback message. The exit code alone tells us what we
|
|
84
|
-
// need to know.
|
|
85
|
-
const proc = Bun.spawn(["bun", "-e", script], {
|
|
86
|
-
stdio: ["ignore", "pipe", "ignore"],
|
|
87
|
-
});
|
|
88
|
-
const [stdout, exitCode] = await Promise.all([
|
|
89
|
-
new Response(proc.stdout).text(),
|
|
90
|
-
proc.exited,
|
|
91
|
-
]);
|
|
92
|
-
|
|
93
|
-
// Bun panic: process killed by SIGTRAP / non-zero exit with no stdout
|
|
94
|
-
// verdict. Treat any unrecognized exit as corruption — better to flag
|
|
95
|
-
// for repair than to silently miss a problem.
|
|
96
|
-
if (stdout.startsWith("OK")) return { table, status: "ok" };
|
|
97
|
-
if (stdout.startsWith("EMPTY")) return { table, status: "empty" };
|
|
98
|
-
if (stdout.startsWith("MISSING:")) {
|
|
99
|
-
return {
|
|
100
|
-
table,
|
|
101
|
-
status: "missing",
|
|
102
|
-
message: firstLine(stdout.slice("MISSING:".length)),
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
if (stdout.startsWith("CORRUPT:")) {
|
|
106
|
-
return {
|
|
107
|
-
table,
|
|
108
|
-
status: "corrupt",
|
|
109
|
-
message: firstLine(stdout.slice("CORRUPT:".length)),
|
|
110
|
-
};
|
|
111
|
-
}
|
|
112
|
-
return {
|
|
113
|
-
table,
|
|
114
|
-
status: "corrupt",
|
|
115
|
-
message: `child exited with code ${exitCode} (likely native panic)`,
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* Run probes for every known table. Sequential rather than parallel so we
|
|
121
|
-
* cooperate with DuckDB's per-process file lock and don't multiply the
|
|
122
|
-
* blast radius of a panic.
|
|
123
|
-
*/
|
|
124
|
-
export async function probeAllTables(dbPath: string): Promise<ProbeResult[]> {
|
|
125
|
-
const results: ProbeResult[] = [];
|
|
126
|
-
for (const { name, pk } of PROBE_TABLES) {
|
|
127
|
-
results.push(await probeTable(dbPath, name, pk));
|
|
128
|
-
}
|
|
129
|
-
return results;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
export interface RepairResult {
|
|
133
|
-
backupDbPath: string;
|
|
134
|
-
exportDir: string;
|
|
135
|
-
durationMs: number;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* Repair `dbPath` by exporting its contents and importing into a fresh
|
|
140
|
-
* file. EXPORT DATABASE reads via sequential scans, not via PK indexes,
|
|
141
|
-
* so it survives the kind of index corruption that breaks UPDATE/DELETE.
|
|
142
|
-
* IMPORT DATABASE rebuilds every index from the data, which restores
|
|
143
|
-
* write integrity.
|
|
144
|
-
*
|
|
145
|
-
* Steps:
|
|
146
|
-
* 1. CHECKPOINT (best-effort) to flush WAL.
|
|
147
|
-
* 2. EXPORT DATABASE to `<dotDir>/.export-<timestamp>`.
|
|
148
|
-
* 3. Move `data.duckdb` (and `.wal`) to `data.duckdb.bak-<timestamp>`.
|
|
149
|
-
* 4. Open a fresh DB at the original path and IMPORT DATABASE.
|
|
150
|
-
* 5. Leave the export dir on disk — cheap insurance if step 4 ever fails
|
|
151
|
-
* mid-way; cleanup on the next successful run.
|
|
152
|
-
*
|
|
153
|
-
* The caller is responsible for ensuring no other process holds the DB
|
|
154
|
-
* (no running workers, no chat session, no TUI).
|
|
155
|
-
*/
|
|
156
|
-
export async function repairDatabase(dbPath: string): Promise<RepairResult> {
|
|
157
|
-
const start = Date.now();
|
|
158
|
-
const dotDir = dirname(dbPath);
|
|
159
|
-
await mkdir(dotDir, { recursive: true });
|
|
160
|
-
|
|
161
|
-
const stamp = new Date()
|
|
162
|
-
.toISOString()
|
|
163
|
-
.replace(/[:.]/g, "-")
|
|
164
|
-
.replace(/Z$/, "");
|
|
165
|
-
const exportDir = join(dotDir, `.export-${stamp}`);
|
|
166
|
-
const backupDbPath = `${dbPath}.bak-${stamp}`;
|
|
167
|
-
const walPath = `${dbPath}.wal`;
|
|
168
|
-
const backupWalPath = `${backupDbPath}.wal`;
|
|
169
|
-
|
|
170
|
-
await withDb(dbPath, async (conn) => {
|
|
171
|
-
try {
|
|
172
|
-
await conn.exec("CHECKPOINT");
|
|
173
|
-
} catch {
|
|
174
|
-
// CHECKPOINT can fail on an already-invalidated DB; the EXPORT
|
|
175
|
-
// below is what actually matters.
|
|
176
|
-
}
|
|
177
|
-
await conn.exec(`EXPORT DATABASE '${exportDir.replace(/'/g, "''")}'`);
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
await rename(dbPath, backupDbPath);
|
|
181
|
-
if (await pathExists(walPath)) {
|
|
182
|
-
await rename(walPath, backupWalPath);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
await withDb(dbPath, async (conn) => {
|
|
186
|
-
await conn.exec(`IMPORT DATABASE '${exportDir.replace(/'/g, "''")}'`);
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
// Best-effort cleanup of the export dir. Leave it on failure — the user
|
|
190
|
-
// still has data.duckdb (rebuilt) and the backup.
|
|
191
|
-
try {
|
|
192
|
-
await rm(exportDir, { recursive: true, force: true });
|
|
193
|
-
} catch {
|
|
194
|
-
// ignore
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
return {
|
|
198
|
-
backupDbPath,
|
|
199
|
-
exportDir,
|
|
200
|
-
durationMs: Date.now() - start,
|
|
201
|
-
};
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
async function pathExists(p: string): Promise<boolean> {
|
|
205
|
-
try {
|
|
206
|
-
await stat(p);
|
|
207
|
-
return true;
|
|
208
|
-
} catch {
|
|
209
|
-
return false;
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
function firstLine(s: string): string {
|
|
214
|
-
const trimmed = s.trim();
|
|
215
|
-
const nl = trimmed.indexOf("\n");
|
|
216
|
-
return nl === -1 ? trimmed : trimmed.slice(0, nl);
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
/**
|
|
220
|
-
* Send signal 0 to test whether `pid` corresponds to a live process. Returns
|
|
221
|
-
* false on ESRCH (no such process) and on any other error (including EPERM,
|
|
222
|
-
* which we conservatively treat as "not ours, not relevant"). Used by the
|
|
223
|
-
* doctor's safety gate to distinguish workers actually running from rows
|
|
224
|
-
* that say `status = 'running'` because the worker crashed before flipping
|
|
225
|
-
* its row to `stopped` or `dead`.
|
|
226
|
-
*/
|
|
227
|
-
export function isPidAlive(pid: number): boolean {
|
|
228
|
-
if (!pid || pid < 1) return false;
|
|
229
|
-
try {
|
|
230
|
-
process.kill(pid, 0);
|
|
231
|
-
return true;
|
|
232
|
-
} catch {
|
|
233
|
-
return false;
|
|
234
|
-
}
|
|
235
|
-
}
|