botholomew 0.16.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +46 -41
  2. package/package.json +4 -9
  3. package/src/chat/agent.ts +37 -40
  4. package/src/chat/session.ts +10 -10
  5. package/src/cli.ts +0 -2
  6. package/src/commands/capabilities.ts +35 -33
  7. package/src/commands/context.ts +133 -221
  8. package/src/commands/init.ts +22 -1
  9. package/src/commands/mcpx.ts +21 -8
  10. package/src/commands/nuke.ts +52 -15
  11. package/src/commands/prepare.ts +16 -13
  12. package/src/config/loader.ts +1 -8
  13. package/src/config/schemas.ts +6 -0
  14. package/src/constants.ts +16 -32
  15. package/src/init/index.ts +52 -27
  16. package/src/mcpx/client.ts +21 -5
  17. package/src/mem/client.ts +33 -0
  18. package/src/{context → prompts}/capabilities.ts +11 -7
  19. package/src/schedules/store.ts +1 -1
  20. package/src/tasks/store.ts +1 -1
  21. package/src/threads/store.ts +1 -1
  22. package/src/tools/capabilities/refresh.ts +1 -1
  23. package/src/tools/membot/adapter.ts +111 -0
  24. package/src/tools/membot/copy.ts +59 -0
  25. package/src/tools/membot/count_lines.ts +53 -0
  26. package/src/tools/membot/edit.ts +72 -0
  27. package/src/tools/membot/exists.ts +54 -0
  28. package/src/tools/membot/index.ts +26 -0
  29. package/src/tools/{context → membot}/pipe.ts +34 -32
  30. package/src/tools/registry.ts +6 -37
  31. package/src/tools/tool.ts +6 -8
  32. package/src/tui/App.tsx +3 -4
  33. package/src/tui/components/ContextPanel.tsx +109 -226
  34. package/src/tui/components/HelpPanel.tsx +2 -2
  35. package/src/tui/components/StatusBar.tsx +0 -6
  36. package/src/tui/components/ThreadPanel.tsx +8 -7
  37. package/src/tui/wrapDetail.ts +11 -0
  38. package/src/worker/heartbeat.ts +0 -20
  39. package/src/worker/index.ts +13 -13
  40. package/src/worker/llm.ts +7 -9
  41. package/src/worker/prompt.ts +25 -13
  42. package/src/worker/spawn.ts +1 -1
  43. package/src/worker/tick.ts +10 -9
  44. package/src/commands/db.ts +0 -119
  45. package/src/commands/with-db.ts +0 -22
  46. package/src/context/chunker.ts +0 -275
  47. package/src/context/embedder-impl.ts +0 -100
  48. package/src/context/embedder.ts +0 -9
  49. package/src/context/fetcher-errors.ts +0 -8
  50. package/src/context/fetcher.ts +0 -515
  51. package/src/context/locks.ts +0 -146
  52. package/src/context/markdown-converter.ts +0 -186
  53. package/src/context/reindex.ts +0 -198
  54. package/src/context/store.ts +0 -841
  55. package/src/context/url-utils.ts +0 -25
  56. package/src/db/connection.ts +0 -255
  57. package/src/db/doctor.ts +0 -235
  58. package/src/db/embeddings.ts +0 -317
  59. package/src/db/query.ts +0 -56
  60. package/src/db/schema.ts +0 -93
  61. package/src/db/sql/1-core_tables.sql +0 -53
  62. package/src/db/sql/10-dedupe_context_items.sql +0 -26
  63. package/src/db/sql/11-rebuild_hnsw.sql +0 -8
  64. package/src/db/sql/12-workers.sql +0 -66
  65. package/src/db/sql/13-drive-paths.sql +0 -47
  66. package/src/db/sql/14-drop_hnsw_index.sql +0 -8
  67. package/src/db/sql/15-fts_index.sql +0 -8
  68. package/src/db/sql/16-source_url.sql +0 -7
  69. package/src/db/sql/17-worker_log_path.sql +0 -3
  70. package/src/db/sql/18-reset_embeddings_for_local.sql +0 -39
  71. package/src/db/sql/19-disk_backed_index.sql +0 -36
  72. package/src/db/sql/2-logging_tables.sql +0 -24
  73. package/src/db/sql/20-drop_db_tables_for_files.sql +0 -19
  74. package/src/db/sql/3-daemon_state.sql +0 -5
  75. package/src/db/sql/4-unique_context_path.sql +0 -1
  76. package/src/db/sql/5-reset_embeddings_for_openai.sql +0 -1
  77. package/src/db/sql/6-vss_index.sql +0 -7
  78. package/src/db/sql/7-drop_embeddings_fk.sql +0 -23
  79. package/src/db/sql/8-task_output.sql +0 -1
  80. package/src/db/sql/9-source-type.sql +0 -1
  81. package/src/tools/context/read-large-result.ts +0 -33
  82. package/src/tools/dir/create.ts +0 -47
  83. package/src/tools/dir/size.ts +0 -77
  84. package/src/tools/dir/tree.ts +0 -124
  85. package/src/tools/file/copy.ts +0 -73
  86. package/src/tools/file/count-lines.ts +0 -54
  87. package/src/tools/file/delete.ts +0 -83
  88. package/src/tools/file/edit.ts +0 -76
  89. package/src/tools/file/exists.ts +0 -33
  90. package/src/tools/file/info.ts +0 -66
  91. package/src/tools/file/move.ts +0 -66
  92. package/src/tools/file/read.ts +0 -67
  93. package/src/tools/file/write.ts +0 -58
  94. package/src/tools/search/fuse.ts +0 -96
  95. package/src/tools/search/index.ts +0 -127
  96. package/src/tools/search/regexp.ts +0 -82
  97. package/src/tools/search/semantic.ts +0 -167
  98. /package/src/{db → utils}/uuid.ts +0 -0
@@ -1,317 +0,0 @@
1
- import { EMBEDDING_DIMENSION } from "../constants.ts";
2
- import type { DbConnection } from "./connection.ts";
3
-
4
- if (!Number.isInteger(EMBEDDING_DIMENSION) || EMBEDDING_DIMENSION <= 0) {
5
- throw new Error(`Invalid EMBEDDING_DIMENSION: ${EMBEDDING_DIMENSION}`);
6
- }
7
-
8
- /**
9
- * Disk-backed search index over `<projectDir>/context/`. One row per
10
- * `(path, chunk_index)`; `content_hash` is the file-level sha256 so the
11
- * reindex algorithm can detect adds, updates, and removals in one pass.
12
- */
13
- export interface IndexedChunk {
14
- path: string;
15
- chunk_index: number;
16
- content_hash: string;
17
- chunk_content: string;
18
- embedding: number[];
19
- mtime_ms: number;
20
- size_bytes: number;
21
- indexed_at: Date;
22
- }
23
-
24
- interface IndexRow {
25
- path: string;
26
- chunk_index: number;
27
- content_hash: string;
28
- chunk_content: string;
29
- embedding: number[] | null;
30
- mtime_ms: number;
31
- size_bytes: number;
32
- indexed_at: string;
33
- }
34
-
35
- function rowToChunk(row: IndexRow): IndexedChunk {
36
- return {
37
- path: row.path,
38
- chunk_index: row.chunk_index,
39
- content_hash: row.content_hash,
40
- chunk_content: row.chunk_content,
41
- embedding: row.embedding ?? [],
42
- mtime_ms: Number(row.mtime_ms),
43
- size_bytes: Number(row.size_bytes),
44
- indexed_at: new Date(row.indexed_at),
45
- };
46
- }
47
-
48
- export interface ChunkInput {
49
- chunk_index: number;
50
- chunk_content: string;
51
- embedding: number[];
52
- }
53
-
54
- /**
55
- * Replace all rows for `path` with the supplied chunks. The file-level
56
- * `content_hash` / `mtime_ms` / `size_bytes` are stored on every row so a
57
- * subsequent reindex can short-circuit by comparing just those columns.
58
- */
59
- export async function upsertChunksForPath(
60
- conn: DbConnection,
61
- params: {
62
- path: string;
63
- contentHash: string;
64
- mtimeMs: number;
65
- sizeBytes: number;
66
- chunks: ChunkInput[];
67
- },
68
- ): Promise<void> {
69
- await conn.queryRun("DELETE FROM context_index WHERE path = ?1", params.path);
70
- for (const c of params.chunks) {
71
- await conn.queryRun(
72
- `INSERT INTO context_index
73
- (path, chunk_index, content_hash, chunk_content, embedding, mtime_ms, size_bytes, indexed_at)
74
- VALUES (?1, ?2, ?3, ?4, ?5::FLOAT[${EMBEDDING_DIMENSION}], ?6, ?7, current_timestamp::VARCHAR)`,
75
- params.path,
76
- c.chunk_index,
77
- params.contentHash,
78
- c.chunk_content,
79
- c.embedding,
80
- params.mtimeMs,
81
- params.sizeBytes,
82
- );
83
- }
84
- }
85
-
86
- export async function deleteIndexedPath(
87
- conn: DbConnection,
88
- path: string,
89
- ): Promise<number> {
90
- const result = await conn.queryRun(
91
- "DELETE FROM context_index WHERE path = ?1",
92
- path,
93
- );
94
- return result.changes;
95
- }
96
-
97
- /**
98
- * Remove every indexed entry whose path equals `prefix` or lives beneath
99
- * `prefix/`. Used when a folder is deleted from `context/` and we need to
100
- * drop all child entries in one shot.
101
- */
102
- export async function deleteIndexedPathsUnder(
103
- conn: DbConnection,
104
- prefix: string,
105
- ): Promise<number> {
106
- const result = await conn.queryRun(
107
- "DELETE FROM context_index WHERE path = ?1 OR path LIKE ?2",
108
- prefix,
109
- `${prefix}/%`,
110
- );
111
- return result.changes;
112
- }
113
-
114
- export interface IndexedPathSummary {
115
- path: string;
116
- content_hash: string;
117
- mtime_ms: number;
118
- size_bytes: number;
119
- chunk_count: number;
120
- }
121
-
122
- export async function listIndexedPaths(
123
- conn: DbConnection,
124
- ): Promise<IndexedPathSummary[]> {
125
- const rows = await conn.queryAll<{
126
- path: string;
127
- content_hash: string;
128
- mtime_ms: number;
129
- size_bytes: number;
130
- chunk_count: number;
131
- }>(
132
- `SELECT path,
133
- ANY_VALUE(content_hash) AS content_hash,
134
- ANY_VALUE(mtime_ms) AS mtime_ms,
135
- ANY_VALUE(size_bytes) AS size_bytes,
136
- COUNT(*) AS chunk_count
137
- FROM context_index
138
- GROUP BY path
139
- ORDER BY path ASC`,
140
- );
141
- return rows.map((r) => ({
142
- path: r.path,
143
- content_hash: r.content_hash,
144
- mtime_ms: Number(r.mtime_ms),
145
- size_bytes: Number(r.size_bytes),
146
- chunk_count: Number(r.chunk_count),
147
- }));
148
- }
149
-
150
- export async function getIndexedPath(
151
- conn: DbConnection,
152
- path: string,
153
- ): Promise<IndexedPathSummary | null> {
154
- const row = await conn.queryGet<{
155
- path: string;
156
- content_hash: string;
157
- mtime_ms: number;
158
- size_bytes: number;
159
- chunk_count: number;
160
- }>(
161
- `SELECT path,
162
- ANY_VALUE(content_hash) AS content_hash,
163
- ANY_VALUE(mtime_ms) AS mtime_ms,
164
- ANY_VALUE(size_bytes) AS size_bytes,
165
- COUNT(*) AS chunk_count
166
- FROM context_index
167
- WHERE path = ?1
168
- GROUP BY path`,
169
- path,
170
- );
171
- if (!row) return null;
172
- return {
173
- path: row.path,
174
- content_hash: row.content_hash,
175
- mtime_ms: Number(row.mtime_ms),
176
- size_bytes: Number(row.size_bytes),
177
- chunk_count: Number(row.chunk_count),
178
- };
179
- }
180
-
181
- export interface SearchResult extends IndexedChunk {
182
- score: number;
183
- }
184
-
185
- /**
186
- * Vector similarity over `context_index.embedding`. Returns chunks sorted by
187
- * cosine similarity (higher = closer). Skips rows whose embedding is NULL.
188
- */
189
- export async function searchSemantic(
190
- conn: DbConnection,
191
- queryEmbedding: number[],
192
- limit = 10,
193
- ): Promise<SearchResult[]> {
194
- const rows = await conn.queryAll<IndexRow & { distance: number }>(
195
- `SELECT *, array_cosine_distance(embedding, ?1::FLOAT[${EMBEDDING_DIMENSION}]) AS distance
196
- FROM context_index
197
- WHERE embedding IS NOT NULL
198
- ORDER BY distance ASC
199
- LIMIT ?2`,
200
- queryEmbedding,
201
- limit,
202
- );
203
- return rows.map((row) => ({
204
- ...rowToChunk(row),
205
- score: 1 - row.distance,
206
- }));
207
- }
208
-
209
- /**
210
- * BM25 keyword search over (chunk_content, path). The FTS index is rebuilt
211
- * lazily by `rebuildSearchIndex`. Returns null-scoring rows filtered out.
212
- */
213
- export async function searchKeyword(
214
- conn: DbConnection,
215
- query: string,
216
- limit = 10,
217
- ): Promise<SearchResult[]> {
218
- // The FTS index is created with `path` as input_id (see
219
- // rebuildSearchIndex), so match_bm25's first argument must be the path
220
- // value, not rowid. Passing rowid silently returns no hits — searchHybrid
221
- // would then degrade to semantic-only.
222
- const rows = await conn.queryAll<IndexRow & { score: number }>(
223
- `SELECT context_index.*,
224
- fts_main_context_index.match_bm25(context_index.path, ?1) AS score
225
- FROM context_index
226
- WHERE fts_main_context_index.match_bm25(context_index.path, ?1) IS NOT NULL
227
- ORDER BY score DESC
228
- LIMIT ?2`,
229
- query,
230
- limit,
231
- );
232
- return rows.map((row) => ({ ...rowToChunk(row), score: Number(row.score) }));
233
- }
234
-
235
- /**
236
- * Reciprocal-rank fusion of semantic + keyword results, deduped by
237
- * (path, chunk_index).
238
- */
239
- export async function searchHybrid(
240
- conn: DbConnection,
241
- query: string,
242
- queryEmbedding: number[],
243
- limit = 10,
244
- ): Promise<SearchResult[]> {
245
- const k = 60;
246
- const [semantic, keyword] = await Promise.all([
247
- searchSemantic(conn, queryEmbedding, 100),
248
- searchKeyword(conn, query, 100).catch(() => [] as SearchResult[]),
249
- ]);
250
-
251
- const scores = new Map<string, { chunk: IndexedChunk; score: number }>();
252
- const key = (c: IndexedChunk) => `${c.path}::${c.chunk_index}`;
253
-
254
- for (let i = 0; i < semantic.length; i++) {
255
- const c = semantic[i];
256
- if (!c) continue;
257
- const existing = scores.get(key(c));
258
- const rrf = 1 / (k + i + 1);
259
- if (existing) existing.score += rrf;
260
- else scores.set(key(c), { chunk: c, score: rrf });
261
- }
262
- for (let i = 0; i < keyword.length; i++) {
263
- const c = keyword[i];
264
- if (!c) continue;
265
- const existing = scores.get(key(c));
266
- const rrf = 1 / (k + i + 1);
267
- if (existing) existing.score += rrf;
268
- else scores.set(key(c), { chunk: c, score: rrf });
269
- }
270
- const merged = [...scores.values()].sort((a, b) => b.score - a.score);
271
- return merged.slice(0, limit).map((m) => ({ ...m.chunk, score: m.score }));
272
- }
273
-
274
- /**
275
- * Rebuild the FTS index over (chunk_content, path). DuckDB's FTS index is a
276
- * snapshot — it does not update incrementally on INSERT/UPDATE/DELETE, so any
277
- * batch writer must call this once its transaction commits.
278
- *
279
- * The trailing CHECKPOINT is load-bearing (see history): `overwrite = 1`
280
- * writes a `DROP SCHEMA fts_main_context_index` record into the WAL; without
281
- * the checkpoint, replay on the next open can fail with "Cannot drop entry
282
- * 'fts_main_context_index' because there are entries that depend on it".
283
- */
284
- export async function rebuildSearchIndex(conn: DbConnection): Promise<void> {
285
- // Skip if the table doesn't exist yet (e.g., fresh tests with an empty
286
- // schema). The FTS extension errors out on a missing table.
287
- const exists = await conn.queryGet<{ name: string }>(
288
- "SELECT table_name AS name FROM information_schema.tables WHERE table_name = 'context_index'",
289
- );
290
- if (!exists) return;
291
- await conn.exec(
292
- "PRAGMA create_fts_index('context_index', 'path', 'chunk_content', 'path', overwrite = 1)",
293
- );
294
- await conn.exec("CHECKPOINT");
295
- }
296
-
297
- export async function indexStats(conn: DbConnection): Promise<{
298
- paths: number;
299
- chunks: number;
300
- embedded: number;
301
- }> {
302
- const row = await conn.queryGet<{
303
- paths: number;
304
- chunks: number;
305
- embedded: number;
306
- }>(
307
- `SELECT COUNT(DISTINCT path) AS paths,
308
- COUNT(*) AS chunks,
309
- COUNT(embedding) AS embedded
310
- FROM context_index`,
311
- );
312
- return {
313
- paths: Number(row?.paths ?? 0),
314
- chunks: Number(row?.chunks ?? 0),
315
- embedded: Number(row?.embedded ?? 0),
316
- };
317
- }
package/src/db/query.ts DELETED
@@ -1,56 +0,0 @@
1
- type SqlParam = string | number | null;
2
-
3
- /**
4
- * Validate that a value is a positive integer, suitable for use in
5
- * LIMIT / OFFSET clauses that must be interpolated into SQL strings.
6
- */
7
- export function sanitizeInt(val: number): number {
8
- if (!Number.isInteger(val) || val <= 0) {
9
- throw new Error(`Expected a positive integer, got: ${val}`);
10
- }
11
- return val;
12
- }
13
-
14
- /**
15
- * Build a WHERE clause from column-value pairs.
16
- * Entries with `undefined` values are skipped.
17
- */
18
- export function buildWhereClause(filters: [string, SqlParam | undefined][]): {
19
- where: string;
20
- params: SqlParam[];
21
- } {
22
- const conditions: string[] = [];
23
- const params: SqlParam[] = [];
24
-
25
- for (const [col, val] of filters) {
26
- if (val !== undefined) {
27
- params.push(val);
28
- conditions.push(`${col} = ?${params.length}`);
29
- }
30
- }
31
-
32
- const where =
33
- conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
34
- return { where, params };
35
- }
36
-
37
- /**
38
- * Build SET clauses for an UPDATE from column-value pairs.
39
- * Entries with `undefined` values are skipped.
40
- */
41
- export function buildSetClauses(fields: [string, SqlParam | undefined][]): {
42
- setClauses: string[];
43
- params: SqlParam[];
44
- } {
45
- const setClauses: string[] = [];
46
- const params: SqlParam[] = [];
47
-
48
- for (const [col, val] of fields) {
49
- if (val !== undefined) {
50
- params.push(val);
51
- setClauses.push(`${col} = ?${params.length}`);
52
- }
53
- }
54
-
55
- return { setClauses, params };
56
- }
package/src/db/schema.ts DELETED
@@ -1,93 +0,0 @@
1
- import { readdirSync, readFileSync } from "node:fs";
2
- import { join } from "node:path";
3
- import { logger } from "../utils/logger.ts";
4
- import type { DbConnection } from "./connection.ts";
5
- import { rebuildSearchIndex } from "./embeddings.ts";
6
-
7
- interface Migration {
8
- id: number;
9
- name: string;
10
- sql: string;
11
- }
12
-
13
- const sqlDir = join(import.meta.dir, "sql");
14
-
15
- function loadMigrations(): Migration[] {
16
- const files = readdirSync(sqlDir).filter((f) => f.endsWith(".sql"));
17
-
18
- const migrations = files.map((file) => {
19
- const match = file.match(/^(\d+)-(.+)\.sql$/);
20
- if (!match) throw new Error(`Invalid migration filename: ${file}`);
21
- const id = match[1];
22
- const name = match[2];
23
- if (!id || !name) throw new Error(`Invalid migration filename: ${file}`);
24
- return {
25
- id: parseInt(id, 10),
26
- name,
27
- sql: readFileSync(join(sqlDir, file), "utf-8"),
28
- };
29
- });
30
-
31
- // Sort by numeric id so `12-` runs after `2-`, not between `11-` and `2-`.
32
- return migrations.sort((a, b) => a.id - b.id);
33
- }
34
-
35
- export async function migrate(db: DbConnection): Promise<void> {
36
- // Create migrations tracking table
37
- await db.exec(`
38
- CREATE TABLE IF NOT EXISTS _migrations (
39
- id INTEGER PRIMARY KEY,
40
- name TEXT NOT NULL,
41
- applied_at TEXT DEFAULT (current_timestamp::VARCHAR)
42
- )
43
- `);
44
-
45
- // Get already-applied migrations
46
- const rows = await db.queryAll<{ id: number }>("SELECT id FROM _migrations");
47
- const applied = new Set(rows.map((row) => row.id));
48
-
49
- // Run pending migrations in order
50
- const pending = loadMigrations().filter((m) => !applied.has(m.id));
51
- if (pending.length > 0) {
52
- logger.info(
53
- `applying ${pending.length} migration${pending.length === 1 ? "" : "s"}`,
54
- );
55
- }
56
-
57
- let appliedAny = false;
58
- for (const migration of pending) {
59
- logger.info(` ${migration.id}. ${migration.name}`);
60
-
61
- // Split on semicolons and run each statement individually
62
- const statements = migration.sql
63
- .split(";")
64
- .map((s) => s.trim())
65
- .filter((s) => s.length > 0);
66
-
67
- for (const statement of statements) {
68
- await db.exec(statement);
69
- }
70
-
71
- await db.queryRun(
72
- "INSERT INTO _migrations (id, name) VALUES (?1, ?2)",
73
- migration.id,
74
- migration.name,
75
- );
76
- appliedAny = true;
77
- }
78
-
79
- // Flush the WAL so the next open has no schema entries to replay. DuckDB's
80
- // WAL replay of ALTER TABLE re-binds all column defaults on the target
81
- // table, and our CREATE TABLE defaults use `current_timestamp::VARCHAR` —
82
- // which cannot be resolved during replay (no default database attached yet),
83
- // crashing the process on reopen.
84
- if (appliedAny) {
85
- await db.exec("CHECKPOINT");
86
- }
87
-
88
- // Ensure the FTS index exists. Migration 18 drops it (it can't recreate it
89
- // in the same SQL run without DuckDB rejecting the dependency commit), and
90
- // fresh DBs need it created at least once. `overwrite = 1` makes this
91
- // idempotent for DBs that already have a healthy FTS index.
92
- await rebuildSearchIndex(db);
93
- }
@@ -1,53 +0,0 @@
1
- CREATE TABLE tasks (
2
- id TEXT PRIMARY KEY,
3
- name TEXT NOT NULL,
4
- description TEXT NOT NULL DEFAULT '',
5
- priority TEXT NOT NULL DEFAULT 'medium' CHECK(priority IN ('low', 'medium', 'high')),
6
- status TEXT NOT NULL DEFAULT 'pending' CHECK(status IN ('pending', 'in_progress', 'failed', 'complete', 'waiting')),
7
- waiting_reason TEXT,
8
- claimed_by TEXT,
9
- claimed_at TEXT,
10
- blocked_by TEXT NOT NULL DEFAULT '[]',
11
- context_ids TEXT NOT NULL DEFAULT '[]',
12
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
13
- updated_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR)
14
- );
15
-
16
- CREATE TABLE schedules (
17
- id TEXT PRIMARY KEY,
18
- name TEXT NOT NULL,
19
- description TEXT NOT NULL DEFAULT '',
20
- frequency TEXT NOT NULL,
21
- last_run_at TEXT,
22
- enabled BOOLEAN NOT NULL DEFAULT true,
23
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
24
- updated_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR)
25
- );
26
-
27
- CREATE TABLE context_items (
28
- id TEXT PRIMARY KEY,
29
- title TEXT NOT NULL,
30
- description TEXT NOT NULL DEFAULT '',
31
- content TEXT,
32
- content_blob BLOB,
33
- mime_type TEXT NOT NULL DEFAULT 'text/plain',
34
- is_textual BOOLEAN NOT NULL DEFAULT true,
35
- source_path TEXT,
36
- context_path TEXT NOT NULL,
37
- indexed_at TEXT,
38
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
39
- updated_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR)
40
- );
41
-
42
- CREATE TABLE embeddings (
43
- id TEXT PRIMARY KEY,
44
- context_item_id TEXT NOT NULL REFERENCES context_items(id),
45
- chunk_index INTEGER NOT NULL,
46
- chunk_content TEXT,
47
- title TEXT NOT NULL,
48
- description TEXT NOT NULL DEFAULT '',
49
- source_path TEXT,
50
- embedding FLOAT[1536],
51
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
52
- UNIQUE(context_item_id, chunk_index)
53
- );
@@ -1,26 +0,0 @@
1
- -- Older DBs could accumulate duplicate rows in context_items with the same
2
- -- context_path: migration 4's CREATE UNIQUE INDEX IF NOT EXISTS silently left
3
- -- the index metadata in place without enforcing it when duplicates predated
4
- -- the migration. The resulting "corrupt" unique index triggers a native
5
- -- crash in @duckdb/node-api on UPDATE ... RETURNING. Rebuild cleanly.
6
- DROP INDEX IF EXISTS idx_context_items_context_path;
7
-
8
- DELETE FROM embeddings WHERE context_item_id IN (
9
- SELECT id FROM (
10
- SELECT id, ROW_NUMBER() OVER (
11
- PARTITION BY context_path
12
- ORDER BY updated_at DESC, id DESC
13
- ) AS rn FROM context_items
14
- ) WHERE rn > 1
15
- );
16
-
17
- DELETE FROM context_items WHERE id IN (
18
- SELECT id FROM (
19
- SELECT id, ROW_NUMBER() OVER (
20
- PARTITION BY context_path
21
- ORDER BY updated_at DESC, id DESC
22
- ) AS rn FROM context_items
23
- ) WHERE rn > 1
24
- );
25
-
26
- CREATE UNIQUE INDEX idx_context_items_context_path ON context_items(context_path);
@@ -1,8 +0,0 @@
1
- -- Historical: this migration used to drop and recreate the HNSW index
2
- -- to clean up an internally-inconsistent state after native-side crashes
3
- -- during embedding writes. HNSW is now gone (see migration 14) and the
4
- -- VSS extension is no longer loaded at connection time, so the original
5
- -- DDL would fail on fresh DBs. Kept as a no-op to preserve migration
6
- -- numbering for existing databases that have already recorded id 11 in
7
- -- _migrations.
8
- SELECT 1;
@@ -1,66 +0,0 @@
1
- -- Worker agents: replaces the PID-file + OS-watchdog single-daemon model
2
- -- with multiple in-DB registered workers that heartbeat and can be reaped.
3
-
4
- CREATE TABLE workers (
5
- id TEXT PRIMARY KEY,
6
- pid INTEGER NOT NULL,
7
- hostname TEXT NOT NULL,
8
- mode TEXT NOT NULL CHECK(mode IN ('persist', 'once')),
9
- task_id TEXT,
10
- status TEXT NOT NULL CHECK(status IN ('running', 'stopped', 'dead')),
11
- started_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
12
- last_heartbeat_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
13
- stopped_at TEXT
14
- );
15
-
16
- CREATE INDEX idx_workers_status_heartbeat ON workers(status, last_heartbeat_at);
17
-
18
- -- Schedule claim columns: only one worker evaluates a schedule per window.
19
- ALTER TABLE schedules ADD COLUMN claimed_by TEXT;
20
- ALTER TABLE schedules ADD COLUMN claimed_at TEXT;
21
-
22
- -- Rewrite threads.type values: daemon_tick → worker_tick. The existing
23
- -- CHECK constraint forbids the new value, so we rebuild both threads and
24
- -- interactions (whose FK to threads would block a DROP). Dropping the FK
25
- -- follows the 7-drop_embeddings_fk.sql precedent.
26
- CREATE TABLE threads_backup AS SELECT * FROM threads;
27
- CREATE TABLE interactions_backup AS SELECT * FROM interactions;
28
-
29
- DROP TABLE interactions;
30
- DROP TABLE threads;
31
-
32
- CREATE TABLE threads (
33
- id TEXT PRIMARY KEY,
34
- type TEXT NOT NULL CHECK(type IN ('worker_tick', 'chat_session')),
35
- task_id TEXT,
36
- title TEXT NOT NULL DEFAULT '',
37
- started_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
38
- ended_at TEXT,
39
- metadata TEXT
40
- );
41
-
42
- CREATE TABLE interactions (
43
- id TEXT PRIMARY KEY,
44
- thread_id TEXT NOT NULL,
45
- sequence INTEGER NOT NULL,
46
- role TEXT NOT NULL CHECK(role IN ('user', 'assistant', 'system', 'tool')),
47
- kind TEXT NOT NULL CHECK(kind IN ('message', 'thinking', 'tool_use', 'tool_result', 'context_update', 'status_change')),
48
- content TEXT NOT NULL,
49
- tool_name TEXT,
50
- tool_input TEXT,
51
- duration_ms INTEGER,
52
- token_count INTEGER,
53
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
54
- UNIQUE(thread_id, sequence)
55
- );
56
-
57
- INSERT INTO threads
58
- SELECT id,
59
- CASE WHEN type = 'daemon_tick' THEN 'worker_tick' ELSE type END,
60
- task_id, title, started_at, ended_at, metadata
61
- FROM threads_backup;
62
-
63
- INSERT INTO interactions SELECT * FROM interactions_backup;
64
-
65
- DROP TABLE threads_backup;
66
- DROP TABLE interactions_backup;
@@ -1,47 +0,0 @@
1
- -- Milestone 10: collapse `source_path` + `context_path` + `source_type` into a
2
- -- single `(drive, path)` identity pair. Pre-1.0, no backwards-compat promise —
3
- -- we wipe context_items + embeddings and have the user re-add their content.
4
- --
5
- -- DuckDB's ALTER TABLE support is thin (no SET NOT NULL, flaky DROP COLUMN with
6
- -- existing indexes), so this is a table rebuild. Order matters: drop indexes
7
- -- first, then the old tables, then recreate with the new shape.
8
-
9
- DELETE FROM embeddings;
10
- DELETE FROM context_items;
11
-
12
- DROP INDEX IF EXISTS idx_embeddings_cosine;
13
- DROP INDEX IF EXISTS idx_context_items_context_path;
14
-
15
- DROP TABLE embeddings;
16
- DROP TABLE context_items;
17
-
18
- CREATE TABLE context_items (
19
- id TEXT PRIMARY KEY,
20
- title TEXT NOT NULL,
21
- description TEXT NOT NULL DEFAULT '',
22
- content TEXT,
23
- content_blob BLOB,
24
- mime_type TEXT NOT NULL DEFAULT 'text/plain',
25
- is_textual BOOLEAN NOT NULL DEFAULT true,
26
- drive TEXT NOT NULL,
27
- path TEXT NOT NULL,
28
- indexed_at TEXT,
29
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
30
- updated_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR)
31
- );
32
-
33
- CREATE UNIQUE INDEX idx_context_items_drive_path ON context_items(drive, path);
34
-
35
- CREATE TABLE embeddings (
36
- id TEXT PRIMARY KEY,
37
- context_item_id TEXT NOT NULL,
38
- chunk_index INTEGER NOT NULL,
39
- chunk_content TEXT,
40
- title TEXT NOT NULL,
41
- description TEXT NOT NULL DEFAULT '',
42
- embedding FLOAT[1536],
43
- created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
44
- UNIQUE(context_item_id, chunk_index)
45
- );
46
-
47
- CHECKPOINT;
@@ -1,8 +0,0 @@
1
- -- HNSW has caused two separate corruption modes in this project: the
2
- -- "Duplicate keys not allowed in high-level wrappers" failure addressed by
3
- -- migration 11, and a second mode where the index silently returns zero rows
4
- -- for cosine top-K queries (its stored SQL loses the `WITH (metric = 'cosine')`
5
- -- clause). At our scale a linear scan of array_cosine_distance is plenty fast
6
- -- and array_cosine_distance is a core DuckDB function — no VSS extension
7
- -- required. Drop the index and move on.
8
- DROP INDEX IF EXISTS idx_embeddings_cosine;
@@ -1,8 +0,0 @@
1
- -- Keyword search uses DuckDB's FTS extension for BM25 ranking over
2
- -- chunk_content and title. The index is a snapshot and must be rebuilt
3
- -- after any write to the embeddings table. rebuildSearchIndex() in
4
- -- src/db/embeddings.ts is the single entry point and is called from the
5
- -- ingest transaction. overwrite = 1 makes this PRAGMA idempotent, which
6
- -- also gives us a first-run rebuild for users upgrading from a DB that
7
- -- never had FTS.
8
- PRAGMA create_fts_index('embeddings', 'id', 'chunk_content', 'title', overwrite = 1);
@@ -1,7 +0,0 @@
1
- -- Issue #145: preserve the original URL that produced each context item so
2
- -- `context refresh` can re-fetch loss-lessly for service-specific drives
3
- -- (google-docs, github, ...). Nullable — local-origin drives (disk, agent,
4
- -- tool writes) leave it NULL and use their own refresh path. Legacy rows
5
- -- ingested before this column existed also leave it NULL and surface a
6
- -- "re-add from URL" error on refresh.
7
- ALTER TABLE context_items ADD COLUMN source_url TEXT;