botholomew 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +91 -68
  2. package/package.json +3 -3
  3. package/src/chat/agent.ts +42 -82
  4. package/src/chat/session.ts +29 -25
  5. package/src/commands/capabilities.ts +1 -1
  6. package/src/commands/context.ts +177 -926
  7. package/src/commands/db.ts +9 -13
  8. package/src/commands/init.ts +4 -1
  9. package/src/commands/nuke.ts +57 -90
  10. package/src/commands/schedule.ts +103 -124
  11. package/src/commands/skill.ts +2 -2
  12. package/src/commands/task.ts +86 -95
  13. package/src/commands/thread.ts +107 -112
  14. package/src/commands/worker.ts +88 -88
  15. package/src/constants.ts +93 -16
  16. package/src/context/capabilities.ts +10 -10
  17. package/src/context/fetcher.ts +9 -10
  18. package/src/context/reindex.ts +189 -0
  19. package/src/context/store.ts +630 -0
  20. package/src/db/doctor.ts +1 -8
  21. package/src/db/embeddings.ts +227 -175
  22. package/src/db/sql/19-disk_backed_index.sql +36 -0
  23. package/src/db/sql/20-drop_db_tables_for_files.sql +19 -0
  24. package/src/fs/atomic.ts +217 -0
  25. package/src/fs/compat.ts +86 -0
  26. package/src/fs/sandbox.ts +279 -0
  27. package/src/init/index.ts +69 -52
  28. package/src/init/templates.ts +1 -1
  29. package/src/mcpx/client.ts +1 -1
  30. package/src/schedules/schema.ts +19 -0
  31. package/src/schedules/store.ts +296 -0
  32. package/src/skills/commands.ts +1 -3
  33. package/src/tasks/schema.ts +47 -0
  34. package/src/tasks/store.ts +486 -0
  35. package/src/threads/store.ts +559 -0
  36. package/src/tools/capabilities/refresh.ts +42 -21
  37. package/src/tools/context/pipe.ts +15 -71
  38. package/src/tools/context/update-beliefs.ts +3 -3
  39. package/src/tools/context/update-goals.ts +3 -3
  40. package/src/tools/dir/create.ts +26 -23
  41. package/src/tools/dir/size.ts +46 -17
  42. package/src/tools/dir/tree.ts +73 -279
  43. package/src/tools/file/copy.ts +50 -24
  44. package/src/tools/file/count-lines.ts +34 -10
  45. package/src/tools/file/delete.ts +44 -23
  46. package/src/tools/file/edit.ts +39 -14
  47. package/src/tools/file/exists.ts +12 -26
  48. package/src/tools/file/info.ts +25 -85
  49. package/src/tools/file/move.ts +39 -24
  50. package/src/tools/file/read.ts +32 -80
  51. package/src/tools/file/write.ts +14 -91
  52. package/src/tools/registry.ts +3 -7
  53. package/src/tools/schedule/create.ts +2 -2
  54. package/src/tools/schedule/list.ts +7 -3
  55. package/src/tools/search/fuse.ts +12 -33
  56. package/src/tools/search/index.ts +36 -43
  57. package/src/tools/search/regexp.ts +29 -17
  58. package/src/tools/search/semantic.ts +137 -51
  59. package/src/tools/skill/delete.ts +1 -1
  60. package/src/tools/skill/list.ts +1 -1
  61. package/src/tools/skill/write.ts +1 -1
  62. package/src/tools/task/create.ts +41 -16
  63. package/src/tools/task/delete.ts +3 -3
  64. package/src/tools/task/list.ts +6 -3
  65. package/src/tools/task/update.ts +31 -9
  66. package/src/tools/task/view.ts +6 -6
  67. package/src/tools/thread/list.ts +2 -2
  68. package/src/tools/thread/search.ts +208 -0
  69. package/src/tools/thread/view.ts +50 -5
  70. package/src/tools/worker/spawn.ts +28 -14
  71. package/src/tui/App.tsx +12 -19
  72. package/src/tui/components/ContextPanel.tsx +83 -316
  73. package/src/tui/components/SchedulePanel.tsx +34 -48
  74. package/src/tui/components/StatusBar.tsx +15 -15
  75. package/src/tui/components/TaskPanel.tsx +34 -38
  76. package/src/tui/components/ThreadPanel.tsx +29 -38
  77. package/src/tui/components/WorkerPanel.tsx +21 -19
  78. package/src/tui/markdown.ts +2 -8
  79. package/src/types/file-imports.d.ts +9 -0
  80. package/src/utils/title.ts +5 -7
  81. package/src/utils/v7-date.ts +47 -0
  82. package/src/worker/heartbeat.ts +46 -24
  83. package/src/worker/index.ts +13 -15
  84. package/src/worker/llm.ts +30 -37
  85. package/src/worker/prompt.ts +19 -41
  86. package/src/worker/schedules.ts +48 -69
  87. package/src/worker/spawn.ts +11 -11
  88. package/src/worker/tick.ts +39 -43
  89. package/src/workers/store.ts +247 -0
  90. package/src/commands/tools.ts +0 -367
  91. package/src/context/describer.ts +0 -140
  92. package/src/context/drives.ts +0 -110
  93. package/src/context/ingest.ts +0 -162
  94. package/src/context/refresh.ts +0 -183
  95. package/src/db/context.ts +0 -637
  96. package/src/db/daemon-state.ts +0 -6
  97. package/src/db/reembed.ts +0 -113
  98. package/src/db/schedules.ts +0 -213
  99. package/src/db/tasks.ts +0 -347
  100. package/src/db/threads.ts +0 -276
  101. package/src/db/workers.ts +0 -212
  102. package/src/tools/context/list-drives.ts +0 -36
  103. package/src/tools/context/refresh.ts +0 -165
  104. package/src/tools/context/search.ts +0 -54
@@ -1,11 +1,8 @@
1
1
  // Capabilities tools
2
2
  import { capabilitiesRefreshTool } from "./capabilities/refresh.ts";
3
3
  // Context tools
4
- import { contextListDrivesTool } from "./context/list-drives.ts";
5
4
  import { pipeToContextTool } from "./context/pipe.ts";
6
5
  import { readLargeResultTool } from "./context/read-large-result.ts";
7
- import { contextRefreshTool } from "./context/refresh.ts";
8
- import { contextSearchTool } from "./context/search.ts";
9
6
  import { updateBeliefsTool } from "./context/update-beliefs.ts";
10
7
  import { updateGoalsTool } from "./context/update-goals.ts";
11
8
  // Context — directory operations
@@ -50,6 +47,7 @@ import { viewTaskTool } from "./task/view.ts";
50
47
  import { waitTaskTool } from "./task/wait.ts";
51
48
  // Thread tools
52
49
  import { listThreadsTool } from "./thread/list.ts";
50
+ import { searchThreadsTool } from "./thread/search.ts";
53
51
  import { viewThreadTool } from "./thread/view.ts";
54
52
  import { registerTool } from "./tool.ts";
55
53
  // Worker tools
@@ -66,8 +64,7 @@ export function registerAllTools(): void {
66
64
  registerTool(listTasksTool);
67
65
  registerTool(viewTaskTool);
68
66
 
69
- // Context
70
- registerTool(contextListDrivesTool);
67
+ // Context (file/dir + self-reflection)
71
68
  registerTool(contextCreateDirTool);
72
69
  registerTool(contextTreeTool);
73
70
  registerTool(contextDirSizeTool);
@@ -80,8 +77,6 @@ export function registerAllTools(): void {
80
77
  registerTool(contextInfoTool);
81
78
  registerTool(contextExistsTool);
82
79
  registerTool(contextCountLinesTool);
83
- registerTool(contextSearchTool);
84
- registerTool(contextRefreshTool);
85
80
  registerTool(updateBeliefsTool);
86
81
  registerTool(updateGoalsTool);
87
82
  registerTool(readLargeResultTool);
@@ -108,6 +103,7 @@ export function registerAllTools(): void {
108
103
  // Thread
109
104
  registerTool(listThreadsTool);
110
105
  registerTool(viewThreadTool);
106
+ registerTool(searchThreadsTool);
111
107
 
112
108
  // MCP
113
109
  registerTool(mcpListToolsTool);
@@ -1,5 +1,5 @@
1
1
  import { z } from "zod";
2
- import { createSchedule } from "../../db/schedules.ts";
2
+ import { createSchedule } from "../../schedules/store.ts";
3
3
  import { logger } from "../../utils/logger.ts";
4
4
  import type { ToolDefinition } from "../tool.ts";
5
5
 
@@ -28,7 +28,7 @@ export const createScheduleTool = {
28
28
  inputSchema,
29
29
  outputSchema,
30
30
  execute: async (input, ctx) => {
31
- const schedule = await createSchedule(ctx.conn, {
31
+ const schedule = await createSchedule(ctx.projectDir, {
32
32
  name: input.name,
33
33
  description: input.description,
34
34
  frequency: input.frequency,
@@ -1,9 +1,11 @@
1
1
  import { z } from "zod";
2
- import { listSchedules } from "../../db/schedules.ts";
2
+ import { listSchedules } from "../../schedules/store.ts";
3
3
  import type { ToolDefinition } from "../tool.ts";
4
4
 
5
5
  const inputSchema = z.object({
6
6
  enabled: z.boolean().optional().describe("Filter by enabled status"),
7
+ limit: z.number().optional().describe("Max number of schedules to return"),
8
+ offset: z.number().optional().describe("Skip first N schedules"),
7
9
  });
8
10
 
9
11
  const outputSchema = z.object({
@@ -27,8 +29,10 @@ export const listSchedulesTool = {
27
29
  inputSchema,
28
30
  outputSchema,
29
31
  execute: async (input, ctx) => {
30
- const schedules = await listSchedules(ctx.conn, {
32
+ const schedules = await listSchedules(ctx.projectDir, {
31
33
  enabled: input.enabled,
34
+ limit: input.limit,
35
+ offset: input.offset,
32
36
  });
33
37
  return {
34
38
  schedules: schedules.map((s) => ({
@@ -36,7 +40,7 @@ export const listSchedulesTool = {
36
40
  name: s.name,
37
41
  frequency: s.frequency,
38
42
  enabled: s.enabled,
39
- last_run_at: s.last_run_at?.toISOString() ?? null,
43
+ last_run_at: s.last_run_at,
40
44
  })),
41
45
  count: schedules.length,
42
46
  is_error: false,
@@ -2,8 +2,6 @@ import type { RegexpHit } from "./regexp.ts";
2
2
  import type { SemanticHit } from "./semantic.ts";
3
3
 
4
4
  export interface FusedMatch {
5
- ref: string;
6
- drive: string;
7
5
  path: string;
8
6
  line: number | null;
9
7
  content: string;
@@ -16,17 +14,14 @@ export interface FusedMatch {
16
14
  const SNIPPET_MAX = 300;
17
15
 
18
16
  /**
19
- * Reciprocal rank fusion of regexp line hits and semantic chunk hits.
17
+ * Reciprocal rank fusion of regexp line hits and semantic file hits.
20
18
  *
21
- * Each regexp hit becomes its own row. If the file (drive + path) also has a
22
- * semantic hit, the regexp row picks up that semantic side's RRF contribution
23
- * and is tagged `match_type: "both"` — exact-line + semantic agreement is
24
- * the strongest signal.
19
+ * Each regexp hit becomes its own row. If the same file also has a semantic
20
+ * hit, the regexp row picks up that semantic side's RRF contribution and is
21
+ * tagged `match_type: "both"` — exact-line + semantic agreement is the
22
+ * strongest signal.
25
23
  *
26
- * Semantic hits are emitted as their own rows only for paths with no regexp
27
- * hit; otherwise the regexp row already represents that file (and is more
28
- * locatable). This keeps the result list focused without losing pure
29
- * semantic matches in files the regexp didn't touch.
24
+ * Semantic hits emit their own rows only for paths with no regexp hit.
30
25
  */
31
26
  export function fuseRRF(
32
27
  regexpHits: RegexpHit[],
@@ -42,26 +37,21 @@ export function fuseRRF(
42
37
  for (let i = 0; i < semanticHits.length; i++) {
43
38
  const hit = semanticHits[i];
44
39
  if (!hit) continue;
45
- const key = pathKey(hit.drive, hit.path);
46
- if (key == null) continue;
47
- const existing = bestSemByPath.get(key);
40
+ const existing = bestSemByPath.get(hit.path);
48
41
  if (!existing || i < existing.rank) {
49
- bestSemByPath.set(key, { rank: i, score: hit.score, hit });
42
+ bestSemByPath.set(hit.path, { rank: i, score: hit.score, hit });
50
43
  }
51
44
  }
52
45
 
53
46
  const regexpPaths = new Set<string>();
54
- for (const hit of regexpHits) {
55
- regexpPaths.add(pathKey(hit.drive, hit.path) ?? "");
56
- }
47
+ for (const hit of regexpHits) regexpPaths.add(hit.path);
57
48
 
58
49
  const fused: FusedMatch[] = [];
59
50
 
60
51
  for (let i = 0; i < regexpHits.length; i++) {
61
52
  const rx = regexpHits[i];
62
53
  if (!rx) continue;
63
- const key = pathKey(rx.drive, rx.path) ?? "";
64
- const sem = bestSemByPath.get(key);
54
+ const sem = bestSemByPath.get(rx.path);
65
55
  let score = 1 / (k + i + 1);
66
56
  let matchType: FusedMatch["match_type"] = "regexp";
67
57
  let semanticScore: number | null = null;
@@ -71,8 +61,6 @@ export function fuseRRF(
71
61
  semanticScore = round(sem.score);
72
62
  }
73
63
  fused.push({
74
- ref: rx.ref,
75
- drive: rx.drive,
76
64
  path: rx.path,
77
65
  line: rx.line,
78
66
  content: rx.content,
@@ -86,14 +74,10 @@ export function fuseRRF(
86
74
  for (let i = 0; i < semanticHits.length; i++) {
87
75
  const sem = semanticHits[i];
88
76
  if (!sem) continue;
89
- const key = pathKey(sem.drive, sem.path);
90
- if (key == null) continue;
91
- if (regexpPaths.has(key)) continue;
77
+ if (regexpPaths.has(sem.path)) continue;
92
78
  const score = 1 / (k + i + 1);
93
79
  fused.push({
94
- ref: sem.ref,
95
- drive: sem.drive ?? "",
96
- path: sem.path ?? "",
80
+ path: sem.path,
97
81
  line: null,
98
82
  content: sem.chunk_content.slice(0, SNIPPET_MAX),
99
83
  context_lines: [],
@@ -107,11 +91,6 @@ export function fuseRRF(
107
91
  return fused.slice(0, options.limit);
108
92
  }
109
93
 
110
- function pathKey(drive: string | null, path: string | null): string | null {
111
- if (!drive || !path) return null;
112
- return `${drive}:${path}`;
113
- }
114
-
115
94
  function round(n: number): number {
116
95
  return Math.round(n * 10000) / 10000;
117
96
  }
@@ -1,16 +1,10 @@
1
1
  import { z } from "zod";
2
- import {
3
- listContextItems,
4
- listContextItemsByPrefix,
5
- } from "../../db/context.ts";
6
2
  import type { ToolDefinition } from "../tool.ts";
7
3
  import { fuseRRF } from "./fuse.ts";
8
4
  import { runRegexp } from "./regexp.ts";
9
5
  import { runSemantic } from "./semantic.ts";
10
6
 
11
7
  const MatchSchema = z.object({
12
- ref: z.string(),
13
- drive: z.string(),
14
8
  path: z.string(),
15
9
  line: z.number().nullable(),
16
10
  content: z.string(),
@@ -25,22 +19,20 @@ const inputSchema = z.object({
25
19
  .string()
26
20
  .optional()
27
21
  .describe(
28
- "Natural-language query for semantic + keyword (BM25) hybrid search. Provide alongside `pattern` for the strongest signal — chunks matched by both methods are boosted via reciprocal rank fusion.",
22
+ "Natural-language query for semantic search. Provide alongside `pattern` for the strongest signal — files matched by both methods float to the top via reciprocal rank fusion.",
29
23
  ),
30
24
  pattern: z
31
- .string()
32
- .optional()
33
- .describe("Regex pattern for exact text search across context contents."),
34
- drive: z
35
25
  .string()
36
26
  .optional()
37
27
  .describe(
38
- "Restrict to a single drive (applies to both `query` and `pattern`).",
28
+ "Regex pattern for exact text search across file contents under context/.",
39
29
  ),
40
- path: z
30
+ scope: z
41
31
  .string()
42
32
  .optional()
43
- .describe("Directory prefix within the drive. Requires `drive`."),
33
+ .describe(
34
+ "Restrict search to a sub-directory under context/ (e.g. 'notes' to only search context/notes/...).",
35
+ ),
44
36
  glob: z
45
37
  .string()
46
38
  .optional()
@@ -66,12 +58,13 @@ const outputSchema = z.object({
66
58
  is_error: z.boolean(),
67
59
  error_type: z.string().optional(),
68
60
  message: z.string().optional(),
61
+ next_action_hint: z.string().optional(),
69
62
  });
70
63
 
71
64
  export const searchTool = {
72
65
  name: "search",
73
66
  description:
74
- "[[ bash equivalent command: grep -r ]] Hybrid search over indexed context. At least one of `query` (natural language → semantic + BM25) or `pattern` (regex over file contents) is required. Pass both for the strongest signal: results matched by both methods float to the top via reciprocal rank fusion. Scoping (`drive`, `path`, `glob`) applies to both sides.",
67
+ "[[ bash equivalent command: grep -r ]] Hybrid search over files under context/. At least one of `query` (natural language → semantic) or `pattern` (regex over file contents) is required. Pass both for the strongest signal: results matched by both methods float to the top via reciprocal rank fusion. Scoping (`scope`, `glob`) applies to both sides. Note: while a persistent index sidecar is being rebuilt, semantic search re-embeds files on every call — keep result sets small.",
75
68
  group: "search",
76
69
  inputSchema,
77
70
  outputSchema,
@@ -85,43 +78,43 @@ export const searchTool = {
85
78
  "Provide at least one of `query` (natural language) or `pattern` (regex). Pass both to fuse semantic and exact-match signals.",
86
79
  };
87
80
  }
88
- if (input.path && !input.drive) {
89
- return {
90
- matches: [],
91
- is_error: true,
92
- error_type: "invalid_arguments",
93
- message:
94
- "`path` requires `drive` — call context_list_drives to see which drives exist, then pass `drive` alongside `path`.",
95
- };
81
+
82
+ // Validate the regex up front so a malformed pattern returns a
83
+ // structured error instead of bubbling SyntaxError. Match the shape
84
+ // of search_threads' invalid_regex response so the agent can recover
85
+ // identically across both tools.
86
+ if (input.pattern) {
87
+ try {
88
+ new RegExp(input.pattern, input.ignore_case ? "i" : "");
89
+ } catch (err) {
90
+ return {
91
+ matches: [],
92
+ is_error: true,
93
+ error_type: "invalid_regex",
94
+ message: `Could not compile pattern: ${err instanceof Error ? err.message : String(err)}`,
95
+ next_action_hint:
96
+ "Double-check the regex; remember `.` is a metacharacter — escape it as `\\.` for a literal dot.",
97
+ };
98
+ }
96
99
  }
97
100
 
98
101
  const limit = input.max_results ?? 20;
99
102
 
100
103
  const regexpHits = input.pattern
101
- ? runRegexp(
102
- input.drive
103
- ? await listContextItemsByPrefix(
104
- ctx.conn,
105
- input.drive,
106
- input.path ?? "/",
107
- { recursive: true },
108
- )
109
- : await listContextItems(ctx.conn),
110
- {
111
- pattern: input.pattern,
112
- glob: input.glob,
113
- ignore_case: input.ignore_case,
114
- context: input.context,
115
- max_results: 100,
116
- },
117
- )
104
+ ? await runRegexp(ctx.projectDir, {
105
+ pattern: input.pattern,
106
+ scope: input.scope,
107
+ glob: input.glob,
108
+ ignore_case: input.ignore_case,
109
+ context: input.context,
110
+ max_results: 100,
111
+ })
118
112
  : [];
119
113
 
120
114
  const semanticHits = input.query
121
- ? await runSemantic(ctx, {
115
+ ? await runSemantic(ctx.projectDir, ctx.config, ctx.dbPath, {
122
116
  query: input.query,
123
- drive: input.drive,
124
- path: input.path,
117
+ scope: input.scope,
125
118
  glob: input.glob,
126
119
  limit: 100,
127
120
  })
@@ -1,9 +1,6 @@
1
- import { formatDriveRef } from "../../context/drives.ts";
2
- import type { ContextItem } from "../../db/context.ts";
1
+ import { listContextDir, readContextFile } from "../../context/store.ts";
3
2
 
4
3
  export interface RegexpHit {
5
- ref: string;
6
- drive: string;
7
4
  path: string;
8
5
  line: number;
9
6
  content: string;
@@ -12,43 +9,58 @@ export interface RegexpHit {
12
9
 
13
10
  export interface RegexpOptions {
14
11
  pattern: string;
12
+ /** Optional path under context/ to scope the walk (default: whole tree). */
13
+ scope?: string;
15
14
  glob?: string;
16
15
  ignore_case?: boolean;
17
16
  context?: number;
18
17
  max_results?: number;
19
18
  }
20
19
 
21
- export function runRegexp(
22
- items: ContextItem[],
20
+ /**
21
+ * Walk every textual file under `context/` (or `context/<scope>/`) and run
22
+ * `pattern` against each line. Cheap because tools opt into reading content
23
+ * only for files whose names match an optional glob.
24
+ */
25
+ export async function runRegexp(
26
+ projectDir: string,
23
27
  options: RegexpOptions,
24
- ): RegexpHit[] {
28
+ ): Promise<RegexpHit[]> {
25
29
  const flags = options.ignore_case ? "gi" : "g";
26
30
  const regex = new RegExp(options.pattern, flags);
27
31
  const globRegex = options.glob ? globToRegex(options.glob) : null;
28
32
  const contextLines = options.context ?? 0;
29
33
  const maxResults = options.max_results ?? 100;
30
34
 
31
- const hits: RegexpHit[] = [];
32
-
33
- for (const item of items) {
34
- if (item.content == null) continue;
35
+ const entries = await listContextDir(projectDir, options.scope ?? "", {
36
+ recursive: true,
37
+ });
35
38
 
39
+ const hits: RegexpHit[] = [];
40
+ for (const entry of entries) {
41
+ if (entry.is_directory) continue;
42
+ if (!entry.is_textual) continue;
36
43
  if (globRegex) {
37
- const filename = item.path.split("/").pop() ?? "";
44
+ const filename = entry.path.split("/").pop() ?? "";
38
45
  if (!globRegex.test(filename)) continue;
39
46
  }
40
47
 
41
- const lines = item.content.split("\n");
48
+ let content: string;
49
+ try {
50
+ content = await readContextFile(projectDir, entry.path);
51
+ } catch {
52
+ continue;
53
+ }
54
+ const lines = content.split("\n");
42
55
  for (let i = 0; i < lines.length; i++) {
43
56
  regex.lastIndex = 0;
44
57
  const line = lines[i];
45
- if (line !== undefined && regex.test(line)) {
58
+ if (line === undefined) continue;
59
+ if (regex.test(line)) {
46
60
  const start = Math.max(0, i - contextLines);
47
61
  const end = Math.min(lines.length, i + contextLines + 1);
48
62
  hits.push({
49
- ref: formatDriveRef(item),
50
- drive: item.drive,
51
- path: item.path,
63
+ path: entry.path,
52
64
  line: i + 1,
53
65
  content: line,
54
66
  context_lines: lines.slice(start, end),
@@ -1,81 +1,167 @@
1
- import { formatDriveRef } from "../../context/drives.ts";
2
- import { embedSingle } from "../../context/embedder.ts";
3
- import { type HybridSearchResult, hybridSearch } from "../../db/embeddings.ts";
4
- import type { ToolContext } from "../tool.ts";
1
+ import type { BotholomewConfig } from "../../config/schemas.ts";
2
+ import { embed, embedSingle } from "../../context/embedder.ts";
3
+ import { listContextDir, readContextFile } from "../../context/store.ts";
4
+ import { withDb } from "../../db/connection.ts";
5
+ import { indexStats, searchSemantic } from "../../db/embeddings.ts";
5
6
  import { globToRegex } from "./regexp.ts";
6
7
 
7
8
  export interface SemanticHit {
8
- ref: string;
9
- drive: string | null;
10
- path: string | null;
11
- context_item_id: string;
9
+ path: string;
12
10
  chunk_index: number;
13
- title: string;
14
11
  chunk_content: string;
15
12
  score: number;
16
13
  }
17
14
 
18
15
  export interface SemanticOptions {
19
16
  query: string;
20
- drive?: string;
21
- path?: string;
17
+ scope?: string;
22
18
  glob?: string;
23
19
  limit?: number;
24
20
  }
25
21
 
22
+ // On-the-fly fallback (used when the index sidecar is empty / stale).
23
+ // One chunk per file truncated to MAX_CHARS; the indexed path is much faster
24
+ // and supports proper chunking via `botholomew context reindex`.
25
+ const MAX_CHARS = 4_000;
26
+ const MAX_FILES_TO_EMBED = 200;
27
+
26
28
  /**
27
- * Run the embedding + hybrid-search pipeline. Scoping (`drive` / `path` /
28
- * `glob`) is applied as a *post-filter* on results so the caller gets
29
- * consistent behavior whether they used the regex side, the semantic side,
30
- * or both.
29
+ * Semantic search over `context/`. Prefers the persistent index sidecar
30
+ * (`context_index` table, populated by `botholomew context reindex`) when
31
+ * it has rows. Falls back to embedding files on the fly so a fresh project
32
+ * still gets useful results before the user has reindexed once.
31
33
  */
32
34
  export async function runSemantic(
33
- ctx: ToolContext,
35
+ projectDir: string,
36
+ config: Required<BotholomewConfig>,
37
+ dbPath: string | null,
34
38
  options: SemanticOptions,
35
39
  ): Promise<SemanticHit[]> {
36
- const queryVec = await embedSingle(options.query, ctx.config);
37
- const results = await hybridSearch(
38
- ctx.conn,
39
- options.query,
40
- queryVec,
41
- options.limit ?? 100,
40
+ if (dbPath) {
41
+ const indexed = await tryIndexedSearch(dbPath, config, options);
42
+ if (indexed) return indexed;
43
+ }
44
+ return runOnTheFly(projectDir, config, options);
45
+ }
46
+
47
+ async function tryIndexedSearch(
48
+ dbPath: string,
49
+ config: Required<BotholomewConfig>,
50
+ options: SemanticOptions,
51
+ ): Promise<SemanticHit[] | null> {
52
+ let stats: Awaited<ReturnType<typeof indexStats>>;
53
+ try {
54
+ stats = await withDb(dbPath, indexStats);
55
+ } catch {
56
+ return null;
57
+ }
58
+ if (stats.embedded === 0) return null;
59
+
60
+ const queryVec = await embedSingle(options.query, config);
61
+ const limit = options.limit ?? 100;
62
+ const rows = await withDb(dbPath, (conn) =>
63
+ searchSemantic(conn, queryVec, limit * 4),
42
64
  );
43
65
 
44
- return results.filter((r) => matchesScope(r, options)).map(toHit);
66
+ const globRegex = options.glob ? globToRegex(options.glob) : null;
67
+ const scope = options.scope
68
+ ? options.scope.endsWith("/")
69
+ ? options.scope
70
+ : `${options.scope}/`
71
+ : null;
72
+
73
+ const filtered: SemanticHit[] = [];
74
+ for (const r of rows) {
75
+ if (scope && !r.path.startsWith(scope) && r.path !== options.scope) {
76
+ continue;
77
+ }
78
+ if (globRegex) {
79
+ const filename = r.path.split("/").pop() ?? "";
80
+ if (!globRegex.test(filename)) continue;
81
+ }
82
+ filtered.push({
83
+ path: r.path,
84
+ chunk_index: r.chunk_index,
85
+ chunk_content: r.chunk_content,
86
+ score: r.score,
87
+ });
88
+ if (filtered.length >= limit) break;
89
+ }
90
+ return filtered;
45
91
  }
46
92
 
47
- function matchesScope(
48
- result: HybridSearchResult,
93
+ async function runOnTheFly(
94
+ projectDir: string,
95
+ config: Required<BotholomewConfig>,
49
96
  options: SemanticOptions,
50
- ): boolean {
51
- if (options.drive && result.drive !== options.drive) return false;
52
- if (options.path && result.path) {
53
- const prefix = options.path.endsWith("/")
54
- ? options.path
55
- : `${options.path}/`;
56
- if (result.path !== options.path && !result.path.startsWith(prefix)) {
57
- return false;
97
+ ): Promise<SemanticHit[]> {
98
+ const entries = await listContextDir(projectDir, options.scope ?? "", {
99
+ recursive: true,
100
+ });
101
+ const globRegex = options.glob ? globToRegex(options.glob) : null;
102
+
103
+ const candidates: Array<{ path: string; content: string }> = [];
104
+ for (const entry of entries) {
105
+ if (entry.is_directory) continue;
106
+ if (!entry.is_textual) continue;
107
+ if (globRegex) {
108
+ const filename = entry.path.split("/").pop() ?? "";
109
+ if (!globRegex.test(filename)) continue;
110
+ }
111
+ let content: string;
112
+ try {
113
+ content = await readContextFile(projectDir, entry.path);
114
+ } catch {
115
+ continue;
58
116
  }
117
+ if (content.trim().length === 0) continue;
118
+ candidates.push({
119
+ path: entry.path,
120
+ content: content.slice(0, MAX_CHARS),
121
+ });
122
+ if (candidates.length >= MAX_FILES_TO_EMBED) break;
59
123
  }
60
- if (options.glob && result.path) {
61
- const filename = result.path.split("/").pop() ?? "";
62
- if (!globToRegex(options.glob).test(filename)) return false;
124
+
125
+ if (candidates.length === 0) return [];
126
+
127
+ const [queryVec, fileVecs] = await Promise.all([
128
+ embedSingle(options.query, config),
129
+ embed(
130
+ candidates.map((c) => c.content),
131
+ config,
132
+ ),
133
+ ]);
134
+
135
+ const limit = options.limit ?? 100;
136
+ const scored: SemanticHit[] = [];
137
+ for (let i = 0; i < candidates.length; i++) {
138
+ const c = candidates[i];
139
+ const v = fileVecs[i];
140
+ if (!c || !v) continue;
141
+ const score = cosine(queryVec, v);
142
+ scored.push({
143
+ path: c.path,
144
+ chunk_index: 0,
145
+ chunk_content: c.content,
146
+ score,
147
+ });
63
148
  }
64
- return true;
149
+ scored.sort((a, b) => b.score - a.score);
150
+ return scored.slice(0, limit);
65
151
  }
66
152
 
67
- function toHit(r: HybridSearchResult): SemanticHit {
68
- return {
69
- ref:
70
- r.drive && r.path
71
- ? formatDriveRef({ drive: r.drive, path: r.path })
72
- : r.context_item_id,
73
- drive: r.drive,
74
- path: r.path,
75
- context_item_id: r.context_item_id,
76
- chunk_index: r.chunk_index,
77
- title: r.title,
78
- chunk_content: r.chunk_content ?? "",
79
- score: r.score,
80
- };
153
+ function cosine(a: number[], b: number[]): number {
154
+ let dot = 0;
155
+ let na = 0;
156
+ let nb = 0;
157
+ const len = Math.min(a.length, b.length);
158
+ for (let i = 0; i < len; i++) {
159
+ const ai = a[i] ?? 0;
160
+ const bi = b[i] ?? 0;
161
+ dot += ai * bi;
162
+ na += ai * ai;
163
+ nb += bi * bi;
164
+ }
165
+ const denom = Math.sqrt(na) * Math.sqrt(nb);
166
+ return denom === 0 ? 0 : dot / denom;
81
167
  }
@@ -19,7 +19,7 @@ const outputSchema = z.object({
19
19
  export const skillDeleteTool = {
20
20
  name: "skill_delete",
21
21
  description:
22
- "[[ bash equivalent command: rm ]] Delete a skill file (user-defined slash command) by name. The file is removed from .botholomew/skills/. Returns a not_found error with the list of available names when the skill doesn't exist.",
22
+ "[[ bash equivalent command: rm ]] Delete a skill file (user-defined slash command) by name. The file is removed from skills/. Returns a not_found error with the list of available names when the skill doesn't exist.",
23
23
  group: "skill",
24
24
  inputSchema,
25
25
  outputSchema,
@@ -33,7 +33,7 @@ const outputSchema = z.object({
33
33
  export const skillListTool = {
34
34
  name: "skill_list",
35
35
  description:
36
- "[[ bash equivalent command: ls ]] List skills (user-defined slash commands) loaded from .botholomew/skills/. Returns name, description, argument names, and file path for each.",
36
+ "[[ bash equivalent command: ls ]] List skills (user-defined slash commands) loaded from skills/. Returns name, description, argument names, and file path for each.",
37
37
  group: "skill",
38
38
  inputSchema,
39
39
  outputSchema,
@@ -67,7 +67,7 @@ const outputSchema = z.object({
67
67
  export const skillWriteTool = {
68
68
  name: "skill_write",
69
69
  description:
70
- "[[ bash equivalent command: tee ]] Create or overwrite a skill file (user-defined slash command) at .botholomew/skills/<name>.md. Fails with path_conflict when the file exists unless on_conflict='overwrite'. Reserved names (help, skills, clear, exit) are rejected. The generated file is parsed to validate before being written.",
70
+ "[[ bash equivalent command: tee ]] Create or overwrite a skill file (user-defined slash command) at skills/<name>.md. Fails with path_conflict when the file exists unless on_conflict='overwrite'. Reserved names (help, skills, clear, exit) are rejected. The generated file is parsed to validate before being written.",
71
71
  group: "skill",
72
72
  inputSchema,
73
73
  outputSchema,