botholomew 0.8.10 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/chat/agent.ts +5 -3
- package/src/commands/context.ts +223 -373
- package/src/commands/tools.ts +100 -11
- package/src/context/describer.ts +3 -118
- package/src/context/drives.ts +110 -0
- package/src/context/fetcher.ts +11 -1
- package/src/context/ingest.ts +13 -10
- package/src/context/refresh.ts +39 -24
- package/src/context/url-utils.ts +0 -23
- package/src/db/context.ts +195 -119
- package/src/db/embeddings.ts +35 -16
- package/src/db/sql/13-drive-paths.sql +49 -0
- package/src/tools/context/list-drives.ts +36 -0
- package/src/tools/context/refresh.ts +41 -23
- package/src/tools/context/search.ts +8 -3
- package/src/tools/dir/create.ts +14 -11
- package/src/tools/dir/size.ts +3 -2
- package/src/tools/dir/tree.ts +57 -17
- package/src/tools/file/copy.ts +14 -8
- package/src/tools/file/count-lines.ts +6 -3
- package/src/tools/file/delete.ts +12 -5
- package/src/tools/file/edit.ts +5 -3
- package/src/tools/file/exists.ts +25 -3
- package/src/tools/file/info.ts +90 -18
- package/src/tools/file/move.ts +15 -16
- package/src/tools/file/read.ts +79 -5
- package/src/tools/file/write.ts +29 -12
- package/src/tools/registry.ts +2 -2
- package/src/tools/search/grep.ts +44 -11
- package/src/tools/search/semantic.ts +7 -3
- package/src/tui/components/ContextPanel.tsx +73 -35
- package/src/tui/markdown.ts +2 -3
- package/src/worker/prompt.ts +3 -2
- package/src/tools/dir/list.ts +0 -89
package/src/context/refresh.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { McpxClient } from "@evantahler/mcpx";
|
|
|
2
2
|
import type { BotholomewConfig } from "../config/schemas.ts";
|
|
3
3
|
import type { DbConnection } from "../db/connection.ts";
|
|
4
4
|
import { type ContextItem, updateContextItem } from "../db/context.ts";
|
|
5
|
+
import { formatDriveRef } from "./drives.ts";
|
|
5
6
|
import { fetchUrl } from "./fetcher.ts";
|
|
6
7
|
import {
|
|
7
8
|
type PreparedIngestion,
|
|
@@ -13,9 +14,9 @@ export type RefreshItemStatus = "updated" | "unchanged" | "missing" | "error";
|
|
|
13
14
|
|
|
14
15
|
export interface RefreshItemResult {
|
|
15
16
|
id: string;
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
drive: string;
|
|
18
|
+
path: string;
|
|
19
|
+
ref: string;
|
|
19
20
|
status: RefreshItemStatus;
|
|
20
21
|
error?: string;
|
|
21
22
|
}
|
|
@@ -40,9 +41,16 @@ export interface RefreshOptions {
|
|
|
40
41
|
type IngestEmbedFn = (texts: string[]) => Promise<number[][]>;
|
|
41
42
|
|
|
42
43
|
/**
|
|
43
|
-
* Refresh a batch of context items: re-read
|
|
44
|
-
* content, and re-embed only the items that changed.
|
|
45
|
-
*
|
|
44
|
+
* Refresh a batch of context items: re-read from origin, diff, update
|
|
45
|
+
* content, and re-embed only the items that changed.
|
|
46
|
+
*
|
|
47
|
+
* Dispatches on `drive`:
|
|
48
|
+
* disk → read from filesystem
|
|
49
|
+
* agent → skip (no external origin)
|
|
50
|
+
* other → re-fetch as a URL (the path is either a full URL for `url` drive
|
|
51
|
+
* or an origin-specific identifier that fetchUrl can re-derive via
|
|
52
|
+
* the MCP agent; for now this only refreshes items stored under
|
|
53
|
+
* `url:/<full-url>`)
|
|
46
54
|
*/
|
|
47
55
|
export async function refreshContextItems(
|
|
48
56
|
conn: DbConnection,
|
|
@@ -52,36 +60,44 @@ export async function refreshContextItems(
|
|
|
52
60
|
opts: RefreshOptions = {},
|
|
53
61
|
embedFn?: IngestEmbedFn,
|
|
54
62
|
): Promise<RefreshResult> {
|
|
55
|
-
const
|
|
56
|
-
(i): i is ContextItem & { source_path: string } => !!i.source_path,
|
|
57
|
-
);
|
|
63
|
+
const refreshable = items.filter((i) => i.drive !== "agent");
|
|
58
64
|
|
|
59
65
|
const results: RefreshItemResult[] = [];
|
|
60
66
|
const toReembed: string[] = [];
|
|
61
67
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
opts.onItemProgress?.(idx, sourced.length);
|
|
68
|
+
for (const [idx, item] of refreshable.entries()) {
|
|
69
|
+
opts.onItemProgress?.(idx, refreshable.length);
|
|
65
70
|
const base = {
|
|
66
71
|
id: item.id,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
72
|
+
drive: item.drive,
|
|
73
|
+
path: item.path,
|
|
74
|
+
ref: formatDriveRef(item),
|
|
70
75
|
};
|
|
71
76
|
|
|
72
77
|
try {
|
|
73
78
|
let content: string;
|
|
74
79
|
|
|
75
|
-
if (item.
|
|
76
|
-
const
|
|
77
|
-
content = fetched.content;
|
|
78
|
-
} else {
|
|
79
|
-
const bunFile = Bun.file(item.source_path);
|
|
80
|
+
if (item.drive === "disk") {
|
|
81
|
+
const bunFile = Bun.file(item.path);
|
|
80
82
|
if (!(await bunFile.exists())) {
|
|
81
83
|
results.push({ ...base, status: "missing" });
|
|
82
84
|
continue;
|
|
83
85
|
}
|
|
84
86
|
content = await bunFile.text();
|
|
87
|
+
} else if (item.drive === "url") {
|
|
88
|
+
const url = item.path.startsWith("/") ? item.path.slice(1) : item.path;
|
|
89
|
+
const fetched = await fetchUrl(url, config, mcpxClient);
|
|
90
|
+
content = fetched.content;
|
|
91
|
+
} else {
|
|
92
|
+
// Service-specific drives (google-docs, github, etc.) — only
|
|
93
|
+
// refreshable when the original URL can be reconstructed. For now,
|
|
94
|
+
// we punt: mark as error so the user knows to re-add from URL.
|
|
95
|
+
results.push({
|
|
96
|
+
...base,
|
|
97
|
+
status: "error",
|
|
98
|
+
error: `Refresh not implemented for drive '${item.drive}' — re-add from the original URL.`,
|
|
99
|
+
});
|
|
100
|
+
continue;
|
|
85
101
|
}
|
|
86
102
|
|
|
87
103
|
if (content === item.content) {
|
|
@@ -100,17 +116,16 @@ export async function refreshContextItems(
|
|
|
100
116
|
});
|
|
101
117
|
}
|
|
102
118
|
}
|
|
103
|
-
opts.onItemProgress?.(
|
|
119
|
+
opts.onItemProgress?.(refreshable.length, refreshable.length);
|
|
104
120
|
|
|
105
121
|
const updated = results.filter((r) => r.status === "updated").length;
|
|
106
122
|
const unchanged = results.filter((r) => r.status === "unchanged").length;
|
|
107
123
|
const missing = results.filter((r) => r.status === "missing").length;
|
|
108
124
|
|
|
109
|
-
// Phase 2: re-embed changed items. Skip cleanly if no OpenAI key.
|
|
110
125
|
const hasEmbedder = !!embedFn || !!config.openai_api_key;
|
|
111
126
|
if (toReembed.length === 0 || !hasEmbedder) {
|
|
112
127
|
return {
|
|
113
|
-
checked:
|
|
128
|
+
checked: refreshable.length,
|
|
114
129
|
updated,
|
|
115
130
|
unchanged,
|
|
116
131
|
missing,
|
|
@@ -147,7 +162,7 @@ export async function refreshContextItems(
|
|
|
147
162
|
}
|
|
148
163
|
|
|
149
164
|
return {
|
|
150
|
-
checked:
|
|
165
|
+
checked: refreshable.length,
|
|
151
166
|
updated,
|
|
152
167
|
unchanged,
|
|
153
168
|
missing,
|
package/src/context/url-utils.ts
CHANGED
|
@@ -10,29 +10,6 @@ export function isUrl(input: string): boolean {
|
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
/**
|
|
14
|
-
* Derives a virtual context path from a URL.
|
|
15
|
-
* Example: `https://docs.google.com/document/d/abc123/edit` → `/{prefix}/docs.google.com/document-d-abc123.md`
|
|
16
|
-
*/
|
|
17
|
-
export function urlToContextPath(url: string, prefix: string): string {
|
|
18
|
-
const parsed = new URL(url);
|
|
19
|
-
const hostname = parsed.hostname;
|
|
20
|
-
const pathname = parsed.pathname
|
|
21
|
-
.replace(/\/+$/, "") // strip trailing slashes
|
|
22
|
-
.replace(/^\/+/, "") // strip leading slashes
|
|
23
|
-
.replace(/[^a-zA-Z0-9\-_.]/g, "-") // slugify
|
|
24
|
-
.replace(/-{2,}/g, "-"); // collapse repeated dashes
|
|
25
|
-
|
|
26
|
-
const slug = pathname ? `${hostname}/${pathname}` : hostname;
|
|
27
|
-
const full = `${prefix.replace(/\/+$/, "")}/${slug}.md`;
|
|
28
|
-
|
|
29
|
-
if (full.length > 120) {
|
|
30
|
-
return `${full.slice(0, 117 - 3)}.md`;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
return full;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
13
|
/**
|
|
37
14
|
* Strips HTML tags from a string, removing script/style blocks first,
|
|
38
15
|
* then all remaining tags, and collapsing whitespace.
|