membot 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +137 -0
- package/.cursor/rules/membot.mdc +137 -0
- package/README.md +131 -0
- package/package.json +83 -24
- package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
- package/scripts/apply-transformers-patch.sh +35 -0
- package/src/cli.ts +72 -0
- package/src/commands/check-update.ts +69 -0
- package/src/commands/mcpx.ts +112 -0
- package/src/commands/reindex.ts +53 -0
- package/src/commands/serve.ts +58 -0
- package/src/commands/skill.ts +131 -0
- package/src/commands/upgrade.ts +220 -0
- package/src/config/loader.ts +100 -0
- package/src/config/schemas.ts +39 -0
- package/src/constants.ts +42 -0
- package/src/context.ts +80 -0
- package/src/db/blobs.ts +53 -0
- package/src/db/chunks.ts +176 -0
- package/src/db/connection.ts +173 -0
- package/src/db/files.ts +325 -0
- package/src/db/migrations/001-init.ts +63 -0
- package/src/db/migrations/002-fts.ts +12 -0
- package/src/db/migrations.ts +45 -0
- package/src/errors.ts +87 -0
- package/src/ingest/chunker.ts +117 -0
- package/src/ingest/converter/docx.ts +15 -0
- package/src/ingest/converter/html.ts +20 -0
- package/src/ingest/converter/image.ts +71 -0
- package/src/ingest/converter/index.ts +119 -0
- package/src/ingest/converter/llm.ts +66 -0
- package/src/ingest/converter/ocr.ts +51 -0
- package/src/ingest/converter/pdf.ts +38 -0
- package/src/ingest/converter/text.ts +8 -0
- package/src/ingest/describer.ts +72 -0
- package/src/ingest/embedder.ts +98 -0
- package/src/ingest/fetcher.ts +280 -0
- package/src/ingest/ingest.ts +444 -0
- package/src/ingest/local-reader.ts +64 -0
- package/src/ingest/search-text.ts +18 -0
- package/src/ingest/source-resolver.ts +186 -0
- package/src/mcp/instructions.ts +34 -0
- package/src/mcp/server.ts +101 -0
- package/src/mount/commander.ts +174 -0
- package/src/mount/mcp.ts +111 -0
- package/src/mount/zod-to-cli.ts +158 -0
- package/src/operations/add.ts +69 -0
- package/src/operations/diff.ts +105 -0
- package/src/operations/index.ts +38 -0
- package/src/operations/info.ts +95 -0
- package/src/operations/list.ts +87 -0
- package/src/operations/move.ts +83 -0
- package/src/operations/prune.ts +80 -0
- package/src/operations/read.ts +102 -0
- package/src/operations/refresh.ts +72 -0
- package/src/operations/remove.ts +35 -0
- package/src/operations/search.ts +72 -0
- package/src/operations/tree.ts +103 -0
- package/src/operations/types.ts +81 -0
- package/src/operations/versions.ts +78 -0
- package/src/operations/write.ts +77 -0
- package/src/output/formatter.ts +68 -0
- package/src/output/logger.ts +114 -0
- package/src/output/progress.ts +78 -0
- package/src/output/tty.ts +91 -0
- package/src/refresh/runner.ts +296 -0
- package/src/refresh/scheduler.ts +54 -0
- package/src/sdk.ts +27 -0
- package/src/search/hybrid.ts +100 -0
- package/src/search/keyword.ts +62 -0
- package/src/search/semantic.ts +56 -0
- package/src/types/text-modules.d.ts +9 -0
- package/src/update/background.ts +73 -0
- package/src/update/cache.ts +40 -0
- package/src/update/checker.ts +117 -0
- package/.claude/settings.local.json +0 -7
- package/CLAUDE.md +0 -139
- package/docs/plan.md +0 -905
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { getCurrent, getVersion } from "../db/files.ts";
|
|
3
|
+
import { HelpfulError } from "../errors.ts";
|
|
4
|
+
import { colors } from "../output/formatter.ts";
|
|
5
|
+
import { defineOperation } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
export const diffOperation = defineOperation({
|
|
8
|
+
name: "membot_diff",
|
|
9
|
+
cliName: "diff",
|
|
10
|
+
description: `Return a unified diff between two versions of a file. \`a\` is required; \`b\` defaults to the current version. Both \`a\` and \`b\` are version_id timestamps from membot_versions. Use to understand what a refresh actually changed before deciding to act on the new content.`,
|
|
11
|
+
inputSchema: z.object({
|
|
12
|
+
logical_path: z.string().describe("Path of the file"),
|
|
13
|
+
a: z.string().describe("Older version_id"),
|
|
14
|
+
b: z.string().optional().describe("Newer version_id; default current"),
|
|
15
|
+
}),
|
|
16
|
+
outputSchema: z.object({
|
|
17
|
+
logical_path: z.string(),
|
|
18
|
+
a: z.string(),
|
|
19
|
+
b: z.string(),
|
|
20
|
+
diff: z.string(),
|
|
21
|
+
}),
|
|
22
|
+
cli: { positional: ["logical_path", "a", "b"] },
|
|
23
|
+
console_formatter: (result) => {
|
|
24
|
+
const header = `${colors.bold(result.logical_path)} ${colors.dim(`${result.a} → ${result.b}`)}`;
|
|
25
|
+
if (!result.diff.trim()) return `${header}\n${colors.dim("(no changes)")}`;
|
|
26
|
+
const body = result.diff
|
|
27
|
+
.split("\n")
|
|
28
|
+
.map((line) => {
|
|
29
|
+
if (line.startsWith("---") || line.startsWith("+++") || line.startsWith("@@")) return colors.cyan(line);
|
|
30
|
+
if (line.startsWith("+")) return colors.green(line);
|
|
31
|
+
if (line.startsWith("-")) return colors.red(line);
|
|
32
|
+
return line;
|
|
33
|
+
})
|
|
34
|
+
.join("\n");
|
|
35
|
+
return `${header}\n${body}`;
|
|
36
|
+
},
|
|
37
|
+
handler: async (input, ctx) => {
|
|
38
|
+
const aRow = await getVersion(ctx.db, input.logical_path, input.a);
|
|
39
|
+
const bRow = input.b
|
|
40
|
+
? await getVersion(ctx.db, input.logical_path, input.b)
|
|
41
|
+
: await getCurrent(ctx.db, input.logical_path);
|
|
42
|
+
if (!aRow || !bRow) {
|
|
43
|
+
throw new HelpfulError({
|
|
44
|
+
kind: "not_found",
|
|
45
|
+
message: `couldn't load both versions for diff (${aRow ? "" : "a missing"} ${bRow ? "" : "b missing"})`.trim(),
|
|
46
|
+
hint: `Run \`membot versions ${input.logical_path}\` to list valid version_ids.`,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
const diff = unifiedDiff(aRow.content ?? "", bRow.content ?? "", input.a, bRow.version_id);
|
|
50
|
+
return { logical_path: input.logical_path, a: aRow.version_id, b: bRow.version_id, diff };
|
|
51
|
+
},
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Produce a minimal unified diff between two strings using a simple LCS
|
|
56
|
+
* algorithm. We don't pull in a diff library because the volumes are small
|
|
57
|
+
* and the output format is a stable convenience for humans/agents reading
|
|
58
|
+
* what changed across versions.
|
|
59
|
+
*/
|
|
60
|
+
function unifiedDiff(a: string, b: string, aLabel: string, bLabel: string): string {
|
|
61
|
+
const aLines = a.split("\n");
|
|
62
|
+
const bLines = b.split("\n");
|
|
63
|
+
const out: string[] = [`--- ${aLabel}`, `+++ ${bLabel}`];
|
|
64
|
+
|
|
65
|
+
// Simple line-by-line walk using LCS.
|
|
66
|
+
const lcs = lcsTable(aLines, bLines);
|
|
67
|
+
let i = 0;
|
|
68
|
+
let j = 0;
|
|
69
|
+
const ops: { kind: "=" | "-" | "+"; line: string }[] = [];
|
|
70
|
+
while (i < aLines.length && j < bLines.length) {
|
|
71
|
+
if (aLines[i] === bLines[j]) {
|
|
72
|
+
ops.push({ kind: "=", line: aLines[i]! });
|
|
73
|
+
i++;
|
|
74
|
+
j++;
|
|
75
|
+
} else if ((lcs[i + 1]?.[j] ?? 0) >= (lcs[i]?.[j + 1] ?? 0)) {
|
|
76
|
+
ops.push({ kind: "-", line: aLines[i]! });
|
|
77
|
+
i++;
|
|
78
|
+
} else {
|
|
79
|
+
ops.push({ kind: "+", line: bLines[j]! });
|
|
80
|
+
j++;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
while (i < aLines.length) ops.push({ kind: "-", line: aLines[i++]! });
|
|
84
|
+
while (j < bLines.length) ops.push({ kind: "+", line: bLines[j++]! });
|
|
85
|
+
|
|
86
|
+
for (const op of ops) {
|
|
87
|
+
out.push(`${op.kind === "=" ? " " : op.kind}${op.line}`);
|
|
88
|
+
}
|
|
89
|
+
return out.join("\n");
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Build the LCS dynamic-programming table for two arrays. */
|
|
93
|
+
function lcsTable(a: string[], b: string[]): number[][] {
|
|
94
|
+
const m = a.length;
|
|
95
|
+
const n = b.length;
|
|
96
|
+
const t: number[][] = Array.from({ length: m + 1 }, () => new Array<number>(n + 1).fill(0));
|
|
97
|
+
for (let i = m - 1; i >= 0; i--) {
|
|
98
|
+
const ti = t[i]!;
|
|
99
|
+
const tin = t[i + 1]!;
|
|
100
|
+
for (let j = n - 1; j >= 0; j--) {
|
|
101
|
+
ti[j] = a[i] === b[j] ? (tin[j + 1] ?? 0) + 1 : Math.max(tin[j] ?? 0, ti[j + 1] ?? 0);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return t;
|
|
105
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { addOperation } from "./add.ts";
|
|
2
|
+
import { diffOperation } from "./diff.ts";
|
|
3
|
+
import { infoOperation } from "./info.ts";
|
|
4
|
+
import { listOperation } from "./list.ts";
|
|
5
|
+
import { moveOperation } from "./move.ts";
|
|
6
|
+
import { pruneOperation } from "./prune.ts";
|
|
7
|
+
import { readOperation } from "./read.ts";
|
|
8
|
+
import { refreshOperation } from "./refresh.ts";
|
|
9
|
+
import { removeOperation } from "./remove.ts";
|
|
10
|
+
import { searchOperation } from "./search.ts";
|
|
11
|
+
import { treeOperation } from "./tree.ts";
|
|
12
|
+
import type { Operation } from "./types.ts";
|
|
13
|
+
import { versionsOperation } from "./versions.ts";
|
|
14
|
+
import { writeOperation } from "./write.ts";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Ordered registry of every Operation. The CLI and the MCP server both
|
|
18
|
+
* iterate this list and call the appropriate mount adapter, so a new tool
|
|
19
|
+
* is added by writing one file under `operations/` and appending it here.
|
|
20
|
+
*
|
|
21
|
+
* Order influences `--help` output and MCP `tools/list` ordering.
|
|
22
|
+
*/
|
|
23
|
+
// biome-ignore lint/suspicious/noExplicitAny: heterogenous Operation generics — registry stays open-ended on purpose
|
|
24
|
+
export const OPERATIONS: Operation<any, any>[] = [
|
|
25
|
+
addOperation,
|
|
26
|
+
listOperation,
|
|
27
|
+
treeOperation,
|
|
28
|
+
readOperation,
|
|
29
|
+
searchOperation,
|
|
30
|
+
infoOperation,
|
|
31
|
+
versionsOperation,
|
|
32
|
+
diffOperation,
|
|
33
|
+
writeOperation,
|
|
34
|
+
moveOperation,
|
|
35
|
+
removeOperation,
|
|
36
|
+
refreshOperation,
|
|
37
|
+
pruneOperation,
|
|
38
|
+
];
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { getCurrent, getVersion } from "../db/files.ts";
|
|
3
|
+
import { HelpfulError } from "../errors.ts";
|
|
4
|
+
import { colors } from "../output/formatter.ts";
|
|
5
|
+
import { defineOperation } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
export const infoOperation = defineOperation({
|
|
8
|
+
name: "membot_info",
|
|
9
|
+
cliName: "info",
|
|
10
|
+
description: `Inspect metadata for a file: source (local path or URL), fetcher used, refresh schedule, last refresh status, all sha256 digests, and whether the requested version is the current one. Does NOT return file content — use membot_read for that. Use this to decide whether a refresh is worth forcing or whether to trust a cached row.`,
|
|
11
|
+
inputSchema: z.object({
|
|
12
|
+
logical_path: z.string().describe("Path to inspect"),
|
|
13
|
+
version: z.string().optional().describe("Specific version_id; default current"),
|
|
14
|
+
}),
|
|
15
|
+
outputSchema: z.object({
|
|
16
|
+
logical_path: z.string(),
|
|
17
|
+
version_id: z.string(),
|
|
18
|
+
version_is_current: z.boolean(),
|
|
19
|
+
source_type: z.string(),
|
|
20
|
+
source_path: z.string().nullable(),
|
|
21
|
+
source_sha256: z.string().nullable(),
|
|
22
|
+
blob_sha256: z.string().nullable(),
|
|
23
|
+
content_sha256: z.string().nullable(),
|
|
24
|
+
mime_type: z.string().nullable(),
|
|
25
|
+
size_bytes: z.number().nullable(),
|
|
26
|
+
description: z.string().nullable(),
|
|
27
|
+
fetcher: z.string().nullable(),
|
|
28
|
+
fetcher_server: z.string().nullable(),
|
|
29
|
+
fetcher_tool: z.string().nullable(),
|
|
30
|
+
fetcher_args: z.record(z.string(), z.unknown()).nullable(),
|
|
31
|
+
refresh_frequency_sec: z.number().nullable(),
|
|
32
|
+
refreshed_at: z.string().nullable(),
|
|
33
|
+
last_refresh_status: z.string().nullable(),
|
|
34
|
+
change_note: z.string().nullable(),
|
|
35
|
+
created_at: z.string(),
|
|
36
|
+
tombstone: z.boolean(),
|
|
37
|
+
}),
|
|
38
|
+
cli: { positional: ["logical_path"] },
|
|
39
|
+
console_formatter: (result) => {
|
|
40
|
+
const fmt = (k: string, v: string): string => `${colors.dim(k.padEnd(22))}${v}`;
|
|
41
|
+
const yn = (b: boolean): string => (b ? colors.green("yes") : colors.dim("no"));
|
|
42
|
+
const orDash = (s: string | null): string => s ?? colors.dim("-");
|
|
43
|
+
const lines: string[] = [];
|
|
44
|
+
const head = `${colors.cyan(result.logical_path)} ${colors.dim(`@ ${result.version_id}`)}`;
|
|
45
|
+
lines.push(result.tombstone ? `${head} ${colors.red("[tombstoned]")}` : head);
|
|
46
|
+
lines.push(fmt("current", yn(result.version_is_current)));
|
|
47
|
+
lines.push(fmt("source_type", orDash(result.source_type)));
|
|
48
|
+
lines.push(fmt("source_path", orDash(result.source_path)));
|
|
49
|
+
lines.push(fmt("mime_type", orDash(result.mime_type)));
|
|
50
|
+
lines.push(fmt("size_bytes", result.size_bytes !== null ? String(result.size_bytes) : colors.dim("-")));
|
|
51
|
+
lines.push(fmt("description", orDash(result.description)));
|
|
52
|
+
lines.push(fmt("content_sha256", orDash(result.content_sha256)));
|
|
53
|
+
lines.push(fmt("blob_sha256", orDash(result.blob_sha256)));
|
|
54
|
+
lines.push(fmt("source_sha256", orDash(result.source_sha256)));
|
|
55
|
+
if (result.fetcher) lines.push(fmt("fetcher", result.fetcher));
|
|
56
|
+
if (result.fetcher_server) lines.push(fmt("fetcher_server", result.fetcher_server));
|
|
57
|
+
if (result.fetcher_tool) lines.push(fmt("fetcher_tool", result.fetcher_tool));
|
|
58
|
+
if (result.fetcher_args) lines.push(fmt("fetcher_args", JSON.stringify(result.fetcher_args)));
|
|
59
|
+
lines.push(
|
|
60
|
+
fmt(
|
|
61
|
+
"refresh_frequency",
|
|
62
|
+
result.refresh_frequency_sec !== null ? `${result.refresh_frequency_sec}s` : colors.dim("-"),
|
|
63
|
+
),
|
|
64
|
+
);
|
|
65
|
+
lines.push(fmt("refreshed_at", orDash(result.refreshed_at)));
|
|
66
|
+
lines.push(
|
|
67
|
+
fmt(
|
|
68
|
+
"last_refresh_status",
|
|
69
|
+
result.last_refresh_status === "failed"
|
|
70
|
+
? colors.red(result.last_refresh_status)
|
|
71
|
+
: result.last_refresh_status === "ok" || result.last_refresh_status === "fresh"
|
|
72
|
+
? colors.green(result.last_refresh_status)
|
|
73
|
+
: orDash(result.last_refresh_status),
|
|
74
|
+
),
|
|
75
|
+
);
|
|
76
|
+
if (result.change_note) lines.push(fmt("change_note", result.change_note));
|
|
77
|
+
lines.push(fmt("created_at", result.created_at));
|
|
78
|
+
return lines.join("\n");
|
|
79
|
+
},
|
|
80
|
+
handler: async (input, ctx) => {
|
|
81
|
+
const cur = await getCurrent(ctx.db, input.logical_path);
|
|
82
|
+
const row = input.version ? await getVersion(ctx.db, input.logical_path, input.version) : cur;
|
|
83
|
+
if (!row) {
|
|
84
|
+
throw new HelpfulError({
|
|
85
|
+
kind: "not_found",
|
|
86
|
+
message: `no version of ${input.logical_path}${input.version ? ` at ${input.version}` : ""}`,
|
|
87
|
+
hint: `Run \`membot versions ${input.logical_path}\` to list versions, or \`membot ls\` for paths.`,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return {
|
|
91
|
+
...row,
|
|
92
|
+
version_is_current: !!cur && cur.version_id === row.version_id,
|
|
93
|
+
};
|
|
94
|
+
},
|
|
95
|
+
});
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { listCurrent } from "../db/files.ts";
|
|
3
|
+
import { colors, renderTable } from "../output/formatter.ts";
|
|
4
|
+
import { defineOperation } from "./types.ts";
|
|
5
|
+
|
|
6
|
+
export const listOperation = defineOperation({
|
|
7
|
+
name: "membot_list",
|
|
8
|
+
cliName: "ls",
|
|
9
|
+
bashEquivalent: "ls",
|
|
10
|
+
description: `List current files under an optional prefix, with size, mime type, refresh frequency, and last refresh status. Returns one row per logical_path (current version only). Pair with membot_tree for shape, membot_search for content-based discovery.`,
|
|
11
|
+
inputSchema: z.object({
|
|
12
|
+
prefix: z.string().optional().describe("Only show paths starting with this prefix"),
|
|
13
|
+
limit: z.number().default(1000).describe("Max rows to return"),
|
|
14
|
+
offset: z.number().default(0).describe("Skip this many rows (paginate)"),
|
|
15
|
+
}),
|
|
16
|
+
outputSchema: z.object({
|
|
17
|
+
entries: z.array(
|
|
18
|
+
z.object({
|
|
19
|
+
logical_path: z.string(),
|
|
20
|
+
version_id: z.string(),
|
|
21
|
+
size_bytes: z.number().nullable(),
|
|
22
|
+
mime_type: z.string().nullable(),
|
|
23
|
+
refresh_frequency_sec: z.number().nullable(),
|
|
24
|
+
last_refresh_status: z.string().nullable(),
|
|
25
|
+
refreshed_at: z.string().nullable(),
|
|
26
|
+
description: z.string().nullable(),
|
|
27
|
+
}),
|
|
28
|
+
),
|
|
29
|
+
count: z.number(),
|
|
30
|
+
}),
|
|
31
|
+
cli: { positional: ["prefix"] },
|
|
32
|
+
console_formatter: (result) => {
|
|
33
|
+
if (result.entries.length === 0) return colors.dim("(no entries)");
|
|
34
|
+
const rows = result.entries.map((e) => [
|
|
35
|
+
e.logical_path,
|
|
36
|
+
e.size_bytes !== null ? formatSize(e.size_bytes) : "-",
|
|
37
|
+
e.mime_type ?? "-",
|
|
38
|
+
e.last_refresh_status ?? "-",
|
|
39
|
+
]);
|
|
40
|
+
const table = renderTable(["PATH", "SIZE", "MIME", "STATUS"], rows, {
|
|
41
|
+
columnStyles: [colors.cyan, colors.dim, colors.dim, statusStyle],
|
|
42
|
+
});
|
|
43
|
+
return `${table}\n${colors.dim(`${result.count} ${result.count === 1 ? "entry" : "entries"}`)}`;
|
|
44
|
+
},
|
|
45
|
+
handler: async (input, ctx) => {
|
|
46
|
+
const rows = await listCurrent(ctx.db, {
|
|
47
|
+
prefix: input.prefix,
|
|
48
|
+
limit: input.limit,
|
|
49
|
+
offset: input.offset,
|
|
50
|
+
});
|
|
51
|
+
return {
|
|
52
|
+
entries: rows.map((r) => ({
|
|
53
|
+
logical_path: r.logical_path,
|
|
54
|
+
version_id: r.version_id,
|
|
55
|
+
size_bytes: r.size_bytes,
|
|
56
|
+
mime_type: r.mime_type,
|
|
57
|
+
refresh_frequency_sec: r.refresh_frequency_sec,
|
|
58
|
+
last_refresh_status: r.last_refresh_status,
|
|
59
|
+
refreshed_at: r.refreshed_at,
|
|
60
|
+
description: r.description,
|
|
61
|
+
})),
|
|
62
|
+
count: rows.length,
|
|
63
|
+
};
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
/** Map refresh status → semantic color. `failed` red, `stale`/`partial` yellow, `ok` green, anything else plain. */
|
|
68
|
+
function statusStyle(s: string): string {
|
|
69
|
+
const trimmed = s.trim();
|
|
70
|
+
if (trimmed === "failed" || trimmed === "error") return colors.red(s);
|
|
71
|
+
if (trimmed === "stale" || trimmed === "partial") return colors.yellow(s);
|
|
72
|
+
if (trimmed === "ok" || trimmed === "fresh") return colors.green(s);
|
|
73
|
+
return s;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Format a byte count in human units. 1024 boundary, two-digit precision past KB. */
|
|
77
|
+
function formatSize(bytes: number): string {
|
|
78
|
+
if (bytes < 1024) return `${bytes}B`;
|
|
79
|
+
const units = ["KB", "MB", "GB", "TB"];
|
|
80
|
+
let i = -1;
|
|
81
|
+
let n = bytes;
|
|
82
|
+
while (n >= 1024 && i < units.length - 1) {
|
|
83
|
+
n /= 1024;
|
|
84
|
+
i++;
|
|
85
|
+
}
|
|
86
|
+
return `${n.toFixed(n >= 100 ? 0 : 1)}${units[i]}`;
|
|
87
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { insertChunksForVersion, listChunksForVersion, rebuildFts } from "../db/chunks.ts";
|
|
3
|
+
import { getCurrent, insertVersion, millisIso, tombstone } from "../db/files.ts";
|
|
4
|
+
import { HelpfulError } from "../errors.ts";
|
|
5
|
+
import { buildSearchText } from "../ingest/search-text.ts";
|
|
6
|
+
import { colors } from "../output/formatter.ts";
|
|
7
|
+
import { defineOperation } from "./types.ts";
|
|
8
|
+
|
|
9
|
+
export const moveOperation = defineOperation({
|
|
10
|
+
name: "membot_move",
|
|
11
|
+
cliName: "mv",
|
|
12
|
+
bashEquivalent: "mv",
|
|
13
|
+
description: `Rename a logical_path. Creates one new version under the new path with full content carried over and tombstones the old path. History remains queryable under both names via membot_versions.`,
|
|
14
|
+
inputSchema: z.object({
|
|
15
|
+
from_logical_path: z.string().describe("Source path"),
|
|
16
|
+
to_logical_path: z.string().describe("Destination path"),
|
|
17
|
+
}),
|
|
18
|
+
outputSchema: z.object({
|
|
19
|
+
from_logical_path: z.string(),
|
|
20
|
+
to_logical_path: z.string(),
|
|
21
|
+
new_version_id: z.string(),
|
|
22
|
+
}),
|
|
23
|
+
cli: { positional: ["from_logical_path", "to_logical_path"] },
|
|
24
|
+
console_formatter: (result) =>
|
|
25
|
+
`${colors.green("✓")} ${colors.cyan(result.from_logical_path)} → ${colors.cyan(result.to_logical_path)} ${colors.dim(`@ ${result.new_version_id}`)}`,
|
|
26
|
+
handler: async (input, ctx) => {
|
|
27
|
+
const cur = await getCurrent(ctx.db, input.from_logical_path);
|
|
28
|
+
if (!cur) {
|
|
29
|
+
throw new HelpfulError({
|
|
30
|
+
kind: "not_found",
|
|
31
|
+
message: `${input.from_logical_path} doesn't exist (or is tombstoned)`,
|
|
32
|
+
hint: "Run `membot ls` to see paths.",
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
if (await getCurrent(ctx.db, input.to_logical_path)) {
|
|
36
|
+
throw new HelpfulError({
|
|
37
|
+
kind: "conflict",
|
|
38
|
+
message: `${input.to_logical_path} already has a current version`,
|
|
39
|
+
hint: "Pick a different destination or `membot rm` the existing one first.",
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
const newVersion = millisIso(Date.now());
|
|
43
|
+
await insertVersion(ctx.db, {
|
|
44
|
+
logical_path: input.to_logical_path,
|
|
45
|
+
version_id: newVersion,
|
|
46
|
+
source_type: cur.source_type,
|
|
47
|
+
source_path: cur.source_path,
|
|
48
|
+
source_mtime_ms: cur.source_mtime_ms,
|
|
49
|
+
source_sha256: cur.source_sha256,
|
|
50
|
+
blob_sha256: cur.blob_sha256,
|
|
51
|
+
content_sha256: cur.content_sha256,
|
|
52
|
+
content: cur.content,
|
|
53
|
+
description: cur.description,
|
|
54
|
+
mime_type: cur.mime_type,
|
|
55
|
+
size_bytes: cur.size_bytes,
|
|
56
|
+
fetcher: cur.fetcher,
|
|
57
|
+
fetcher_server: cur.fetcher_server,
|
|
58
|
+
fetcher_tool: cur.fetcher_tool,
|
|
59
|
+
fetcher_args: cur.fetcher_args,
|
|
60
|
+
refresh_frequency_sec: cur.refresh_frequency_sec,
|
|
61
|
+
refreshed_at: cur.refreshed_at,
|
|
62
|
+
last_refresh_status: cur.last_refresh_status,
|
|
63
|
+
change_note: `move from ${input.from_logical_path}`,
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
const oldChunks = await listChunksForVersion(ctx.db, cur.logical_path, cur.version_id);
|
|
67
|
+
const reKeyed = oldChunks.map((c) => ({
|
|
68
|
+
chunk_index: c.chunk_index,
|
|
69
|
+
chunk_content: c.chunk_content,
|
|
70
|
+
search_text: buildSearchText(input.to_logical_path, cur.description, c.chunk_content),
|
|
71
|
+
embedding: c.embedding,
|
|
72
|
+
}));
|
|
73
|
+
await insertChunksForVersion(ctx.db, input.to_logical_path, newVersion, reKeyed);
|
|
74
|
+
await tombstone(ctx.db, input.from_logical_path, `moved to ${input.to_logical_path}`);
|
|
75
|
+
await rebuildFts(ctx.db);
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
from_logical_path: input.from_logical_path,
|
|
79
|
+
to_logical_path: input.to_logical_path,
|
|
80
|
+
new_version_id: newVersion,
|
|
81
|
+
};
|
|
82
|
+
},
|
|
83
|
+
});
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { gcOrphanBlobs } from "../db/blobs.ts";
|
|
3
|
+
import { pruneOldVersions } from "../db/files.ts";
|
|
4
|
+
import { HelpfulError } from "../errors.ts";
|
|
5
|
+
import { colors } from "../output/formatter.ts";
|
|
6
|
+
import { defineOperation } from "./types.ts";
|
|
7
|
+
|
|
8
|
+
export const pruneOperation = defineOperation({
|
|
9
|
+
name: "membot_prune",
|
|
10
|
+
cliName: "prune",
|
|
11
|
+
description: `Permanently drop non-current versions older than the cutoff and garbage-collect orphan blobs. Current versions and tombstones-with-no-newer-version are preserved. Use sparingly — pruned versions cannot be recovered.`,
|
|
12
|
+
inputSchema: z.object({
|
|
13
|
+
before: z
|
|
14
|
+
.string()
|
|
15
|
+
.describe("Duration (e.g. 30d, 7d) or absolute ISO timestamp — versions strictly older are dropped"),
|
|
16
|
+
dry_run: z.boolean().default(true).describe("Report what would be removed without changing the DB"),
|
|
17
|
+
}),
|
|
18
|
+
outputSchema: z.object({
|
|
19
|
+
cutoff: z.string(),
|
|
20
|
+
removed_versions: z.number(),
|
|
21
|
+
removed_orphan_blobs: z.number(),
|
|
22
|
+
dry_run: z.boolean(),
|
|
23
|
+
}),
|
|
24
|
+
cli: { positional: ["before"] },
|
|
25
|
+
console_formatter: (result) => {
|
|
26
|
+
const tag = result.dry_run ? colors.yellow("[dry-run]") : colors.green("[applied]");
|
|
27
|
+
const head = `${tag} cutoff ${colors.cyan(result.cutoff)}`;
|
|
28
|
+
const versions = `${colors.yellow(`${result.removed_versions} version${result.removed_versions === 1 ? "" : "s"}`)} would be dropped`;
|
|
29
|
+
const blobs = result.dry_run
|
|
30
|
+
? colors.dim("(orphan blob count not computed in dry-run)")
|
|
31
|
+
: `${colors.yellow(`${result.removed_orphan_blobs} orphan blob${result.removed_orphan_blobs === 1 ? "" : "s"}`)} reclaimed`;
|
|
32
|
+
return `${head}\n${versions}\n${blobs}`;
|
|
33
|
+
},
|
|
34
|
+
handler: async (input, ctx) => {
|
|
35
|
+
const cutoff = resolveCutoff(input.before);
|
|
36
|
+
if (input.dry_run) {
|
|
37
|
+
const cnt =
|
|
38
|
+
(
|
|
39
|
+
await ctx.db.queryGet<{ n: number }>(
|
|
40
|
+
`SELECT COUNT(*) AS n FROM files
|
|
41
|
+
WHERE version_id < CAST(?1 AS TIMESTAMP)
|
|
42
|
+
AND (logical_path, version_id) NOT IN (
|
|
43
|
+
SELECT logical_path, MAX(version_id) FROM files GROUP BY logical_path
|
|
44
|
+
)`,
|
|
45
|
+
cutoff,
|
|
46
|
+
)
|
|
47
|
+
)?.n ?? 0;
|
|
48
|
+
return { cutoff, removed_versions: Number(cnt), removed_orphan_blobs: 0, dry_run: true };
|
|
49
|
+
}
|
|
50
|
+
const removed = await pruneOldVersions(ctx.db, cutoff);
|
|
51
|
+
const orphans = await gcOrphanBlobs(ctx.db);
|
|
52
|
+
return {
|
|
53
|
+
cutoff,
|
|
54
|
+
removed_versions: removed.removed,
|
|
55
|
+
removed_orphan_blobs: orphans.removed,
|
|
56
|
+
dry_run: false,
|
|
57
|
+
};
|
|
58
|
+
},
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
/** Convert a duration string or ISO timestamp into an ISO cutoff. */
|
|
62
|
+
function resolveCutoff(input: string): string {
|
|
63
|
+
const trimmed = input.trim();
|
|
64
|
+
const m = trimmed.match(/^(\d+)([smhd])$/i);
|
|
65
|
+
if (m) {
|
|
66
|
+
const n = Number(m[1]);
|
|
67
|
+
const unit = m[2]?.toLowerCase() ?? "s";
|
|
68
|
+
const sec = unit === "s" ? 1 : unit === "m" ? 60 : unit === "h" ? 3600 : 86400;
|
|
69
|
+
return new Date(Date.now() - n * sec * 1000).toISOString();
|
|
70
|
+
}
|
|
71
|
+
const parsed = Date.parse(trimmed);
|
|
72
|
+
if (Number.isNaN(parsed)) {
|
|
73
|
+
throw new HelpfulError({
|
|
74
|
+
kind: "input_error",
|
|
75
|
+
message: `invalid --before: ${input}`,
|
|
76
|
+
hint: "Use a duration like 30d, or an ISO-8601 timestamp like 2024-01-01T00:00:00Z.",
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
return new Date(parsed).toISOString();
|
|
80
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { readBlob } from "../db/blobs.ts";
|
|
3
|
+
import { getCurrent, getVersion } from "../db/files.ts";
|
|
4
|
+
import { HelpfulError } from "../errors.ts";
|
|
5
|
+
import { colors } from "../output/formatter.ts";
|
|
6
|
+
import { defineOperation } from "./types.ts";
|
|
7
|
+
|
|
8
|
+
export const readOperation = defineOperation({
|
|
9
|
+
name: "membot_read",
|
|
10
|
+
cliName: "read",
|
|
11
|
+
bashEquivalent: "cat",
|
|
12
|
+
description: `Read a stored file. By default returns the cleaned markdown surrogate the rest of the index sees — for a markdown source that's the original text, for a PDF/DOCX/HTML that's the converted markdown, and for an image that's its caption. Pass bytes=true to instead return the **original ingested bytes verbatim** (base64-encoded): for a textual source like .md or .txt that's the literal source you uploaded, NOT the surrogate; for binary sources it's the raw PDF / DOCX / image bytes. Defaults to the current version; pass \`version\` (timestamp) to read a historical snapshot — use membot_versions to enumerate available versions. For finding content across many files, use membot_search instead of repeated membot_read calls.`,
|
|
13
|
+
inputSchema: z.object({
|
|
14
|
+
logical_path: z.string().describe("Path of the file to read"),
|
|
15
|
+
version: z.string().optional().describe("Specific version_id (ISO timestamp) — defaults to current"),
|
|
16
|
+
bytes: z
|
|
17
|
+
.boolean()
|
|
18
|
+
.default(false)
|
|
19
|
+
.describe(
|
|
20
|
+
"Return original ingested bytes (base64) verbatim instead of the markdown surrogate. For textual sources this is the original text, NOT the surrogate.",
|
|
21
|
+
),
|
|
22
|
+
offset: z.number().optional().describe("1-based start line (text mode only)"),
|
|
23
|
+
limit: z.number().optional().describe("Number of lines to return (text mode only)"),
|
|
24
|
+
}),
|
|
25
|
+
outputSchema: z.object({
|
|
26
|
+
logical_path: z.string(),
|
|
27
|
+
version_id: z.string(),
|
|
28
|
+
mime_type: z.string().nullable(),
|
|
29
|
+
size_bytes: z.number().nullable(),
|
|
30
|
+
version_is_current: z.boolean(),
|
|
31
|
+
content: z.string().optional(),
|
|
32
|
+
description: z.string().nullable().optional(),
|
|
33
|
+
bytes_base64: z.string().optional(),
|
|
34
|
+
blob_available: z.boolean(),
|
|
35
|
+
}),
|
|
36
|
+
cli: { positional: ["logical_path"] },
|
|
37
|
+
console_formatter: (result) => {
|
|
38
|
+
const tag = result.version_is_current ? colors.green("[current]") : colors.yellow("[historical]");
|
|
39
|
+
const head = `${colors.cyan(result.logical_path)} ${colors.dim(`@ ${result.version_id}`)} ${tag}`;
|
|
40
|
+
const meta = colors.dim(
|
|
41
|
+
`mime=${result.mime_type ?? "-"} size=${result.size_bytes ?? "-"} blob=${result.blob_available ? "yes" : "no"}`,
|
|
42
|
+
);
|
|
43
|
+
if (result.bytes_base64 !== undefined) {
|
|
44
|
+
return `${head}\n${meta}\n${colors.dim(`(${result.bytes_base64.length} base64 chars; pipe with --json for the full payload)`)}`;
|
|
45
|
+
}
|
|
46
|
+
const body = result.content ?? "";
|
|
47
|
+
return `${head}\n${meta}\n\n${body}`;
|
|
48
|
+
},
|
|
49
|
+
handler: async (input, ctx) => {
|
|
50
|
+
const cur = await getCurrent(ctx.db, input.logical_path);
|
|
51
|
+
const row = input.version ? await getVersion(ctx.db, input.logical_path, input.version) : cur;
|
|
52
|
+
if (!row) {
|
|
53
|
+
throw new HelpfulError({
|
|
54
|
+
kind: "not_found",
|
|
55
|
+
message: `no version of ${input.logical_path}${input.version ? ` at ${input.version}` : ""} found`,
|
|
56
|
+
hint: `Run \`membot ls\` to see paths, or \`membot versions ${input.logical_path}\` to list versions.`,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
const isCurrent = !!cur && cur.version_id === row.version_id;
|
|
60
|
+
|
|
61
|
+
if (input.bytes) {
|
|
62
|
+
const blob = row.blob_sha256 ? await readBlob(ctx.db, row.blob_sha256) : null;
|
|
63
|
+
if (!blob) {
|
|
64
|
+
throw new HelpfulError({
|
|
65
|
+
kind: "not_found",
|
|
66
|
+
message: `no blob bytes available for ${input.logical_path}@${row.version_id}`,
|
|
67
|
+
hint: "Inline writes do not have an underlying blob. Use the markdown surrogate (default) instead.",
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
logical_path: row.logical_path,
|
|
72
|
+
version_id: row.version_id,
|
|
73
|
+
mime_type: blob.mime_type,
|
|
74
|
+
size_bytes: blob.size_bytes,
|
|
75
|
+
version_is_current: isCurrent,
|
|
76
|
+
bytes_base64: Buffer.from(blob.bytes).toString("base64"),
|
|
77
|
+
blob_available: true,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const content = sliceLines(row.content ?? "", input.offset, input.limit);
|
|
82
|
+
return {
|
|
83
|
+
logical_path: row.logical_path,
|
|
84
|
+
version_id: row.version_id,
|
|
85
|
+
mime_type: row.mime_type,
|
|
86
|
+
size_bytes: row.size_bytes,
|
|
87
|
+
version_is_current: isCurrent,
|
|
88
|
+
content,
|
|
89
|
+
description: row.description,
|
|
90
|
+
blob_available: !!row.blob_sha256,
|
|
91
|
+
};
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
/** Return the requested 1-based line range (offset..offset+limit-1) or the full body. */
|
|
96
|
+
function sliceLines(text: string, offset?: number, limit?: number): string {
|
|
97
|
+
if (offset === undefined && limit === undefined) return text;
|
|
98
|
+
const lines = text.split("\n");
|
|
99
|
+
const start = Math.max(0, (offset ?? 1) - 1);
|
|
100
|
+
const end = limit !== undefined ? start + limit : lines.length;
|
|
101
|
+
return lines.slice(start, end).join("\n");
|
|
102
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { listDueRefreshes } from "../db/files.ts";
|
|
3
|
+
import { colors } from "../output/formatter.ts";
|
|
4
|
+
import { refreshOne } from "../refresh/runner.ts";
|
|
5
|
+
import { defineOperation } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
export const refreshOperation = defineOperation({
|
|
8
|
+
name: "membot_refresh",
|
|
9
|
+
cliName: "refresh",
|
|
10
|
+
description: `Re-read a file's source and create a new version only if the source bytes changed. Pass \`logical_path\` to refresh one file, or omit it to refresh every file whose refresh_frequency_sec has elapsed. Local files are detected via mtime+sha; remote files are re-fetched via the same mcpx invocation that was originally used. On auth or network failure the prior version stays current — check \`last_refresh_status\`.`,
|
|
11
|
+
inputSchema: z.object({
|
|
12
|
+
logical_path: z.string().optional().describe("Single path to refresh; omit for all-due"),
|
|
13
|
+
force: z.boolean().default(false).describe("Re-embed even if source sha is unchanged"),
|
|
14
|
+
}),
|
|
15
|
+
outputSchema: z.object({
|
|
16
|
+
processed: z.array(
|
|
17
|
+
z.object({
|
|
18
|
+
logical_path: z.string(),
|
|
19
|
+
status: z.enum(["ok", "unchanged", "failed"]),
|
|
20
|
+
new_version_id: z.string().optional(),
|
|
21
|
+
error: z.string().optional(),
|
|
22
|
+
}),
|
|
23
|
+
),
|
|
24
|
+
count: z.number(),
|
|
25
|
+
}),
|
|
26
|
+
cli: { positional: ["logical_path"] },
|
|
27
|
+
console_formatter: (result) => {
|
|
28
|
+
if (result.processed.length === 0) return colors.dim("(nothing due to refresh)");
|
|
29
|
+
let updated = 0;
|
|
30
|
+
let unchanged = 0;
|
|
31
|
+
let failed = 0;
|
|
32
|
+
const lines = result.processed.map((p) => {
|
|
33
|
+
if (p.status === "ok") {
|
|
34
|
+
updated++;
|
|
35
|
+
const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
|
|
36
|
+
return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
|
|
37
|
+
}
|
|
38
|
+
if (p.status === "unchanged") {
|
|
39
|
+
unchanged++;
|
|
40
|
+
return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
|
|
41
|
+
}
|
|
42
|
+
failed++;
|
|
43
|
+
return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
|
|
44
|
+
});
|
|
45
|
+
const parts = [colors.green(`updated ${updated}`), colors.dim(`unchanged ${unchanged}`)];
|
|
46
|
+
if (failed) parts.push(colors.red(`failed ${failed}`));
|
|
47
|
+
return `${lines.join("\n")}\n${parts.join(", ")}`;
|
|
48
|
+
},
|
|
49
|
+
handler: async (input, ctx) => {
|
|
50
|
+
const targets = input.logical_path
|
|
51
|
+
? [input.logical_path]
|
|
52
|
+
: (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
|
|
53
|
+
const out: Array<{
|
|
54
|
+
logical_path: string;
|
|
55
|
+
status: "ok" | "unchanged" | "failed";
|
|
56
|
+
new_version_id?: string;
|
|
57
|
+
error?: string;
|
|
58
|
+
}> = [];
|
|
59
|
+
ctx.progress.start(targets.length, "refresh");
|
|
60
|
+
for (const path of targets) {
|
|
61
|
+
ctx.progress.tick(path);
|
|
62
|
+
try {
|
|
63
|
+
const r = await refreshOne(ctx, path, input.force);
|
|
64
|
+
out.push(r);
|
|
65
|
+
} catch (err) {
|
|
66
|
+
out.push({ logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
|
|
70
|
+
return { processed: out, count: out.length };
|
|
71
|
+
},
|
|
72
|
+
});
|