membot 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/package.json +81 -24
  2. package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
  3. package/scripts/apply-transformers-patch.sh +35 -0
  4. package/src/cli.ts +70 -0
  5. package/src/commands/check-update.ts +69 -0
  6. package/src/commands/mcpx.ts +112 -0
  7. package/src/commands/reindex.ts +53 -0
  8. package/src/commands/serve.ts +58 -0
  9. package/src/commands/upgrade.ts +220 -0
  10. package/src/config/loader.ts +100 -0
  11. package/src/config/schemas.ts +39 -0
  12. package/src/constants.ts +42 -0
  13. package/src/context.ts +80 -0
  14. package/src/db/blobs.ts +53 -0
  15. package/src/db/chunks.ts +176 -0
  16. package/src/db/connection.ts +173 -0
  17. package/src/db/files.ts +325 -0
  18. package/src/db/migrations/001-init.ts +63 -0
  19. package/src/db/migrations/002-fts.ts +12 -0
  20. package/src/db/migrations.ts +45 -0
  21. package/src/errors.ts +87 -0
  22. package/src/ingest/chunker.ts +117 -0
  23. package/src/ingest/converter/docx.ts +15 -0
  24. package/src/ingest/converter/html.ts +20 -0
  25. package/src/ingest/converter/image.ts +71 -0
  26. package/src/ingest/converter/index.ts +119 -0
  27. package/src/ingest/converter/llm.ts +66 -0
  28. package/src/ingest/converter/ocr.ts +51 -0
  29. package/src/ingest/converter/pdf.ts +38 -0
  30. package/src/ingest/converter/text.ts +8 -0
  31. package/src/ingest/describer.ts +72 -0
  32. package/src/ingest/embedder.ts +83 -0
  33. package/src/ingest/fetcher.ts +280 -0
  34. package/src/ingest/ingest.ts +444 -0
  35. package/src/ingest/local-reader.ts +64 -0
  36. package/src/ingest/search-text.ts +18 -0
  37. package/src/ingest/source-resolver.ts +186 -0
  38. package/src/mcp/instructions.ts +34 -0
  39. package/src/mcp/server.ts +101 -0
  40. package/src/mount/commander.ts +174 -0
  41. package/src/mount/mcp.ts +111 -0
  42. package/src/mount/zod-to-cli.ts +158 -0
  43. package/src/operations/add.ts +69 -0
  44. package/src/operations/diff.ts +105 -0
  45. package/src/operations/index.ts +38 -0
  46. package/src/operations/info.ts +95 -0
  47. package/src/operations/list.ts +87 -0
  48. package/src/operations/move.ts +83 -0
  49. package/src/operations/prune.ts +80 -0
  50. package/src/operations/read.ts +102 -0
  51. package/src/operations/refresh.ts +72 -0
  52. package/src/operations/remove.ts +35 -0
  53. package/src/operations/search.ts +72 -0
  54. package/src/operations/tree.ts +103 -0
  55. package/src/operations/types.ts +81 -0
  56. package/src/operations/versions.ts +78 -0
  57. package/src/operations/write.ts +77 -0
  58. package/src/output/formatter.ts +68 -0
  59. package/src/output/logger.ts +114 -0
  60. package/src/output/progress.ts +78 -0
  61. package/src/output/tty.ts +91 -0
  62. package/src/refresh/runner.ts +296 -0
  63. package/src/refresh/scheduler.ts +54 -0
  64. package/src/sdk.ts +27 -0
  65. package/src/search/hybrid.ts +100 -0
  66. package/src/search/keyword.ts +62 -0
  67. package/src/search/semantic.ts +56 -0
  68. package/src/update/background.ts +73 -0
  69. package/src/update/cache.ts +40 -0
  70. package/src/update/checker.ts +117 -0
  71. package/.claude/settings.local.json +0 -7
  72. package/CLAUDE.md +0 -139
  73. package/docs/plan.md +0 -905
@@ -0,0 +1,220 @@
1
+ import { tmpdir } from "node:os";
2
+ import { join } from "node:path";
3
+ import { dim, green, red, yellow } from "ansis";
4
+ import { $ } from "bun";
5
+ import type { Command } from "commander";
6
+ import { createSpinner } from "nanospinner";
7
+ import pkg from "../../package.json" with { type: "json" };
8
+ import { clearUpdateCache, loadUpdateCache, saveUpdateCache } from "../update/cache.ts";
9
+ import {
10
+ checkForUpdate,
11
+ detectInstallMethod,
12
+ type InstallMethod,
13
+ needsCheck,
14
+ type UpdateCache,
15
+ } from "../update/checker.ts";
16
+
17
+ const GITHUB_REPO = pkg.repository.url.replace(/^https:\/\/github\.com\//, "").replace(/\.git$/, "");
18
+
19
+ /** Build the platform-specific release artifact name (e.g. `membot-linux-x64`, `membot-windows-arm64.exe`). */
20
+ function platformArtifactName(): string {
21
+ let os: string;
22
+ let ext = "";
23
+ switch (process.platform) {
24
+ case "darwin":
25
+ os = "darwin";
26
+ break;
27
+ case "win32":
28
+ os = "windows";
29
+ ext = ".exe";
30
+ break;
31
+ default:
32
+ os = "linux";
33
+ break;
34
+ }
35
+ const arch = process.arch === "arm64" ? "arm64" : "x64";
36
+ return `membot-${os}-${arch}${ext}`;
37
+ }
38
+
39
+ /** Run `bun install -g` or `npm install -g` and return whether it succeeded. */
40
+ async function upgradeWithPackageManager(command: string, args: string[]): Promise<boolean> {
41
+ const result = await $`${command} ${args}`.nothrow();
42
+ return result.exitCode === 0;
43
+ }
44
+
45
+ /**
46
+ * Download the platform binary for `latestVersion` from GitHub releases and replace
47
+ * the running executable in place. Falls back to `sudo mv` if the target is non-writable.
48
+ */
49
+ async function upgradeFromBinary(latestVersion: string): Promise<boolean> {
50
+ const artifact = platformArtifactName();
51
+ const tag = `v${latestVersion}`;
52
+ const url = `https://github.com/${GITHUB_REPO}/releases/download/${tag}/${artifact}`;
53
+
54
+ const tmpPath = join(tmpdir(), `membot-upgrade-${Date.now()}`);
55
+ const targetPath = process.execPath;
56
+
57
+ try {
58
+ const res = await fetch(url);
59
+ if (!res.ok) {
60
+ console.error(red(`Failed to download binary: HTTP ${res.status}`));
61
+ return false;
62
+ }
63
+
64
+ const bytes = await res.arrayBuffer();
65
+ await Bun.write(tmpPath, bytes);
66
+
67
+ await $`chmod +x ${tmpPath}`.quiet();
68
+
69
+ const mv = await $`mv ${tmpPath} ${targetPath}`.quiet().nothrow();
70
+
71
+ if (mv.exitCode !== 0) {
72
+ console.log(dim("Requires elevated permissions..."));
73
+ const sudo = await $`sudo mv ${tmpPath} ${targetPath}`.nothrow();
74
+ if (sudo.exitCode !== 0) {
75
+ console.error(red("Failed to install binary. Try running with sudo."));
76
+ return false;
77
+ }
78
+ }
79
+
80
+ return true;
81
+ } catch (err) {
82
+ console.error(red(`Failed to upgrade binary: ${err}`));
83
+ await $`rm -f ${tmpPath}`.quiet().nothrow();
84
+ return false;
85
+ }
86
+ }
87
+
88
+ /**
89
+ * Register `membot upgrade`. Detects the install method (npm/bun/binary/local-dev),
90
+ * uses cached update info if fresh, then performs the appropriate self-update.
91
+ * Emits structured JSON when `--json` is set.
92
+ */
93
+ export function registerUpgradeCommand(program: Command) {
94
+ program
95
+ .command("upgrade")
96
+ .description("Upgrade membot to the latest version")
97
+ .action(async () => {
98
+ const opts = program.opts();
99
+ const json = !!(opts.json as boolean | undefined);
100
+ const isTTY = process.stderr.isTTY ?? false;
101
+
102
+ const spinner =
103
+ !json && isTTY ? createSpinner("Checking for updates...", { stream: process.stderr }).start() : null;
104
+
105
+ try {
106
+ const cache = await loadUpdateCache();
107
+ let latestVersion: string;
108
+ let hasUpdate: boolean;
109
+
110
+ if (!needsCheck(cache) && cache) {
111
+ latestVersion = cache.latestVersion;
112
+ hasUpdate = cache.hasUpdate;
113
+ } else {
114
+ const info = await checkForUpdate(pkg.version);
115
+ latestVersion = info.latestVersion;
116
+ hasUpdate = info.hasUpdate;
117
+
118
+ const newCache: UpdateCache = {
119
+ lastCheckAt: new Date().toISOString(),
120
+ latestVersion,
121
+ hasUpdate,
122
+ changelog: info.changelog,
123
+ };
124
+ await saveUpdateCache(newCache);
125
+ }
126
+
127
+ if (!hasUpdate) {
128
+ spinner?.stop();
129
+ if (json) {
130
+ console.log(
131
+ JSON.stringify({
132
+ upgraded: false,
133
+ currentVersion: pkg.version,
134
+ message: "Already up to date",
135
+ }),
136
+ );
137
+ } else {
138
+ console.log(green(`membot is already up to date (v${pkg.version})`));
139
+ }
140
+ return;
141
+ }
142
+
143
+ const method: InstallMethod = detectInstallMethod();
144
+ spinner?.update({
145
+ text: `Upgrading from v${pkg.version} to v${latestVersion} (${method})...`,
146
+ });
147
+
148
+ let success = false;
149
+
150
+ switch (method) {
151
+ case "bun":
152
+ spinner?.stop();
153
+ success = await upgradeWithPackageManager("bun", ["install", "-g", `${pkg.name}@${latestVersion}`]);
154
+ break;
155
+
156
+ case "npm":
157
+ spinner?.stop();
158
+ success = await upgradeWithPackageManager("npm", ["install", "-g", `${pkg.name}@${latestVersion}`]);
159
+ break;
160
+
161
+ case "binary":
162
+ spinner?.stop();
163
+ success = await upgradeFromBinary(latestVersion);
164
+ break;
165
+
166
+ case "local-dev":
167
+ spinner?.stop();
168
+ if (json) {
169
+ console.log(
170
+ JSON.stringify({
171
+ upgraded: false,
172
+ currentVersion: pkg.version,
173
+ latestVersion,
174
+ installMethod: "local-dev",
175
+ message: "Running from source. Use `git pull && bun install` to update.",
176
+ }),
177
+ );
178
+ } else {
179
+ console.log(yellow("Running from source. Use `git pull && bun install` to update."));
180
+ }
181
+ return;
182
+ }
183
+
184
+ if (success) {
185
+ await clearUpdateCache();
186
+ if (json) {
187
+ console.log(
188
+ JSON.stringify({
189
+ upgraded: true,
190
+ previousVersion: pkg.version,
191
+ newVersion: latestVersion,
192
+ installMethod: method,
193
+ }),
194
+ );
195
+ } else {
196
+ console.log(green(`Successfully upgraded membot: v${pkg.version} → v${latestVersion}`));
197
+ }
198
+ } else {
199
+ if (json) {
200
+ console.log(
201
+ JSON.stringify({
202
+ upgraded: false,
203
+ currentVersion: pkg.version,
204
+ latestVersion,
205
+ installMethod: method,
206
+ message: "Upgrade failed",
207
+ }),
208
+ );
209
+ } else {
210
+ console.error(red("Upgrade failed. See errors above."));
211
+ }
212
+ process.exit(1);
213
+ }
214
+ } catch (err) {
215
+ spinner?.error({ text: "Upgrade failed" });
216
+ console.error(String(err));
217
+ process.exit(1);
218
+ }
219
+ });
220
+ }
@@ -0,0 +1,100 @@
1
+ import { mkdir } from "node:fs/promises";
2
+ import { resolve } from "node:path";
3
+ import { defaultMembotHome, ENV, FILES } from "../constants.ts";
4
+ import { asHelpful, HelpfulError } from "../errors.ts";
5
+ import { type MembotConfig, MembotConfigSchema } from "./schemas.ts";
6
+
7
+ export interface LoadConfigOptions {
8
+ configFlag?: string;
9
+ }
10
+
11
+ /**
12
+ * Resolve, read, and validate `~/.membot/config.json`. The directory is
13
+ * created if missing. Environment variables (ANTHROPIC_API_KEY) take
14
+ * precedence over the on-disk values for sensitive fields.
15
+ */
16
+ export async function loadConfig(options: LoadConfigOptions = {}): Promise<{
17
+ config: MembotConfig;
18
+ dataDir: string;
19
+ configPath: string;
20
+ }> {
21
+ const dataDir = resolveDataDir(options.configFlag);
22
+ await mkdir(dataDir, { recursive: true });
23
+
24
+ const configPath = resolve(dataDir, FILES.CONFIG_JSON);
25
+ let raw: unknown = {};
26
+ const file = Bun.file(configPath);
27
+ if (await file.exists()) {
28
+ try {
29
+ raw = JSON.parse(await file.text());
30
+ } catch (err) {
31
+ throw asHelpful(
32
+ err,
33
+ `while parsing ${configPath}`,
34
+ `Fix the JSON in ${configPath}, or delete it to regenerate defaults.`,
35
+ "input_error",
36
+ );
37
+ }
38
+ }
39
+
40
+ let config: MembotConfig;
41
+ try {
42
+ config = MembotConfigSchema.parse(raw);
43
+ } catch (err) {
44
+ throw asHelpful(
45
+ err,
46
+ `while validating ${configPath}`,
47
+ `Check ${configPath} against the documented schema, or delete it to regenerate defaults.`,
48
+ "input_error",
49
+ );
50
+ }
51
+
52
+ const envKey = process.env[ENV.ANTHROPIC_API_KEY];
53
+ if (envKey?.trim()) {
54
+ config = { ...config, llm: { ...config.llm, anthropic_api_key: envKey } };
55
+ }
56
+
57
+ if (config.data_dir !== dataDir) {
58
+ config = { ...config, data_dir: dataDir };
59
+ }
60
+
61
+ return { config, dataDir, configPath };
62
+ }
63
+
64
+ /**
65
+ * Pick the membot data directory. Precedence: explicit `--config` flag,
66
+ * then `MEMBOT_HOME` env var, then `~/.membot`. The chosen path is later
67
+ * created (recursive mkdir) and stamped back into `config.data_dir`.
68
+ */
69
+ function resolveDataDir(flag?: string): string {
70
+ if (flag?.trim()) return resolve(flag);
71
+ const env = process.env[ENV.HOME];
72
+ if (env?.trim()) return resolve(env);
73
+ return defaultMembotHome();
74
+ }
75
+
76
+ /**
77
+ * Persist config to disk, with the Anthropic API key blanked out — the env
78
+ * var (`ANTHROPIC_API_KEY`) is the source of truth, never the file. Writing
79
+ * the key to disk would land it in shell history, dotfile syncs, and
80
+ * accidental commits.
81
+ */
82
+ export async function saveConfig(configPath: string, config: MembotConfig): Promise<void> {
83
+ const safe: MembotConfig = {
84
+ ...config,
85
+ llm: { ...config.llm, anthropic_api_key: "" },
86
+ };
87
+ await Bun.write(configPath, `${JSON.stringify(safe, null, 2)}\n`);
88
+ }
89
+
90
+ /**
91
+ * Tree-shaking guard. Not called at runtime — its presence keeps the module
92
+ * from being eliminated by aggressive bundlers when only types are imported.
93
+ */
94
+ export function _ensureExportedSentinel(): never {
95
+ throw new HelpfulError({
96
+ kind: "internal_error",
97
+ message: "sentinel called",
98
+ hint: "This function exists only for tree-shaking sanity checks.",
99
+ });
100
+ }
@@ -0,0 +1,39 @@
1
+ import { z } from "zod";
2
+ import { DEFAULTS, defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
3
+
4
+ export const ChunkerConfigSchema = z.object({
5
+ mode: z.enum(["deterministic", "llm"]).default(DEFAULTS.CHUNKER_MODE),
6
+ target_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_TARGET_CHARS),
7
+ max_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_MAX_CHARS),
8
+ });
9
+
10
+ export const LlmConfigSchema = z.object({
11
+ anthropic_api_key: z.string().default(""),
12
+ converter_model: z.string().default(DEFAULTS.CONVERTER_MODEL),
13
+ chunker_model: z.string().default(DEFAULTS.CHUNKER_MODEL),
14
+ describer_model: z.string().default(DEFAULTS.DESCRIBER_MODEL),
15
+ vision_model: z.string().default(DEFAULTS.VISION_MODEL),
16
+ });
17
+
18
+ export const McpxConfigSchema = z.object({
19
+ config_path: z.string().default(""),
20
+ });
21
+
22
+ export const DaemonConfigSchema = z.object({
23
+ tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
24
+ });
25
+
26
+ export const MembotConfigSchema = z.object({
27
+ data_dir: z.string().default(defaultMembotHome()),
28
+ embedding_model: z.string().default(EMBEDDING_MODEL),
29
+ embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
30
+ chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
31
+ llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
32
+ mcpx: McpxConfigSchema.default(() => McpxConfigSchema.parse({})),
33
+ daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
34
+ default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
35
+ });
36
+
37
+ export type MembotConfig = z.infer<typeof MembotConfigSchema>;
38
+ export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
39
+ export type LlmConfig = z.infer<typeof LlmConfigSchema>;
@@ -0,0 +1,42 @@
1
+ import { homedir } from "node:os";
2
+ import { join } from "node:path";
3
+
4
+ /** Default data directory: `~/.membot`. Override via $MEMBOT_HOME or `--config`. */
5
+ export function defaultMembotHome(): string {
6
+ const env = process.env.MEMBOT_HOME;
7
+ if (env?.trim()) return env;
8
+ return join(homedir(), ".membot");
9
+ }
10
+
11
+ export const ENV = {
12
+ HOME: "MEMBOT_HOME",
13
+ CONFIG: "MEMBOT_CONFIG",
14
+ DEBUG: "MEMBOT_DEBUG",
15
+ ANTHROPIC_API_KEY: "ANTHROPIC_API_KEY",
16
+ MCPX_CONFIG_PATH: "MCP_CONFIG_PATH",
17
+ NO_UPDATE_CHECK: "MEMBOT_NO_UPDATE_CHECK",
18
+ } as const;
19
+
20
+ export const EMBEDDING_MODEL = "Xenova/bge-small-en-v1.5";
21
+ export const EMBEDDING_DIMENSION = 384;
22
+
23
+ export const DEFAULTS = {
24
+ CHUNKER_MODE: "deterministic" as const,
25
+ CHUNKER_TARGET_CHARS: 4_000,
26
+ CHUNKER_MAX_CHARS: 15_000,
27
+ DAEMON_TICK_SEC: 60,
28
+ HTTP_TIMEOUT_MS: 30_000,
29
+ CONVERTER_MODEL: "claude-haiku-4-5-20251001",
30
+ CHUNKER_MODEL: "claude-haiku-4-5-20251001",
31
+ DESCRIBER_MODEL: "claude-haiku-4-5-20251001",
32
+ VISION_MODEL: "claude-haiku-4-5-20251001",
33
+ UPDATE_CHECK_INTERVAL_MS: 24 * 60 * 60 * 1000,
34
+ UPDATE_CHECK_TIMEOUT_MS: 5_000,
35
+ } as const;
36
+
37
+ export const FILES = {
38
+ CONFIG_JSON: "config.json",
39
+ INDEX_DUCKDB: "index.duckdb",
40
+ MODELS_DIR: "models",
41
+ LOGS_DIR: "logs",
42
+ } as const;
package/src/context.ts ADDED
@@ -0,0 +1,80 @@
1
+ import { join } from "node:path";
2
+ import { McpxClient } from "@evantahler/mcpx";
3
+ import { loadConfig } from "./config/loader.ts";
4
+ import type { MembotConfig } from "./config/schemas.ts";
5
+ import { ENV, FILES } from "./constants.ts";
6
+ import { type DbConnection, openDb } from "./db/connection.ts";
7
+ import { logger } from "./output/logger.ts";
8
+ import type { Progress } from "./output/progress.ts";
9
+ import { createProgress } from "./output/progress.ts";
10
+ import { detectMode, setMode } from "./output/tty.ts";
11
+
12
+ export interface AppContext {
13
+ config: MembotConfig;
14
+ dataDir: string;
15
+ configPath: string;
16
+ db: DbConnection;
17
+ logger: typeof logger;
18
+ progress: Progress;
19
+ mcpx: McpxClient | null;
20
+ }
21
+
22
+ export interface BuildContextOptions {
23
+ configFlag?: string;
24
+ json?: boolean;
25
+ verbose?: boolean;
26
+ noColor?: boolean;
27
+ noInteractive?: boolean;
28
+ }
29
+
30
+ /**
31
+ * Build the AppContext used by every operation handler. Initializes:
32
+ * - output mode (TTY/JSON/color detection — frozen for the rest of the run)
33
+ * - config (~/.membot/config.json with env overrides)
34
+ * - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
35
+ * - mcpx client (lazy — opened on first remote fetch; null when no servers)
36
+ */
37
+ export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
38
+ setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
39
+
40
+ const { config, dataDir, configPath } = await loadConfig({ configFlag: options.configFlag });
41
+ const dbPath = join(dataDir, FILES.INDEX_DUCKDB);
42
+ const db = await openDb(dbPath);
43
+
44
+ const mcpx = await maybeMcpx(config);
45
+
46
+ return {
47
+ config,
48
+ dataDir,
49
+ configPath,
50
+ db,
51
+ logger,
52
+ progress: createProgress(),
53
+ mcpx,
54
+ };
55
+ }
56
+
57
+ async function maybeMcpx(config: MembotConfig): Promise<McpxClient | null> {
58
+ const configDir = config.mcpx.config_path || process.env[ENV.MCPX_CONFIG_PATH];
59
+ try {
60
+ const client = new McpxClient(configDir ? { configDir } : {});
61
+ return client;
62
+ } catch {
63
+ return null;
64
+ }
65
+ }
66
+
67
+ export async function closeContext(ctx: AppContext): Promise<void> {
68
+ try {
69
+ await ctx.db.close();
70
+ } catch {
71
+ // best effort
72
+ }
73
+ if (ctx.mcpx) {
74
+ try {
75
+ await ctx.mcpx.close();
76
+ } catch {
77
+ // best effort
78
+ }
79
+ }
80
+ }
@@ -0,0 +1,53 @@
1
+ import type { DbConnection } from "./connection.ts";
2
+
3
+ export interface BlobRow {
4
+ sha256: string;
5
+ mime_type: string;
6
+ size_bytes: number;
7
+ bytes: Uint8Array;
8
+ }
9
+
10
+ /**
11
+ * Insert a content-addressed blob, doing nothing when the sha256 already
12
+ * exists. Uses an explicit `?::BLOB` cast because DuckDB can't infer the
13
+ * column type from a JS Uint8Array on its own.
14
+ */
15
+ export async function upsertBlob(db: DbConnection, blob: BlobRow): Promise<void> {
16
+ await db.queryRun(
17
+ `INSERT INTO blobs (sha256, mime_type, size_bytes, bytes)
18
+ VALUES (?1, ?2, ?3, ?4::BLOB)
19
+ ON CONFLICT (sha256) DO NOTHING`,
20
+ blob.sha256,
21
+ blob.mime_type,
22
+ blob.size_bytes,
23
+ blob.bytes,
24
+ );
25
+ }
26
+
27
+ /** Fetch a blob by sha256, or null. Used when serving `membot_read bytes=true`. */
28
+ export async function readBlob(db: DbConnection, sha256: string): Promise<BlobRow | null> {
29
+ const row = await db.queryGet<{
30
+ sha256: string;
31
+ mime_type: string;
32
+ size_bytes: number;
33
+ bytes: Uint8Array;
34
+ }>(`SELECT sha256, mime_type, size_bytes, bytes FROM blobs WHERE sha256 = ?1`, sha256);
35
+ if (!row) return null;
36
+ return {
37
+ sha256: row.sha256,
38
+ mime_type: row.mime_type,
39
+ size_bytes: Number(row.size_bytes),
40
+ bytes: row.bytes,
41
+ };
42
+ }
43
+
44
+ /** Drop blobs whose sha256 isn't referenced by any non-tombstone file row. */
45
+ export async function gcOrphanBlobs(db: DbConnection): Promise<{ removed: number }> {
46
+ const result = await db.queryRun(
47
+ `DELETE FROM blobs
48
+ WHERE sha256 NOT IN (
49
+ SELECT DISTINCT blob_sha256 FROM files WHERE blob_sha256 IS NOT NULL
50
+ )`,
51
+ );
52
+ return { removed: result.changes };
53
+ }