membot 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +2 -2
- package/.cursor/rules/membot.mdc +2 -2
- package/README.md +4 -9
- package/package.json +2 -2
- package/patches/@evantahler%2Fmcpx@0.21.4.patch +44 -0
- package/scripts/apply-patches.sh +49 -0
- package/src/ingest/ingest.ts +34 -5
- package/src/ingest/source-resolver.ts +34 -7
- package/src/operations/add.ts +21 -8
- package/scripts/apply-transformers-patch.sh +0 -35
package/.claude/skills/membot.md
CHANGED
|
@@ -75,7 +75,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
75
75
|
|
|
76
76
|
- Defaults always operate on the current, non-tombstoned version.
|
|
77
77
|
- Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
|
|
78
|
-
- `membot_add
|
|
78
|
+
- `membot_add` (when source bytes have changed), refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved. Re-running `membot_add` against an unchanged source is a no-op (status `unchanged`, same `version_id`); pass `force=true` to force a new version.
|
|
79
79
|
- Mutating an existing version is not possible — corrections are new versions.
|
|
80
80
|
|
|
81
81
|
## When to use this skill
|
|
@@ -99,7 +99,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
99
99
|
|
|
100
100
|
| Command | Purpose |
|
|
101
101
|
| ------------------------------------- | ------------------------------------------------------------------------------ |
|
|
102
|
-
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text
|
|
102
|
+
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
|
|
103
103
|
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
104
104
|
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
105
105
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
package/.cursor/rules/membot.mdc
CHANGED
|
@@ -75,7 +75,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
75
75
|
|
|
76
76
|
- Defaults always operate on the current, non-tombstoned version.
|
|
77
77
|
- Pass an explicit `--version <timestamp>` (from `membot versions`) to read or diff history.
|
|
78
|
-
- `membot_add
|
|
78
|
+
- `membot_add` (when source bytes have changed), refresh-with-changes, `write`, and `mv` each create a new version. The previous version is preserved. Re-running `membot_add` against an unchanged source is a no-op (status `unchanged`, same `version_id`); pass `force=true` to force a new version.
|
|
79
79
|
- Mutating an existing version is not possible — corrections are new versions.
|
|
80
80
|
|
|
81
81
|
## When to use this rule
|
|
@@ -99,7 +99,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
99
99
|
|
|
100
100
|
| Command | Purpose |
|
|
101
101
|
| ------------------------------------- | ------------------------------------------------------------------------------ |
|
|
102
|
-
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text
|
|
102
|
+
| `membot add <source>` | Ingest file, directory, glob, URL, or `inline:<text>`. Skips unchanged sources; pass `--force` to re-ingest |
|
|
103
103
|
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
104
104
|
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
105
105
|
| `membot read <path>` | Read current markdown surrogate (or `--bytes` for original) |
|
package/README.md
CHANGED
|
@@ -15,18 +15,13 @@
|
|
|
15
15
|
## Install
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
|
|
19
|
-
curl -fsSL https://raw.githubusercontent.com/evantahler/membot/main/install.sh | bash
|
|
20
|
-
|
|
21
|
-
# Windows — PowerShell
|
|
22
|
-
iwr -useb https://raw.githubusercontent.com/evantahler/membot/main/install.ps1 | iex
|
|
23
|
-
|
|
24
|
-
# From npm (requires Bun or Node)
|
|
25
|
-
bun add -g membot
|
|
18
|
+
bun install -g membot
|
|
26
19
|
# or
|
|
27
20
|
npm install -g membot
|
|
28
21
|
```
|
|
29
22
|
|
|
23
|
+
This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global npm/bun install is the supported path.
|
|
24
|
+
|
|
30
25
|
## Quick start
|
|
31
26
|
|
|
32
27
|
```bash
|
|
@@ -55,7 +50,7 @@ The skill files describe the discover → ingest → search → read → write w
|
|
|
55
50
|
|
|
56
51
|
| Command | Description |
|
|
57
52
|
| ------------------------------- | --------------------------------------------------------------------------------- |
|
|
58
|
-
| `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`.
|
|
53
|
+
| `membot add <source>` | Ingest a file, directory, glob, URL, or `inline:<text>`. Skips on unchanged source bytes; pass `--force` to re-ingest |
|
|
59
54
|
| `membot ls [prefix]` | List current files (size, mime, refresh status) |
|
|
60
55
|
| `membot tree [prefix]` | Render the synthesised logical-path tree |
|
|
61
56
|
| `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "membot",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"test": "bun test",
|
|
27
27
|
"lint": "biome ci . && tsc --noEmit",
|
|
28
28
|
"format": "biome check --write .",
|
|
29
|
-
"prebuild": "bash scripts/apply-
|
|
29
|
+
"prebuild": "bash scripts/apply-patches.sh",
|
|
30
30
|
"build": "bun build --compile --minify --sourcemap --external '@duckdb/*' ./src/cli.ts --outfile dist/membot"
|
|
31
31
|
},
|
|
32
32
|
"keywords": [
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
diff --git a/src/search/onnx-wasm-paths.ts b/src/search/onnx-wasm-paths.ts
|
|
2
|
+
--- a/src/search/onnx-wasm-paths.ts
|
|
3
|
+
+++ b/src/search/onnx-wasm-paths.ts
|
|
4
|
+
@@ -1,31 +1,9 @@
|
|
5
|
+
-// Embed the onnxruntime-web WASM runtime files into the compiled binary
|
|
6
|
+
-// (`bun build --compile`) so they survive in a single-binary distribution
|
|
7
|
+
-// where the user has no node_modules.
|
|
8
|
+
-//
|
|
9
|
+
-// This file is loaded **dynamically** by semantic.ts. The relative paths
|
|
10
|
+
-// only resolve in the local repo / compiled binary; for npm/bun-installed
|
|
11
|
+
-// mcpx the parent directory layout is different (deps are hoisted), the
|
|
12
|
+
-// dynamic import throws, and we fall back to letting transformers.js
|
|
13
|
+
-// load WASM via its default mechanism — which works fine because in
|
|
14
|
+
-// that environment node_modules exists and onnxruntime-web is reachable
|
|
15
|
+
-// through normal module resolution.
|
|
16
|
+
-
|
|
17
|
+
-// The relative `../../node_modules/...` paths only resolve from the local repo
|
|
18
|
+
-// layout (and inside `bun build --compile`). When this file is shipped via npm,
|
|
19
|
+
-// deps are hoisted, so consumer `tsc` runs hit TS2307. The `ts-ignore` directive
|
|
20
|
+
-// below silences that for consumers; we avoid the stricter `expect-error` form
|
|
21
|
+
-// because in the local repo the path resolves fine and there would be no error
|
|
22
|
+
-// to expect. At runtime the dynamic import in semantic.ts is wrapped in
|
|
23
|
+
-// try/catch and falls back to transformers.js's default WASM loader (issue #85).
|
|
24
|
+
-// biome-ignore lint/suspicious/noTsIgnore: must stay as ts-ignore per comment above
|
|
25
|
+
-// @ts-ignore - dynamic-only import
|
|
26
|
+
-import wasmMjsPath from "../../node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.asyncify.mjs" with {
|
|
27
|
+
- type: "file",
|
|
28
|
+
-};
|
|
29
|
+
-// biome-ignore lint/suspicious/noTsIgnore: must stay as ts-ignore per comment above
|
|
30
|
+
-// @ts-ignore - dynamic-only import
|
|
31
|
+
-import wasmBinPath from "../../node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.asyncify.wasm" with {
|
|
32
|
+
- type: "file",
|
|
33
|
+
-};
|
|
34
|
+
-
|
|
35
|
+
-export { wasmBinPath, wasmMjsPath };
|
|
36
|
+
+// PATCHED (membot): upstream mcpx ships static `with { type: "file" }` imports
|
|
37
|
+
+// of onnxruntime-web WASM assets via `../../node_modules/...`, which only
|
|
38
|
+
+// resolves when mcpx is built standalone. When consumed as an npm dep those
|
|
39
|
+
+// paths are unreachable and `bun build --compile` fails at build time. membot
|
|
40
|
+
+// never invokes mcpx's semantic search (only `mcpx.exec()` for URL fetching),
|
|
41
|
+
+// so we stub the exports — semantic.ts wraps the dynamic import in try/catch
|
|
42
|
+
+// and falls back to transformers.js's default WASM loader.
|
|
43
|
+
+export const wasmMjsPath = "";
|
|
44
|
+
+export const wasmBinPath = "";
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Apply node_modules patches imperatively. We don't use package.json's
|
|
5
|
+
# `patchedDependencies` field because that field, when present in a published
|
|
6
|
+
# package, breaks `bun install` from a tarball.
|
|
7
|
+
#
|
|
8
|
+
# Each patch is gated by a marker file inside its target so reruns are no-ops.
|
|
9
|
+
|
|
10
|
+
apply_patch() {
|
|
11
|
+
local patch="$1" target="$2" marker_name="$3"
|
|
12
|
+
local marker="$target/$marker_name"
|
|
13
|
+
|
|
14
|
+
if [ ! -d "$target" ]; then
|
|
15
|
+
echo "error: $target not found — run \`bun install\` first" >&2
|
|
16
|
+
exit 1
|
|
17
|
+
fi
|
|
18
|
+
if [ ! -f "$patch" ]; then
|
|
19
|
+
echo "error: $patch not found" >&2
|
|
20
|
+
exit 1
|
|
21
|
+
fi
|
|
22
|
+
if [ -f "$marker" ]; then
|
|
23
|
+
echo "patch $patch already applied — skipping"
|
|
24
|
+
return 0
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
echo "Applying $patch to $target..."
|
|
28
|
+
git apply --directory="$target" "$patch"
|
|
29
|
+
touch "$marker"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# @huggingface/transformers — replace static `import 'onnxruntime-node'` with a
|
|
33
|
+
# stub so `bun build --compile` produces a binary using the WASM backend
|
|
34
|
+
# (onnxruntime-web) instead of onnxruntime-node, whose native bindings can't be
|
|
35
|
+
# bundled into a single-binary distribution.
|
|
36
|
+
apply_patch \
|
|
37
|
+
"patches/@huggingface%2Ftransformers@4.2.0.patch" \
|
|
38
|
+
"node_modules/@huggingface/transformers" \
|
|
39
|
+
".membot-transformers-patch-applied"
|
|
40
|
+
|
|
41
|
+
# @evantahler/mcpx — stub `src/search/onnx-wasm-paths.ts` whose static
|
|
42
|
+
# `with { type: "file" }` imports use a relative path that only resolves in
|
|
43
|
+
# mcpx's own repo layout. When mcpx is consumed as an npm dep those paths are
|
|
44
|
+
# unreachable and `bun build --compile` fails at build time. membot never
|
|
45
|
+
# invokes mcpx's semantic search, so the stubbed exports are safe.
|
|
46
|
+
apply_patch \
|
|
47
|
+
"patches/@evantahler%2Fmcpx@0.21.4.patch" \
|
|
48
|
+
"node_modules/@evantahler/mcpx" \
|
|
49
|
+
".membot-mcpx-patch-applied"
|
package/src/ingest/ingest.ts
CHANGED
|
@@ -21,13 +21,14 @@ export interface IngestInput {
|
|
|
21
21
|
refresh_frequency?: string;
|
|
22
22
|
fetcher_hint?: string;
|
|
23
23
|
change_note?: string;
|
|
24
|
+
force?: boolean;
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
export interface IngestEntryResult {
|
|
27
28
|
source_path: string;
|
|
28
29
|
logical_path: string;
|
|
29
30
|
version_id: string | null;
|
|
30
|
-
status: "ok" | "failed";
|
|
31
|
+
status: "ok" | "unchanged" | "failed";
|
|
31
32
|
error?: string;
|
|
32
33
|
mime_type: string | null;
|
|
33
34
|
size_bytes: number;
|
|
@@ -39,6 +40,7 @@ export interface IngestResult {
|
|
|
39
40
|
ingested: IngestEntryResult[];
|
|
40
41
|
total: number;
|
|
41
42
|
ok: number;
|
|
43
|
+
unchanged: number;
|
|
42
44
|
failed: number;
|
|
43
45
|
}
|
|
44
46
|
|
|
@@ -57,14 +59,15 @@ export async function ingest(input: IngestInput, ctx: AppContext): Promise<Inges
|
|
|
57
59
|
});
|
|
58
60
|
|
|
59
61
|
const refreshSec = parseDuration(input.refresh_frequency);
|
|
62
|
+
const force = input.force === true;
|
|
60
63
|
|
|
61
64
|
if (resolved.kind === "inline") {
|
|
62
65
|
return ingestInline(resolved.text, input, ctx, refreshSec);
|
|
63
66
|
}
|
|
64
67
|
if (resolved.kind === "url") {
|
|
65
|
-
return ingestUrl(resolved.url, input, ctx, refreshSec);
|
|
68
|
+
return ingestUrl(resolved.url, input, ctx, refreshSec, force);
|
|
66
69
|
}
|
|
67
|
-
return ingestLocalFiles(resolved, input, ctx, refreshSec);
|
|
70
|
+
return ingestLocalFiles(resolved, input, ctx, refreshSec, force);
|
|
68
71
|
}
|
|
69
72
|
|
|
70
73
|
/** Ingest a single inline blob (source_type='inline'). */
|
|
@@ -119,6 +122,7 @@ async function ingestUrl(
|
|
|
119
122
|
input: IngestInput,
|
|
120
123
|
ctx: AppContext,
|
|
121
124
|
refreshSec: number | null,
|
|
125
|
+
force: boolean,
|
|
122
126
|
): Promise<IngestResult> {
|
|
123
127
|
const mcpxAdapter = ctx.mcpx
|
|
124
128
|
? {
|
|
@@ -151,6 +155,15 @@ async function ingestUrl(
|
|
|
151
155
|
result.fetcher = fetched.fetcher;
|
|
152
156
|
result.source_sha256 = fetched.sha256;
|
|
153
157
|
|
|
158
|
+
if (!force) {
|
|
159
|
+
const cur = await getCurrent(ctx.db, logicalPath);
|
|
160
|
+
if (cur && cur.source_sha256 === fetched.sha256) {
|
|
161
|
+
result.status = "unchanged";
|
|
162
|
+
result.version_id = cur.version_id;
|
|
163
|
+
return summarize([result]);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
154
167
|
const versionId = await pipelineForBytes(ctx, {
|
|
155
168
|
logicalPath,
|
|
156
169
|
bytes: fetched.bytes,
|
|
@@ -181,6 +194,7 @@ async function ingestLocalFiles(
|
|
|
181
194
|
input: IngestInput,
|
|
182
195
|
ctx: AppContext,
|
|
183
196
|
refreshSec: number | null,
|
|
197
|
+
force: boolean,
|
|
184
198
|
): Promise<IngestResult> {
|
|
185
199
|
if (resolved.entries.length === 0) {
|
|
186
200
|
throw new HelpfulError({
|
|
@@ -213,6 +227,16 @@ async function ingestLocalFiles(
|
|
|
213
227
|
result.size_bytes = local.sizeBytes;
|
|
214
228
|
result.source_sha256 = local.sha256;
|
|
215
229
|
|
|
230
|
+
if (!force) {
|
|
231
|
+
const cur = await getCurrent(ctx.db, logicalPath);
|
|
232
|
+
if (cur && cur.source_sha256 === local.sha256) {
|
|
233
|
+
result.status = "unchanged";
|
|
234
|
+
result.version_id = cur.version_id;
|
|
235
|
+
results.push(result);
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
216
240
|
const versionId = await pipelineForBytes(ctx, {
|
|
217
241
|
logicalPath,
|
|
218
242
|
bytes: local.bytes,
|
|
@@ -236,7 +260,10 @@ async function ingestLocalFiles(
|
|
|
236
260
|
}
|
|
237
261
|
results.push(result);
|
|
238
262
|
}
|
|
239
|
-
|
|
263
|
+
const okCount = results.filter((r) => r.status === "ok").length;
|
|
264
|
+
const unchangedCount = results.filter((r) => r.status === "unchanged").length;
|
|
265
|
+
const suffix = unchangedCount > 0 ? ` (${unchangedCount} unchanged)` : "";
|
|
266
|
+
ctx.progress.done(`ingested ${okCount}/${results.length}${suffix}`);
|
|
240
267
|
|
|
241
268
|
return summarize(results);
|
|
242
269
|
}
|
|
@@ -428,12 +455,14 @@ export function parseDuration(input: string | null | undefined): number | null {
|
|
|
428
455
|
/** Roll a list of per-entry results into the top-level summary shape. */
|
|
429
456
|
function summarize(entries: IngestEntryResult[]): IngestResult {
|
|
430
457
|
let ok = 0;
|
|
458
|
+
let unchanged = 0;
|
|
431
459
|
let failed = 0;
|
|
432
460
|
for (const e of entries) {
|
|
433
461
|
if (e.status === "ok") ok += 1;
|
|
462
|
+
else if (e.status === "unchanged") unchanged += 1;
|
|
434
463
|
else failed += 1;
|
|
435
464
|
}
|
|
436
|
-
return { ingested: entries, total: entries.length, ok, failed };
|
|
465
|
+
return { ingested: entries, total: entries.length, ok, unchanged, failed };
|
|
437
466
|
}
|
|
438
467
|
|
|
439
468
|
function errorMessage(err: unknown): string {
|
|
@@ -43,10 +43,12 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
const followSymlinks = options.followSymlinks !== false;
|
|
46
|
-
const
|
|
47
|
-
.
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
const userIncludes = options.include
|
|
47
|
+
? options.include
|
|
48
|
+
.split(",")
|
|
49
|
+
.map((g) => g.trim())
|
|
50
|
+
.filter(Boolean)
|
|
51
|
+
: [];
|
|
50
52
|
const excludeMatchers = [
|
|
51
53
|
...DEFAULT_EXCLUDES,
|
|
52
54
|
...(options.exclude ?? "")
|
|
@@ -57,9 +59,14 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
|
|
|
57
59
|
|
|
58
60
|
if (isGlob(source)) {
|
|
59
61
|
const base = globBase(source);
|
|
62
|
+
const remainder = globRemainder(source);
|
|
60
63
|
try {
|
|
61
64
|
const realBase = await realpath(base);
|
|
62
|
-
|
|
65
|
+
// Source glob acts as a hard filter; user includes (if any) further
|
|
66
|
+
// narrow the result via AND. Pass them as a separate matcher so the
|
|
67
|
+
// two sets aren't picomatch-OR'd together.
|
|
68
|
+
const extraIncludes = userIncludes.length > 0 ? [userIncludes] : [];
|
|
69
|
+
return walk(realBase, [remainder], excludeMatchers, followSymlinks, extraIncludes);
|
|
63
70
|
} catch (err) {
|
|
64
71
|
throw asHelpful(
|
|
65
72
|
err,
|
|
@@ -93,7 +100,8 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
|
|
|
93
100
|
|
|
94
101
|
if (st.isDirectory()) {
|
|
95
102
|
const realBase = await realpath(abs);
|
|
96
|
-
|
|
103
|
+
const dirIncludes = userIncludes.length > 0 ? userIncludes : ["**/*"];
|
|
104
|
+
return walk(realBase, dirIncludes, excludeMatchers, followSymlinks);
|
|
97
105
|
}
|
|
98
106
|
|
|
99
107
|
throw new HelpfulError({
|
|
@@ -120,22 +128,40 @@ export function globBase(glob: string): string {
|
|
|
120
128
|
return base.length === 0 || !isAbsolute(base) ? resolve(base || ".") : base;
|
|
121
129
|
}
|
|
122
130
|
|
|
131
|
+
/**
|
|
132
|
+
* Take the wildcard portion of a glob — everything from the first segment
|
|
133
|
+
* containing a wildcard onward. We strip the static prefix so the matcher
|
|
134
|
+
* runs against entry paths relative to `globBase`. Without this, a glob like
|
|
135
|
+
* `docs/star-star/star.md` never matches anything under base=`docs/`, since
|
|
136
|
+
* walk() exposes `sub/file.md` to picomatch, not `docs/sub/file.md`.
|
|
137
|
+
*/
|
|
138
|
+
export function globRemainder(glob: string): string {
|
|
139
|
+
const parts = glob.split(sep);
|
|
140
|
+
const wildcardIdx = parts.findIndex((p) => /[*?[\]{}!]/.test(p));
|
|
141
|
+
if (wildcardIdx === -1) return glob;
|
|
142
|
+
return parts.slice(wildcardIdx).join(sep);
|
|
143
|
+
}
|
|
144
|
+
|
|
123
145
|
/**
|
|
124
146
|
* Recursively walk `base`, returning files matched by `includes` and not
|
|
125
147
|
* matched by `excludes`. Both globsets match against the entry's path
|
|
126
148
|
* relative to `base`. Symlinks are followed when `followSymlinks` is true,
|
|
127
|
-
* with cycles detected via a realpath cache.
|
|
149
|
+
* with cycles detected via a realpath cache. `extraIncludeSets` is a list
|
|
150
|
+
* of additional include groups, each ANDed onto the primary `includes` —
|
|
151
|
+
* use it when two filters must both match (e.g. source glob + --include).
|
|
128
152
|
*/
|
|
129
153
|
async function walk(
|
|
130
154
|
base: string,
|
|
131
155
|
includes: string[],
|
|
132
156
|
excludes: string[],
|
|
133
157
|
followSymlinks: boolean,
|
|
158
|
+
extraIncludeSets: string[][] = [],
|
|
134
159
|
): Promise<ResolvedSource> {
|
|
135
160
|
const seen = new Set<string>();
|
|
136
161
|
const entries: ResolvedLocalEntry[] = [];
|
|
137
162
|
|
|
138
163
|
const isInclude = picomatch(includes, { dot: false, nocase: false });
|
|
164
|
+
const extraMatchers = extraIncludeSets.map((set) => picomatch(set, { dot: false, nocase: false }));
|
|
139
165
|
const isExclude = excludes.length ? picomatch(excludes, { dot: false }) : null;
|
|
140
166
|
|
|
141
167
|
const queue: string[] = [base];
|
|
@@ -174,6 +200,7 @@ async function walk(
|
|
|
174
200
|
const relForMatch = rel.length === 0 ? (cur.split(sep).pop() ?? cur) : rel;
|
|
175
201
|
if (isExclude?.(relForMatch)) continue;
|
|
176
202
|
if (!isInclude(relForMatch)) continue;
|
|
203
|
+
if (extraMatchers.some((m) => !m(relForMatch))) continue;
|
|
177
204
|
entries.push({ absPath: real, relPath: relForMatch });
|
|
178
205
|
}
|
|
179
206
|
|
package/src/operations/add.ts
CHANGED
|
@@ -14,11 +14,16 @@ export const addOperation = defineOperation({
|
|
|
14
14
|
- a glob pattern (e.g. "docs/**/*.md")
|
|
15
15
|
- a URL (fetched via mcpx if configured, otherwise plain HTTP)
|
|
16
16
|
- "inline:<text>" literal
|
|
17
|
-
PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. Each ingested file becomes a
|
|
17
|
+
PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. By default, re-ingesting an unchanged source (same source_sha256 as the current version) is a no-op and reports \`status: "unchanged"\`; pass \`force=true\` to always create a new version. Each newly-ingested file becomes a new version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.`,
|
|
18
18
|
inputSchema: z.object({
|
|
19
19
|
source: z.string().describe("Local path, directory, glob, URL, or `inline:<text>` literal"),
|
|
20
20
|
logical_path: z.string().optional().describe("Destination logical_path (single source) or prefix (directory/glob)"),
|
|
21
|
-
include: z
|
|
21
|
+
include: z
|
|
22
|
+
.string()
|
|
23
|
+
.optional()
|
|
24
|
+
.describe(
|
|
25
|
+
"Glob include filter (comma-separated for multiple). Defaults to `**/*` for directory sources, or the source pattern itself when source is a glob.",
|
|
26
|
+
),
|
|
22
27
|
exclude: z.string().optional().describe("Glob exclude filter (comma-separated for multiple)"),
|
|
23
28
|
follow_symlinks: z
|
|
24
29
|
.boolean()
|
|
@@ -30,6 +35,10 @@ PDF, DOCX, HTML, images, and other binaries are converted to markdown — native
|
|
|
30
35
|
.optional()
|
|
31
36
|
.describe("Free-form hint passed to mcpx tool search (e.g. 'firecrawl', 'github', 'google docs', 'http')"),
|
|
32
37
|
change_note: z.string().optional().describe("Free-text note attached to the new version"),
|
|
38
|
+
force: z
|
|
39
|
+
.boolean()
|
|
40
|
+
.optional()
|
|
41
|
+
.describe("Re-ingest even when source bytes are unchanged. Default skips and reports `unchanged`."),
|
|
33
42
|
}),
|
|
34
43
|
outputSchema: z.object({
|
|
35
44
|
ingested: z.array(
|
|
@@ -37,7 +46,7 @@ PDF, DOCX, HTML, images, and other binaries are converted to markdown — native
|
|
|
37
46
|
source_path: z.string(),
|
|
38
47
|
logical_path: z.string(),
|
|
39
48
|
version_id: z.string().nullable(),
|
|
40
|
-
status: z.enum(["ok", "failed"]),
|
|
49
|
+
status: z.enum(["ok", "unchanged", "failed"]),
|
|
41
50
|
error: z.string().optional(),
|
|
42
51
|
mime_type: z.string().nullable(),
|
|
43
52
|
size_bytes: z.number(),
|
|
@@ -47,23 +56,27 @@ PDF, DOCX, HTML, images, and other binaries are converted to markdown — native
|
|
|
47
56
|
),
|
|
48
57
|
total: z.number(),
|
|
49
58
|
ok: z.number(),
|
|
59
|
+
unchanged: z.number(),
|
|
50
60
|
failed: z.number(),
|
|
51
61
|
}),
|
|
52
62
|
cli: {
|
|
53
63
|
positional: ["source"],
|
|
54
|
-
aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m" },
|
|
64
|
+
aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m", force: "-f" },
|
|
55
65
|
},
|
|
56
66
|
console_formatter: (result) => {
|
|
57
67
|
const lines = result.ingested.map((e) => {
|
|
58
68
|
if (e.status === "ok") {
|
|
59
69
|
return `${colors.green("✓")} ${colors.cyan(e.logical_path)} ${colors.dim(`(${e.fetcher}, ${e.size_bytes}B)`)}`;
|
|
60
70
|
}
|
|
71
|
+
if (e.status === "unchanged") {
|
|
72
|
+
return `${colors.dim("≡")} ${colors.cyan(e.logical_path)} ${colors.dim("(unchanged)")}`;
|
|
73
|
+
}
|
|
61
74
|
return `${colors.red("✗")} ${e.source_path} ${colors.dim(e.error ?? "")}`;
|
|
62
75
|
});
|
|
63
|
-
const
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return `${lines.join("\n")}\n${
|
|
76
|
+
const parts: string[] = [colors.green(`added ${result.ok}`)];
|
|
77
|
+
if (result.unchanged > 0) parts.push(colors.dim(`unchanged ${result.unchanged}`));
|
|
78
|
+
if (result.failed > 0) parts.push(colors.red(`failed ${result.failed}`));
|
|
79
|
+
return `${lines.join("\n")}\n${parts.join(", ")}`;
|
|
67
80
|
},
|
|
68
81
|
handler: async (input, ctx) => ingest(input, ctx),
|
|
69
82
|
});
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
set -euo pipefail
|
|
3
|
-
|
|
4
|
-
# Apply the @huggingface/transformers patch to node_modules so that
|
|
5
|
-
# `bun build --compile` produces a binary using the WASM backend
|
|
6
|
-
# (onnxruntime-web) instead of onnxruntime-node, whose native bindings
|
|
7
|
-
# can't be bundled into a single-binary distribution.
|
|
8
|
-
#
|
|
9
|
-
# We apply the patch imperatively (rather than via package.json
|
|
10
|
-
# `patchedDependencies`) because that field, when present in a
|
|
11
|
-
# published package, breaks `bun install` from a tarball.
|
|
12
|
-
|
|
13
|
-
PATCH="patches/@huggingface%2Ftransformers@4.2.0.patch"
|
|
14
|
-
TARGET="node_modules/@huggingface/transformers"
|
|
15
|
-
MARKER="$TARGET/.membot-transformers-patch-applied"
|
|
16
|
-
|
|
17
|
-
if [ ! -d "$TARGET" ]; then
|
|
18
|
-
echo "error: $TARGET not found — run \`bun install\` first" >&2
|
|
19
|
-
exit 1
|
|
20
|
-
fi
|
|
21
|
-
|
|
22
|
-
if [ ! -f "$PATCH" ]; then
|
|
23
|
-
echo "error: $PATCH not found" >&2
|
|
24
|
-
exit 1
|
|
25
|
-
fi
|
|
26
|
-
|
|
27
|
-
if [ -f "$MARKER" ]; then
|
|
28
|
-
echo "transformers patch already applied — skipping"
|
|
29
|
-
exit 0
|
|
30
|
-
fi
|
|
31
|
-
|
|
32
|
-
echo "Applying transformers patch ($PATCH) to $TARGET..."
|
|
33
|
-
git apply --directory="$TARGET" "$PATCH"
|
|
34
|
-
touch "$MARKER"
|
|
35
|
-
echo "Patch applied."
|