@swarmvaultai/engine 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -25
- package/dist/index.d.ts +73 -1
- package/dist/index.js +666 -65
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
`@swarmvaultai/engine` is the runtime library behind SwarmVault.
|
|
4
4
|
|
|
5
|
-
It
|
|
5
|
+
It exposes the primitives for initializing a workspace, ingesting sources, importing an inbox, compiling a wiki, querying the vault, running lint, serving the graph viewer, watching the inbox, and exposing the vault over MCP.
|
|
6
6
|
|
|
7
7
|
## Who This Is For
|
|
8
8
|
|
|
@@ -10,7 +10,8 @@ Use this package if you want to:
|
|
|
10
10
|
|
|
11
11
|
- build your own interface on top of the SwarmVault runtime
|
|
12
12
|
- integrate vault operations into another Node application
|
|
13
|
-
-
|
|
13
|
+
- embed watch or MCP behavior without shelling out to the CLI
|
|
14
|
+
- customize provider loading or orchestration in code
|
|
14
15
|
|
|
15
16
|
If you only want to use SwarmVault as a tool, install `@swarmvaultai/cli` instead.
|
|
16
17
|
|
|
@@ -19,48 +20,54 @@ If you only want to use SwarmVault as a tool, install `@swarmvaultai/cli` instea
|
|
|
19
20
|
```ts
|
|
20
21
|
import {
|
|
21
22
|
compileVault,
|
|
22
|
-
|
|
23
|
+
createMcpServer,
|
|
23
24
|
defaultVaultConfig,
|
|
25
|
+
importInbox,
|
|
24
26
|
ingestInput,
|
|
25
27
|
initVault,
|
|
26
28
|
installAgent,
|
|
27
29
|
lintVault,
|
|
28
30
|
loadVaultConfig,
|
|
29
31
|
queryVault,
|
|
30
|
-
|
|
32
|
+
searchVault,
|
|
33
|
+
startGraphServer,
|
|
34
|
+
startMcpServer,
|
|
35
|
+
watchVault,
|
|
31
36
|
} from "@swarmvaultai/engine";
|
|
32
37
|
```
|
|
33
38
|
|
|
34
|
-
The engine also exports the main runtime types for providers, graph artifacts, pages, manifests,
|
|
39
|
+
The engine also exports the main runtime types for providers, graph artifacts, pages, manifests, query results, lint findings, and watch records.
|
|
35
40
|
|
|
36
41
|
## Example
|
|
37
42
|
|
|
38
43
|
```ts
|
|
39
|
-
import { compileVault,
|
|
44
|
+
import { compileVault, importInbox, initVault, queryVault, watchVault } from "@swarmvaultai/engine";
|
|
40
45
|
|
|
41
46
|
const rootDir = process.cwd();
|
|
42
47
|
|
|
43
48
|
await initVault(rootDir);
|
|
44
|
-
await
|
|
49
|
+
await importInbox(rootDir);
|
|
45
50
|
await compileVault(rootDir);
|
|
46
51
|
|
|
47
52
|
const result = await queryVault(rootDir, "What changed most recently?", true);
|
|
48
53
|
console.log(result.answer);
|
|
54
|
+
|
|
55
|
+
const watcher = await watchVault(rootDir, { lint: true });
|
|
49
56
|
```
|
|
50
57
|
|
|
51
58
|
## Provider Model
|
|
52
59
|
|
|
53
60
|
The engine supports:
|
|
54
61
|
|
|
62
|
+
- `heuristic`
|
|
55
63
|
- `openai`
|
|
56
|
-
- `ollama`
|
|
57
64
|
- `anthropic`
|
|
58
65
|
- `gemini`
|
|
66
|
+
- `ollama`
|
|
59
67
|
- `openai-compatible`
|
|
60
68
|
- `custom`
|
|
61
|
-
- `heuristic`
|
|
62
69
|
|
|
63
|
-
Providers are
|
|
70
|
+
Providers are capability-driven. Each provider declares support for features such as:
|
|
64
71
|
|
|
65
72
|
- `chat`
|
|
66
73
|
- `structured`
|
|
@@ -70,29 +77,56 @@ Providers are validated through capabilities such as:
|
|
|
70
77
|
- `streaming`
|
|
71
78
|
- `local`
|
|
72
79
|
|
|
73
|
-
This matters because many "OpenAI-compatible"
|
|
80
|
+
This matters because many "OpenAI-compatible" backends only implement part of the OpenAI surface.
|
|
74
81
|
|
|
75
|
-
##
|
|
82
|
+
## Main Engine Surfaces
|
|
76
83
|
|
|
77
|
-
|
|
84
|
+
### Ingest
|
|
78
85
|
|
|
79
|
-
- `
|
|
80
|
-
- `
|
|
81
|
-
|
|
82
|
-
|
|
86
|
+
- `ingestInput(rootDir, input)` ingests a local path or URL
|
|
87
|
+
- `importInbox(rootDir, inputDir?)` recursively imports supported inbox files and browser-clipper style bundles
|
|
88
|
+
|
|
89
|
+
### Compile + Query
|
|
90
|
+
|
|
91
|
+
- `compileVault(rootDir)` writes wiki pages, graph data, and search state
|
|
92
|
+
- `queryVault(rootDir, question, save)` answers against the compiled vault
|
|
93
|
+
- `searchVault(rootDir, query, limit)` searches compiled pages directly
|
|
94
|
+
|
|
95
|
+
### Automation
|
|
83
96
|
|
|
84
|
-
|
|
97
|
+
- `watchVault(rootDir, options)` watches the inbox and appends run records to `state/jobs.ndjson`
|
|
98
|
+
- `lintVault(rootDir)` runs health and anti-drift checks
|
|
85
99
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
- `
|
|
89
|
-
- `
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
100
|
+
### MCP
|
|
101
|
+
|
|
102
|
+
- `createMcpServer(rootDir)` creates an MCP server instance
|
|
103
|
+
- `startMcpServer(rootDir)` runs the MCP server over stdio
|
|
104
|
+
|
|
105
|
+
The MCP surface includes tools for workspace info, page search, page reads, source listing, querying, ingestion, compile, and lint, along with resources for config, graph, manifests, and page content.
|
|
106
|
+
|
|
107
|
+
## Artifacts
|
|
108
|
+
|
|
109
|
+
Running the engine produces a local workspace with these main areas:
|
|
110
|
+
|
|
111
|
+
- `inbox/`: capture staging area for markdown bundles and imported files
|
|
112
|
+
- `raw/sources/`: immutable source copies
|
|
113
|
+
- `raw/assets/`: copied attachments referenced by ingested markdown bundles
|
|
114
|
+
- `wiki/`: generated markdown pages and saved outputs
|
|
115
|
+
- `state/manifests/`: source manifests
|
|
116
|
+
- `state/extracts/`: extracted text
|
|
117
|
+
- `state/analyses/`: model analysis output
|
|
118
|
+
- `state/graph.json`: compiled graph
|
|
119
|
+
- `state/search.sqlite`: full-text index
|
|
120
|
+
- `state/jobs.ndjson`: watch-mode automation logs
|
|
93
121
|
|
|
94
122
|
## Notes
|
|
95
123
|
|
|
96
124
|
- The engine expects Node `>=24`
|
|
97
125
|
- The local search layer currently uses the built-in `node:sqlite` module, which may emit an experimental warning in Node 24
|
|
98
126
|
- The viewer source lives in the companion `@swarmvaultai/viewer` package, and the built assets are bundled into the engine package for CLI installs
|
|
127
|
+
|
|
128
|
+
## Links
|
|
129
|
+
|
|
130
|
+
- Website: https://www.swarmvault.ai
|
|
131
|
+
- Docs: https://www.swarmvault.ai/docs
|
|
132
|
+
- GitHub: https://github.com/swarmclawai/swarmvault
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
import { Readable, Writable } from 'node:stream';
|
|
3
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
4
|
|
|
3
5
|
declare const providerCapabilitySchema: z.ZodEnum<{
|
|
4
6
|
responses: "responses";
|
|
@@ -66,6 +68,7 @@ interface VaultConfig {
|
|
|
66
68
|
wikiDir: string;
|
|
67
69
|
stateDir: string;
|
|
68
70
|
agentDir: string;
|
|
71
|
+
inboxDir: string;
|
|
69
72
|
};
|
|
70
73
|
providers: Record<string, ProviderConfig>;
|
|
71
74
|
tasks: {
|
|
@@ -82,9 +85,12 @@ interface VaultConfig {
|
|
|
82
85
|
interface ResolvedPaths {
|
|
83
86
|
rootDir: string;
|
|
84
87
|
rawDir: string;
|
|
88
|
+
rawSourcesDir: string;
|
|
89
|
+
rawAssetsDir: string;
|
|
85
90
|
wikiDir: string;
|
|
86
91
|
stateDir: string;
|
|
87
92
|
agentDir: string;
|
|
93
|
+
inboxDir: string;
|
|
88
94
|
manifestsDir: string;
|
|
89
95
|
extractsDir: string;
|
|
90
96
|
analysesDir: string;
|
|
@@ -92,8 +98,14 @@ interface ResolvedPaths {
|
|
|
92
98
|
graphPath: string;
|
|
93
99
|
searchDbPath: string;
|
|
94
100
|
compileStatePath: string;
|
|
101
|
+
jobsLogPath: string;
|
|
95
102
|
configPath: string;
|
|
96
103
|
}
|
|
104
|
+
interface SourceAttachment {
|
|
105
|
+
path: string;
|
|
106
|
+
mimeType: string;
|
|
107
|
+
originalPath?: string;
|
|
108
|
+
}
|
|
97
109
|
interface SourceManifest {
|
|
98
110
|
sourceId: string;
|
|
99
111
|
title: string;
|
|
@@ -107,6 +119,7 @@ interface SourceManifest {
|
|
|
107
119
|
contentHash: string;
|
|
108
120
|
createdAt: string;
|
|
109
121
|
updatedAt: string;
|
|
122
|
+
attachments?: SourceAttachment[];
|
|
110
123
|
}
|
|
111
124
|
interface AnalyzedTerm {
|
|
112
125
|
id: string;
|
|
@@ -193,6 +206,38 @@ interface LintFinding {
|
|
|
193
206
|
message: string;
|
|
194
207
|
pagePath?: string;
|
|
195
208
|
}
|
|
209
|
+
interface InboxImportSkip {
|
|
210
|
+
path: string;
|
|
211
|
+
reason: string;
|
|
212
|
+
}
|
|
213
|
+
interface InboxImportResult {
|
|
214
|
+
inputDir: string;
|
|
215
|
+
scannedCount: number;
|
|
216
|
+
attachmentCount: number;
|
|
217
|
+
imported: SourceManifest[];
|
|
218
|
+
skipped: InboxImportSkip[];
|
|
219
|
+
}
|
|
220
|
+
interface WatchOptions {
|
|
221
|
+
lint?: boolean;
|
|
222
|
+
debounceMs?: number;
|
|
223
|
+
}
|
|
224
|
+
interface WatchRunRecord {
|
|
225
|
+
startedAt: string;
|
|
226
|
+
finishedAt: string;
|
|
227
|
+
durationMs: number;
|
|
228
|
+
inputDir: string;
|
|
229
|
+
reasons: string[];
|
|
230
|
+
importedCount: number;
|
|
231
|
+
scannedCount: number;
|
|
232
|
+
attachmentCount: number;
|
|
233
|
+
changedPages: string[];
|
|
234
|
+
lintFindingCount?: number;
|
|
235
|
+
success: boolean;
|
|
236
|
+
error?: string;
|
|
237
|
+
}
|
|
238
|
+
interface WatchController {
|
|
239
|
+
close(): Promise<void>;
|
|
240
|
+
}
|
|
196
241
|
|
|
197
242
|
declare function defaultVaultConfig(): VaultConfig;
|
|
198
243
|
declare function resolvePaths(rootDir: string, config?: VaultConfig, configPath?: string): ResolvedPaths;
|
|
@@ -206,12 +251,32 @@ declare function initWorkspace(rootDir: string): Promise<{
|
|
|
206
251
|
}>;
|
|
207
252
|
|
|
208
253
|
declare function ingestInput(rootDir: string, input: string): Promise<SourceManifest>;
|
|
254
|
+
declare function importInbox(rootDir: string, inputDir?: string): Promise<InboxImportResult>;
|
|
209
255
|
declare function listManifests(rootDir: string): Promise<SourceManifest[]>;
|
|
210
256
|
declare function readExtractedText(rootDir: string, manifest: SourceManifest): Promise<string | undefined>;
|
|
211
257
|
|
|
212
258
|
declare function initVault(rootDir: string): Promise<void>;
|
|
213
259
|
declare function compileVault(rootDir: string): Promise<CompileResult>;
|
|
214
260
|
declare function queryVault(rootDir: string, question: string, save?: boolean): Promise<QueryResult>;
|
|
261
|
+
declare function searchVault(rootDir: string, query: string, limit?: number): Promise<SearchResult[]>;
|
|
262
|
+
declare function listPages(rootDir: string): Promise<GraphPage[]>;
|
|
263
|
+
declare function readPage(rootDir: string, relativePath: string): Promise<{
|
|
264
|
+
path: string;
|
|
265
|
+
title: string;
|
|
266
|
+
frontmatter: Record<string, unknown>;
|
|
267
|
+
content: string;
|
|
268
|
+
} | null>;
|
|
269
|
+
declare function getWorkspaceInfo(rootDir: string): Promise<{
|
|
270
|
+
rootDir: string;
|
|
271
|
+
configPath: string;
|
|
272
|
+
rawDir: string;
|
|
273
|
+
wikiDir: string;
|
|
274
|
+
stateDir: string;
|
|
275
|
+
agentDir: string;
|
|
276
|
+
inboxDir: string;
|
|
277
|
+
sourceCount: number;
|
|
278
|
+
pageCount: number;
|
|
279
|
+
}>;
|
|
215
280
|
declare function lintVault(rootDir: string): Promise<LintFinding[]>;
|
|
216
281
|
declare function bootstrapDemo(rootDir: string, input?: string): Promise<{
|
|
217
282
|
manifestId?: string;
|
|
@@ -226,8 +291,15 @@ declare function startGraphServer(rootDir: string, port?: number): Promise<{
|
|
|
226
291
|
close: () => Promise<void>;
|
|
227
292
|
}>;
|
|
228
293
|
|
|
294
|
+
declare function createMcpServer(rootDir: string): Promise<McpServer>;
|
|
295
|
+
declare function startMcpServer(rootDir: string, stdin?: Readable, stdout?: Writable): Promise<{
|
|
296
|
+
close: () => Promise<void>;
|
|
297
|
+
}>;
|
|
298
|
+
|
|
299
|
+
declare function watchVault(rootDir: string, options?: WatchOptions): Promise<WatchController>;
|
|
300
|
+
|
|
229
301
|
declare function createProvider(id: string, config: ProviderConfig, rootDir: string): Promise<ProviderAdapter>;
|
|
230
302
|
declare function getProviderForTask(rootDir: string, task: keyof Awaited<ReturnType<typeof loadVaultConfig>>["config"]["tasks"]): Promise<ProviderAdapter>;
|
|
231
303
|
declare function assertProviderCapability(provider: ProviderAdapter, capability: ProviderCapability): void;
|
|
232
304
|
|
|
233
|
-
export { type AnalyzedTerm, type ClaimStatus, type CompileResult, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GraphArtifact, type GraphEdge, type GraphNode, type GraphPage, type LintFinding, type PageKind, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderType, type QueryResult, type ResolvedPaths, type SearchResult, type SourceAnalysis, type SourceClaim, type SourceManifest, type VaultConfig, assertProviderCapability, bootstrapDemo, compileVault, createProvider, defaultVaultConfig, getProviderForTask, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, lintVault, listManifests, loadVaultConfig, providerCapabilitySchema, providerTypeSchema, queryVault, readExtractedText, resolvePaths, startGraphServer };
|
|
305
|
+
export { type AnalyzedTerm, type ClaimStatus, type CompileResult, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GraphArtifact, type GraphEdge, type GraphNode, type GraphPage, type InboxImportResult, type InboxImportSkip, type LintFinding, type PageKind, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderType, type QueryResult, type ResolvedPaths, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceClaim, type SourceManifest, type VaultConfig, type WatchController, type WatchOptions, type WatchRunRecord, assertProviderCapability, bootstrapDemo, compileVault, createMcpServer, createProvider, defaultVaultConfig, getProviderForTask, getWorkspaceInfo, importInbox, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, lintVault, listManifests, listPages, loadVaultConfig, providerCapabilitySchema, providerTypeSchema, queryVault, readExtractedText, readPage, resolvePaths, searchVault, startGraphServer, startMcpServer, watchVault };
|
package/dist/index.js
CHANGED
|
@@ -36,6 +36,11 @@ async function writeJsonFile(filePath, value) {
|
|
|
36
36
|
await fs.writeFile(filePath, `${JSON.stringify(value, null, 2)}
|
|
37
37
|
`, "utf8");
|
|
38
38
|
}
|
|
39
|
+
async function appendJsonLine(filePath, value) {
|
|
40
|
+
await ensureDir(path.dirname(filePath));
|
|
41
|
+
await fs.appendFile(filePath, `${JSON.stringify(value)}
|
|
42
|
+
`, "utf8");
|
|
43
|
+
}
|
|
39
44
|
async function writeFileIfChanged(filePath, content) {
|
|
40
45
|
await ensureDir(path.dirname(filePath));
|
|
41
46
|
if (await fileExists(filePath)) {
|
|
@@ -88,6 +93,21 @@ function truncate(value, maxLength) {
|
|
|
88
93
|
}
|
|
89
94
|
return `${value.slice(0, maxLength - 3)}...`;
|
|
90
95
|
}
|
|
96
|
+
async function listFilesRecursive(rootDir) {
|
|
97
|
+
const entries = await fs.readdir(rootDir, { withFileTypes: true }).catch(() => []);
|
|
98
|
+
const files = [];
|
|
99
|
+
for (const entry of entries) {
|
|
100
|
+
const absolutePath = path.join(rootDir, entry.name);
|
|
101
|
+
if (entry.isDirectory()) {
|
|
102
|
+
files.push(...await listFilesRecursive(absolutePath));
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (entry.isFile()) {
|
|
106
|
+
files.push(absolutePath);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return files;
|
|
110
|
+
}
|
|
91
111
|
|
|
92
112
|
// src/types.ts
|
|
93
113
|
import { z } from "zod";
|
|
@@ -130,7 +150,8 @@ var vaultConfigSchema = z2.object({
|
|
|
130
150
|
rawDir: z2.string().min(1),
|
|
131
151
|
wikiDir: z2.string().min(1),
|
|
132
152
|
stateDir: z2.string().min(1),
|
|
133
|
-
agentDir: z2.string().min(1)
|
|
153
|
+
agentDir: z2.string().min(1),
|
|
154
|
+
inboxDir: z2.string().min(1)
|
|
134
155
|
}),
|
|
135
156
|
providers: z2.record(z2.string(), providerConfigSchema),
|
|
136
157
|
tasks: z2.object({
|
|
@@ -150,7 +171,8 @@ function defaultVaultConfig() {
|
|
|
150
171
|
rawDir: "raw",
|
|
151
172
|
wikiDir: "wiki",
|
|
152
173
|
stateDir: "state",
|
|
153
|
-
agentDir: "agent"
|
|
174
|
+
agentDir: "agent",
|
|
175
|
+
inboxDir: "inbox"
|
|
154
176
|
},
|
|
155
177
|
providers: {
|
|
156
178
|
local: {
|
|
@@ -185,15 +207,21 @@ async function findConfigPath(rootDir) {
|
|
|
185
207
|
function resolvePaths(rootDir, config, configPath = path2.join(rootDir, PRIMARY_CONFIG_FILENAME)) {
|
|
186
208
|
const effective = config ?? defaultVaultConfig();
|
|
187
209
|
const rawDir = path2.resolve(rootDir, effective.workspace.rawDir);
|
|
210
|
+
const rawSourcesDir = path2.join(rawDir, "sources");
|
|
211
|
+
const rawAssetsDir = path2.join(rawDir, "assets");
|
|
188
212
|
const wikiDir = path2.resolve(rootDir, effective.workspace.wikiDir);
|
|
189
213
|
const stateDir = path2.resolve(rootDir, effective.workspace.stateDir);
|
|
190
214
|
const agentDir = path2.resolve(rootDir, effective.workspace.agentDir);
|
|
215
|
+
const inboxDir = path2.resolve(rootDir, effective.workspace.inboxDir);
|
|
191
216
|
return {
|
|
192
217
|
rootDir,
|
|
193
218
|
rawDir,
|
|
219
|
+
rawSourcesDir,
|
|
220
|
+
rawAssetsDir,
|
|
194
221
|
wikiDir,
|
|
195
222
|
stateDir,
|
|
196
223
|
agentDir,
|
|
224
|
+
inboxDir,
|
|
197
225
|
manifestsDir: path2.join(stateDir, "manifests"),
|
|
198
226
|
extractsDir: path2.join(stateDir, "extracts"),
|
|
199
227
|
analysesDir: path2.join(stateDir, "analyses"),
|
|
@@ -201,6 +229,7 @@ function resolvePaths(rootDir, config, configPath = path2.join(rootDir, PRIMARY_
|
|
|
201
229
|
graphPath: path2.join(stateDir, "graph.json"),
|
|
202
230
|
searchDbPath: path2.join(stateDir, "search.sqlite"),
|
|
203
231
|
compileStatePath: path2.join(stateDir, "compile-state.json"),
|
|
232
|
+
jobsLogPath: path2.join(stateDir, "jobs.ndjson"),
|
|
204
233
|
configPath
|
|
205
234
|
};
|
|
206
235
|
}
|
|
@@ -222,9 +251,12 @@ async function initWorkspace(rootDir) {
|
|
|
222
251
|
ensureDir(paths.wikiDir),
|
|
223
252
|
ensureDir(paths.stateDir),
|
|
224
253
|
ensureDir(paths.agentDir),
|
|
254
|
+
ensureDir(paths.inboxDir),
|
|
225
255
|
ensureDir(paths.manifestsDir),
|
|
226
256
|
ensureDir(paths.extractsDir),
|
|
227
|
-
ensureDir(paths.analysesDir)
|
|
257
|
+
ensureDir(paths.analysesDir),
|
|
258
|
+
ensureDir(paths.rawSourcesDir),
|
|
259
|
+
ensureDir(paths.rawAssetsDir)
|
|
228
260
|
]);
|
|
229
261
|
if (!await fileExists(configPath)) {
|
|
230
262
|
await writeJsonFile(configPath, config);
|
|
@@ -253,6 +285,10 @@ async function appendLogEntry(rootDir, action, title, lines = []) {
|
|
|
253
285
|
await fs2.writeFile(logPath, `${existing}${entry}
|
|
254
286
|
`, "utf8");
|
|
255
287
|
}
|
|
288
|
+
async function appendWatchRun(rootDir, run) {
|
|
289
|
+
const { paths } = await initWorkspace(rootDir);
|
|
290
|
+
await appendJsonLine(paths.jobsLogPath, run);
|
|
291
|
+
}
|
|
256
292
|
|
|
257
293
|
// src/ingest.ts
|
|
258
294
|
function inferKind(mimeType, filePath) {
|
|
@@ -280,6 +316,50 @@ function titleFromText(fallback, content) {
|
|
|
280
316
|
function guessMimeType(target) {
|
|
281
317
|
return mime.lookup(target) || "application/octet-stream";
|
|
282
318
|
}
|
|
319
|
+
function buildCompositeHash(payloadBytes, attachments = []) {
|
|
320
|
+
if (!attachments.length) {
|
|
321
|
+
return sha256(payloadBytes);
|
|
322
|
+
}
|
|
323
|
+
const attachmentSignature = attachments.map((attachment) => `${attachment.relativePath}:${sha256(attachment.bytes)}`).sort().join("|");
|
|
324
|
+
return sha256(`${sha256(payloadBytes)}|${attachmentSignature}`);
|
|
325
|
+
}
|
|
326
|
+
function sanitizeAssetRelativePath(value) {
|
|
327
|
+
const normalized = path4.posix.normalize(value.replace(/\\/g, "/"));
|
|
328
|
+
const segments = normalized.split("/").filter(Boolean).map((segment) => {
|
|
329
|
+
if (segment === ".") {
|
|
330
|
+
return "";
|
|
331
|
+
}
|
|
332
|
+
if (segment === "..") {
|
|
333
|
+
return "_up";
|
|
334
|
+
}
|
|
335
|
+
return segment;
|
|
336
|
+
}).filter(Boolean);
|
|
337
|
+
return segments.join("/") || "asset";
|
|
338
|
+
}
|
|
339
|
+
function normalizeLocalReference(value) {
|
|
340
|
+
const trimmed = value.trim().replace(/^<|>$/g, "");
|
|
341
|
+
const [withoutTitle] = trimmed.split(/\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/, 1);
|
|
342
|
+
const candidate = withoutTitle.split("#")[0]?.split("?")[0]?.trim();
|
|
343
|
+
if (!candidate) {
|
|
344
|
+
return null;
|
|
345
|
+
}
|
|
346
|
+
const lowered = candidate.toLowerCase();
|
|
347
|
+
if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") || path4.isAbsolute(candidate)) {
|
|
348
|
+
return null;
|
|
349
|
+
}
|
|
350
|
+
return candidate.replace(/\\/g, "/");
|
|
351
|
+
}
|
|
352
|
+
function extractMarkdownReferences(content) {
|
|
353
|
+
const references = [];
|
|
354
|
+
const linkPattern = /!?\[[^\]]*]\(([^)]+)\)/g;
|
|
355
|
+
for (const match of content.matchAll(linkPattern)) {
|
|
356
|
+
const normalized = normalizeLocalReference(match[1] ?? "");
|
|
357
|
+
if (normalized) {
|
|
358
|
+
references.push(normalized);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
return references;
|
|
362
|
+
}
|
|
283
363
|
async function convertHtmlToMarkdown(html, url) {
|
|
284
364
|
const dom = new JSDOM(html, { url });
|
|
285
365
|
const article = new Readability(dom.window.document).parse();
|
|
@@ -304,82 +384,268 @@ async function readManifestByHash(manifestsDir, contentHash) {
|
|
|
304
384
|
}
|
|
305
385
|
return null;
|
|
306
386
|
}
|
|
307
|
-
async function
|
|
308
|
-
|
|
309
|
-
await ensureDir(
|
|
387
|
+
async function persistPreparedInput(rootDir, prepared, paths) {
|
|
388
|
+
await ensureDir(paths.rawSourcesDir);
|
|
389
|
+
await ensureDir(paths.rawAssetsDir);
|
|
310
390
|
await ensureDir(paths.manifestsDir);
|
|
311
391
|
await ensureDir(paths.extractsDir);
|
|
312
|
-
const
|
|
313
|
-
const
|
|
314
|
-
let title = path4.basename(input);
|
|
315
|
-
let mimeType = "application/octet-stream";
|
|
316
|
-
let storedExtension = ".bin";
|
|
317
|
-
let payloadBytes;
|
|
318
|
-
let extractedTextPath;
|
|
319
|
-
let sourceKind = "binary";
|
|
320
|
-
if (isUrl) {
|
|
321
|
-
const response = await fetch(input);
|
|
322
|
-
if (!response.ok) {
|
|
323
|
-
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
324
|
-
}
|
|
325
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
326
|
-
payloadBytes = Buffer.from(arrayBuffer);
|
|
327
|
-
mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
|
|
328
|
-
sourceKind = inferKind(mimeType, input);
|
|
329
|
-
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
330
|
-
const html = payloadBytes.toString("utf8");
|
|
331
|
-
const converted = await convertHtmlToMarkdown(html, input);
|
|
332
|
-
title = converted.title;
|
|
333
|
-
payloadBytes = Buffer.from(converted.markdown, "utf8");
|
|
334
|
-
mimeType = "text/markdown";
|
|
335
|
-
sourceKind = "markdown";
|
|
336
|
-
storedExtension = ".md";
|
|
337
|
-
} else {
|
|
338
|
-
title = new URL(input).hostname + new URL(input).pathname;
|
|
339
|
-
const extension = path4.extname(new URL(input).pathname) || (mime.extension(mimeType) ? `.${mime.extension(mimeType)}` : ".bin");
|
|
340
|
-
storedExtension = extension;
|
|
341
|
-
}
|
|
342
|
-
} else {
|
|
343
|
-
const absoluteInput = path4.resolve(rootDir, input);
|
|
344
|
-
payloadBytes = await fs3.readFile(absoluteInput);
|
|
345
|
-
mimeType = guessMimeType(absoluteInput);
|
|
346
|
-
sourceKind = inferKind(mimeType, absoluteInput);
|
|
347
|
-
storedExtension = path4.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
348
|
-
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
349
|
-
title = titleFromText(path4.basename(absoluteInput, path4.extname(absoluteInput)), payloadBytes.toString("utf8"));
|
|
350
|
-
} else {
|
|
351
|
-
title = path4.basename(absoluteInput, path4.extname(absoluteInput));
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
const contentHash = sha256(payloadBytes);
|
|
392
|
+
const attachments = prepared.attachments ?? [];
|
|
393
|
+
const contentHash = prepared.contentHash ?? buildCompositeHash(prepared.payloadBytes, attachments);
|
|
355
394
|
const existing = await readManifestByHash(paths.manifestsDir, contentHash);
|
|
356
395
|
if (existing) {
|
|
357
|
-
return existing;
|
|
396
|
+
return { manifest: existing, isNew: false };
|
|
358
397
|
}
|
|
359
|
-
const
|
|
360
|
-
const
|
|
361
|
-
|
|
362
|
-
|
|
398
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
399
|
+
const sourceId = `${slugify(prepared.title)}-${contentHash.slice(0, 8)}`;
|
|
400
|
+
const storedPath = path4.join(paths.rawSourcesDir, `${sourceId}${prepared.storedExtension}`);
|
|
401
|
+
await fs3.writeFile(storedPath, prepared.payloadBytes);
|
|
402
|
+
let extractedTextPath;
|
|
403
|
+
if (prepared.extractedText) {
|
|
363
404
|
extractedTextPath = path4.join(paths.extractsDir, `${sourceId}.md`);
|
|
364
|
-
await fs3.writeFile(extractedTextPath,
|
|
405
|
+
await fs3.writeFile(extractedTextPath, prepared.extractedText, "utf8");
|
|
406
|
+
}
|
|
407
|
+
const manifestAttachments = [];
|
|
408
|
+
for (const attachment of attachments) {
|
|
409
|
+
const absoluteAttachmentPath = path4.join(paths.rawAssetsDir, sourceId, attachment.relativePath);
|
|
410
|
+
await ensureDir(path4.dirname(absoluteAttachmentPath));
|
|
411
|
+
await fs3.writeFile(absoluteAttachmentPath, attachment.bytes);
|
|
412
|
+
manifestAttachments.push({
|
|
413
|
+
path: toPosix(path4.relative(rootDir, absoluteAttachmentPath)),
|
|
414
|
+
mimeType: attachment.mimeType,
|
|
415
|
+
originalPath: attachment.originalPath
|
|
416
|
+
});
|
|
365
417
|
}
|
|
366
418
|
const manifest = {
|
|
367
419
|
sourceId,
|
|
368
|
-
title,
|
|
369
|
-
originType:
|
|
370
|
-
sourceKind,
|
|
371
|
-
originalPath:
|
|
372
|
-
url:
|
|
420
|
+
title: prepared.title,
|
|
421
|
+
originType: prepared.originType,
|
|
422
|
+
sourceKind: prepared.sourceKind,
|
|
423
|
+
originalPath: prepared.originalPath,
|
|
424
|
+
url: prepared.url,
|
|
373
425
|
storedPath: toPosix(path4.relative(rootDir, storedPath)),
|
|
374
426
|
extractedTextPath: extractedTextPath ? toPosix(path4.relative(rootDir, extractedTextPath)) : void 0,
|
|
375
|
-
mimeType,
|
|
427
|
+
mimeType: prepared.mimeType,
|
|
376
428
|
contentHash,
|
|
377
429
|
createdAt: now,
|
|
378
|
-
updatedAt: now
|
|
430
|
+
updatedAt: now,
|
|
431
|
+
attachments: manifestAttachments.length ? manifestAttachments : void 0
|
|
379
432
|
};
|
|
380
433
|
await writeJsonFile(path4.join(paths.manifestsDir, `${sourceId}.json`), manifest);
|
|
381
|
-
await appendLogEntry(rootDir, "ingest", title, [
|
|
382
|
-
|
|
434
|
+
await appendLogEntry(rootDir, "ingest", prepared.title, [
|
|
435
|
+
`source_id=${sourceId}`,
|
|
436
|
+
`kind=${prepared.sourceKind}`,
|
|
437
|
+
`attachments=${manifestAttachments.length}`
|
|
438
|
+
]);
|
|
439
|
+
return { manifest, isNew: true };
|
|
440
|
+
}
|
|
441
|
+
async function prepareFileInput(rootDir, absoluteInput) {
|
|
442
|
+
const payloadBytes = await fs3.readFile(absoluteInput);
|
|
443
|
+
const mimeType = guessMimeType(absoluteInput);
|
|
444
|
+
const sourceKind = inferKind(mimeType, absoluteInput);
|
|
445
|
+
const storedExtension = path4.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
446
|
+
let title;
|
|
447
|
+
let extractedText;
|
|
448
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
449
|
+
extractedText = payloadBytes.toString("utf8");
|
|
450
|
+
title = titleFromText(path4.basename(absoluteInput, path4.extname(absoluteInput)), extractedText);
|
|
451
|
+
} else {
|
|
452
|
+
title = path4.basename(absoluteInput, path4.extname(absoluteInput));
|
|
453
|
+
}
|
|
454
|
+
return {
|
|
455
|
+
title,
|
|
456
|
+
originType: "file",
|
|
457
|
+
sourceKind,
|
|
458
|
+
originalPath: toPosix(absoluteInput),
|
|
459
|
+
mimeType,
|
|
460
|
+
storedExtension,
|
|
461
|
+
payloadBytes,
|
|
462
|
+
extractedText
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
async function prepareUrlInput(input) {
|
|
466
|
+
const response = await fetch(input);
|
|
467
|
+
if (!response.ok) {
|
|
468
|
+
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
469
|
+
}
|
|
470
|
+
let payloadBytes = Buffer.from(await response.arrayBuffer());
|
|
471
|
+
let mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
|
|
472
|
+
let sourceKind = inferKind(mimeType, input);
|
|
473
|
+
let storedExtension = ".bin";
|
|
474
|
+
let title = new URL(input).hostname + new URL(input).pathname;
|
|
475
|
+
let extractedText;
|
|
476
|
+
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
477
|
+
const html = payloadBytes.toString("utf8");
|
|
478
|
+
const converted = await convertHtmlToMarkdown(html, input);
|
|
479
|
+
title = converted.title;
|
|
480
|
+
extractedText = converted.markdown;
|
|
481
|
+
payloadBytes = Buffer.from(converted.markdown, "utf8");
|
|
482
|
+
mimeType = "text/markdown";
|
|
483
|
+
sourceKind = "markdown";
|
|
484
|
+
storedExtension = ".md";
|
|
485
|
+
} else {
|
|
486
|
+
const extension = path4.extname(new URL(input).pathname);
|
|
487
|
+
storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
|
|
488
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
489
|
+
extractedText = payloadBytes.toString("utf8");
|
|
490
|
+
title = titleFromText(title || new URL(input).hostname, extractedText);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
return {
|
|
494
|
+
title,
|
|
495
|
+
originType: "url",
|
|
496
|
+
sourceKind,
|
|
497
|
+
url: input,
|
|
498
|
+
mimeType,
|
|
499
|
+
storedExtension,
|
|
500
|
+
payloadBytes,
|
|
501
|
+
extractedText
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
async function collectInboxAttachmentRefs(inputDir, files) {
|
|
505
|
+
const refsBySource = /* @__PURE__ */ new Map();
|
|
506
|
+
for (const absolutePath of files) {
|
|
507
|
+
const mimeType = guessMimeType(absolutePath);
|
|
508
|
+
const sourceKind = inferKind(mimeType, absolutePath);
|
|
509
|
+
if (sourceKind !== "markdown") {
|
|
510
|
+
continue;
|
|
511
|
+
}
|
|
512
|
+
const content = await fs3.readFile(absolutePath, "utf8");
|
|
513
|
+
const refs = extractMarkdownReferences(content);
|
|
514
|
+
if (!refs.length) {
|
|
515
|
+
continue;
|
|
516
|
+
}
|
|
517
|
+
const sourceRefs = [];
|
|
518
|
+
for (const ref of refs) {
|
|
519
|
+
const resolved = path4.resolve(path4.dirname(absolutePath), ref);
|
|
520
|
+
if (!resolved.startsWith(inputDir) || !await fileExists(resolved)) {
|
|
521
|
+
continue;
|
|
522
|
+
}
|
|
523
|
+
sourceRefs.push({
|
|
524
|
+
absolutePath: resolved,
|
|
525
|
+
relativeRef: ref
|
|
526
|
+
});
|
|
527
|
+
}
|
|
528
|
+
if (sourceRefs.length) {
|
|
529
|
+
refsBySource.set(
|
|
530
|
+
absolutePath,
|
|
531
|
+
sourceRefs.filter(
|
|
532
|
+
(ref, index, items) => index === items.findIndex((candidate) => candidate.absolutePath === ref.absolutePath && candidate.relativeRef === ref.relativeRef)
|
|
533
|
+
)
|
|
534
|
+
);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
return refsBySource;
|
|
538
|
+
}
|
|
539
|
+
function rewriteMarkdownReferences(content, replacements) {
|
|
540
|
+
return content.replace(/(!?\[[^\]]*]\()([^)]+)(\))/g, (fullMatch, prefix, target, suffix) => {
|
|
541
|
+
const normalized = normalizeLocalReference(target);
|
|
542
|
+
if (!normalized) {
|
|
543
|
+
return fullMatch;
|
|
544
|
+
}
|
|
545
|
+
const replacement = replacements.get(normalized);
|
|
546
|
+
if (!replacement) {
|
|
547
|
+
return fullMatch;
|
|
548
|
+
}
|
|
549
|
+
return `${prefix}${replacement}${suffix}`;
|
|
550
|
+
});
|
|
551
|
+
}
|
|
552
|
+
async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
|
|
553
|
+
const originalBytes = await fs3.readFile(absolutePath);
|
|
554
|
+
const originalText = originalBytes.toString("utf8");
|
|
555
|
+
const title = titleFromText(path4.basename(absolutePath, path4.extname(absolutePath)), originalText);
|
|
556
|
+
const attachments = [];
|
|
557
|
+
for (const attachmentRef of attachmentRefs) {
|
|
558
|
+
const bytes = await fs3.readFile(attachmentRef.absolutePath);
|
|
559
|
+
attachments.push({
|
|
560
|
+
relativePath: sanitizeAssetRelativePath(attachmentRef.relativeRef),
|
|
561
|
+
mimeType: guessMimeType(attachmentRef.absolutePath),
|
|
562
|
+
originalPath: toPosix(attachmentRef.absolutePath),
|
|
563
|
+
bytes
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
const contentHash = buildCompositeHash(originalBytes, attachments);
|
|
567
|
+
const sourceId = `${slugify(title)}-${contentHash.slice(0, 8)}`;
|
|
568
|
+
const replacements = new Map(
|
|
569
|
+
attachmentRefs.map((attachmentRef) => [
|
|
570
|
+
attachmentRef.relativeRef.replace(/\\/g, "/"),
|
|
571
|
+
`../assets/${sourceId}/${sanitizeAssetRelativePath(attachmentRef.relativeRef)}`
|
|
572
|
+
])
|
|
573
|
+
);
|
|
574
|
+
const rewrittenText = rewriteMarkdownReferences(originalText, replacements);
|
|
575
|
+
return {
|
|
576
|
+
title,
|
|
577
|
+
originType: "file",
|
|
578
|
+
sourceKind: "markdown",
|
|
579
|
+
originalPath: toPosix(absolutePath),
|
|
580
|
+
mimeType: "text/markdown",
|
|
581
|
+
storedExtension: path4.extname(absolutePath) || ".md",
|
|
582
|
+
payloadBytes: Buffer.from(rewrittenText, "utf8"),
|
|
583
|
+
extractedText: rewrittenText,
|
|
584
|
+
attachments,
|
|
585
|
+
contentHash
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
function isSupportedInboxKind(sourceKind) {
|
|
589
|
+
return ["markdown", "text", "html", "pdf", "image"].includes(sourceKind);
|
|
590
|
+
}
|
|
591
|
+
async function ingestInput(rootDir, input) {
|
|
592
|
+
const { paths } = await initWorkspace(rootDir);
|
|
593
|
+
const prepared = /^https?:\/\//i.test(input) ? await prepareUrlInput(input) : await prepareFileInput(rootDir, path4.resolve(rootDir, input));
|
|
594
|
+
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
595
|
+
return result.manifest;
|
|
596
|
+
}
|
|
597
|
+
async function importInbox(rootDir, inputDir) {
|
|
598
|
+
const { paths } = await initWorkspace(rootDir);
|
|
599
|
+
const effectiveInputDir = path4.resolve(rootDir, inputDir ?? paths.inboxDir);
|
|
600
|
+
if (!await fileExists(effectiveInputDir)) {
|
|
601
|
+
throw new Error(`Inbox directory not found: ${effectiveInputDir}`);
|
|
602
|
+
}
|
|
603
|
+
const files = (await listFilesRecursive(effectiveInputDir)).sort();
|
|
604
|
+
const refsBySource = await collectInboxAttachmentRefs(effectiveInputDir, files);
|
|
605
|
+
const claimedAttachments = new Set(
|
|
606
|
+
[...refsBySource.values()].flatMap((refs) => refs.map((ref) => ref.absolutePath))
|
|
607
|
+
);
|
|
608
|
+
const imported = [];
|
|
609
|
+
const skipped = [];
|
|
610
|
+
let attachmentCount = 0;
|
|
611
|
+
for (const absolutePath of files) {
|
|
612
|
+
const basename = path4.basename(absolutePath);
|
|
613
|
+
if (basename.startsWith(".")) {
|
|
614
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "hidden_file" });
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
if (claimedAttachments.has(absolutePath)) {
|
|
618
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "referenced_attachment" });
|
|
619
|
+
continue;
|
|
620
|
+
}
|
|
621
|
+
const mimeType = guessMimeType(absolutePath);
|
|
622
|
+
const sourceKind = inferKind(mimeType, absolutePath);
|
|
623
|
+
if (!isSupportedInboxKind(sourceKind)) {
|
|
624
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
|
|
625
|
+
continue;
|
|
626
|
+
}
|
|
627
|
+
const prepared = sourceKind === "markdown" && refsBySource.has(absolutePath) ? await prepareInboxMarkdownInput(absolutePath, refsBySource.get(absolutePath) ?? []) : await prepareFileInput(rootDir, absolutePath);
|
|
628
|
+
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
629
|
+
if (!result.isNew) {
|
|
630
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "duplicate_content" });
|
|
631
|
+
continue;
|
|
632
|
+
}
|
|
633
|
+
attachmentCount += result.manifest.attachments?.length ?? 0;
|
|
634
|
+
imported.push(result.manifest);
|
|
635
|
+
}
|
|
636
|
+
await appendLogEntry(rootDir, "inbox_import", toPosix(path4.relative(rootDir, effectiveInputDir)) || ".", [
|
|
637
|
+
`scanned=${files.length}`,
|
|
638
|
+
`imported=${imported.length}`,
|
|
639
|
+
`attachments=${attachmentCount}`,
|
|
640
|
+
`skipped=${skipped.length}`
|
|
641
|
+
]);
|
|
642
|
+
return {
|
|
643
|
+
inputDir: effectiveInputDir,
|
|
644
|
+
scannedCount: files.length,
|
|
645
|
+
attachmentCount,
|
|
646
|
+
imported,
|
|
647
|
+
skipped
|
|
648
|
+
};
|
|
383
649
|
}
|
|
384
650
|
async function listManifests(rootDir) {
|
|
385
651
|
const { paths } = await loadVaultConfig(rootDir);
|
|
@@ -1550,6 +1816,49 @@ ${excerpts.join("\n\n---\n\n")}`
|
|
|
1550
1816
|
await appendLogEntry(rootDir, "query", question, [`citations=${citations.join(",") || "none"}`, `saved=${Boolean(savedTo)}`]);
|
|
1551
1817
|
return { answer, savedTo, citations };
|
|
1552
1818
|
}
|
|
1819
|
+
async function searchVault(rootDir, query, limit = 5) {
|
|
1820
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1821
|
+
if (!await fileExists(paths.searchDbPath)) {
|
|
1822
|
+
await compileVault(rootDir);
|
|
1823
|
+
}
|
|
1824
|
+
return searchPages(paths.searchDbPath, query, limit);
|
|
1825
|
+
}
|
|
1826
|
+
async function listPages(rootDir) {
|
|
1827
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1828
|
+
const graph = await readJsonFile(paths.graphPath);
|
|
1829
|
+
return graph?.pages ?? [];
|
|
1830
|
+
}
|
|
1831
|
+
async function readPage(rootDir, relativePath) {
|
|
1832
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1833
|
+
const absolutePath = path9.resolve(paths.wikiDir, relativePath);
|
|
1834
|
+
if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
|
|
1835
|
+
return null;
|
|
1836
|
+
}
|
|
1837
|
+
const raw = await fs7.readFile(absolutePath, "utf8");
|
|
1838
|
+
const parsed = matter3(raw);
|
|
1839
|
+
return {
|
|
1840
|
+
path: relativePath,
|
|
1841
|
+
title: typeof parsed.data.title === "string" ? parsed.data.title : path9.basename(relativePath, path9.extname(relativePath)),
|
|
1842
|
+
frontmatter: parsed.data,
|
|
1843
|
+
content: parsed.content
|
|
1844
|
+
};
|
|
1845
|
+
}
|
|
1846
|
+
async function getWorkspaceInfo(rootDir) {
|
|
1847
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1848
|
+
const manifests = await listManifests(rootDir);
|
|
1849
|
+
const pages = await listPages(rootDir);
|
|
1850
|
+
return {
|
|
1851
|
+
rootDir,
|
|
1852
|
+
configPath: paths.configPath,
|
|
1853
|
+
rawDir: paths.rawDir,
|
|
1854
|
+
wikiDir: paths.wikiDir,
|
|
1855
|
+
stateDir: paths.stateDir,
|
|
1856
|
+
agentDir: paths.agentDir,
|
|
1857
|
+
inboxDir: paths.inboxDir,
|
|
1858
|
+
sourceCount: manifests.length,
|
|
1859
|
+
pageCount: pages.length
|
|
1860
|
+
};
|
|
1861
|
+
}
|
|
1553
1862
|
async function lintVault(rootDir) {
|
|
1554
1863
|
const { paths } = await loadVaultConfig(rootDir);
|
|
1555
1864
|
const manifests = await listManifests(rootDir);
|
|
@@ -1667,13 +1976,300 @@ async function startGraphServer(rootDir, port) {
|
|
|
1667
1976
|
}
|
|
1668
1977
|
};
|
|
1669
1978
|
}
|
|
1979
|
+
|
|
1980
|
+
// src/mcp.ts
|
|
1981
|
+
import fs9 from "fs/promises";
|
|
1982
|
+
import path11 from "path";
|
|
1983
|
+
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
1984
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
1985
|
+
import { z as z6 } from "zod";
|
|
1986
|
+
var SERVER_VERSION = "0.1.3";
|
|
1987
|
+
async function createMcpServer(rootDir) {
|
|
1988
|
+
const server = new McpServer({
|
|
1989
|
+
name: "swarmvault",
|
|
1990
|
+
version: SERVER_VERSION,
|
|
1991
|
+
websiteUrl: "https://www.swarmvault.ai"
|
|
1992
|
+
});
|
|
1993
|
+
server.registerTool("workspace_info", {
|
|
1994
|
+
description: "Return the current SwarmVault workspace paths and high-level counts."
|
|
1995
|
+
}, async () => {
|
|
1996
|
+
const info = await getWorkspaceInfo(rootDir);
|
|
1997
|
+
return asToolText(info);
|
|
1998
|
+
});
|
|
1999
|
+
server.registerTool("search_pages", {
|
|
2000
|
+
description: "Search compiled wiki pages using the local full-text index.",
|
|
2001
|
+
inputSchema: {
|
|
2002
|
+
query: z6.string().min(1).describe("Search query"),
|
|
2003
|
+
limit: z6.number().int().min(1).max(25).optional().describe("Maximum number of results")
|
|
2004
|
+
}
|
|
2005
|
+
}, async ({ query, limit }) => {
|
|
2006
|
+
const results = await searchVault(rootDir, query, limit ?? 5);
|
|
2007
|
+
return asToolText(results);
|
|
2008
|
+
});
|
|
2009
|
+
server.registerTool("read_page", {
|
|
2010
|
+
description: "Read a generated wiki page by its path relative to wiki/.",
|
|
2011
|
+
inputSchema: {
|
|
2012
|
+
path: z6.string().min(1).describe("Path relative to wiki/, for example sources/example.md")
|
|
2013
|
+
}
|
|
2014
|
+
}, async ({ path: relativePath }) => {
|
|
2015
|
+
const page = await readPage(rootDir, relativePath);
|
|
2016
|
+
if (!page) {
|
|
2017
|
+
return asToolError(`Page not found: ${relativePath}`);
|
|
2018
|
+
}
|
|
2019
|
+
return asToolText(page);
|
|
2020
|
+
});
|
|
2021
|
+
server.registerTool("list_sources", {
|
|
2022
|
+
description: "List source manifests in the current workspace.",
|
|
2023
|
+
inputSchema: {
|
|
2024
|
+
limit: z6.number().int().min(1).max(100).optional().describe("Maximum number of manifests to return")
|
|
2025
|
+
}
|
|
2026
|
+
}, async ({ limit }) => {
|
|
2027
|
+
const manifests = await listManifests(rootDir);
|
|
2028
|
+
return asToolText(limit ? manifests.slice(0, limit) : manifests);
|
|
2029
|
+
});
|
|
2030
|
+
server.registerTool("query_vault", {
|
|
2031
|
+
description: "Ask a question against the compiled vault and optionally save the answer.",
|
|
2032
|
+
inputSchema: {
|
|
2033
|
+
question: z6.string().min(1).describe("Question to ask the vault"),
|
|
2034
|
+
save: z6.boolean().optional().describe("Persist the answer to wiki/outputs")
|
|
2035
|
+
}
|
|
2036
|
+
}, async ({ question, save }) => {
|
|
2037
|
+
const result = await queryVault(rootDir, question, save ?? false);
|
|
2038
|
+
return asToolText(result);
|
|
2039
|
+
});
|
|
2040
|
+
server.registerTool("ingest_input", {
|
|
2041
|
+
description: "Ingest a local file path or URL into the SwarmVault workspace.",
|
|
2042
|
+
inputSchema: {
|
|
2043
|
+
input: z6.string().min(1).describe("Local path or URL to ingest")
|
|
2044
|
+
}
|
|
2045
|
+
}, async ({ input }) => {
|
|
2046
|
+
const manifest = await ingestInput(rootDir, input);
|
|
2047
|
+
return asToolText(manifest);
|
|
2048
|
+
});
|
|
2049
|
+
server.registerTool("compile_vault", {
|
|
2050
|
+
description: "Compile source manifests into wiki pages, graph data, and search index."
|
|
2051
|
+
}, async () => {
|
|
2052
|
+
const result = await compileVault(rootDir);
|
|
2053
|
+
return asToolText(result);
|
|
2054
|
+
});
|
|
2055
|
+
server.registerTool("lint_vault", {
|
|
2056
|
+
description: "Run anti-drift and vault health checks."
|
|
2057
|
+
}, async () => {
|
|
2058
|
+
const findings = await lintVault(rootDir);
|
|
2059
|
+
return asToolText(findings);
|
|
2060
|
+
});
|
|
2061
|
+
server.registerResource("swarmvault-config", "swarmvault://config", {
|
|
2062
|
+
title: "SwarmVault Config",
|
|
2063
|
+
description: "The resolved SwarmVault config file.",
|
|
2064
|
+
mimeType: "application/json"
|
|
2065
|
+
}, async () => {
|
|
2066
|
+
const { config } = await loadVaultConfig(rootDir);
|
|
2067
|
+
return asTextResource("swarmvault://config", JSON.stringify(config, null, 2));
|
|
2068
|
+
});
|
|
2069
|
+
server.registerResource("swarmvault-graph", "swarmvault://graph", {
|
|
2070
|
+
title: "SwarmVault Graph",
|
|
2071
|
+
description: "The compiled graph artifact for the current workspace.",
|
|
2072
|
+
mimeType: "application/json"
|
|
2073
|
+
}, async () => {
|
|
2074
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
2075
|
+
const graph = await readJsonFile(paths.graphPath);
|
|
2076
|
+
return asTextResource(
|
|
2077
|
+
"swarmvault://graph",
|
|
2078
|
+
JSON.stringify(graph ?? { error: "Graph artifact not found. Run `swarmvault compile` first." }, null, 2)
|
|
2079
|
+
);
|
|
2080
|
+
});
|
|
2081
|
+
server.registerResource("swarmvault-manifests", "swarmvault://manifests", {
|
|
2082
|
+
title: "SwarmVault Manifests",
|
|
2083
|
+
description: "All source manifests in the workspace.",
|
|
2084
|
+
mimeType: "application/json"
|
|
2085
|
+
}, async () => {
|
|
2086
|
+
const manifests = await listManifests(rootDir);
|
|
2087
|
+
return asTextResource("swarmvault://manifests", JSON.stringify(manifests, null, 2));
|
|
2088
|
+
});
|
|
2089
|
+
server.registerResource(
|
|
2090
|
+
"swarmvault-pages",
|
|
2091
|
+
new ResourceTemplate("swarmvault://pages/{path}", {
|
|
2092
|
+
list: async () => {
|
|
2093
|
+
const pages = await listPages(rootDir);
|
|
2094
|
+
return {
|
|
2095
|
+
resources: pages.map((page) => ({
|
|
2096
|
+
uri: `swarmvault://pages/${encodeURIComponent(page.path)}`,
|
|
2097
|
+
name: page.title,
|
|
2098
|
+
title: page.title,
|
|
2099
|
+
description: `Generated ${page.kind} page`,
|
|
2100
|
+
mimeType: "text/markdown"
|
|
2101
|
+
}))
|
|
2102
|
+
};
|
|
2103
|
+
}
|
|
2104
|
+
}),
|
|
2105
|
+
{
|
|
2106
|
+
title: "SwarmVault Pages",
|
|
2107
|
+
description: "Generated wiki pages exposed as MCP resources.",
|
|
2108
|
+
mimeType: "text/markdown"
|
|
2109
|
+
},
|
|
2110
|
+
async (_uri, variables) => {
|
|
2111
|
+
const encodedPath = typeof variables.path === "string" ? variables.path : "";
|
|
2112
|
+
const relativePath = decodeURIComponent(encodedPath);
|
|
2113
|
+
const page = await readPage(rootDir, relativePath);
|
|
2114
|
+
if (!page) {
|
|
2115
|
+
return asTextResource(`swarmvault://pages/${encodedPath}`, `Page not found: ${relativePath}`);
|
|
2116
|
+
}
|
|
2117
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
2118
|
+
const absolutePath = path11.resolve(paths.wikiDir, relativePath);
|
|
2119
|
+
return asTextResource(`swarmvault://pages/${encodedPath}`, await fs9.readFile(absolutePath, "utf8"));
|
|
2120
|
+
}
|
|
2121
|
+
);
|
|
2122
|
+
return server;
|
|
2123
|
+
}
|
|
2124
|
+
async function startMcpServer(rootDir, stdin, stdout) {
|
|
2125
|
+
const server = await createMcpServer(rootDir);
|
|
2126
|
+
const transport = new StdioServerTransport(stdin, stdout);
|
|
2127
|
+
await server.connect(transport);
|
|
2128
|
+
return {
|
|
2129
|
+
close: async () => {
|
|
2130
|
+
await server.close();
|
|
2131
|
+
}
|
|
2132
|
+
};
|
|
2133
|
+
}
|
|
2134
|
+
function asToolText(value) {
|
|
2135
|
+
return {
|
|
2136
|
+
content: [
|
|
2137
|
+
{
|
|
2138
|
+
type: "text",
|
|
2139
|
+
text: JSON.stringify(value, null, 2)
|
|
2140
|
+
}
|
|
2141
|
+
]
|
|
2142
|
+
};
|
|
2143
|
+
}
|
|
2144
|
+
function asToolError(message) {
|
|
2145
|
+
return {
|
|
2146
|
+
isError: true,
|
|
2147
|
+
content: [
|
|
2148
|
+
{
|
|
2149
|
+
type: "text",
|
|
2150
|
+
text: message
|
|
2151
|
+
}
|
|
2152
|
+
]
|
|
2153
|
+
};
|
|
2154
|
+
}
|
|
2155
|
+
function asTextResource(uri, text) {
|
|
2156
|
+
return {
|
|
2157
|
+
contents: [
|
|
2158
|
+
{
|
|
2159
|
+
uri,
|
|
2160
|
+
text
|
|
2161
|
+
}
|
|
2162
|
+
]
|
|
2163
|
+
};
|
|
2164
|
+
}
|
|
2165
|
+
|
|
2166
|
+
// src/watch.ts
|
|
2167
|
+
import path12 from "path";
|
|
2168
|
+
import chokidar from "chokidar";
|
|
2169
|
+
async function watchVault(rootDir, options = {}) {
|
|
2170
|
+
const { paths } = await initWorkspace(rootDir);
|
|
2171
|
+
const debounceMs = options.debounceMs ?? 900;
|
|
2172
|
+
let timer;
|
|
2173
|
+
let running = false;
|
|
2174
|
+
let pending = false;
|
|
2175
|
+
let closed = false;
|
|
2176
|
+
const reasons = /* @__PURE__ */ new Set();
|
|
2177
|
+
const watcher = chokidar.watch(paths.inboxDir, {
|
|
2178
|
+
ignoreInitial: true,
|
|
2179
|
+
awaitWriteFinish: {
|
|
2180
|
+
stabilityThreshold: Math.max(250, Math.floor(debounceMs / 2)),
|
|
2181
|
+
pollInterval: 100
|
|
2182
|
+
}
|
|
2183
|
+
});
|
|
2184
|
+
const schedule = (reason) => {
|
|
2185
|
+
if (closed) {
|
|
2186
|
+
return;
|
|
2187
|
+
}
|
|
2188
|
+
reasons.add(reason);
|
|
2189
|
+
pending = true;
|
|
2190
|
+
if (timer) {
|
|
2191
|
+
clearTimeout(timer);
|
|
2192
|
+
}
|
|
2193
|
+
timer = setTimeout(() => {
|
|
2194
|
+
void runCycle();
|
|
2195
|
+
}, debounceMs);
|
|
2196
|
+
};
|
|
2197
|
+
const runCycle = async () => {
|
|
2198
|
+
if (running || closed || !pending) {
|
|
2199
|
+
return;
|
|
2200
|
+
}
|
|
2201
|
+
pending = false;
|
|
2202
|
+
running = true;
|
|
2203
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
2204
|
+
const runReasons = [...reasons];
|
|
2205
|
+
reasons.clear();
|
|
2206
|
+
let importedCount = 0;
|
|
2207
|
+
let scannedCount = 0;
|
|
2208
|
+
let attachmentCount = 0;
|
|
2209
|
+
let changedPages = [];
|
|
2210
|
+
let lintFindingCount;
|
|
2211
|
+
let success = true;
|
|
2212
|
+
let error;
|
|
2213
|
+
try {
|
|
2214
|
+
const imported = await importInbox(rootDir, paths.inboxDir);
|
|
2215
|
+
importedCount = imported.imported.length;
|
|
2216
|
+
scannedCount = imported.scannedCount;
|
|
2217
|
+
attachmentCount = imported.attachmentCount;
|
|
2218
|
+
const compile = await compileVault(rootDir);
|
|
2219
|
+
changedPages = compile.changedPages;
|
|
2220
|
+
if (options.lint) {
|
|
2221
|
+
const findings = await lintVault(rootDir);
|
|
2222
|
+
lintFindingCount = findings.length;
|
|
2223
|
+
}
|
|
2224
|
+
} catch (caught) {
|
|
2225
|
+
success = false;
|
|
2226
|
+
error = caught instanceof Error ? caught.message : String(caught);
|
|
2227
|
+
} finally {
|
|
2228
|
+
const finishedAt = /* @__PURE__ */ new Date();
|
|
2229
|
+
await appendWatchRun(rootDir, {
|
|
2230
|
+
startedAt: startedAt.toISOString(),
|
|
2231
|
+
finishedAt: finishedAt.toISOString(),
|
|
2232
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
2233
|
+
inputDir: paths.inboxDir,
|
|
2234
|
+
reasons: runReasons,
|
|
2235
|
+
importedCount,
|
|
2236
|
+
scannedCount,
|
|
2237
|
+
attachmentCount,
|
|
2238
|
+
changedPages,
|
|
2239
|
+
lintFindingCount,
|
|
2240
|
+
success,
|
|
2241
|
+
error
|
|
2242
|
+
});
|
|
2243
|
+
running = false;
|
|
2244
|
+
if (pending && !closed) {
|
|
2245
|
+
schedule("queued");
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
};
|
|
2249
|
+
watcher.on("add", (filePath) => schedule(`add:${toWatchReason(paths.inboxDir, filePath)}`)).on("change", (filePath) => schedule(`change:${toWatchReason(paths.inboxDir, filePath)}`)).on("unlink", (filePath) => schedule(`unlink:${toWatchReason(paths.inboxDir, filePath)}`)).on("addDir", (dirPath) => schedule(`addDir:${toWatchReason(paths.inboxDir, dirPath)}`)).on("unlinkDir", (dirPath) => schedule(`unlinkDir:${toWatchReason(paths.inboxDir, dirPath)}`)).on("error", (caught) => schedule(`error:${caught instanceof Error ? caught.message : String(caught)}`));
|
|
2250
|
+
return {
|
|
2251
|
+
close: async () => {
|
|
2252
|
+
closed = true;
|
|
2253
|
+
if (timer) {
|
|
2254
|
+
clearTimeout(timer);
|
|
2255
|
+
}
|
|
2256
|
+
await watcher.close();
|
|
2257
|
+
}
|
|
2258
|
+
};
|
|
2259
|
+
}
|
|
2260
|
+
function toWatchReason(baseDir, targetPath) {
|
|
2261
|
+
return path12.relative(baseDir, targetPath) || ".";
|
|
2262
|
+
}
|
|
1670
2263
|
export {
|
|
1671
2264
|
assertProviderCapability,
|
|
1672
2265
|
bootstrapDemo,
|
|
1673
2266
|
compileVault,
|
|
2267
|
+
createMcpServer,
|
|
1674
2268
|
createProvider,
|
|
1675
2269
|
defaultVaultConfig,
|
|
1676
2270
|
getProviderForTask,
|
|
2271
|
+
getWorkspaceInfo,
|
|
2272
|
+
importInbox,
|
|
1677
2273
|
ingestInput,
|
|
1678
2274
|
initVault,
|
|
1679
2275
|
initWorkspace,
|
|
@@ -1681,9 +2277,14 @@ export {
|
|
|
1681
2277
|
installConfiguredAgents,
|
|
1682
2278
|
lintVault,
|
|
1683
2279
|
listManifests,
|
|
2280
|
+
listPages,
|
|
1684
2281
|
loadVaultConfig,
|
|
1685
2282
|
queryVault,
|
|
1686
2283
|
readExtractedText,
|
|
2284
|
+
readPage,
|
|
1687
2285
|
resolvePaths,
|
|
1688
|
-
|
|
2286
|
+
searchVault,
|
|
2287
|
+
startGraphServer,
|
|
2288
|
+
startMcpServer,
|
|
2289
|
+
watchVault
|
|
1689
2290
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@swarmvaultai/engine",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Core engine for SwarmVault: ingest, compile, query, lint, and provider abstractions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"type": "git",
|
|
19
19
|
"url": "git+https://github.com/swarmclawai/swarmvault.git"
|
|
20
20
|
},
|
|
21
|
-
"homepage": "https://swarmvault.ai",
|
|
21
|
+
"homepage": "https://www.swarmvault.ai",
|
|
22
22
|
"bugs": {
|
|
23
23
|
"url": "https://github.com/swarmclawai/swarmvault/issues"
|
|
24
24
|
},
|
|
@@ -40,7 +40,9 @@
|
|
|
40
40
|
"node": ">=24.0.0"
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
43
44
|
"@mozilla/readability": "^0.6.0",
|
|
45
|
+
"chokidar": "^4.0.3",
|
|
44
46
|
"gray-matter": "^4.0.3",
|
|
45
47
|
"jsdom": "^27.0.0",
|
|
46
48
|
"mime-types": "^3.0.1",
|