@swarmvaultai/engine 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -25
- package/dist/index.d.ts +87 -2
- package/dist/index.js +896 -141
- package/package.json +4 -2
package/dist/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// src/config.ts
|
|
2
2
|
import path2 from "path";
|
|
3
|
+
import fs2 from "fs/promises";
|
|
3
4
|
import { fileURLToPath } from "url";
|
|
4
5
|
import { z as z2 } from "zod";
|
|
5
6
|
|
|
@@ -36,6 +37,11 @@ async function writeJsonFile(filePath, value) {
|
|
|
36
37
|
await fs.writeFile(filePath, `${JSON.stringify(value, null, 2)}
|
|
37
38
|
`, "utf8");
|
|
38
39
|
}
|
|
40
|
+
async function appendJsonLine(filePath, value) {
|
|
41
|
+
await ensureDir(path.dirname(filePath));
|
|
42
|
+
await fs.appendFile(filePath, `${JSON.stringify(value)}
|
|
43
|
+
`, "utf8");
|
|
44
|
+
}
|
|
39
45
|
async function writeFileIfChanged(filePath, content) {
|
|
40
46
|
await ensureDir(path.dirname(filePath));
|
|
41
47
|
if (await fileExists(filePath)) {
|
|
@@ -88,6 +94,21 @@ function truncate(value, maxLength) {
|
|
|
88
94
|
}
|
|
89
95
|
return `${value.slice(0, maxLength - 3)}...`;
|
|
90
96
|
}
|
|
97
|
+
async function listFilesRecursive(rootDir) {
|
|
98
|
+
const entries = await fs.readdir(rootDir, { withFileTypes: true }).catch(() => []);
|
|
99
|
+
const files = [];
|
|
100
|
+
for (const entry of entries) {
|
|
101
|
+
const absolutePath = path.join(rootDir, entry.name);
|
|
102
|
+
if (entry.isDirectory()) {
|
|
103
|
+
files.push(...await listFilesRecursive(absolutePath));
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
if (entry.isFile()) {
|
|
107
|
+
files.push(absolutePath);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return files;
|
|
111
|
+
}
|
|
91
112
|
|
|
92
113
|
// src/types.ts
|
|
93
114
|
import { z } from "zod";
|
|
@@ -114,6 +135,8 @@ var providerTypeSchema = z.enum([
|
|
|
114
135
|
// src/config.ts
|
|
115
136
|
var PRIMARY_CONFIG_FILENAME = "swarmvault.config.json";
|
|
116
137
|
var LEGACY_CONFIG_FILENAME = "vault.config.json";
|
|
138
|
+
var PRIMARY_SCHEMA_FILENAME = "swarmvault.schema.md";
|
|
139
|
+
var LEGACY_SCHEMA_FILENAME = "schema.md";
|
|
117
140
|
var moduleDir = path2.dirname(fileURLToPath(import.meta.url));
|
|
118
141
|
var providerConfigSchema = z2.object({
|
|
119
142
|
type: providerTypeSchema,
|
|
@@ -130,7 +153,8 @@ var vaultConfigSchema = z2.object({
|
|
|
130
153
|
rawDir: z2.string().min(1),
|
|
131
154
|
wikiDir: z2.string().min(1),
|
|
132
155
|
stateDir: z2.string().min(1),
|
|
133
|
-
agentDir: z2.string().min(1)
|
|
156
|
+
agentDir: z2.string().min(1),
|
|
157
|
+
inboxDir: z2.string().min(1)
|
|
134
158
|
}),
|
|
135
159
|
providers: z2.record(z2.string(), providerConfigSchema),
|
|
136
160
|
tasks: z2.object({
|
|
@@ -150,7 +174,8 @@ function defaultVaultConfig() {
|
|
|
150
174
|
rawDir: "raw",
|
|
151
175
|
wikiDir: "wiki",
|
|
152
176
|
stateDir: "state",
|
|
153
|
-
agentDir: "agent"
|
|
177
|
+
agentDir: "agent",
|
|
178
|
+
inboxDir: "inbox"
|
|
154
179
|
},
|
|
155
180
|
providers: {
|
|
156
181
|
local: {
|
|
@@ -171,6 +196,52 @@ function defaultVaultConfig() {
|
|
|
171
196
|
agents: ["codex", "claude", "cursor"]
|
|
172
197
|
};
|
|
173
198
|
}
|
|
199
|
+
function defaultVaultSchema() {
|
|
200
|
+
return [
|
|
201
|
+
"# SwarmVault Schema",
|
|
202
|
+
"",
|
|
203
|
+
"Edit this file to teach SwarmVault how this vault should be organized and maintained.",
|
|
204
|
+
"",
|
|
205
|
+
"## Vault Purpose",
|
|
206
|
+
"",
|
|
207
|
+
"- Describe the domain this vault covers.",
|
|
208
|
+
"- Note the intended audience and the kinds of questions the vault should answer well.",
|
|
209
|
+
"",
|
|
210
|
+
"## Naming Conventions",
|
|
211
|
+
"",
|
|
212
|
+
"- Prefer stable, descriptive page titles.",
|
|
213
|
+
"- Keep concept and entity names specific to the domain.",
|
|
214
|
+
"",
|
|
215
|
+
"## Page Structure Rules",
|
|
216
|
+
"",
|
|
217
|
+
"- Source pages should stay grounded in the original material.",
|
|
218
|
+
"- Concept and entity pages should aggregate source-backed claims instead of inventing new ones.",
|
|
219
|
+
"- Preserve contradictions instead of smoothing them away.",
|
|
220
|
+
"",
|
|
221
|
+
"## Categories",
|
|
222
|
+
"",
|
|
223
|
+
"- List domain-specific concept categories here.",
|
|
224
|
+
"- List important entity types here.",
|
|
225
|
+
"",
|
|
226
|
+
"## Relationship Types",
|
|
227
|
+
"",
|
|
228
|
+
"- Mentions",
|
|
229
|
+
"- Supports",
|
|
230
|
+
"- Contradicts",
|
|
231
|
+
"- Depends on",
|
|
232
|
+
"",
|
|
233
|
+
"## Grounding Rules",
|
|
234
|
+
"",
|
|
235
|
+
"- Prefer raw sources over summaries.",
|
|
236
|
+
"- Cite source ids whenever claims are stated.",
|
|
237
|
+
"- Do not treat the wiki as a source of truth when the raw material disagrees.",
|
|
238
|
+
"",
|
|
239
|
+
"## Exclusions",
|
|
240
|
+
"",
|
|
241
|
+
"- List topics, claims, or page types the compiler should avoid generating.",
|
|
242
|
+
""
|
|
243
|
+
].join("\n");
|
|
244
|
+
}
|
|
174
245
|
async function findConfigPath(rootDir) {
|
|
175
246
|
const primaryPath = path2.join(rootDir, PRIMARY_CONFIG_FILENAME);
|
|
176
247
|
if (await fileExists(primaryPath)) {
|
|
@@ -182,18 +253,36 @@ async function findConfigPath(rootDir) {
|
|
|
182
253
|
}
|
|
183
254
|
return primaryPath;
|
|
184
255
|
}
|
|
185
|
-
function
|
|
256
|
+
async function findSchemaPath(rootDir) {
|
|
257
|
+
const primaryPath = path2.join(rootDir, PRIMARY_SCHEMA_FILENAME);
|
|
258
|
+
if (await fileExists(primaryPath)) {
|
|
259
|
+
return primaryPath;
|
|
260
|
+
}
|
|
261
|
+
const legacyPath = path2.join(rootDir, LEGACY_SCHEMA_FILENAME);
|
|
262
|
+
if (await fileExists(legacyPath)) {
|
|
263
|
+
return legacyPath;
|
|
264
|
+
}
|
|
265
|
+
return primaryPath;
|
|
266
|
+
}
|
|
267
|
+
function resolvePaths(rootDir, config, configPath = path2.join(rootDir, PRIMARY_CONFIG_FILENAME), schemaPath = path2.join(rootDir, PRIMARY_SCHEMA_FILENAME)) {
|
|
186
268
|
const effective = config ?? defaultVaultConfig();
|
|
187
269
|
const rawDir = path2.resolve(rootDir, effective.workspace.rawDir);
|
|
270
|
+
const rawSourcesDir = path2.join(rawDir, "sources");
|
|
271
|
+
const rawAssetsDir = path2.join(rawDir, "assets");
|
|
188
272
|
const wikiDir = path2.resolve(rootDir, effective.workspace.wikiDir);
|
|
189
273
|
const stateDir = path2.resolve(rootDir, effective.workspace.stateDir);
|
|
190
274
|
const agentDir = path2.resolve(rootDir, effective.workspace.agentDir);
|
|
275
|
+
const inboxDir = path2.resolve(rootDir, effective.workspace.inboxDir);
|
|
191
276
|
return {
|
|
192
277
|
rootDir,
|
|
278
|
+
schemaPath,
|
|
193
279
|
rawDir,
|
|
280
|
+
rawSourcesDir,
|
|
281
|
+
rawAssetsDir,
|
|
194
282
|
wikiDir,
|
|
195
283
|
stateDir,
|
|
196
284
|
agentDir,
|
|
285
|
+
inboxDir,
|
|
197
286
|
manifestsDir: path2.join(stateDir, "manifests"),
|
|
198
287
|
extractsDir: path2.join(stateDir, "extracts"),
|
|
199
288
|
analysesDir: path2.join(stateDir, "analyses"),
|
|
@@ -201,39 +290,51 @@ function resolvePaths(rootDir, config, configPath = path2.join(rootDir, PRIMARY_
|
|
|
201
290
|
graphPath: path2.join(stateDir, "graph.json"),
|
|
202
291
|
searchDbPath: path2.join(stateDir, "search.sqlite"),
|
|
203
292
|
compileStatePath: path2.join(stateDir, "compile-state.json"),
|
|
293
|
+
jobsLogPath: path2.join(stateDir, "jobs.ndjson"),
|
|
204
294
|
configPath
|
|
205
295
|
};
|
|
206
296
|
}
|
|
207
297
|
async function loadVaultConfig(rootDir) {
|
|
208
298
|
const configPath = await findConfigPath(rootDir);
|
|
299
|
+
const schemaPath = await findSchemaPath(rootDir);
|
|
209
300
|
const raw = await readJsonFile(configPath);
|
|
210
301
|
const parsed = vaultConfigSchema.parse(raw ?? defaultVaultConfig());
|
|
211
302
|
return {
|
|
212
303
|
config: parsed,
|
|
213
|
-
paths: resolvePaths(rootDir, parsed, configPath)
|
|
304
|
+
paths: resolvePaths(rootDir, parsed, configPath, schemaPath)
|
|
214
305
|
};
|
|
215
306
|
}
|
|
216
307
|
async function initWorkspace(rootDir) {
|
|
217
308
|
const configPath = await findConfigPath(rootDir);
|
|
309
|
+
const schemaPath = await findSchemaPath(rootDir);
|
|
218
310
|
const config = await fileExists(configPath) ? (await loadVaultConfig(rootDir)).config : defaultVaultConfig();
|
|
219
|
-
const paths = resolvePaths(rootDir, config, configPath);
|
|
311
|
+
const paths = resolvePaths(rootDir, config, configPath, schemaPath);
|
|
312
|
+
const primarySchemaPath = path2.join(rootDir, PRIMARY_SCHEMA_FILENAME);
|
|
313
|
+
const legacySchemaPath = path2.join(rootDir, LEGACY_SCHEMA_FILENAME);
|
|
220
314
|
await Promise.all([
|
|
221
315
|
ensureDir(paths.rawDir),
|
|
222
316
|
ensureDir(paths.wikiDir),
|
|
223
317
|
ensureDir(paths.stateDir),
|
|
224
318
|
ensureDir(paths.agentDir),
|
|
319
|
+
ensureDir(paths.inboxDir),
|
|
225
320
|
ensureDir(paths.manifestsDir),
|
|
226
321
|
ensureDir(paths.extractsDir),
|
|
227
|
-
ensureDir(paths.analysesDir)
|
|
322
|
+
ensureDir(paths.analysesDir),
|
|
323
|
+
ensureDir(paths.rawSourcesDir),
|
|
324
|
+
ensureDir(paths.rawAssetsDir)
|
|
228
325
|
]);
|
|
229
326
|
if (!await fileExists(configPath)) {
|
|
230
327
|
await writeJsonFile(configPath, config);
|
|
231
328
|
}
|
|
329
|
+
if (!await fileExists(primarySchemaPath) && !await fileExists(legacySchemaPath)) {
|
|
330
|
+
await ensureDir(path2.dirname(primarySchemaPath));
|
|
331
|
+
await fs2.writeFile(primarySchemaPath, defaultVaultSchema(), "utf8");
|
|
332
|
+
}
|
|
232
333
|
return { config, paths };
|
|
233
334
|
}
|
|
234
335
|
|
|
235
336
|
// src/ingest.ts
|
|
236
|
-
import
|
|
337
|
+
import fs4 from "fs/promises";
|
|
237
338
|
import path4 from "path";
|
|
238
339
|
import { JSDOM } from "jsdom";
|
|
239
340
|
import TurndownService from "turndown";
|
|
@@ -241,7 +342,7 @@ import { Readability } from "@mozilla/readability";
|
|
|
241
342
|
import mime from "mime-types";
|
|
242
343
|
|
|
243
344
|
// src/logs.ts
|
|
244
|
-
import
|
|
345
|
+
import fs3 from "fs/promises";
|
|
245
346
|
import path3 from "path";
|
|
246
347
|
async function appendLogEntry(rootDir, action, title, lines = []) {
|
|
247
348
|
const { paths } = await initWorkspace(rootDir);
|
|
@@ -249,10 +350,14 @@ async function appendLogEntry(rootDir, action, title, lines = []) {
|
|
|
249
350
|
const logPath = path3.join(paths.wikiDir, "log.md");
|
|
250
351
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
|
|
251
352
|
const entry = [`## [${timestamp}] ${action} | ${title}`, ...lines.map((line) => `- ${line}`), ""].join("\n");
|
|
252
|
-
const existing = await fileExists(logPath) ? await
|
|
253
|
-
await
|
|
353
|
+
const existing = await fileExists(logPath) ? await fs3.readFile(logPath, "utf8") : "# Log\n\n";
|
|
354
|
+
await fs3.writeFile(logPath, `${existing}${entry}
|
|
254
355
|
`, "utf8");
|
|
255
356
|
}
|
|
357
|
+
async function appendWatchRun(rootDir, run) {
|
|
358
|
+
const { paths } = await initWorkspace(rootDir);
|
|
359
|
+
await appendJsonLine(paths.jobsLogPath, run);
|
|
360
|
+
}
|
|
256
361
|
|
|
257
362
|
// src/ingest.ts
|
|
258
363
|
function inferKind(mimeType, filePath) {
|
|
@@ -280,6 +385,50 @@ function titleFromText(fallback, content) {
|
|
|
280
385
|
function guessMimeType(target) {
|
|
281
386
|
return mime.lookup(target) || "application/octet-stream";
|
|
282
387
|
}
|
|
388
|
+
function buildCompositeHash(payloadBytes, attachments = []) {
|
|
389
|
+
if (!attachments.length) {
|
|
390
|
+
return sha256(payloadBytes);
|
|
391
|
+
}
|
|
392
|
+
const attachmentSignature = attachments.map((attachment) => `${attachment.relativePath}:${sha256(attachment.bytes)}`).sort().join("|");
|
|
393
|
+
return sha256(`${sha256(payloadBytes)}|${attachmentSignature}`);
|
|
394
|
+
}
|
|
395
|
+
function sanitizeAssetRelativePath(value) {
|
|
396
|
+
const normalized = path4.posix.normalize(value.replace(/\\/g, "/"));
|
|
397
|
+
const segments = normalized.split("/").filter(Boolean).map((segment) => {
|
|
398
|
+
if (segment === ".") {
|
|
399
|
+
return "";
|
|
400
|
+
}
|
|
401
|
+
if (segment === "..") {
|
|
402
|
+
return "_up";
|
|
403
|
+
}
|
|
404
|
+
return segment;
|
|
405
|
+
}).filter(Boolean);
|
|
406
|
+
return segments.join("/") || "asset";
|
|
407
|
+
}
|
|
408
|
+
function normalizeLocalReference(value) {
|
|
409
|
+
const trimmed = value.trim().replace(/^<|>$/g, "");
|
|
410
|
+
const [withoutTitle] = trimmed.split(/\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/, 1);
|
|
411
|
+
const candidate = withoutTitle.split("#")[0]?.split("?")[0]?.trim();
|
|
412
|
+
if (!candidate) {
|
|
413
|
+
return null;
|
|
414
|
+
}
|
|
415
|
+
const lowered = candidate.toLowerCase();
|
|
416
|
+
if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") || path4.isAbsolute(candidate)) {
|
|
417
|
+
return null;
|
|
418
|
+
}
|
|
419
|
+
return candidate.replace(/\\/g, "/");
|
|
420
|
+
}
|
|
421
|
+
function extractMarkdownReferences(content) {
|
|
422
|
+
const references = [];
|
|
423
|
+
const linkPattern = /!?\[[^\]]*]\(([^)]+)\)/g;
|
|
424
|
+
for (const match of content.matchAll(linkPattern)) {
|
|
425
|
+
const normalized = normalizeLocalReference(match[1] ?? "");
|
|
426
|
+
if (normalized) {
|
|
427
|
+
references.push(normalized);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
return references;
|
|
431
|
+
}
|
|
283
432
|
async function convertHtmlToMarkdown(html, url) {
|
|
284
433
|
const dom = new JSDOM(html, { url });
|
|
285
434
|
const article = new Readability(dom.window.document).parse();
|
|
@@ -292,7 +441,7 @@ async function convertHtmlToMarkdown(html, url) {
|
|
|
292
441
|
};
|
|
293
442
|
}
|
|
294
443
|
async function readManifestByHash(manifestsDir, contentHash) {
|
|
295
|
-
const entries = await
|
|
444
|
+
const entries = await fs4.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
|
|
296
445
|
for (const entry of entries) {
|
|
297
446
|
if (!entry.isFile() || !entry.name.endsWith(".json")) {
|
|
298
447
|
continue;
|
|
@@ -304,89 +453,275 @@ async function readManifestByHash(manifestsDir, contentHash) {
|
|
|
304
453
|
}
|
|
305
454
|
return null;
|
|
306
455
|
}
|
|
307
|
-
async function
|
|
308
|
-
|
|
309
|
-
await ensureDir(
|
|
456
|
+
async function persistPreparedInput(rootDir, prepared, paths) {
|
|
457
|
+
await ensureDir(paths.rawSourcesDir);
|
|
458
|
+
await ensureDir(paths.rawAssetsDir);
|
|
310
459
|
await ensureDir(paths.manifestsDir);
|
|
311
460
|
await ensureDir(paths.extractsDir);
|
|
312
|
-
const
|
|
313
|
-
const
|
|
314
|
-
let title = path4.basename(input);
|
|
315
|
-
let mimeType = "application/octet-stream";
|
|
316
|
-
let storedExtension = ".bin";
|
|
317
|
-
let payloadBytes;
|
|
318
|
-
let extractedTextPath;
|
|
319
|
-
let sourceKind = "binary";
|
|
320
|
-
if (isUrl) {
|
|
321
|
-
const response = await fetch(input);
|
|
322
|
-
if (!response.ok) {
|
|
323
|
-
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
324
|
-
}
|
|
325
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
326
|
-
payloadBytes = Buffer.from(arrayBuffer);
|
|
327
|
-
mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
|
|
328
|
-
sourceKind = inferKind(mimeType, input);
|
|
329
|
-
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
330
|
-
const html = payloadBytes.toString("utf8");
|
|
331
|
-
const converted = await convertHtmlToMarkdown(html, input);
|
|
332
|
-
title = converted.title;
|
|
333
|
-
payloadBytes = Buffer.from(converted.markdown, "utf8");
|
|
334
|
-
mimeType = "text/markdown";
|
|
335
|
-
sourceKind = "markdown";
|
|
336
|
-
storedExtension = ".md";
|
|
337
|
-
} else {
|
|
338
|
-
title = new URL(input).hostname + new URL(input).pathname;
|
|
339
|
-
const extension = path4.extname(new URL(input).pathname) || (mime.extension(mimeType) ? `.${mime.extension(mimeType)}` : ".bin");
|
|
340
|
-
storedExtension = extension;
|
|
341
|
-
}
|
|
342
|
-
} else {
|
|
343
|
-
const absoluteInput = path4.resolve(rootDir, input);
|
|
344
|
-
payloadBytes = await fs3.readFile(absoluteInput);
|
|
345
|
-
mimeType = guessMimeType(absoluteInput);
|
|
346
|
-
sourceKind = inferKind(mimeType, absoluteInput);
|
|
347
|
-
storedExtension = path4.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
348
|
-
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
349
|
-
title = titleFromText(path4.basename(absoluteInput, path4.extname(absoluteInput)), payloadBytes.toString("utf8"));
|
|
350
|
-
} else {
|
|
351
|
-
title = path4.basename(absoluteInput, path4.extname(absoluteInput));
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
const contentHash = sha256(payloadBytes);
|
|
461
|
+
const attachments = prepared.attachments ?? [];
|
|
462
|
+
const contentHash = prepared.contentHash ?? buildCompositeHash(prepared.payloadBytes, attachments);
|
|
355
463
|
const existing = await readManifestByHash(paths.manifestsDir, contentHash);
|
|
356
464
|
if (existing) {
|
|
357
|
-
return existing;
|
|
465
|
+
return { manifest: existing, isNew: false };
|
|
358
466
|
}
|
|
359
|
-
const
|
|
360
|
-
const
|
|
361
|
-
|
|
362
|
-
|
|
467
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
468
|
+
const sourceId = `${slugify(prepared.title)}-${contentHash.slice(0, 8)}`;
|
|
469
|
+
const storedPath = path4.join(paths.rawSourcesDir, `${sourceId}${prepared.storedExtension}`);
|
|
470
|
+
await fs4.writeFile(storedPath, prepared.payloadBytes);
|
|
471
|
+
let extractedTextPath;
|
|
472
|
+
if (prepared.extractedText) {
|
|
363
473
|
extractedTextPath = path4.join(paths.extractsDir, `${sourceId}.md`);
|
|
364
|
-
await
|
|
474
|
+
await fs4.writeFile(extractedTextPath, prepared.extractedText, "utf8");
|
|
475
|
+
}
|
|
476
|
+
const manifestAttachments = [];
|
|
477
|
+
for (const attachment of attachments) {
|
|
478
|
+
const absoluteAttachmentPath = path4.join(paths.rawAssetsDir, sourceId, attachment.relativePath);
|
|
479
|
+
await ensureDir(path4.dirname(absoluteAttachmentPath));
|
|
480
|
+
await fs4.writeFile(absoluteAttachmentPath, attachment.bytes);
|
|
481
|
+
manifestAttachments.push({
|
|
482
|
+
path: toPosix(path4.relative(rootDir, absoluteAttachmentPath)),
|
|
483
|
+
mimeType: attachment.mimeType,
|
|
484
|
+
originalPath: attachment.originalPath
|
|
485
|
+
});
|
|
365
486
|
}
|
|
366
487
|
const manifest = {
|
|
367
488
|
sourceId,
|
|
368
|
-
title,
|
|
369
|
-
originType:
|
|
370
|
-
sourceKind,
|
|
371
|
-
originalPath:
|
|
372
|
-
url:
|
|
489
|
+
title: prepared.title,
|
|
490
|
+
originType: prepared.originType,
|
|
491
|
+
sourceKind: prepared.sourceKind,
|
|
492
|
+
originalPath: prepared.originalPath,
|
|
493
|
+
url: prepared.url,
|
|
373
494
|
storedPath: toPosix(path4.relative(rootDir, storedPath)),
|
|
374
495
|
extractedTextPath: extractedTextPath ? toPosix(path4.relative(rootDir, extractedTextPath)) : void 0,
|
|
375
|
-
mimeType,
|
|
496
|
+
mimeType: prepared.mimeType,
|
|
376
497
|
contentHash,
|
|
377
498
|
createdAt: now,
|
|
378
|
-
updatedAt: now
|
|
499
|
+
updatedAt: now,
|
|
500
|
+
attachments: manifestAttachments.length ? manifestAttachments : void 0
|
|
379
501
|
};
|
|
380
502
|
await writeJsonFile(path4.join(paths.manifestsDir, `${sourceId}.json`), manifest);
|
|
381
|
-
await appendLogEntry(rootDir, "ingest", title, [
|
|
382
|
-
|
|
503
|
+
await appendLogEntry(rootDir, "ingest", prepared.title, [
|
|
504
|
+
`source_id=${sourceId}`,
|
|
505
|
+
`kind=${prepared.sourceKind}`,
|
|
506
|
+
`attachments=${manifestAttachments.length}`
|
|
507
|
+
]);
|
|
508
|
+
return { manifest, isNew: true };
|
|
509
|
+
}
|
|
510
|
+
async function prepareFileInput(rootDir, absoluteInput) {
|
|
511
|
+
const payloadBytes = await fs4.readFile(absoluteInput);
|
|
512
|
+
const mimeType = guessMimeType(absoluteInput);
|
|
513
|
+
const sourceKind = inferKind(mimeType, absoluteInput);
|
|
514
|
+
const storedExtension = path4.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
515
|
+
let title;
|
|
516
|
+
let extractedText;
|
|
517
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
518
|
+
extractedText = payloadBytes.toString("utf8");
|
|
519
|
+
title = titleFromText(path4.basename(absoluteInput, path4.extname(absoluteInput)), extractedText);
|
|
520
|
+
} else {
|
|
521
|
+
title = path4.basename(absoluteInput, path4.extname(absoluteInput));
|
|
522
|
+
}
|
|
523
|
+
return {
|
|
524
|
+
title,
|
|
525
|
+
originType: "file",
|
|
526
|
+
sourceKind,
|
|
527
|
+
originalPath: toPosix(absoluteInput),
|
|
528
|
+
mimeType,
|
|
529
|
+
storedExtension,
|
|
530
|
+
payloadBytes,
|
|
531
|
+
extractedText
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
async function prepareUrlInput(input) {
|
|
535
|
+
const response = await fetch(input);
|
|
536
|
+
if (!response.ok) {
|
|
537
|
+
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
538
|
+
}
|
|
539
|
+
let payloadBytes = Buffer.from(await response.arrayBuffer());
|
|
540
|
+
let mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
|
|
541
|
+
let sourceKind = inferKind(mimeType, input);
|
|
542
|
+
let storedExtension = ".bin";
|
|
543
|
+
let title = new URL(input).hostname + new URL(input).pathname;
|
|
544
|
+
let extractedText;
|
|
545
|
+
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
546
|
+
const html = payloadBytes.toString("utf8");
|
|
547
|
+
const converted = await convertHtmlToMarkdown(html, input);
|
|
548
|
+
title = converted.title;
|
|
549
|
+
extractedText = converted.markdown;
|
|
550
|
+
payloadBytes = Buffer.from(converted.markdown, "utf8");
|
|
551
|
+
mimeType = "text/markdown";
|
|
552
|
+
sourceKind = "markdown";
|
|
553
|
+
storedExtension = ".md";
|
|
554
|
+
} else {
|
|
555
|
+
const extension = path4.extname(new URL(input).pathname);
|
|
556
|
+
storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
|
|
557
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
558
|
+
extractedText = payloadBytes.toString("utf8");
|
|
559
|
+
title = titleFromText(title || new URL(input).hostname, extractedText);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
return {
|
|
563
|
+
title,
|
|
564
|
+
originType: "url",
|
|
565
|
+
sourceKind,
|
|
566
|
+
url: input,
|
|
567
|
+
mimeType,
|
|
568
|
+
storedExtension,
|
|
569
|
+
payloadBytes,
|
|
570
|
+
extractedText
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
async function collectInboxAttachmentRefs(inputDir, files) {
|
|
574
|
+
const refsBySource = /* @__PURE__ */ new Map();
|
|
575
|
+
for (const absolutePath of files) {
|
|
576
|
+
const mimeType = guessMimeType(absolutePath);
|
|
577
|
+
const sourceKind = inferKind(mimeType, absolutePath);
|
|
578
|
+
if (sourceKind !== "markdown") {
|
|
579
|
+
continue;
|
|
580
|
+
}
|
|
581
|
+
const content = await fs4.readFile(absolutePath, "utf8");
|
|
582
|
+
const refs = extractMarkdownReferences(content);
|
|
583
|
+
if (!refs.length) {
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
const sourceRefs = [];
|
|
587
|
+
for (const ref of refs) {
|
|
588
|
+
const resolved = path4.resolve(path4.dirname(absolutePath), ref);
|
|
589
|
+
if (!resolved.startsWith(inputDir) || !await fileExists(resolved)) {
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
sourceRefs.push({
|
|
593
|
+
absolutePath: resolved,
|
|
594
|
+
relativeRef: ref
|
|
595
|
+
});
|
|
596
|
+
}
|
|
597
|
+
if (sourceRefs.length) {
|
|
598
|
+
refsBySource.set(
|
|
599
|
+
absolutePath,
|
|
600
|
+
sourceRefs.filter(
|
|
601
|
+
(ref, index, items) => index === items.findIndex((candidate) => candidate.absolutePath === ref.absolutePath && candidate.relativeRef === ref.relativeRef)
|
|
602
|
+
)
|
|
603
|
+
);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
return refsBySource;
|
|
607
|
+
}
|
|
608
|
+
function rewriteMarkdownReferences(content, replacements) {
|
|
609
|
+
return content.replace(/(!?\[[^\]]*]\()([^)]+)(\))/g, (fullMatch, prefix, target, suffix) => {
|
|
610
|
+
const normalized = normalizeLocalReference(target);
|
|
611
|
+
if (!normalized) {
|
|
612
|
+
return fullMatch;
|
|
613
|
+
}
|
|
614
|
+
const replacement = replacements.get(normalized);
|
|
615
|
+
if (!replacement) {
|
|
616
|
+
return fullMatch;
|
|
617
|
+
}
|
|
618
|
+
return `${prefix}${replacement}${suffix}`;
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
|
|
622
|
+
const originalBytes = await fs4.readFile(absolutePath);
|
|
623
|
+
const originalText = originalBytes.toString("utf8");
|
|
624
|
+
const title = titleFromText(path4.basename(absolutePath, path4.extname(absolutePath)), originalText);
|
|
625
|
+
const attachments = [];
|
|
626
|
+
for (const attachmentRef of attachmentRefs) {
|
|
627
|
+
const bytes = await fs4.readFile(attachmentRef.absolutePath);
|
|
628
|
+
attachments.push({
|
|
629
|
+
relativePath: sanitizeAssetRelativePath(attachmentRef.relativeRef),
|
|
630
|
+
mimeType: guessMimeType(attachmentRef.absolutePath),
|
|
631
|
+
originalPath: toPosix(attachmentRef.absolutePath),
|
|
632
|
+
bytes
|
|
633
|
+
});
|
|
634
|
+
}
|
|
635
|
+
const contentHash = buildCompositeHash(originalBytes, attachments);
|
|
636
|
+
const sourceId = `${slugify(title)}-${contentHash.slice(0, 8)}`;
|
|
637
|
+
const replacements = new Map(
|
|
638
|
+
attachmentRefs.map((attachmentRef) => [
|
|
639
|
+
attachmentRef.relativeRef.replace(/\\/g, "/"),
|
|
640
|
+
`../assets/${sourceId}/${sanitizeAssetRelativePath(attachmentRef.relativeRef)}`
|
|
641
|
+
])
|
|
642
|
+
);
|
|
643
|
+
const rewrittenText = rewriteMarkdownReferences(originalText, replacements);
|
|
644
|
+
return {
|
|
645
|
+
title,
|
|
646
|
+
originType: "file",
|
|
647
|
+
sourceKind: "markdown",
|
|
648
|
+
originalPath: toPosix(absolutePath),
|
|
649
|
+
mimeType: "text/markdown",
|
|
650
|
+
storedExtension: path4.extname(absolutePath) || ".md",
|
|
651
|
+
payloadBytes: Buffer.from(rewrittenText, "utf8"),
|
|
652
|
+
extractedText: rewrittenText,
|
|
653
|
+
attachments,
|
|
654
|
+
contentHash
|
|
655
|
+
};
|
|
656
|
+
}
|
|
657
|
+
function isSupportedInboxKind(sourceKind) {
|
|
658
|
+
return ["markdown", "text", "html", "pdf", "image"].includes(sourceKind);
|
|
659
|
+
}
|
|
660
|
+
async function ingestInput(rootDir, input) {
|
|
661
|
+
const { paths } = await initWorkspace(rootDir);
|
|
662
|
+
const prepared = /^https?:\/\//i.test(input) ? await prepareUrlInput(input) : await prepareFileInput(rootDir, path4.resolve(rootDir, input));
|
|
663
|
+
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
664
|
+
return result.manifest;
|
|
665
|
+
}
|
|
666
|
+
async function importInbox(rootDir, inputDir) {
|
|
667
|
+
const { paths } = await initWorkspace(rootDir);
|
|
668
|
+
const effectiveInputDir = path4.resolve(rootDir, inputDir ?? paths.inboxDir);
|
|
669
|
+
if (!await fileExists(effectiveInputDir)) {
|
|
670
|
+
throw new Error(`Inbox directory not found: ${effectiveInputDir}`);
|
|
671
|
+
}
|
|
672
|
+
const files = (await listFilesRecursive(effectiveInputDir)).sort();
|
|
673
|
+
const refsBySource = await collectInboxAttachmentRefs(effectiveInputDir, files);
|
|
674
|
+
const claimedAttachments = new Set(
|
|
675
|
+
[...refsBySource.values()].flatMap((refs) => refs.map((ref) => ref.absolutePath))
|
|
676
|
+
);
|
|
677
|
+
const imported = [];
|
|
678
|
+
const skipped = [];
|
|
679
|
+
let attachmentCount = 0;
|
|
680
|
+
for (const absolutePath of files) {
|
|
681
|
+
const basename = path4.basename(absolutePath);
|
|
682
|
+
if (basename.startsWith(".")) {
|
|
683
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "hidden_file" });
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
if (claimedAttachments.has(absolutePath)) {
|
|
687
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "referenced_attachment" });
|
|
688
|
+
continue;
|
|
689
|
+
}
|
|
690
|
+
const mimeType = guessMimeType(absolutePath);
|
|
691
|
+
const sourceKind = inferKind(mimeType, absolutePath);
|
|
692
|
+
if (!isSupportedInboxKind(sourceKind)) {
|
|
693
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
|
|
694
|
+
continue;
|
|
695
|
+
}
|
|
696
|
+
const prepared = sourceKind === "markdown" && refsBySource.has(absolutePath) ? await prepareInboxMarkdownInput(absolutePath, refsBySource.get(absolutePath) ?? []) : await prepareFileInput(rootDir, absolutePath);
|
|
697
|
+
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
698
|
+
if (!result.isNew) {
|
|
699
|
+
skipped.push({ path: toPosix(path4.relative(rootDir, absolutePath)), reason: "duplicate_content" });
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
attachmentCount += result.manifest.attachments?.length ?? 0;
|
|
703
|
+
imported.push(result.manifest);
|
|
704
|
+
}
|
|
705
|
+
await appendLogEntry(rootDir, "inbox_import", toPosix(path4.relative(rootDir, effectiveInputDir)) || ".", [
|
|
706
|
+
`scanned=${files.length}`,
|
|
707
|
+
`imported=${imported.length}`,
|
|
708
|
+
`attachments=${attachmentCount}`,
|
|
709
|
+
`skipped=${skipped.length}`
|
|
710
|
+
]);
|
|
711
|
+
return {
|
|
712
|
+
inputDir: effectiveInputDir,
|
|
713
|
+
scannedCount: files.length,
|
|
714
|
+
attachmentCount,
|
|
715
|
+
imported,
|
|
716
|
+
skipped
|
|
717
|
+
};
|
|
383
718
|
}
|
|
384
719
|
async function listManifests(rootDir) {
|
|
385
720
|
const { paths } = await loadVaultConfig(rootDir);
|
|
386
721
|
if (!await fileExists(paths.manifestsDir)) {
|
|
387
722
|
return [];
|
|
388
723
|
}
|
|
389
|
-
const entries = await
|
|
724
|
+
const entries = await fs4.readdir(paths.manifestsDir);
|
|
390
725
|
const manifests = await Promise.all(
|
|
391
726
|
entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path4.join(paths.manifestsDir, entry)))
|
|
392
727
|
);
|
|
@@ -400,12 +735,12 @@ async function readExtractedText(rootDir, manifest) {
|
|
|
400
735
|
if (!await fileExists(absolutePath)) {
|
|
401
736
|
return void 0;
|
|
402
737
|
}
|
|
403
|
-
return
|
|
738
|
+
return fs4.readFile(absolutePath, "utf8");
|
|
404
739
|
}
|
|
405
740
|
|
|
406
741
|
// src/vault.ts
|
|
407
|
-
import
|
|
408
|
-
import
|
|
742
|
+
import fs9 from "fs/promises";
|
|
743
|
+
import path10 from "path";
|
|
409
744
|
import matter3 from "gray-matter";
|
|
410
745
|
|
|
411
746
|
// src/analysis.ts
|
|
@@ -492,7 +827,7 @@ function deriveTitle(manifest, text) {
|
|
|
492
827
|
const heading = text.match(/^#\s+(.+)$/m)?.[1]?.trim();
|
|
493
828
|
return heading || manifest.title;
|
|
494
829
|
}
|
|
495
|
-
function heuristicAnalysis(manifest, text) {
|
|
830
|
+
function heuristicAnalysis(manifest, text, schemaHash) {
|
|
496
831
|
const normalized = normalizeWhitespace(text);
|
|
497
832
|
const concepts = extractTopTerms(normalized, 6).map((term) => ({
|
|
498
833
|
id: `concept:${slugify(term)}`,
|
|
@@ -508,6 +843,7 @@ function heuristicAnalysis(manifest, text) {
|
|
|
508
843
|
return {
|
|
509
844
|
sourceId: manifest.sourceId,
|
|
510
845
|
sourceHash: manifest.contentHash,
|
|
846
|
+
schemaHash,
|
|
511
847
|
title: deriveTitle(manifest, text),
|
|
512
848
|
summary: firstSentences(normalized, 3) || truncate(normalized, 280) || `Imported ${manifest.sourceKind} source.`,
|
|
513
849
|
concepts,
|
|
@@ -524,10 +860,19 @@ function heuristicAnalysis(manifest, text) {
|
|
|
524
860
|
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
525
861
|
};
|
|
526
862
|
}
|
|
527
|
-
async function providerAnalysis(manifest, text, provider) {
|
|
863
|
+
async function providerAnalysis(manifest, text, provider, schema) {
|
|
528
864
|
const parsed = await provider.generateStructured(
|
|
529
865
|
{
|
|
530
|
-
system:
|
|
866
|
+
system: [
|
|
867
|
+
"You are compiling a durable markdown wiki and graph. Prefer grounded synthesis over creativity.",
|
|
868
|
+
"",
|
|
869
|
+
"Follow the vault schema when choosing titles, categories, relationships, and summaries.",
|
|
870
|
+
"",
|
|
871
|
+
`Vault schema path: ${schema.path}`,
|
|
872
|
+
"",
|
|
873
|
+
"Vault schema instructions:",
|
|
874
|
+
truncate(schema.content, 6e3)
|
|
875
|
+
].join("\n"),
|
|
531
876
|
prompt: `Analyze the following source and return structured JSON.
|
|
532
877
|
|
|
533
878
|
Source title: ${manifest.title}
|
|
@@ -542,6 +887,7 @@ ${truncate(text, 18e3)}`
|
|
|
542
887
|
return {
|
|
543
888
|
sourceId: manifest.sourceId,
|
|
544
889
|
sourceHash: manifest.contentHash,
|
|
890
|
+
schemaHash: schema.hash,
|
|
545
891
|
title: parsed.title,
|
|
546
892
|
summary: parsed.summary,
|
|
547
893
|
concepts: parsed.concepts.map((term) => ({
|
|
@@ -566,10 +912,10 @@ ${truncate(text, 18e3)}`
|
|
|
566
912
|
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
567
913
|
};
|
|
568
914
|
}
|
|
569
|
-
async function analyzeSource(manifest, extractedText, provider, paths) {
|
|
915
|
+
async function analyzeSource(manifest, extractedText, provider, paths, schema) {
|
|
570
916
|
const cachePath = path5.join(paths.analysesDir, `${manifest.sourceId}.json`);
|
|
571
917
|
const cached = await readJsonFile(cachePath);
|
|
572
|
-
if (cached && cached.sourceHash === manifest.contentHash) {
|
|
918
|
+
if (cached && cached.sourceHash === manifest.contentHash && cached.schemaHash === schema.hash) {
|
|
573
919
|
return cached;
|
|
574
920
|
}
|
|
575
921
|
const content = normalizeWhitespace(extractedText ?? "");
|
|
@@ -578,6 +924,7 @@ async function analyzeSource(manifest, extractedText, provider, paths) {
|
|
|
578
924
|
analysis = {
|
|
579
925
|
sourceId: manifest.sourceId,
|
|
580
926
|
sourceHash: manifest.contentHash,
|
|
927
|
+
schemaHash: schema.hash,
|
|
581
928
|
title: manifest.title,
|
|
582
929
|
summary: `Imported ${manifest.sourceKind} source. Text extraction is not yet available for this source.`,
|
|
583
930
|
concepts: [],
|
|
@@ -587,12 +934,12 @@ async function analyzeSource(manifest, extractedText, provider, paths) {
|
|
|
587
934
|
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
588
935
|
};
|
|
589
936
|
} else if (provider.type === "heuristic") {
|
|
590
|
-
analysis = heuristicAnalysis(manifest, content);
|
|
937
|
+
analysis = heuristicAnalysis(manifest, content, schema.hash);
|
|
591
938
|
} else {
|
|
592
939
|
try {
|
|
593
|
-
analysis = await providerAnalysis(manifest, content, provider);
|
|
940
|
+
analysis = await providerAnalysis(manifest, content, provider, schema);
|
|
594
941
|
} catch {
|
|
595
|
-
analysis = heuristicAnalysis(manifest, content);
|
|
942
|
+
analysis = heuristicAnalysis(manifest, content, schema.hash);
|
|
596
943
|
}
|
|
597
944
|
}
|
|
598
945
|
await writeJsonFile(cachePath, analysis);
|
|
@@ -603,7 +950,7 @@ function analysisSignature(analysis) {
|
|
|
603
950
|
}
|
|
604
951
|
|
|
605
952
|
// src/agents.ts
|
|
606
|
-
import
|
|
953
|
+
import fs5 from "fs/promises";
|
|
607
954
|
import path6 from "path";
|
|
608
955
|
var managedStart = "<!-- swarmvault:managed:start -->";
|
|
609
956
|
var managedEnd = "<!-- swarmvault:managed:end -->";
|
|
@@ -614,6 +961,7 @@ function buildManagedBlock(agent) {
|
|
|
614
961
|
managedStart,
|
|
615
962
|
`# SwarmVault Rules (${agent})`,
|
|
616
963
|
"",
|
|
964
|
+
"- Read `swarmvault.schema.md` before compile or query style work. If only `schema.md` exists, treat it as the legacy schema path.",
|
|
617
965
|
"- Treat `raw/` as immutable source input.",
|
|
618
966
|
"- Treat `wiki/` as generated markdown owned by the agent and compiler workflow.",
|
|
619
967
|
"- Read `wiki/index.md` before broad file searching when answering SwarmVault questions.",
|
|
@@ -629,10 +977,10 @@ function buildManagedBlock(agent) {
|
|
|
629
977
|
return body;
|
|
630
978
|
}
|
|
631
979
|
async function upsertManagedBlock(filePath, block) {
|
|
632
|
-
const existing = await fileExists(filePath) ? await
|
|
980
|
+
const existing = await fileExists(filePath) ? await fs5.readFile(filePath, "utf8") : "";
|
|
633
981
|
if (!existing) {
|
|
634
982
|
await ensureDir(path6.dirname(filePath));
|
|
635
|
-
await
|
|
983
|
+
await fs5.writeFile(filePath, `${block}
|
|
636
984
|
`, "utf8");
|
|
637
985
|
return;
|
|
638
986
|
}
|
|
@@ -640,10 +988,10 @@ async function upsertManagedBlock(filePath, block) {
|
|
|
640
988
|
const endIndex = existing.includes(managedEnd) ? existing.indexOf(managedEnd) : existing.indexOf(legacyManagedEnd);
|
|
641
989
|
if (startIndex !== -1 && endIndex !== -1) {
|
|
642
990
|
const next = `${existing.slice(0, startIndex)}${block}${existing.slice(endIndex + managedEnd.length)}`;
|
|
643
|
-
await
|
|
991
|
+
await fs5.writeFile(filePath, next, "utf8");
|
|
644
992
|
return;
|
|
645
993
|
}
|
|
646
|
-
await
|
|
994
|
+
await fs5.writeFile(filePath, `${existing.trimEnd()}
|
|
647
995
|
|
|
648
996
|
${block}
|
|
649
997
|
`, "utf8");
|
|
@@ -666,7 +1014,7 @@ async function installAgent(rootDir, agent) {
|
|
|
666
1014
|
const rulesDir = path6.join(rootDir, ".cursor", "rules");
|
|
667
1015
|
await ensureDir(rulesDir);
|
|
668
1016
|
const target = path6.join(rulesDir, "swarmvault.mdc");
|
|
669
|
-
await
|
|
1017
|
+
await fs5.writeFile(target, `${block}
|
|
670
1018
|
`, "utf8");
|
|
671
1019
|
return target;
|
|
672
1020
|
}
|
|
@@ -695,7 +1043,7 @@ function pagePathFor(kind, slug) {
|
|
|
695
1043
|
return `${slug}.md`;
|
|
696
1044
|
}
|
|
697
1045
|
}
|
|
698
|
-
function buildSourcePage(manifest, analysis) {
|
|
1046
|
+
function buildSourcePage(manifest, analysis, schemaHash) {
|
|
699
1047
|
const relativePath = pagePathFor("source", manifest.sourceId);
|
|
700
1048
|
const pageId = `source:${manifest.sourceId}`;
|
|
701
1049
|
const nodeIds = [
|
|
@@ -718,6 +1066,7 @@ function buildSourcePage(manifest, analysis) {
|
|
|
718
1066
|
confidence: 0.8,
|
|
719
1067
|
updated_at: analysis.producedAt,
|
|
720
1068
|
backlinks,
|
|
1069
|
+
schema_hash: schemaHash,
|
|
721
1070
|
source_hashes: {
|
|
722
1071
|
[manifest.sourceId]: manifest.contentHash
|
|
723
1072
|
}
|
|
@@ -760,12 +1109,13 @@ function buildSourcePage(manifest, analysis) {
|
|
|
760
1109
|
freshness: "fresh",
|
|
761
1110
|
confidence: 0.8,
|
|
762
1111
|
backlinks,
|
|
1112
|
+
schemaHash,
|
|
763
1113
|
sourceHashes: { [manifest.sourceId]: manifest.contentHash }
|
|
764
1114
|
},
|
|
765
1115
|
content: matter.stringify(body, frontmatter)
|
|
766
1116
|
};
|
|
767
1117
|
}
|
|
768
|
-
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes) {
|
|
1118
|
+
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash) {
|
|
769
1119
|
const slug = slugify(name);
|
|
770
1120
|
const relativePath = pagePathFor(kind, slug);
|
|
771
1121
|
const pageId = `${kind}:${slug}`;
|
|
@@ -783,6 +1133,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
|
|
|
783
1133
|
confidence: 0.72,
|
|
784
1134
|
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
785
1135
|
backlinks: otherPages,
|
|
1136
|
+
schema_hash: schemaHash,
|
|
786
1137
|
source_hashes: sourceHashes
|
|
787
1138
|
};
|
|
788
1139
|
const body = [
|
|
@@ -814,12 +1165,13 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
|
|
|
814
1165
|
freshness: "fresh",
|
|
815
1166
|
confidence: 0.72,
|
|
816
1167
|
backlinks: otherPages,
|
|
1168
|
+
schemaHash,
|
|
817
1169
|
sourceHashes
|
|
818
1170
|
},
|
|
819
1171
|
content: matter.stringify(body, frontmatter)
|
|
820
1172
|
};
|
|
821
1173
|
}
|
|
822
|
-
function buildIndexPage(pages) {
|
|
1174
|
+
function buildIndexPage(pages, schemaHash) {
|
|
823
1175
|
const sources = pages.filter((page) => page.kind === "source");
|
|
824
1176
|
const concepts = pages.filter((page) => page.kind === "concept");
|
|
825
1177
|
const entities = pages.filter((page) => page.kind === "entity");
|
|
@@ -836,6 +1188,7 @@ function buildIndexPage(pages) {
|
|
|
836
1188
|
"confidence: 1",
|
|
837
1189
|
`updated_at: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
838
1190
|
"backlinks: []",
|
|
1191
|
+
`schema_hash: ${schemaHash}`,
|
|
839
1192
|
"source_hashes: {}",
|
|
840
1193
|
"---",
|
|
841
1194
|
"",
|
|
@@ -855,16 +1208,32 @@ function buildIndexPage(pages) {
|
|
|
855
1208
|
""
|
|
856
1209
|
].join("\n");
|
|
857
1210
|
}
|
|
858
|
-
function buildSectionIndex(kind, pages) {
|
|
1211
|
+
function buildSectionIndex(kind, pages, schemaHash) {
|
|
859
1212
|
const title = kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
860
|
-
return
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1213
|
+
return matter.stringify(
|
|
1214
|
+
[
|
|
1215
|
+
`# ${title}`,
|
|
1216
|
+
"",
|
|
1217
|
+
...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`),
|
|
1218
|
+
""
|
|
1219
|
+
].join("\n"),
|
|
1220
|
+
{
|
|
1221
|
+
page_id: `${kind}:index`,
|
|
1222
|
+
kind: "index",
|
|
1223
|
+
title,
|
|
1224
|
+
tags: ["index", kind],
|
|
1225
|
+
source_ids: [],
|
|
1226
|
+
node_ids: [],
|
|
1227
|
+
freshness: "fresh",
|
|
1228
|
+
confidence: 1,
|
|
1229
|
+
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1230
|
+
backlinks: [],
|
|
1231
|
+
schema_hash: schemaHash,
|
|
1232
|
+
source_hashes: {}
|
|
1233
|
+
}
|
|
1234
|
+
);
|
|
866
1235
|
}
|
|
867
|
-
function buildOutputPage(question, answer, citations) {
|
|
1236
|
+
function buildOutputPage(question, answer, citations, schemaHash) {
|
|
868
1237
|
const slug = slugify(question);
|
|
869
1238
|
const pageId = `output:${slug}`;
|
|
870
1239
|
const pathValue = pagePathFor("output", slug);
|
|
@@ -879,6 +1248,7 @@ function buildOutputPage(question, answer, citations) {
|
|
|
879
1248
|
confidence: 0.74,
|
|
880
1249
|
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
881
1250
|
backlinks: citations.map((sourceId) => `source:${sourceId}`),
|
|
1251
|
+
schema_hash: schemaHash,
|
|
882
1252
|
source_hashes: {}
|
|
883
1253
|
};
|
|
884
1254
|
return {
|
|
@@ -892,6 +1262,7 @@ function buildOutputPage(question, answer, citations) {
|
|
|
892
1262
|
freshness: "fresh",
|
|
893
1263
|
confidence: 0.74,
|
|
894
1264
|
backlinks: citations.map((sourceId) => `source:${sourceId}`),
|
|
1265
|
+
schemaHash,
|
|
895
1266
|
sourceHashes: {}
|
|
896
1267
|
},
|
|
897
1268
|
content: matter.stringify(
|
|
@@ -916,7 +1287,7 @@ import { pathToFileURL } from "url";
|
|
|
916
1287
|
import { z as z5 } from "zod";
|
|
917
1288
|
|
|
918
1289
|
// src/providers/base.ts
|
|
919
|
-
import
|
|
1290
|
+
import fs6 from "fs/promises";
|
|
920
1291
|
import { z as z4 } from "zod";
|
|
921
1292
|
var BaseProviderAdapter = class {
|
|
922
1293
|
constructor(id, type, model, capabilities) {
|
|
@@ -945,7 +1316,7 @@ ${schemaDescription}`
|
|
|
945
1316
|
return Promise.all(
|
|
946
1317
|
attachments.map(async (attachment) => ({
|
|
947
1318
|
mimeType: attachment.mimeType,
|
|
948
|
-
base64: await
|
|
1319
|
+
base64: await fs6.readFile(attachment.filePath, "base64")
|
|
949
1320
|
}))
|
|
950
1321
|
);
|
|
951
1322
|
}
|
|
@@ -1259,7 +1630,7 @@ function assertProviderCapability(provider, capability) {
|
|
|
1259
1630
|
}
|
|
1260
1631
|
|
|
1261
1632
|
// src/search.ts
|
|
1262
|
-
import
|
|
1633
|
+
import fs7 from "fs/promises";
|
|
1263
1634
|
import path8 from "path";
|
|
1264
1635
|
import matter2 from "gray-matter";
|
|
1265
1636
|
function getDatabaseSync() {
|
|
@@ -1297,7 +1668,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
1297
1668
|
const insertPage = db.prepare("INSERT INTO pages (id, path, title, body) VALUES (?, ?, ?, ?)");
|
|
1298
1669
|
for (const page of pages) {
|
|
1299
1670
|
const absolutePath = path8.join(wikiDir, page.path);
|
|
1300
|
-
const content = await
|
|
1671
|
+
const content = await fs7.readFile(absolutePath, "utf8");
|
|
1301
1672
|
const parsed = matter2(content);
|
|
1302
1673
|
insertPage.run(page.id, page.path, page.title, parsed.content);
|
|
1303
1674
|
}
|
|
@@ -1335,6 +1706,32 @@ function searchPages(dbPath, query, limit = 5) {
|
|
|
1335
1706
|
}));
|
|
1336
1707
|
}
|
|
1337
1708
|
|
|
1709
|
+
// src/schema.ts
|
|
1710
|
+
import fs8 from "fs/promises";
|
|
1711
|
+
import path9 from "path";
|
|
1712
|
+
async function loadVaultSchema(rootDir) {
|
|
1713
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1714
|
+
const schemaPath = paths.schemaPath;
|
|
1715
|
+
const content = await fileExists(schemaPath) ? await fs8.readFile(schemaPath, "utf8") : defaultVaultSchema();
|
|
1716
|
+
const normalized = content.trim() ? content.trim() : defaultVaultSchema().trim();
|
|
1717
|
+
return {
|
|
1718
|
+
path: schemaPath,
|
|
1719
|
+
content: normalized,
|
|
1720
|
+
hash: sha256(normalized),
|
|
1721
|
+
isLegacyPath: path9.basename(schemaPath) === LEGACY_SCHEMA_FILENAME && path9.basename(schemaPath) !== PRIMARY_SCHEMA_FILENAME
|
|
1722
|
+
};
|
|
1723
|
+
}
|
|
1724
|
+
function buildSchemaPrompt(schema, instruction) {
|
|
1725
|
+
return [
|
|
1726
|
+
instruction,
|
|
1727
|
+
"",
|
|
1728
|
+
`Vault schema path: ${schema.path}`,
|
|
1729
|
+
"",
|
|
1730
|
+
"Vault schema instructions:",
|
|
1731
|
+
schema.content
|
|
1732
|
+
].join("\n");
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1338
1735
|
// src/vault.ts
|
|
1339
1736
|
function buildGraph(manifests, analyses, pages) {
|
|
1340
1737
|
const sourceNodes = manifests.map((manifest) => ({
|
|
@@ -1416,8 +1813,8 @@ function buildGraph(manifests, analyses, pages) {
|
|
|
1416
1813
|
pages
|
|
1417
1814
|
};
|
|
1418
1815
|
}
|
|
1419
|
-
async function writePage(
|
|
1420
|
-
const absolutePath =
|
|
1816
|
+
async function writePage(wikiDir, relativePath, content, changedPages) {
|
|
1817
|
+
const absolutePath = path10.resolve(wikiDir, relativePath);
|
|
1421
1818
|
const changed = await writeFileIfChanged(absolutePath, content);
|
|
1422
1819
|
if (changed) {
|
|
1423
1820
|
changedPages.push(relativePath);
|
|
@@ -1448,50 +1845,52 @@ async function initVault(rootDir) {
|
|
|
1448
1845
|
}
|
|
1449
1846
|
async function compileVault(rootDir) {
|
|
1450
1847
|
const { paths } = await initWorkspace(rootDir);
|
|
1848
|
+
const schema = await loadVaultSchema(rootDir);
|
|
1451
1849
|
const provider = await getProviderForTask(rootDir, "compileProvider");
|
|
1452
1850
|
const manifests = await listManifests(rootDir);
|
|
1453
1851
|
const analyses = await Promise.all(
|
|
1454
|
-
manifests.map(async (manifest) => analyzeSource(manifest, await readExtractedText(rootDir, manifest), provider, paths))
|
|
1852
|
+
manifests.map(async (manifest) => analyzeSource(manifest, await readExtractedText(rootDir, manifest), provider, paths, schema))
|
|
1455
1853
|
);
|
|
1456
1854
|
const changedPages = [];
|
|
1457
1855
|
const pages = [];
|
|
1458
1856
|
await Promise.all([
|
|
1459
|
-
ensureDir(
|
|
1460
|
-
ensureDir(
|
|
1461
|
-
ensureDir(
|
|
1462
|
-
ensureDir(
|
|
1857
|
+
ensureDir(path10.join(paths.wikiDir, "sources")),
|
|
1858
|
+
ensureDir(path10.join(paths.wikiDir, "concepts")),
|
|
1859
|
+
ensureDir(path10.join(paths.wikiDir, "entities")),
|
|
1860
|
+
ensureDir(path10.join(paths.wikiDir, "outputs"))
|
|
1463
1861
|
]);
|
|
1464
1862
|
for (const manifest of manifests) {
|
|
1465
1863
|
const analysis = analyses.find((item) => item.sourceId === manifest.sourceId);
|
|
1466
1864
|
if (!analysis) {
|
|
1467
1865
|
continue;
|
|
1468
1866
|
}
|
|
1469
|
-
const sourcePage = buildSourcePage(manifest, analysis);
|
|
1867
|
+
const sourcePage = buildSourcePage(manifest, analysis, schema.hash);
|
|
1470
1868
|
pages.push(sourcePage.page);
|
|
1471
|
-
await writePage(
|
|
1869
|
+
await writePage(paths.wikiDir, sourcePage.page.path, sourcePage.content, changedPages);
|
|
1472
1870
|
}
|
|
1473
1871
|
for (const aggregate of aggregateItems(analyses, "concepts")) {
|
|
1474
|
-
const page = buildAggregatePage("concept", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes);
|
|
1872
|
+
const page = buildAggregatePage("concept", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes, schema.hash);
|
|
1475
1873
|
pages.push(page.page);
|
|
1476
|
-
await writePage(
|
|
1874
|
+
await writePage(paths.wikiDir, page.page.path, page.content, changedPages);
|
|
1477
1875
|
}
|
|
1478
1876
|
for (const aggregate of aggregateItems(analyses, "entities")) {
|
|
1479
|
-
const page = buildAggregatePage("entity", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes);
|
|
1877
|
+
const page = buildAggregatePage("entity", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes, schema.hash);
|
|
1480
1878
|
pages.push(page.page);
|
|
1481
|
-
await writePage(
|
|
1879
|
+
await writePage(paths.wikiDir, page.page.path, page.content, changedPages);
|
|
1482
1880
|
}
|
|
1483
1881
|
const graph = buildGraph(manifests, analyses, pages);
|
|
1484
1882
|
await writeJsonFile(paths.graphPath, graph);
|
|
1485
1883
|
await writeJsonFile(paths.compileStatePath, {
|
|
1486
1884
|
generatedAt: graph.generatedAt,
|
|
1885
|
+
schemaHash: schema.hash,
|
|
1487
1886
|
analyses: Object.fromEntries(analyses.map((analysis) => [analysis.sourceId, analysisSignature(analysis)]))
|
|
1488
1887
|
});
|
|
1489
|
-
await writePage(
|
|
1490
|
-
await writePage(
|
|
1491
|
-
await writePage(
|
|
1492
|
-
await writePage(
|
|
1888
|
+
await writePage(paths.wikiDir, "index.md", buildIndexPage(pages, schema.hash), changedPages);
|
|
1889
|
+
await writePage(paths.wikiDir, "sources/index.md", buildSectionIndex("sources", pages.filter((page) => page.kind === "source"), schema.hash), changedPages);
|
|
1890
|
+
await writePage(paths.wikiDir, "concepts/index.md", buildSectionIndex("concepts", pages.filter((page) => page.kind === "concept"), schema.hash), changedPages);
|
|
1891
|
+
await writePage(paths.wikiDir, "entities/index.md", buildSectionIndex("entities", pages.filter((page) => page.kind === "entity"), schema.hash), changedPages);
|
|
1493
1892
|
await rebuildSearchIndex(paths.searchDbPath, pages, paths.wikiDir);
|
|
1494
|
-
await appendLogEntry(rootDir, "compile", `Compiled ${manifests.length} source(s)`, [`provider=${provider.id}`, `pages=${pages.length}`]);
|
|
1893
|
+
await appendLogEntry(rootDir, "compile", `Compiled ${manifests.length} source(s)`, [`provider=${provider.id}`, `pages=${pages.length}`, `schema=${schema.hash.slice(0, 12)}`]);
|
|
1495
1894
|
return {
|
|
1496
1895
|
graphPath: paths.graphPath,
|
|
1497
1896
|
pageCount: pages.length,
|
|
@@ -1501,6 +1900,7 @@ async function compileVault(rootDir) {
|
|
|
1501
1900
|
}
|
|
1502
1901
|
async function queryVault(rootDir, question, save = false) {
|
|
1503
1902
|
const { paths } = await loadVaultConfig(rootDir);
|
|
1903
|
+
const schema = await loadVaultSchema(rootDir);
|
|
1504
1904
|
const provider = await getProviderForTask(rootDir, "queryProvider");
|
|
1505
1905
|
if (!await fileExists(paths.searchDbPath)) {
|
|
1506
1906
|
await compileVault(rootDir);
|
|
@@ -1508,8 +1908,8 @@ async function queryVault(rootDir, question, save = false) {
|
|
|
1508
1908
|
const searchResults = searchPages(paths.searchDbPath, question, 5);
|
|
1509
1909
|
const excerpts = await Promise.all(
|
|
1510
1910
|
searchResults.map(async (result) => {
|
|
1511
|
-
const absolutePath =
|
|
1512
|
-
const content = await
|
|
1911
|
+
const absolutePath = path10.join(paths.wikiDir, result.path);
|
|
1912
|
+
const content = await fs9.readFile(absolutePath, "utf8");
|
|
1513
1913
|
const parsed = matter3(content);
|
|
1514
1914
|
return `# ${result.title}
|
|
1515
1915
|
${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
@@ -1527,7 +1927,7 @@ ${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
|
1527
1927
|
].join("\n");
|
|
1528
1928
|
} else {
|
|
1529
1929
|
const response = await provider.generateText({
|
|
1530
|
-
system: "Answer using the provided SwarmVault excerpts. Cite source ids or page titles when possible.",
|
|
1930
|
+
system: buildSchemaPrompt(schema, "Answer using the provided SwarmVault excerpts. Cite source ids or page titles when possible."),
|
|
1531
1931
|
prompt: `Question: ${question}
|
|
1532
1932
|
|
|
1533
1933
|
Context:
|
|
@@ -1541,17 +1941,62 @@ ${excerpts.join("\n\n---\n\n")}`
|
|
|
1541
1941
|
);
|
|
1542
1942
|
let savedTo;
|
|
1543
1943
|
if (save) {
|
|
1544
|
-
const output = buildOutputPage(question, answer, citations);
|
|
1545
|
-
const absolutePath =
|
|
1546
|
-
await ensureDir(
|
|
1547
|
-
await
|
|
1944
|
+
const output = buildOutputPage(question, answer, citations, schema.hash);
|
|
1945
|
+
const absolutePath = path10.join(paths.wikiDir, output.page.path);
|
|
1946
|
+
await ensureDir(path10.dirname(absolutePath));
|
|
1947
|
+
await fs9.writeFile(absolutePath, output.content, "utf8");
|
|
1548
1948
|
savedTo = absolutePath;
|
|
1549
1949
|
}
|
|
1550
1950
|
await appendLogEntry(rootDir, "query", question, [`citations=${citations.join(",") || "none"}`, `saved=${Boolean(savedTo)}`]);
|
|
1551
1951
|
return { answer, savedTo, citations };
|
|
1552
1952
|
}
|
|
1953
|
+
async function searchVault(rootDir, query, limit = 5) {
|
|
1954
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1955
|
+
if (!await fileExists(paths.searchDbPath)) {
|
|
1956
|
+
await compileVault(rootDir);
|
|
1957
|
+
}
|
|
1958
|
+
return searchPages(paths.searchDbPath, query, limit);
|
|
1959
|
+
}
|
|
1960
|
+
async function listPages(rootDir) {
|
|
1961
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1962
|
+
const graph = await readJsonFile(paths.graphPath);
|
|
1963
|
+
return graph?.pages ?? [];
|
|
1964
|
+
}
|
|
1965
|
+
async function readPage(rootDir, relativePath) {
|
|
1966
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1967
|
+
const absolutePath = path10.resolve(paths.wikiDir, relativePath);
|
|
1968
|
+
if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
|
|
1969
|
+
return null;
|
|
1970
|
+
}
|
|
1971
|
+
const raw = await fs9.readFile(absolutePath, "utf8");
|
|
1972
|
+
const parsed = matter3(raw);
|
|
1973
|
+
return {
|
|
1974
|
+
path: relativePath,
|
|
1975
|
+
title: typeof parsed.data.title === "string" ? parsed.data.title : path10.basename(relativePath, path10.extname(relativePath)),
|
|
1976
|
+
frontmatter: parsed.data,
|
|
1977
|
+
content: parsed.content
|
|
1978
|
+
};
|
|
1979
|
+
}
|
|
1980
|
+
async function getWorkspaceInfo(rootDir) {
|
|
1981
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1982
|
+
const manifests = await listManifests(rootDir);
|
|
1983
|
+
const pages = await listPages(rootDir);
|
|
1984
|
+
return {
|
|
1985
|
+
rootDir,
|
|
1986
|
+
configPath: paths.configPath,
|
|
1987
|
+
schemaPath: paths.schemaPath,
|
|
1988
|
+
rawDir: paths.rawDir,
|
|
1989
|
+
wikiDir: paths.wikiDir,
|
|
1990
|
+
stateDir: paths.stateDir,
|
|
1991
|
+
agentDir: paths.agentDir,
|
|
1992
|
+
inboxDir: paths.inboxDir,
|
|
1993
|
+
sourceCount: manifests.length,
|
|
1994
|
+
pageCount: pages.length
|
|
1995
|
+
};
|
|
1996
|
+
}
|
|
1553
1997
|
async function lintVault(rootDir) {
|
|
1554
1998
|
const { paths } = await loadVaultConfig(rootDir);
|
|
1999
|
+
const schema = await loadVaultSchema(rootDir);
|
|
1555
2000
|
const manifests = await listManifests(rootDir);
|
|
1556
2001
|
const graph = await readJsonFile(paths.graphPath);
|
|
1557
2002
|
const findings = [];
|
|
@@ -1566,6 +2011,14 @@ async function lintVault(rootDir) {
|
|
|
1566
2011
|
}
|
|
1567
2012
|
const manifestMap = new Map(manifests.map((manifest) => [manifest.sourceId, manifest]));
|
|
1568
2013
|
for (const page of graph.pages) {
|
|
2014
|
+
if (page.schemaHash !== schema.hash) {
|
|
2015
|
+
findings.push({
|
|
2016
|
+
severity: "warning",
|
|
2017
|
+
code: "stale_page",
|
|
2018
|
+
message: `Page ${page.title} is stale because the vault schema changed.`,
|
|
2019
|
+
pagePath: path10.join(paths.wikiDir, page.path)
|
|
2020
|
+
});
|
|
2021
|
+
}
|
|
1569
2022
|
for (const [sourceId, knownHash] of Object.entries(page.sourceHashes)) {
|
|
1570
2023
|
const manifest = manifestMap.get(sourceId);
|
|
1571
2024
|
if (manifest && manifest.contentHash !== knownHash) {
|
|
@@ -1573,7 +2026,7 @@ async function lintVault(rootDir) {
|
|
|
1573
2026
|
severity: "warning",
|
|
1574
2027
|
code: "stale_page",
|
|
1575
2028
|
message: `Page ${page.title} is stale because source ${sourceId} changed.`,
|
|
1576
|
-
pagePath:
|
|
2029
|
+
pagePath: path10.join(paths.wikiDir, page.path)
|
|
1577
2030
|
});
|
|
1578
2031
|
}
|
|
1579
2032
|
}
|
|
@@ -1582,12 +2035,12 @@ async function lintVault(rootDir) {
|
|
|
1582
2035
|
severity: "info",
|
|
1583
2036
|
code: "orphan_page",
|
|
1584
2037
|
message: `Page ${page.title} has no backlinks.`,
|
|
1585
|
-
pagePath:
|
|
2038
|
+
pagePath: path10.join(paths.wikiDir, page.path)
|
|
1586
2039
|
});
|
|
1587
2040
|
}
|
|
1588
|
-
const absolutePath =
|
|
2041
|
+
const absolutePath = path10.join(paths.wikiDir, page.path);
|
|
1589
2042
|
if (await fileExists(absolutePath)) {
|
|
1590
|
-
const content = await
|
|
2043
|
+
const content = await fs9.readFile(absolutePath, "utf8");
|
|
1591
2044
|
if (content.includes("## Claims")) {
|
|
1592
2045
|
const uncited = content.split("\n").filter((line) => line.startsWith("- ") && !line.includes("[source:"));
|
|
1593
2046
|
if (uncited.length) {
|
|
@@ -1618,9 +2071,9 @@ async function bootstrapDemo(rootDir, input) {
|
|
|
1618
2071
|
}
|
|
1619
2072
|
|
|
1620
2073
|
// src/viewer.ts
|
|
1621
|
-
import
|
|
2074
|
+
import fs10 from "fs/promises";
|
|
1622
2075
|
import http from "http";
|
|
1623
|
-
import
|
|
2076
|
+
import path11 from "path";
|
|
1624
2077
|
import mime2 from "mime-types";
|
|
1625
2078
|
async function startGraphServer(rootDir, port) {
|
|
1626
2079
|
const { config, paths } = await loadVaultConfig(rootDir);
|
|
@@ -1634,12 +2087,12 @@ async function startGraphServer(rootDir, port) {
|
|
|
1634
2087
|
return;
|
|
1635
2088
|
}
|
|
1636
2089
|
response.writeHead(200, { "content-type": "application/json" });
|
|
1637
|
-
response.end(await
|
|
2090
|
+
response.end(await fs10.readFile(paths.graphPath, "utf8"));
|
|
1638
2091
|
return;
|
|
1639
2092
|
}
|
|
1640
2093
|
const relativePath = url.pathname === "/" ? "index.html" : url.pathname.slice(1);
|
|
1641
|
-
const target =
|
|
1642
|
-
const fallback =
|
|
2094
|
+
const target = path11.join(paths.viewerDistDir, relativePath);
|
|
2095
|
+
const fallback = path11.join(paths.viewerDistDir, "index.html");
|
|
1643
2096
|
const filePath = await fileExists(target) ? target : fallback;
|
|
1644
2097
|
if (!await fileExists(filePath)) {
|
|
1645
2098
|
response.writeHead(503, { "content-type": "text/plain" });
|
|
@@ -1647,7 +2100,7 @@ async function startGraphServer(rootDir, port) {
|
|
|
1647
2100
|
return;
|
|
1648
2101
|
}
|
|
1649
2102
|
response.writeHead(200, { "content-type": mime2.lookup(filePath) || "text/plain" });
|
|
1650
|
-
response.end(await
|
|
2103
|
+
response.end(await fs10.readFile(filePath));
|
|
1651
2104
|
});
|
|
1652
2105
|
await new Promise((resolve) => {
|
|
1653
2106
|
server.listen(effectivePort, resolve);
|
|
@@ -1667,13 +2120,309 @@ async function startGraphServer(rootDir, port) {
|
|
|
1667
2120
|
}
|
|
1668
2121
|
};
|
|
1669
2122
|
}
|
|
2123
|
+
|
|
2124
|
+
// src/mcp.ts
|
|
2125
|
+
import fs11 from "fs/promises";
|
|
2126
|
+
import path12 from "path";
|
|
2127
|
+
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2128
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2129
|
+
import { z as z6 } from "zod";
|
|
2130
|
+
var SERVER_VERSION = "0.1.4";
|
|
2131
|
+
async function createMcpServer(rootDir) {
|
|
2132
|
+
const server = new McpServer({
|
|
2133
|
+
name: "swarmvault",
|
|
2134
|
+
version: SERVER_VERSION,
|
|
2135
|
+
websiteUrl: "https://www.swarmvault.ai"
|
|
2136
|
+
});
|
|
2137
|
+
server.registerTool("workspace_info", {
|
|
2138
|
+
description: "Return the current SwarmVault workspace paths and high-level counts."
|
|
2139
|
+
}, async () => {
|
|
2140
|
+
const info = await getWorkspaceInfo(rootDir);
|
|
2141
|
+
return asToolText(info);
|
|
2142
|
+
});
|
|
2143
|
+
server.registerTool("search_pages", {
|
|
2144
|
+
description: "Search compiled wiki pages using the local full-text index.",
|
|
2145
|
+
inputSchema: {
|
|
2146
|
+
query: z6.string().min(1).describe("Search query"),
|
|
2147
|
+
limit: z6.number().int().min(1).max(25).optional().describe("Maximum number of results")
|
|
2148
|
+
}
|
|
2149
|
+
}, async ({ query, limit }) => {
|
|
2150
|
+
const results = await searchVault(rootDir, query, limit ?? 5);
|
|
2151
|
+
return asToolText(results);
|
|
2152
|
+
});
|
|
2153
|
+
server.registerTool("read_page", {
|
|
2154
|
+
description: "Read a generated wiki page by its path relative to wiki/.",
|
|
2155
|
+
inputSchema: {
|
|
2156
|
+
path: z6.string().min(1).describe("Path relative to wiki/, for example sources/example.md")
|
|
2157
|
+
}
|
|
2158
|
+
}, async ({ path: relativePath }) => {
|
|
2159
|
+
const page = await readPage(rootDir, relativePath);
|
|
2160
|
+
if (!page) {
|
|
2161
|
+
return asToolError(`Page not found: ${relativePath}`);
|
|
2162
|
+
}
|
|
2163
|
+
return asToolText(page);
|
|
2164
|
+
});
|
|
2165
|
+
server.registerTool("list_sources", {
|
|
2166
|
+
description: "List source manifests in the current workspace.",
|
|
2167
|
+
inputSchema: {
|
|
2168
|
+
limit: z6.number().int().min(1).max(100).optional().describe("Maximum number of manifests to return")
|
|
2169
|
+
}
|
|
2170
|
+
}, async ({ limit }) => {
|
|
2171
|
+
const manifests = await listManifests(rootDir);
|
|
2172
|
+
return asToolText(limit ? manifests.slice(0, limit) : manifests);
|
|
2173
|
+
});
|
|
2174
|
+
server.registerTool("query_vault", {
|
|
2175
|
+
description: "Ask a question against the compiled vault and optionally save the answer.",
|
|
2176
|
+
inputSchema: {
|
|
2177
|
+
question: z6.string().min(1).describe("Question to ask the vault"),
|
|
2178
|
+
save: z6.boolean().optional().describe("Persist the answer to wiki/outputs")
|
|
2179
|
+
}
|
|
2180
|
+
}, async ({ question, save }) => {
|
|
2181
|
+
const result = await queryVault(rootDir, question, save ?? false);
|
|
2182
|
+
return asToolText(result);
|
|
2183
|
+
});
|
|
2184
|
+
server.registerTool("ingest_input", {
|
|
2185
|
+
description: "Ingest a local file path or URL into the SwarmVault workspace.",
|
|
2186
|
+
inputSchema: {
|
|
2187
|
+
input: z6.string().min(1).describe("Local path or URL to ingest")
|
|
2188
|
+
}
|
|
2189
|
+
}, async ({ input }) => {
|
|
2190
|
+
const manifest = await ingestInput(rootDir, input);
|
|
2191
|
+
return asToolText(manifest);
|
|
2192
|
+
});
|
|
2193
|
+
server.registerTool("compile_vault", {
|
|
2194
|
+
description: "Compile source manifests into wiki pages, graph data, and search index."
|
|
2195
|
+
}, async () => {
|
|
2196
|
+
const result = await compileVault(rootDir);
|
|
2197
|
+
return asToolText(result);
|
|
2198
|
+
});
|
|
2199
|
+
server.registerTool("lint_vault", {
|
|
2200
|
+
description: "Run anti-drift and vault health checks."
|
|
2201
|
+
}, async () => {
|
|
2202
|
+
const findings = await lintVault(rootDir);
|
|
2203
|
+
return asToolText(findings);
|
|
2204
|
+
});
|
|
2205
|
+
server.registerResource("swarmvault-config", "swarmvault://config", {
|
|
2206
|
+
title: "SwarmVault Config",
|
|
2207
|
+
description: "The resolved SwarmVault config file.",
|
|
2208
|
+
mimeType: "application/json"
|
|
2209
|
+
}, async () => {
|
|
2210
|
+
const { config } = await loadVaultConfig(rootDir);
|
|
2211
|
+
return asTextResource("swarmvault://config", JSON.stringify(config, null, 2));
|
|
2212
|
+
});
|
|
2213
|
+
server.registerResource("swarmvault-graph", "swarmvault://graph", {
|
|
2214
|
+
title: "SwarmVault Graph",
|
|
2215
|
+
description: "The compiled graph artifact for the current workspace.",
|
|
2216
|
+
mimeType: "application/json"
|
|
2217
|
+
}, async () => {
|
|
2218
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
2219
|
+
const graph = await readJsonFile(paths.graphPath);
|
|
2220
|
+
return asTextResource(
|
|
2221
|
+
"swarmvault://graph",
|
|
2222
|
+
JSON.stringify(graph ?? { error: "Graph artifact not found. Run `swarmvault compile` first." }, null, 2)
|
|
2223
|
+
);
|
|
2224
|
+
});
|
|
2225
|
+
server.registerResource("swarmvault-manifests", "swarmvault://manifests", {
|
|
2226
|
+
title: "SwarmVault Manifests",
|
|
2227
|
+
description: "All source manifests in the workspace.",
|
|
2228
|
+
mimeType: "application/json"
|
|
2229
|
+
}, async () => {
|
|
2230
|
+
const manifests = await listManifests(rootDir);
|
|
2231
|
+
return asTextResource("swarmvault://manifests", JSON.stringify(manifests, null, 2));
|
|
2232
|
+
});
|
|
2233
|
+
server.registerResource("swarmvault-schema", "swarmvault://schema", {
|
|
2234
|
+
title: "SwarmVault Schema",
|
|
2235
|
+
description: "The vault schema file that guides compile and query behavior.",
|
|
2236
|
+
mimeType: "text/markdown"
|
|
2237
|
+
}, async () => {
|
|
2238
|
+
const schema = await loadVaultSchema(rootDir);
|
|
2239
|
+
return asTextResource("swarmvault://schema", schema.content);
|
|
2240
|
+
});
|
|
2241
|
+
server.registerResource(
|
|
2242
|
+
"swarmvault-pages",
|
|
2243
|
+
new ResourceTemplate("swarmvault://pages/{path}", {
|
|
2244
|
+
list: async () => {
|
|
2245
|
+
const pages = await listPages(rootDir);
|
|
2246
|
+
return {
|
|
2247
|
+
resources: pages.map((page) => ({
|
|
2248
|
+
uri: `swarmvault://pages/${encodeURIComponent(page.path)}`,
|
|
2249
|
+
name: page.title,
|
|
2250
|
+
title: page.title,
|
|
2251
|
+
description: `Generated ${page.kind} page`,
|
|
2252
|
+
mimeType: "text/markdown"
|
|
2253
|
+
}))
|
|
2254
|
+
};
|
|
2255
|
+
}
|
|
2256
|
+
}),
|
|
2257
|
+
{
|
|
2258
|
+
title: "SwarmVault Pages",
|
|
2259
|
+
description: "Generated wiki pages exposed as MCP resources.",
|
|
2260
|
+
mimeType: "text/markdown"
|
|
2261
|
+
},
|
|
2262
|
+
async (_uri, variables) => {
|
|
2263
|
+
const encodedPath = typeof variables.path === "string" ? variables.path : "";
|
|
2264
|
+
const relativePath = decodeURIComponent(encodedPath);
|
|
2265
|
+
const page = await readPage(rootDir, relativePath);
|
|
2266
|
+
if (!page) {
|
|
2267
|
+
return asTextResource(`swarmvault://pages/${encodedPath}`, `Page not found: ${relativePath}`);
|
|
2268
|
+
}
|
|
2269
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
2270
|
+
const absolutePath = path12.resolve(paths.wikiDir, relativePath);
|
|
2271
|
+
return asTextResource(`swarmvault://pages/${encodedPath}`, await fs11.readFile(absolutePath, "utf8"));
|
|
2272
|
+
}
|
|
2273
|
+
);
|
|
2274
|
+
return server;
|
|
2275
|
+
}
|
|
2276
|
+
async function startMcpServer(rootDir, stdin, stdout) {
|
|
2277
|
+
const server = await createMcpServer(rootDir);
|
|
2278
|
+
const transport = new StdioServerTransport(stdin, stdout);
|
|
2279
|
+
await server.connect(transport);
|
|
2280
|
+
return {
|
|
2281
|
+
close: async () => {
|
|
2282
|
+
await server.close();
|
|
2283
|
+
}
|
|
2284
|
+
};
|
|
2285
|
+
}
|
|
2286
|
+
function asToolText(value) {
|
|
2287
|
+
return {
|
|
2288
|
+
content: [
|
|
2289
|
+
{
|
|
2290
|
+
type: "text",
|
|
2291
|
+
text: JSON.stringify(value, null, 2)
|
|
2292
|
+
}
|
|
2293
|
+
]
|
|
2294
|
+
};
|
|
2295
|
+
}
|
|
2296
|
+
function asToolError(message) {
|
|
2297
|
+
return {
|
|
2298
|
+
isError: true,
|
|
2299
|
+
content: [
|
|
2300
|
+
{
|
|
2301
|
+
type: "text",
|
|
2302
|
+
text: message
|
|
2303
|
+
}
|
|
2304
|
+
]
|
|
2305
|
+
};
|
|
2306
|
+
}
|
|
2307
|
+
function asTextResource(uri, text) {
|
|
2308
|
+
return {
|
|
2309
|
+
contents: [
|
|
2310
|
+
{
|
|
2311
|
+
uri,
|
|
2312
|
+
text
|
|
2313
|
+
}
|
|
2314
|
+
]
|
|
2315
|
+
};
|
|
2316
|
+
}
|
|
2317
|
+
|
|
2318
|
+
// src/watch.ts
|
|
2319
|
+
import path13 from "path";
|
|
2320
|
+
import chokidar from "chokidar";
|
|
2321
|
+
async function watchVault(rootDir, options = {}) {
|
|
2322
|
+
const { paths } = await initWorkspace(rootDir);
|
|
2323
|
+
const debounceMs = options.debounceMs ?? 900;
|
|
2324
|
+
let timer;
|
|
2325
|
+
let running = false;
|
|
2326
|
+
let pending = false;
|
|
2327
|
+
let closed = false;
|
|
2328
|
+
const reasons = /* @__PURE__ */ new Set();
|
|
2329
|
+
const watcher = chokidar.watch(paths.inboxDir, {
|
|
2330
|
+
ignoreInitial: true,
|
|
2331
|
+
awaitWriteFinish: {
|
|
2332
|
+
stabilityThreshold: Math.max(250, Math.floor(debounceMs / 2)),
|
|
2333
|
+
pollInterval: 100
|
|
2334
|
+
}
|
|
2335
|
+
});
|
|
2336
|
+
const schedule = (reason) => {
|
|
2337
|
+
if (closed) {
|
|
2338
|
+
return;
|
|
2339
|
+
}
|
|
2340
|
+
reasons.add(reason);
|
|
2341
|
+
pending = true;
|
|
2342
|
+
if (timer) {
|
|
2343
|
+
clearTimeout(timer);
|
|
2344
|
+
}
|
|
2345
|
+
timer = setTimeout(() => {
|
|
2346
|
+
void runCycle();
|
|
2347
|
+
}, debounceMs);
|
|
2348
|
+
};
|
|
2349
|
+
const runCycle = async () => {
|
|
2350
|
+
if (running || closed || !pending) {
|
|
2351
|
+
return;
|
|
2352
|
+
}
|
|
2353
|
+
pending = false;
|
|
2354
|
+
running = true;
|
|
2355
|
+
const startedAt = /* @__PURE__ */ new Date();
|
|
2356
|
+
const runReasons = [...reasons];
|
|
2357
|
+
reasons.clear();
|
|
2358
|
+
let importedCount = 0;
|
|
2359
|
+
let scannedCount = 0;
|
|
2360
|
+
let attachmentCount = 0;
|
|
2361
|
+
let changedPages = [];
|
|
2362
|
+
let lintFindingCount;
|
|
2363
|
+
let success = true;
|
|
2364
|
+
let error;
|
|
2365
|
+
try {
|
|
2366
|
+
const imported = await importInbox(rootDir, paths.inboxDir);
|
|
2367
|
+
importedCount = imported.imported.length;
|
|
2368
|
+
scannedCount = imported.scannedCount;
|
|
2369
|
+
attachmentCount = imported.attachmentCount;
|
|
2370
|
+
const compile = await compileVault(rootDir);
|
|
2371
|
+
changedPages = compile.changedPages;
|
|
2372
|
+
if (options.lint) {
|
|
2373
|
+
const findings = await lintVault(rootDir);
|
|
2374
|
+
lintFindingCount = findings.length;
|
|
2375
|
+
}
|
|
2376
|
+
} catch (caught) {
|
|
2377
|
+
success = false;
|
|
2378
|
+
error = caught instanceof Error ? caught.message : String(caught);
|
|
2379
|
+
} finally {
|
|
2380
|
+
const finishedAt = /* @__PURE__ */ new Date();
|
|
2381
|
+
await appendWatchRun(rootDir, {
|
|
2382
|
+
startedAt: startedAt.toISOString(),
|
|
2383
|
+
finishedAt: finishedAt.toISOString(),
|
|
2384
|
+
durationMs: finishedAt.getTime() - startedAt.getTime(),
|
|
2385
|
+
inputDir: paths.inboxDir,
|
|
2386
|
+
reasons: runReasons,
|
|
2387
|
+
importedCount,
|
|
2388
|
+
scannedCount,
|
|
2389
|
+
attachmentCount,
|
|
2390
|
+
changedPages,
|
|
2391
|
+
lintFindingCount,
|
|
2392
|
+
success,
|
|
2393
|
+
error
|
|
2394
|
+
});
|
|
2395
|
+
running = false;
|
|
2396
|
+
if (pending && !closed) {
|
|
2397
|
+
schedule("queued");
|
|
2398
|
+
}
|
|
2399
|
+
}
|
|
2400
|
+
};
|
|
2401
|
+
watcher.on("add", (filePath) => schedule(`add:${toWatchReason(paths.inboxDir, filePath)}`)).on("change", (filePath) => schedule(`change:${toWatchReason(paths.inboxDir, filePath)}`)).on("unlink", (filePath) => schedule(`unlink:${toWatchReason(paths.inboxDir, filePath)}`)).on("addDir", (dirPath) => schedule(`addDir:${toWatchReason(paths.inboxDir, dirPath)}`)).on("unlinkDir", (dirPath) => schedule(`unlinkDir:${toWatchReason(paths.inboxDir, dirPath)}`)).on("error", (caught) => schedule(`error:${caught instanceof Error ? caught.message : String(caught)}`));
|
|
2402
|
+
return {
|
|
2403
|
+
close: async () => {
|
|
2404
|
+
closed = true;
|
|
2405
|
+
if (timer) {
|
|
2406
|
+
clearTimeout(timer);
|
|
2407
|
+
}
|
|
2408
|
+
await watcher.close();
|
|
2409
|
+
}
|
|
2410
|
+
};
|
|
2411
|
+
}
|
|
2412
|
+
function toWatchReason(baseDir, targetPath) {
|
|
2413
|
+
return path13.relative(baseDir, targetPath) || ".";
|
|
2414
|
+
}
|
|
1670
2415
|
export {
|
|
1671
2416
|
assertProviderCapability,
|
|
1672
2417
|
bootstrapDemo,
|
|
1673
2418
|
compileVault,
|
|
2419
|
+
createMcpServer,
|
|
1674
2420
|
createProvider,
|
|
1675
2421
|
defaultVaultConfig,
|
|
2422
|
+
defaultVaultSchema,
|
|
1676
2423
|
getProviderForTask,
|
|
2424
|
+
getWorkspaceInfo,
|
|
2425
|
+
importInbox,
|
|
1677
2426
|
ingestInput,
|
|
1678
2427
|
initVault,
|
|
1679
2428
|
initWorkspace,
|
|
@@ -1681,9 +2430,15 @@ export {
|
|
|
1681
2430
|
installConfiguredAgents,
|
|
1682
2431
|
lintVault,
|
|
1683
2432
|
listManifests,
|
|
2433
|
+
listPages,
|
|
1684
2434
|
loadVaultConfig,
|
|
2435
|
+
loadVaultSchema,
|
|
1685
2436
|
queryVault,
|
|
1686
2437
|
readExtractedText,
|
|
2438
|
+
readPage,
|
|
1687
2439
|
resolvePaths,
|
|
1688
|
-
|
|
2440
|
+
searchVault,
|
|
2441
|
+
startGraphServer,
|
|
2442
|
+
startMcpServer,
|
|
2443
|
+
watchVault
|
|
1689
2444
|
};
|