@swarmvaultai/engine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/dist/index.d.ts +233 -0
- package/dist/index.js +1689 -0
- package/package.json +44 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1689 @@
|
|
|
1
|
+
// src/config.ts
|
|
2
|
+
import path2 from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { z as z2 } from "zod";
|
|
5
|
+
|
|
6
|
+
// src/utils.ts
|
|
7
|
+
import crypto from "crypto";
|
|
8
|
+
import fs from "fs/promises";
|
|
9
|
+
import path from "path";
|
|
10
|
+
function slugify(value) {
|
|
11
|
+
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80) || "item";
|
|
12
|
+
}
|
|
13
|
+
function sha256(value) {
|
|
14
|
+
return crypto.createHash("sha256").update(value).digest("hex");
|
|
15
|
+
}
|
|
16
|
+
async function ensureDir(dirPath) {
|
|
17
|
+
await fs.mkdir(dirPath, { recursive: true });
|
|
18
|
+
}
|
|
19
|
+
async function fileExists(filePath) {
|
|
20
|
+
try {
|
|
21
|
+
await fs.access(filePath);
|
|
22
|
+
return true;
|
|
23
|
+
} catch {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
async function readJsonFile(filePath) {
|
|
28
|
+
if (!await fileExists(filePath)) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
32
|
+
return JSON.parse(content);
|
|
33
|
+
}
|
|
34
|
+
async function writeJsonFile(filePath, value) {
|
|
35
|
+
await ensureDir(path.dirname(filePath));
|
|
36
|
+
await fs.writeFile(filePath, `${JSON.stringify(value, null, 2)}
|
|
37
|
+
`, "utf8");
|
|
38
|
+
}
|
|
39
|
+
async function writeFileIfChanged(filePath, content) {
|
|
40
|
+
await ensureDir(path.dirname(filePath));
|
|
41
|
+
if (await fileExists(filePath)) {
|
|
42
|
+
const existing = await fs.readFile(filePath, "utf8");
|
|
43
|
+
if (existing === content) {
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
await fs.writeFile(filePath, content, "utf8");
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
function toPosix(value) {
|
|
51
|
+
return value.split(path.sep).join(path.posix.sep);
|
|
52
|
+
}
|
|
53
|
+
function firstSentences(value, count = 3) {
|
|
54
|
+
const sentences = value.replace(/\s+/g, " ").split(/(?<=[.!?])\s+/).filter(Boolean);
|
|
55
|
+
return sentences.slice(0, count).join(" ").trim();
|
|
56
|
+
}
|
|
57
|
+
function uniqueBy(items, key) {
|
|
58
|
+
const seen = /* @__PURE__ */ new Set();
|
|
59
|
+
const result = [];
|
|
60
|
+
for (const item of items) {
|
|
61
|
+
const itemKey = key(item);
|
|
62
|
+
if (seen.has(itemKey)) {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
seen.add(itemKey);
|
|
66
|
+
result.push(item);
|
|
67
|
+
}
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
function extractJson(text) {
|
|
71
|
+
const fencedMatch = text.match(/```json\s*([\s\S]*?)```/i);
|
|
72
|
+
if (fencedMatch) {
|
|
73
|
+
return fencedMatch[1].trim();
|
|
74
|
+
}
|
|
75
|
+
const start = text.indexOf("{");
|
|
76
|
+
const end = text.lastIndexOf("}");
|
|
77
|
+
if (start !== -1 && end !== -1 && end > start) {
|
|
78
|
+
return text.slice(start, end + 1);
|
|
79
|
+
}
|
|
80
|
+
throw new Error("Could not locate JSON object in provider response.");
|
|
81
|
+
}
|
|
82
|
+
function normalizeWhitespace(value) {
|
|
83
|
+
return value.replace(/\s+/g, " ").trim();
|
|
84
|
+
}
|
|
85
|
+
function truncate(value, maxLength) {
|
|
86
|
+
if (value.length <= maxLength) {
|
|
87
|
+
return value;
|
|
88
|
+
}
|
|
89
|
+
return `${value.slice(0, maxLength - 3)}...`;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// src/types.ts
|
|
93
|
+
import { z } from "zod";
|
|
94
|
+
var providerCapabilitySchema = z.enum([
|
|
95
|
+
"responses",
|
|
96
|
+
"chat",
|
|
97
|
+
"structured",
|
|
98
|
+
"tools",
|
|
99
|
+
"vision",
|
|
100
|
+
"embeddings",
|
|
101
|
+
"streaming",
|
|
102
|
+
"local"
|
|
103
|
+
]);
|
|
104
|
+
var providerTypeSchema = z.enum([
|
|
105
|
+
"heuristic",
|
|
106
|
+
"openai",
|
|
107
|
+
"ollama",
|
|
108
|
+
"anthropic",
|
|
109
|
+
"gemini",
|
|
110
|
+
"openai-compatible",
|
|
111
|
+
"custom"
|
|
112
|
+
]);
|
|
113
|
+
|
|
114
|
+
// src/config.ts
|
|
115
|
+
var PRIMARY_CONFIG_FILENAME = "swarmvault.config.json";
|
|
116
|
+
var LEGACY_CONFIG_FILENAME = "vault.config.json";
|
|
117
|
+
var moduleDir = path2.dirname(fileURLToPath(import.meta.url));
|
|
118
|
+
var providerConfigSchema = z2.object({
|
|
119
|
+
type: providerTypeSchema,
|
|
120
|
+
model: z2.string().min(1),
|
|
121
|
+
baseUrl: z2.string().url().optional(),
|
|
122
|
+
apiKeyEnv: z2.string().min(1).optional(),
|
|
123
|
+
headers: z2.record(z2.string(), z2.string()).optional(),
|
|
124
|
+
module: z2.string().min(1).optional(),
|
|
125
|
+
capabilities: z2.array(providerCapabilitySchema).optional(),
|
|
126
|
+
apiStyle: z2.enum(["responses", "chat"]).optional()
|
|
127
|
+
});
|
|
128
|
+
var vaultConfigSchema = z2.object({
|
|
129
|
+
workspace: z2.object({
|
|
130
|
+
rawDir: z2.string().min(1),
|
|
131
|
+
wikiDir: z2.string().min(1),
|
|
132
|
+
stateDir: z2.string().min(1),
|
|
133
|
+
agentDir: z2.string().min(1)
|
|
134
|
+
}),
|
|
135
|
+
providers: z2.record(z2.string(), providerConfigSchema),
|
|
136
|
+
tasks: z2.object({
|
|
137
|
+
compileProvider: z2.string().min(1),
|
|
138
|
+
queryProvider: z2.string().min(1),
|
|
139
|
+
lintProvider: z2.string().min(1),
|
|
140
|
+
visionProvider: z2.string().min(1)
|
|
141
|
+
}),
|
|
142
|
+
viewer: z2.object({
|
|
143
|
+
port: z2.number().int().positive()
|
|
144
|
+
}),
|
|
145
|
+
agents: z2.array(z2.enum(["codex", "claude", "cursor"])).default(["codex", "claude", "cursor"])
|
|
146
|
+
});
|
|
147
|
+
function defaultVaultConfig() {
|
|
148
|
+
return {
|
|
149
|
+
workspace: {
|
|
150
|
+
rawDir: "raw",
|
|
151
|
+
wikiDir: "wiki",
|
|
152
|
+
stateDir: "state",
|
|
153
|
+
agentDir: "agent"
|
|
154
|
+
},
|
|
155
|
+
providers: {
|
|
156
|
+
local: {
|
|
157
|
+
type: "heuristic",
|
|
158
|
+
model: "heuristic-v1",
|
|
159
|
+
capabilities: ["chat", "structured", "vision", "local"]
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
tasks: {
|
|
163
|
+
compileProvider: "local",
|
|
164
|
+
queryProvider: "local",
|
|
165
|
+
lintProvider: "local",
|
|
166
|
+
visionProvider: "local"
|
|
167
|
+
},
|
|
168
|
+
viewer: {
|
|
169
|
+
port: 4123
|
|
170
|
+
},
|
|
171
|
+
agents: ["codex", "claude", "cursor"]
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
async function findConfigPath(rootDir) {
|
|
175
|
+
const primaryPath = path2.join(rootDir, PRIMARY_CONFIG_FILENAME);
|
|
176
|
+
if (await fileExists(primaryPath)) {
|
|
177
|
+
return primaryPath;
|
|
178
|
+
}
|
|
179
|
+
const legacyPath = path2.join(rootDir, LEGACY_CONFIG_FILENAME);
|
|
180
|
+
if (await fileExists(legacyPath)) {
|
|
181
|
+
return legacyPath;
|
|
182
|
+
}
|
|
183
|
+
return primaryPath;
|
|
184
|
+
}
|
|
185
|
+
function resolvePaths(rootDir, config, configPath = path2.join(rootDir, PRIMARY_CONFIG_FILENAME)) {
|
|
186
|
+
const effective = config ?? defaultVaultConfig();
|
|
187
|
+
const rawDir = path2.resolve(rootDir, effective.workspace.rawDir);
|
|
188
|
+
const wikiDir = path2.resolve(rootDir, effective.workspace.wikiDir);
|
|
189
|
+
const stateDir = path2.resolve(rootDir, effective.workspace.stateDir);
|
|
190
|
+
const agentDir = path2.resolve(rootDir, effective.workspace.agentDir);
|
|
191
|
+
return {
|
|
192
|
+
rootDir,
|
|
193
|
+
rawDir,
|
|
194
|
+
wikiDir,
|
|
195
|
+
stateDir,
|
|
196
|
+
agentDir,
|
|
197
|
+
manifestsDir: path2.join(stateDir, "manifests"),
|
|
198
|
+
extractsDir: path2.join(stateDir, "extracts"),
|
|
199
|
+
analysesDir: path2.join(stateDir, "analyses"),
|
|
200
|
+
viewerDistDir: path2.resolve(moduleDir, "../../viewer/dist"),
|
|
201
|
+
graphPath: path2.join(stateDir, "graph.json"),
|
|
202
|
+
searchDbPath: path2.join(stateDir, "search.sqlite"),
|
|
203
|
+
compileStatePath: path2.join(stateDir, "compile-state.json"),
|
|
204
|
+
configPath
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
async function loadVaultConfig(rootDir) {
|
|
208
|
+
const configPath = await findConfigPath(rootDir);
|
|
209
|
+
const raw = await readJsonFile(configPath);
|
|
210
|
+
const parsed = vaultConfigSchema.parse(raw ?? defaultVaultConfig());
|
|
211
|
+
return {
|
|
212
|
+
config: parsed,
|
|
213
|
+
paths: resolvePaths(rootDir, parsed, configPath)
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
async function initWorkspace(rootDir) {
|
|
217
|
+
const configPath = await findConfigPath(rootDir);
|
|
218
|
+
const config = await fileExists(configPath) ? (await loadVaultConfig(rootDir)).config : defaultVaultConfig();
|
|
219
|
+
const paths = resolvePaths(rootDir, config, configPath);
|
|
220
|
+
await Promise.all([
|
|
221
|
+
ensureDir(paths.rawDir),
|
|
222
|
+
ensureDir(paths.wikiDir),
|
|
223
|
+
ensureDir(paths.stateDir),
|
|
224
|
+
ensureDir(paths.agentDir),
|
|
225
|
+
ensureDir(paths.manifestsDir),
|
|
226
|
+
ensureDir(paths.extractsDir),
|
|
227
|
+
ensureDir(paths.analysesDir)
|
|
228
|
+
]);
|
|
229
|
+
if (!await fileExists(configPath)) {
|
|
230
|
+
await writeJsonFile(configPath, config);
|
|
231
|
+
}
|
|
232
|
+
return { config, paths };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// src/ingest.ts
|
|
236
|
+
import fs3 from "fs/promises";
|
|
237
|
+
import path4 from "path";
|
|
238
|
+
import { JSDOM } from "jsdom";
|
|
239
|
+
import TurndownService from "turndown";
|
|
240
|
+
import { Readability } from "@mozilla/readability";
|
|
241
|
+
import mime from "mime-types";
|
|
242
|
+
|
|
243
|
+
// src/logs.ts
|
|
244
|
+
import fs2 from "fs/promises";
|
|
245
|
+
import path3 from "path";
|
|
246
|
+
async function appendLogEntry(rootDir, action, title, lines = []) {
|
|
247
|
+
const { paths } = await initWorkspace(rootDir);
|
|
248
|
+
await ensureDir(paths.wikiDir);
|
|
249
|
+
const logPath = path3.join(paths.wikiDir, "log.md");
|
|
250
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
|
|
251
|
+
const entry = [`## [${timestamp}] ${action} | ${title}`, ...lines.map((line) => `- ${line}`), ""].join("\n");
|
|
252
|
+
const existing = await fileExists(logPath) ? await fs2.readFile(logPath, "utf8") : "# Log\n\n";
|
|
253
|
+
await fs2.writeFile(logPath, `${existing}${entry}
|
|
254
|
+
`, "utf8");
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// src/ingest.ts
|
|
258
|
+
function inferKind(mimeType, filePath) {
|
|
259
|
+
if (mimeType.includes("markdown")) {
|
|
260
|
+
return "markdown";
|
|
261
|
+
}
|
|
262
|
+
if (mimeType.startsWith("text/")) {
|
|
263
|
+
return "text";
|
|
264
|
+
}
|
|
265
|
+
if (mimeType === "application/pdf" || filePath.toLowerCase().endsWith(".pdf")) {
|
|
266
|
+
return "pdf";
|
|
267
|
+
}
|
|
268
|
+
if (mimeType.startsWith("image/")) {
|
|
269
|
+
return "image";
|
|
270
|
+
}
|
|
271
|
+
if (mimeType.includes("html")) {
|
|
272
|
+
return "html";
|
|
273
|
+
}
|
|
274
|
+
return "binary";
|
|
275
|
+
}
|
|
276
|
+
function titleFromText(fallback, content) {
|
|
277
|
+
const heading = content.match(/^#\s+(.+)$/m)?.[1]?.trim();
|
|
278
|
+
return heading || fallback;
|
|
279
|
+
}
|
|
280
|
+
function guessMimeType(target) {
|
|
281
|
+
return mime.lookup(target) || "application/octet-stream";
|
|
282
|
+
}
|
|
283
|
+
async function convertHtmlToMarkdown(html, url) {
|
|
284
|
+
const dom = new JSDOM(html, { url });
|
|
285
|
+
const article = new Readability(dom.window.document).parse();
|
|
286
|
+
const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
|
|
287
|
+
const body = article?.content ?? dom.window.document.body.innerHTML;
|
|
288
|
+
const markdown = turndown.turndown(body);
|
|
289
|
+
return {
|
|
290
|
+
markdown,
|
|
291
|
+
title: article?.title?.trim() || new URL(url).hostname
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
async function readManifestByHash(manifestsDir, contentHash) {
|
|
295
|
+
const entries = await fs3.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
|
|
296
|
+
for (const entry of entries) {
|
|
297
|
+
if (!entry.isFile() || !entry.name.endsWith(".json")) {
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
const manifest = await readJsonFile(path4.join(manifestsDir, entry.name));
|
|
301
|
+
if (manifest?.contentHash === contentHash) {
|
|
302
|
+
return manifest;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
async function ingestInput(rootDir, input) {
|
|
308
|
+
const { paths } = await initWorkspace(rootDir);
|
|
309
|
+
await ensureDir(path4.join(paths.rawDir, "sources"));
|
|
310
|
+
await ensureDir(paths.manifestsDir);
|
|
311
|
+
await ensureDir(paths.extractsDir);
|
|
312
|
+
const isUrl = /^https?:\/\//i.test(input);
|
|
313
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
314
|
+
let title = path4.basename(input);
|
|
315
|
+
let mimeType = "application/octet-stream";
|
|
316
|
+
let storedExtension = ".bin";
|
|
317
|
+
let payloadBytes;
|
|
318
|
+
let extractedTextPath;
|
|
319
|
+
let sourceKind = "binary";
|
|
320
|
+
if (isUrl) {
|
|
321
|
+
const response = await fetch(input);
|
|
322
|
+
if (!response.ok) {
|
|
323
|
+
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
324
|
+
}
|
|
325
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
326
|
+
payloadBytes = Buffer.from(arrayBuffer);
|
|
327
|
+
mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
|
|
328
|
+
sourceKind = inferKind(mimeType, input);
|
|
329
|
+
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
330
|
+
const html = payloadBytes.toString("utf8");
|
|
331
|
+
const converted = await convertHtmlToMarkdown(html, input);
|
|
332
|
+
title = converted.title;
|
|
333
|
+
payloadBytes = Buffer.from(converted.markdown, "utf8");
|
|
334
|
+
mimeType = "text/markdown";
|
|
335
|
+
sourceKind = "markdown";
|
|
336
|
+
storedExtension = ".md";
|
|
337
|
+
} else {
|
|
338
|
+
title = new URL(input).hostname + new URL(input).pathname;
|
|
339
|
+
const extension = path4.extname(new URL(input).pathname) || (mime.extension(mimeType) ? `.${mime.extension(mimeType)}` : ".bin");
|
|
340
|
+
storedExtension = extension;
|
|
341
|
+
}
|
|
342
|
+
} else {
|
|
343
|
+
const absoluteInput = path4.resolve(rootDir, input);
|
|
344
|
+
payloadBytes = await fs3.readFile(absoluteInput);
|
|
345
|
+
mimeType = guessMimeType(absoluteInput);
|
|
346
|
+
sourceKind = inferKind(mimeType, absoluteInput);
|
|
347
|
+
storedExtension = path4.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
348
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
349
|
+
title = titleFromText(path4.basename(absoluteInput, path4.extname(absoluteInput)), payloadBytes.toString("utf8"));
|
|
350
|
+
} else {
|
|
351
|
+
title = path4.basename(absoluteInput, path4.extname(absoluteInput));
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
const contentHash = sha256(payloadBytes);
|
|
355
|
+
const existing = await readManifestByHash(paths.manifestsDir, contentHash);
|
|
356
|
+
if (existing) {
|
|
357
|
+
return existing;
|
|
358
|
+
}
|
|
359
|
+
const sourceId = `${slugify(title)}-${contentHash.slice(0, 8)}`;
|
|
360
|
+
const storedPath = path4.join(paths.rawDir, "sources", `${sourceId}${storedExtension}`);
|
|
361
|
+
await fs3.writeFile(storedPath, payloadBytes);
|
|
362
|
+
if (sourceKind === "markdown" || sourceKind === "text") {
|
|
363
|
+
extractedTextPath = path4.join(paths.extractsDir, `${sourceId}.md`);
|
|
364
|
+
await fs3.writeFile(extractedTextPath, payloadBytes.toString("utf8"), "utf8");
|
|
365
|
+
}
|
|
366
|
+
const manifest = {
|
|
367
|
+
sourceId,
|
|
368
|
+
title,
|
|
369
|
+
originType: isUrl ? "url" : "file",
|
|
370
|
+
sourceKind,
|
|
371
|
+
originalPath: isUrl ? void 0 : toPosix(path4.resolve(rootDir, input)),
|
|
372
|
+
url: isUrl ? input : void 0,
|
|
373
|
+
storedPath: toPosix(path4.relative(rootDir, storedPath)),
|
|
374
|
+
extractedTextPath: extractedTextPath ? toPosix(path4.relative(rootDir, extractedTextPath)) : void 0,
|
|
375
|
+
mimeType,
|
|
376
|
+
contentHash,
|
|
377
|
+
createdAt: now,
|
|
378
|
+
updatedAt: now
|
|
379
|
+
};
|
|
380
|
+
await writeJsonFile(path4.join(paths.manifestsDir, `${sourceId}.json`), manifest);
|
|
381
|
+
await appendLogEntry(rootDir, "ingest", title, [`source_id=${sourceId}`, `kind=${sourceKind}`]);
|
|
382
|
+
return manifest;
|
|
383
|
+
}
|
|
384
|
+
async function listManifests(rootDir) {
|
|
385
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
386
|
+
if (!await fileExists(paths.manifestsDir)) {
|
|
387
|
+
return [];
|
|
388
|
+
}
|
|
389
|
+
const entries = await fs3.readdir(paths.manifestsDir);
|
|
390
|
+
const manifests = await Promise.all(
|
|
391
|
+
entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path4.join(paths.manifestsDir, entry)))
|
|
392
|
+
);
|
|
393
|
+
return manifests.filter((manifest) => Boolean(manifest));
|
|
394
|
+
}
|
|
395
|
+
async function readExtractedText(rootDir, manifest) {
|
|
396
|
+
if (!manifest.extractedTextPath) {
|
|
397
|
+
return void 0;
|
|
398
|
+
}
|
|
399
|
+
const absolutePath = path4.resolve(rootDir, manifest.extractedTextPath);
|
|
400
|
+
if (!await fileExists(absolutePath)) {
|
|
401
|
+
return void 0;
|
|
402
|
+
}
|
|
403
|
+
return fs3.readFile(absolutePath, "utf8");
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// src/vault.ts
|
|
407
|
+
import fs7 from "fs/promises";
|
|
408
|
+
import path9 from "path";
|
|
409
|
+
import matter3 from "gray-matter";
|
|
410
|
+
|
|
411
|
+
// src/analysis.ts
|
|
412
|
+
import path5 from "path";
|
|
413
|
+
import { z as z3 } from "zod";
|
|
414
|
+
var sourceAnalysisSchema = z3.object({
|
|
415
|
+
title: z3.string().min(1),
|
|
416
|
+
summary: z3.string().min(1),
|
|
417
|
+
concepts: z3.array(z3.object({ name: z3.string().min(1), description: z3.string().default("") })).max(12).default([]),
|
|
418
|
+
entities: z3.array(z3.object({ name: z3.string().min(1), description: z3.string().default("") })).max(12).default([]),
|
|
419
|
+
claims: z3.array(z3.object({
|
|
420
|
+
text: z3.string().min(1),
|
|
421
|
+
confidence: z3.number().min(0).max(1).default(0.6),
|
|
422
|
+
status: z3.enum(["extracted", "inferred", "conflicted", "stale"]).default("extracted"),
|
|
423
|
+
polarity: z3.enum(["positive", "negative", "neutral"]).default("neutral"),
|
|
424
|
+
citation: z3.string().min(1)
|
|
425
|
+
})).max(8).default([]),
|
|
426
|
+
questions: z3.array(z3.string()).max(6).default([])
|
|
427
|
+
});
|
|
428
|
+
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
429
|
+
"about",
|
|
430
|
+
"after",
|
|
431
|
+
"also",
|
|
432
|
+
"been",
|
|
433
|
+
"being",
|
|
434
|
+
"between",
|
|
435
|
+
"both",
|
|
436
|
+
"could",
|
|
437
|
+
"does",
|
|
438
|
+
"each",
|
|
439
|
+
"from",
|
|
440
|
+
"have",
|
|
441
|
+
"into",
|
|
442
|
+
"just",
|
|
443
|
+
"more",
|
|
444
|
+
"much",
|
|
445
|
+
"only",
|
|
446
|
+
"other",
|
|
447
|
+
"over",
|
|
448
|
+
"same",
|
|
449
|
+
"some",
|
|
450
|
+
"such",
|
|
451
|
+
"than",
|
|
452
|
+
"that",
|
|
453
|
+
"their",
|
|
454
|
+
"there",
|
|
455
|
+
"these",
|
|
456
|
+
"they",
|
|
457
|
+
"this",
|
|
458
|
+
"very",
|
|
459
|
+
"what",
|
|
460
|
+
"when",
|
|
461
|
+
"where",
|
|
462
|
+
"which",
|
|
463
|
+
"while",
|
|
464
|
+
"with",
|
|
465
|
+
"would",
|
|
466
|
+
"your"
|
|
467
|
+
]);
|
|
468
|
+
function extractTopTerms(text, count) {
|
|
469
|
+
const frequency = /* @__PURE__ */ new Map();
|
|
470
|
+
for (const token of text.toLowerCase().match(/[a-z][a-z0-9-]{3,}/g) ?? []) {
|
|
471
|
+
if (STOPWORDS.has(token)) {
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
frequency.set(token, (frequency.get(token) ?? 0) + 1);
|
|
475
|
+
}
|
|
476
|
+
return [...frequency.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])).slice(0, count).map(([token]) => token);
|
|
477
|
+
}
|
|
478
|
+
function extractEntities(text, count) {
|
|
479
|
+
const matches = text.match(/\b[A-Z][A-Za-z0-9-]+(?:\s+[A-Z][A-Za-z0-9-]+){0,2}\b/g) ?? [];
|
|
480
|
+
return uniqueBy(matches.map((value) => normalizeWhitespace(value)), (value) => value.toLowerCase()).slice(0, count);
|
|
481
|
+
}
|
|
482
|
+
function detectPolarity(text) {
|
|
483
|
+
if (/\b(no|not|never|cannot|can't|won't|without)\b/i.test(text)) {
|
|
484
|
+
return "negative";
|
|
485
|
+
}
|
|
486
|
+
if (/\b(is|are|will|does|supports|enables|improves|includes)\b/i.test(text)) {
|
|
487
|
+
return "positive";
|
|
488
|
+
}
|
|
489
|
+
return "neutral";
|
|
490
|
+
}
|
|
491
|
+
function deriveTitle(manifest, text) {
|
|
492
|
+
const heading = text.match(/^#\s+(.+)$/m)?.[1]?.trim();
|
|
493
|
+
return heading || manifest.title;
|
|
494
|
+
}
|
|
495
|
+
function heuristicAnalysis(manifest, text) {
|
|
496
|
+
const normalized = normalizeWhitespace(text);
|
|
497
|
+
const concepts = extractTopTerms(normalized, 6).map((term) => ({
|
|
498
|
+
id: `concept:${slugify(term)}`,
|
|
499
|
+
name: term,
|
|
500
|
+
description: `Frequently referenced concept in ${manifest.title}.`
|
|
501
|
+
}));
|
|
502
|
+
const entities = extractEntities(text, 6).map((term) => ({
|
|
503
|
+
id: `entity:${slugify(term)}`,
|
|
504
|
+
name: term,
|
|
505
|
+
description: `Named entity mentioned in ${manifest.title}.`
|
|
506
|
+
}));
|
|
507
|
+
const claimSentences = normalized.split(/(?<=[.!?])\s+/).filter(Boolean).slice(0, 4);
|
|
508
|
+
return {
|
|
509
|
+
sourceId: manifest.sourceId,
|
|
510
|
+
sourceHash: manifest.contentHash,
|
|
511
|
+
title: deriveTitle(manifest, text),
|
|
512
|
+
summary: firstSentences(normalized, 3) || truncate(normalized, 280) || `Imported ${manifest.sourceKind} source.`,
|
|
513
|
+
concepts,
|
|
514
|
+
entities,
|
|
515
|
+
claims: claimSentences.map((sentence, index) => ({
|
|
516
|
+
id: `claim:${manifest.sourceId}:${index + 1}`,
|
|
517
|
+
text: sentence,
|
|
518
|
+
confidence: 0.55,
|
|
519
|
+
status: "extracted",
|
|
520
|
+
polarity: detectPolarity(sentence),
|
|
521
|
+
citation: manifest.sourceId
|
|
522
|
+
})),
|
|
523
|
+
questions: concepts.slice(0, 3).map((term) => `How does ${term.name} relate to ${manifest.title}?`),
|
|
524
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
async function providerAnalysis(manifest, text, provider) {
|
|
528
|
+
const parsed = await provider.generateStructured(
|
|
529
|
+
{
|
|
530
|
+
system: "You are compiling a durable markdown wiki and graph. Prefer grounded synthesis over creativity.",
|
|
531
|
+
prompt: `Analyze the following source and return structured JSON.
|
|
532
|
+
|
|
533
|
+
Source title: ${manifest.title}
|
|
534
|
+
Source kind: ${manifest.sourceKind}
|
|
535
|
+
Source id: ${manifest.sourceId}
|
|
536
|
+
|
|
537
|
+
Text:
|
|
538
|
+
${truncate(text, 18e3)}`
|
|
539
|
+
},
|
|
540
|
+
sourceAnalysisSchema
|
|
541
|
+
);
|
|
542
|
+
return {
|
|
543
|
+
sourceId: manifest.sourceId,
|
|
544
|
+
sourceHash: manifest.contentHash,
|
|
545
|
+
title: parsed.title,
|
|
546
|
+
summary: parsed.summary,
|
|
547
|
+
concepts: parsed.concepts.map((term) => ({
|
|
548
|
+
id: `concept:${slugify(term.name)}`,
|
|
549
|
+
name: term.name,
|
|
550
|
+
description: term.description
|
|
551
|
+
})),
|
|
552
|
+
entities: parsed.entities.map((term) => ({
|
|
553
|
+
id: `entity:${slugify(term.name)}`,
|
|
554
|
+
name: term.name,
|
|
555
|
+
description: term.description
|
|
556
|
+
})),
|
|
557
|
+
claims: parsed.claims.map((claim, index) => ({
|
|
558
|
+
id: `claim:${manifest.sourceId}:${index + 1}`,
|
|
559
|
+
text: claim.text,
|
|
560
|
+
confidence: claim.confidence,
|
|
561
|
+
status: claim.status,
|
|
562
|
+
polarity: claim.polarity,
|
|
563
|
+
citation: claim.citation
|
|
564
|
+
})),
|
|
565
|
+
questions: parsed.questions,
|
|
566
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
async function analyzeSource(manifest, extractedText, provider, paths) {
|
|
570
|
+
const cachePath = path5.join(paths.analysesDir, `${manifest.sourceId}.json`);
|
|
571
|
+
const cached = await readJsonFile(cachePath);
|
|
572
|
+
if (cached && cached.sourceHash === manifest.contentHash) {
|
|
573
|
+
return cached;
|
|
574
|
+
}
|
|
575
|
+
const content = normalizeWhitespace(extractedText ?? "");
|
|
576
|
+
let analysis;
|
|
577
|
+
if (!content) {
|
|
578
|
+
analysis = {
|
|
579
|
+
sourceId: manifest.sourceId,
|
|
580
|
+
sourceHash: manifest.contentHash,
|
|
581
|
+
title: manifest.title,
|
|
582
|
+
summary: `Imported ${manifest.sourceKind} source. Text extraction is not yet available for this source.`,
|
|
583
|
+
concepts: [],
|
|
584
|
+
entities: [],
|
|
585
|
+
claims: [],
|
|
586
|
+
questions: [],
|
|
587
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
588
|
+
};
|
|
589
|
+
} else if (provider.type === "heuristic") {
|
|
590
|
+
analysis = heuristicAnalysis(manifest, content);
|
|
591
|
+
} else {
|
|
592
|
+
try {
|
|
593
|
+
analysis = await providerAnalysis(manifest, content, provider);
|
|
594
|
+
} catch {
|
|
595
|
+
analysis = heuristicAnalysis(manifest, content);
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
await writeJsonFile(cachePath, analysis);
|
|
599
|
+
return analysis;
|
|
600
|
+
}
|
|
601
|
+
function analysisSignature(analysis) {
|
|
602
|
+
return sha256(JSON.stringify(analysis));
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// src/agents.ts
|
|
606
|
+
import fs4 from "fs/promises";
|
|
607
|
+
import path6 from "path";
|
|
608
|
+
var managedStart = "<!-- swarmvault:managed:start -->";
|
|
609
|
+
var managedEnd = "<!-- swarmvault:managed:end -->";
|
|
610
|
+
var legacyManagedStart = "<!-- vault:managed:start -->";
|
|
611
|
+
var legacyManagedEnd = "<!-- vault:managed:end -->";
|
|
612
|
+
function buildManagedBlock(agent) {
|
|
613
|
+
const body = [
|
|
614
|
+
managedStart,
|
|
615
|
+
`# SwarmVault Rules (${agent})`,
|
|
616
|
+
"",
|
|
617
|
+
"- Treat `raw/` as immutable source input.",
|
|
618
|
+
"- Treat `wiki/` as generated markdown owned by the agent and compiler workflow.",
|
|
619
|
+
"- Read `wiki/index.md` before broad file searching when answering SwarmVault questions.",
|
|
620
|
+
"- Preserve frontmatter fields including `page_id`, `source_ids`, `node_ids`, `freshness`, and `source_hashes`.",
|
|
621
|
+
"- Save high-value answers back into `wiki/outputs/` instead of leaving them only in chat.",
|
|
622
|
+
"- Prefer `swarmvault ingest`, `swarmvault compile`, `swarmvault query`, and `swarmvault lint` for SwarmVault maintenance tasks.",
|
|
623
|
+
managedEnd,
|
|
624
|
+
""
|
|
625
|
+
].join("\n");
|
|
626
|
+
if (agent === "cursor") {
|
|
627
|
+
return body;
|
|
628
|
+
}
|
|
629
|
+
return body;
|
|
630
|
+
}
|
|
631
|
+
async function upsertManagedBlock(filePath, block) {
|
|
632
|
+
const existing = await fileExists(filePath) ? await fs4.readFile(filePath, "utf8") : "";
|
|
633
|
+
if (!existing) {
|
|
634
|
+
await ensureDir(path6.dirname(filePath));
|
|
635
|
+
await fs4.writeFile(filePath, `${block}
|
|
636
|
+
`, "utf8");
|
|
637
|
+
return;
|
|
638
|
+
}
|
|
639
|
+
const startIndex = existing.includes(managedStart) ? existing.indexOf(managedStart) : existing.indexOf(legacyManagedStart);
|
|
640
|
+
const endIndex = existing.includes(managedEnd) ? existing.indexOf(managedEnd) : existing.indexOf(legacyManagedEnd);
|
|
641
|
+
if (startIndex !== -1 && endIndex !== -1) {
|
|
642
|
+
const next = `${existing.slice(0, startIndex)}${block}${existing.slice(endIndex + managedEnd.length)}`;
|
|
643
|
+
await fs4.writeFile(filePath, next, "utf8");
|
|
644
|
+
return;
|
|
645
|
+
}
|
|
646
|
+
await fs4.writeFile(filePath, `${existing.trimEnd()}
|
|
647
|
+
|
|
648
|
+
${block}
|
|
649
|
+
`, "utf8");
|
|
650
|
+
}
|
|
651
|
+
async function installAgent(rootDir, agent) {
|
|
652
|
+
const { paths } = await initWorkspace(rootDir);
|
|
653
|
+
const block = buildManagedBlock(agent);
|
|
654
|
+
switch (agent) {
|
|
655
|
+
case "codex": {
|
|
656
|
+
const target = path6.join(rootDir, "AGENTS.md");
|
|
657
|
+
await upsertManagedBlock(target, block);
|
|
658
|
+
return target;
|
|
659
|
+
}
|
|
660
|
+
case "claude": {
|
|
661
|
+
const target = path6.join(rootDir, "CLAUDE.md");
|
|
662
|
+
await upsertManagedBlock(target, block);
|
|
663
|
+
return target;
|
|
664
|
+
}
|
|
665
|
+
case "cursor": {
|
|
666
|
+
const rulesDir = path6.join(rootDir, ".cursor", "rules");
|
|
667
|
+
await ensureDir(rulesDir);
|
|
668
|
+
const target = path6.join(rulesDir, "swarmvault.mdc");
|
|
669
|
+
await fs4.writeFile(target, `${block}
|
|
670
|
+
`, "utf8");
|
|
671
|
+
return target;
|
|
672
|
+
}
|
|
673
|
+
default:
|
|
674
|
+
throw new Error(`Unsupported agent ${String(agent)}`);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
async function installConfiguredAgents(rootDir) {
|
|
678
|
+
const { config } = await initWorkspace(rootDir);
|
|
679
|
+
return Promise.all(config.agents.map((agent) => installAgent(rootDir, agent)));
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// src/markdown.ts
|
|
683
|
+
import matter from "gray-matter";
|
|
684
|
+
function pagePathFor(kind, slug) {
|
|
685
|
+
switch (kind) {
|
|
686
|
+
case "source":
|
|
687
|
+
return `sources/${slug}.md`;
|
|
688
|
+
case "concept":
|
|
689
|
+
return `concepts/${slug}.md`;
|
|
690
|
+
case "entity":
|
|
691
|
+
return `entities/${slug}.md`;
|
|
692
|
+
case "output":
|
|
693
|
+
return `outputs/${slug}.md`;
|
|
694
|
+
default:
|
|
695
|
+
return `${slug}.md`;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
function buildSourcePage(manifest, analysis) {
|
|
699
|
+
const relativePath = pagePathFor("source", manifest.sourceId);
|
|
700
|
+
const pageId = `source:${manifest.sourceId}`;
|
|
701
|
+
const nodeIds = [
|
|
702
|
+
`source:${manifest.sourceId}`,
|
|
703
|
+
...analysis.concepts.map((item) => item.id),
|
|
704
|
+
...analysis.entities.map((item) => item.id)
|
|
705
|
+
];
|
|
706
|
+
const backlinks = [
|
|
707
|
+
...analysis.concepts.map((item) => `concept:${slugify(item.name)}`),
|
|
708
|
+
...analysis.entities.map((item) => `entity:${slugify(item.name)}`)
|
|
709
|
+
];
|
|
710
|
+
const frontmatter = {
|
|
711
|
+
page_id: pageId,
|
|
712
|
+
kind: "source",
|
|
713
|
+
title: analysis.title,
|
|
714
|
+
tags: ["source"],
|
|
715
|
+
source_ids: [manifest.sourceId],
|
|
716
|
+
node_ids: nodeIds,
|
|
717
|
+
freshness: "fresh",
|
|
718
|
+
confidence: 0.8,
|
|
719
|
+
updated_at: analysis.producedAt,
|
|
720
|
+
backlinks,
|
|
721
|
+
source_hashes: {
|
|
722
|
+
[manifest.sourceId]: manifest.contentHash
|
|
723
|
+
}
|
|
724
|
+
};
|
|
725
|
+
const body = [
|
|
726
|
+
`# ${analysis.title}`,
|
|
727
|
+
"",
|
|
728
|
+
`Source ID: \`${manifest.sourceId}\``,
|
|
729
|
+
manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
|
|
730
|
+
"",
|
|
731
|
+
"## Summary",
|
|
732
|
+
"",
|
|
733
|
+
analysis.summary,
|
|
734
|
+
"",
|
|
735
|
+
"## Concepts",
|
|
736
|
+
"",
|
|
737
|
+
...analysis.concepts.length ? analysis.concepts.map((item) => `- [[${pagePathFor("concept", slugify(item.name)).replace(/\.md$/, "")}|${item.name}]]: ${item.description}`) : ["- None detected."],
|
|
738
|
+
"",
|
|
739
|
+
"## Entities",
|
|
740
|
+
"",
|
|
741
|
+
...analysis.entities.length ? analysis.entities.map((item) => `- [[${pagePathFor("entity", slugify(item.name)).replace(/\.md$/, "")}|${item.name}]]: ${item.description}`) : ["- None detected."],
|
|
742
|
+
"",
|
|
743
|
+
"## Claims",
|
|
744
|
+
"",
|
|
745
|
+
...analysis.claims.length ? analysis.claims.map((claim) => `- ${claim.text} [source:${claim.citation}]`) : ["- No claims extracted."],
|
|
746
|
+
"",
|
|
747
|
+
"## Questions",
|
|
748
|
+
"",
|
|
749
|
+
...analysis.questions.length ? analysis.questions.map((question) => `- ${question}`) : ["- No follow-up questions yet."],
|
|
750
|
+
""
|
|
751
|
+
].join("\n");
|
|
752
|
+
return {
|
|
753
|
+
page: {
|
|
754
|
+
id: pageId,
|
|
755
|
+
path: relativePath,
|
|
756
|
+
title: analysis.title,
|
|
757
|
+
kind: "source",
|
|
758
|
+
sourceIds: [manifest.sourceId],
|
|
759
|
+
nodeIds,
|
|
760
|
+
freshness: "fresh",
|
|
761
|
+
confidence: 0.8,
|
|
762
|
+
backlinks,
|
|
763
|
+
sourceHashes: { [manifest.sourceId]: manifest.contentHash }
|
|
764
|
+
},
|
|
765
|
+
content: matter.stringify(body, frontmatter)
|
|
766
|
+
};
|
|
767
|
+
}
|
|
768
|
+
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes) {
|
|
769
|
+
const slug = slugify(name);
|
|
770
|
+
const relativePath = pagePathFor(kind, slug);
|
|
771
|
+
const pageId = `${kind}:${slug}`;
|
|
772
|
+
const sourceIds = sourceAnalyses.map((item) => item.sourceId);
|
|
773
|
+
const otherPages = sourceAnalyses.map((item) => `source:${item.sourceId}`);
|
|
774
|
+
const summary = descriptions.find(Boolean) ?? `${kind} aggregated from ${sourceIds.length} source(s).`;
|
|
775
|
+
const frontmatter = {
|
|
776
|
+
page_id: pageId,
|
|
777
|
+
kind,
|
|
778
|
+
title: name,
|
|
779
|
+
tags: [kind],
|
|
780
|
+
source_ids: sourceIds,
|
|
781
|
+
node_ids: [pageId],
|
|
782
|
+
freshness: "fresh",
|
|
783
|
+
confidence: 0.72,
|
|
784
|
+
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
785
|
+
backlinks: otherPages,
|
|
786
|
+
source_hashes: sourceHashes
|
|
787
|
+
};
|
|
788
|
+
const body = [
|
|
789
|
+
`# ${name}`,
|
|
790
|
+
"",
|
|
791
|
+
"## Summary",
|
|
792
|
+
"",
|
|
793
|
+
summary,
|
|
794
|
+
"",
|
|
795
|
+
"## Seen In",
|
|
796
|
+
"",
|
|
797
|
+
...sourceAnalyses.map((item) => `- [[${pagePathFor("source", item.sourceId).replace(/\.md$/, "")}|${item.title}]]`),
|
|
798
|
+
"",
|
|
799
|
+
"## Source Claims",
|
|
800
|
+
"",
|
|
801
|
+
...sourceAnalyses.flatMap(
|
|
802
|
+
(item) => item.claims.filter((claim) => claim.text.toLowerCase().includes(name.toLowerCase())).map((claim) => `- ${claim.text} [source:${claim.citation}]`)
|
|
803
|
+
),
|
|
804
|
+
""
|
|
805
|
+
].join("\n");
|
|
806
|
+
return {
|
|
807
|
+
page: {
|
|
808
|
+
id: pageId,
|
|
809
|
+
path: relativePath,
|
|
810
|
+
title: name,
|
|
811
|
+
kind,
|
|
812
|
+
sourceIds,
|
|
813
|
+
nodeIds: [pageId],
|
|
814
|
+
freshness: "fresh",
|
|
815
|
+
confidence: 0.72,
|
|
816
|
+
backlinks: otherPages,
|
|
817
|
+
sourceHashes
|
|
818
|
+
},
|
|
819
|
+
content: matter.stringify(body, frontmatter)
|
|
820
|
+
};
|
|
821
|
+
}
|
|
822
|
+
function buildIndexPage(pages) {
|
|
823
|
+
const sources = pages.filter((page) => page.kind === "source");
|
|
824
|
+
const concepts = pages.filter((page) => page.kind === "concept");
|
|
825
|
+
const entities = pages.filter((page) => page.kind === "entity");
|
|
826
|
+
return [
|
|
827
|
+
"---",
|
|
828
|
+
"page_id: index",
|
|
829
|
+
"kind: index",
|
|
830
|
+
"title: SwarmVault Index",
|
|
831
|
+
"tags:",
|
|
832
|
+
" - index",
|
|
833
|
+
"source_ids: []",
|
|
834
|
+
"node_ids: []",
|
|
835
|
+
"freshness: fresh",
|
|
836
|
+
"confidence: 1",
|
|
837
|
+
`updated_at: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
838
|
+
"backlinks: []",
|
|
839
|
+
"source_hashes: {}",
|
|
840
|
+
"---",
|
|
841
|
+
"",
|
|
842
|
+
"# SwarmVault Index",
|
|
843
|
+
"",
|
|
844
|
+
"## Sources",
|
|
845
|
+
"",
|
|
846
|
+
...sources.length ? sources.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No sources yet."],
|
|
847
|
+
"",
|
|
848
|
+
"## Concepts",
|
|
849
|
+
"",
|
|
850
|
+
...concepts.length ? concepts.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No concepts yet."],
|
|
851
|
+
"",
|
|
852
|
+
"## Entities",
|
|
853
|
+
"",
|
|
854
|
+
...entities.length ? entities.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No entities yet."],
|
|
855
|
+
""
|
|
856
|
+
].join("\n");
|
|
857
|
+
}
|
|
858
|
+
function buildSectionIndex(kind, pages) {
|
|
859
|
+
const title = kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
860
|
+
return [
|
|
861
|
+
`# ${title}`,
|
|
862
|
+
"",
|
|
863
|
+
...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`),
|
|
864
|
+
""
|
|
865
|
+
].join("\n");
|
|
866
|
+
}
|
|
867
|
+
function buildOutputPage(question, answer, citations) {
|
|
868
|
+
const slug = slugify(question);
|
|
869
|
+
const pageId = `output:${slug}`;
|
|
870
|
+
const pathValue = pagePathFor("output", slug);
|
|
871
|
+
const frontmatter = {
|
|
872
|
+
page_id: pageId,
|
|
873
|
+
kind: "output",
|
|
874
|
+
title: question,
|
|
875
|
+
tags: ["output"],
|
|
876
|
+
source_ids: citations,
|
|
877
|
+
node_ids: [],
|
|
878
|
+
freshness: "fresh",
|
|
879
|
+
confidence: 0.74,
|
|
880
|
+
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
881
|
+
backlinks: citations.map((sourceId) => `source:${sourceId}`),
|
|
882
|
+
source_hashes: {}
|
|
883
|
+
};
|
|
884
|
+
return {
|
|
885
|
+
page: {
|
|
886
|
+
id: pageId,
|
|
887
|
+
path: pathValue,
|
|
888
|
+
title: question,
|
|
889
|
+
kind: "output",
|
|
890
|
+
sourceIds: citations,
|
|
891
|
+
nodeIds: [],
|
|
892
|
+
freshness: "fresh",
|
|
893
|
+
confidence: 0.74,
|
|
894
|
+
backlinks: citations.map((sourceId) => `source:${sourceId}`),
|
|
895
|
+
sourceHashes: {}
|
|
896
|
+
},
|
|
897
|
+
content: matter.stringify(
|
|
898
|
+
[
|
|
899
|
+
`# ${question}`,
|
|
900
|
+
"",
|
|
901
|
+
answer,
|
|
902
|
+
"",
|
|
903
|
+
"## Citations",
|
|
904
|
+
"",
|
|
905
|
+
...citations.map((citation) => `- [source:${citation}]`),
|
|
906
|
+
""
|
|
907
|
+
].join("\n"),
|
|
908
|
+
frontmatter
|
|
909
|
+
)
|
|
910
|
+
};
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
// src/providers/registry.ts
|
|
914
|
+
import path7 from "path";
|
|
915
|
+
import { pathToFileURL } from "url";
|
|
916
|
+
import { z as z5 } from "zod";
|
|
917
|
+
|
|
918
|
+
// src/providers/base.ts
|
|
919
|
+
import fs5 from "fs/promises";
|
|
920
|
+
import { z as z4 } from "zod";
|
|
921
|
+
var BaseProviderAdapter = class {
|
|
922
|
+
constructor(id, type, model, capabilities) {
|
|
923
|
+
this.id = id;
|
|
924
|
+
this.type = type;
|
|
925
|
+
this.model = model;
|
|
926
|
+
this.capabilities = new Set(capabilities);
|
|
927
|
+
}
|
|
928
|
+
id;
|
|
929
|
+
type;
|
|
930
|
+
model;
|
|
931
|
+
capabilities;
|
|
932
|
+
async generateStructured(request, schema) {
|
|
933
|
+
const schemaDescription = JSON.stringify(z4.toJSONSchema(schema), null, 2);
|
|
934
|
+
const response = await this.generateText({
|
|
935
|
+
...request,
|
|
936
|
+
prompt: `${request.prompt}
|
|
937
|
+
|
|
938
|
+
Return JSON only. Follow this JSON Schema exactly:
|
|
939
|
+
${schemaDescription}`
|
|
940
|
+
});
|
|
941
|
+
const parsed = JSON.parse(extractJson(response.text));
|
|
942
|
+
return schema.parse(parsed);
|
|
943
|
+
}
|
|
944
|
+
async encodeAttachments(attachments = []) {
|
|
945
|
+
return Promise.all(
|
|
946
|
+
attachments.map(async (attachment) => ({
|
|
947
|
+
mimeType: attachment.mimeType,
|
|
948
|
+
base64: await fs5.readFile(attachment.filePath, "base64")
|
|
949
|
+
}))
|
|
950
|
+
);
|
|
951
|
+
}
|
|
952
|
+
};
|
|
953
|
+
|
|
954
|
+
// src/providers/heuristic.ts
|
|
955
|
+
function summarizePrompt(prompt) {
|
|
956
|
+
const cleaned = normalizeWhitespace(prompt);
|
|
957
|
+
if (!cleaned) {
|
|
958
|
+
return "No prompt content provided.";
|
|
959
|
+
}
|
|
960
|
+
return firstSentences(cleaned, 2) || cleaned.slice(0, 280);
|
|
961
|
+
}
|
|
962
|
+
var HeuristicProviderAdapter = class extends BaseProviderAdapter {
|
|
963
|
+
constructor(id, model) {
|
|
964
|
+
super(id, "heuristic", model, ["chat", "structured", "vision", "local"]);
|
|
965
|
+
}
|
|
966
|
+
async generateText(request) {
|
|
967
|
+
const attachmentHint = request.attachments?.length ? ` Attachments: ${request.attachments.length}.` : "";
|
|
968
|
+
return {
|
|
969
|
+
text: `Heuristic provider response.${attachmentHint} ${summarizePrompt(request.prompt)}`.trim()
|
|
970
|
+
};
|
|
971
|
+
}
|
|
972
|
+
};
|
|
973
|
+
|
|
974
|
+
// src/providers/openai-compatible.ts
|
|
975
|
+
function buildAuthHeaders(apiKey) {
|
|
976
|
+
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {};
|
|
977
|
+
}
|
|
978
|
+
var OpenAiCompatibleProviderAdapter = class extends BaseProviderAdapter {
|
|
979
|
+
baseUrl;
|
|
980
|
+
apiKey;
|
|
981
|
+
headers;
|
|
982
|
+
apiStyle;
|
|
983
|
+
constructor(id, type, model, options) {
|
|
984
|
+
super(id, type, model, options.capabilities);
|
|
985
|
+
this.baseUrl = options.baseUrl.replace(/\/+$/, "");
|
|
986
|
+
this.apiKey = options.apiKey;
|
|
987
|
+
this.headers = options.headers;
|
|
988
|
+
this.apiStyle = options.apiStyle ?? "responses";
|
|
989
|
+
}
|
|
990
|
+
async generateText(request) {
|
|
991
|
+
if (this.apiStyle === "chat") {
|
|
992
|
+
return this.generateViaChatCompletions(request);
|
|
993
|
+
}
|
|
994
|
+
return this.generateViaResponses(request);
|
|
995
|
+
}
|
|
996
|
+
async generateViaResponses(request) {
|
|
997
|
+
const encodedAttachments = await this.encodeAttachments(request.attachments);
|
|
998
|
+
const input = encodedAttachments.length ? [
|
|
999
|
+
{
|
|
1000
|
+
role: "user",
|
|
1001
|
+
content: [
|
|
1002
|
+
{ type: "input_text", text: request.prompt },
|
|
1003
|
+
...encodedAttachments.map((item) => ({
|
|
1004
|
+
type: "input_image",
|
|
1005
|
+
image_url: `data:${item.mimeType};base64,${item.base64}`
|
|
1006
|
+
}))
|
|
1007
|
+
]
|
|
1008
|
+
}
|
|
1009
|
+
] : request.prompt;
|
|
1010
|
+
const response = await fetch(`${this.baseUrl}/responses`, {
|
|
1011
|
+
method: "POST",
|
|
1012
|
+
headers: {
|
|
1013
|
+
"content-type": "application/json",
|
|
1014
|
+
...buildAuthHeaders(this.apiKey),
|
|
1015
|
+
...this.headers
|
|
1016
|
+
},
|
|
1017
|
+
body: JSON.stringify({
|
|
1018
|
+
model: this.model,
|
|
1019
|
+
input,
|
|
1020
|
+
instructions: request.system,
|
|
1021
|
+
max_output_tokens: request.maxOutputTokens
|
|
1022
|
+
})
|
|
1023
|
+
});
|
|
1024
|
+
if (!response.ok) {
|
|
1025
|
+
throw new Error(`Provider ${this.id} failed: ${response.status} ${response.statusText}`);
|
|
1026
|
+
}
|
|
1027
|
+
const payload = await response.json();
|
|
1028
|
+
return {
|
|
1029
|
+
text: payload.output_text ?? "",
|
|
1030
|
+
usage: payload.usage ? { inputTokens: payload.usage.input_tokens, outputTokens: payload.usage.output_tokens } : void 0
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
async generateViaChatCompletions(request) {
|
|
1034
|
+
const encodedAttachments = await this.encodeAttachments(request.attachments);
|
|
1035
|
+
const content = encodedAttachments.length ? [
|
|
1036
|
+
{ type: "text", text: request.prompt },
|
|
1037
|
+
...encodedAttachments.map((item) => ({
|
|
1038
|
+
type: "image_url",
|
|
1039
|
+
image_url: {
|
|
1040
|
+
url: `data:${item.mimeType};base64,${item.base64}`
|
|
1041
|
+
}
|
|
1042
|
+
}))
|
|
1043
|
+
] : request.prompt;
|
|
1044
|
+
const messages = [
|
|
1045
|
+
...request.system ? [{ role: "system", content: request.system }] : [],
|
|
1046
|
+
{ role: "user", content }
|
|
1047
|
+
];
|
|
1048
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
1049
|
+
method: "POST",
|
|
1050
|
+
headers: {
|
|
1051
|
+
"content-type": "application/json",
|
|
1052
|
+
...buildAuthHeaders(this.apiKey),
|
|
1053
|
+
...this.headers
|
|
1054
|
+
},
|
|
1055
|
+
body: JSON.stringify({
|
|
1056
|
+
model: this.model,
|
|
1057
|
+
messages,
|
|
1058
|
+
max_tokens: request.maxOutputTokens
|
|
1059
|
+
})
|
|
1060
|
+
});
|
|
1061
|
+
if (!response.ok) {
|
|
1062
|
+
throw new Error(`Provider ${this.id} failed: ${response.status} ${response.statusText}`);
|
|
1063
|
+
}
|
|
1064
|
+
const payload = await response.json();
|
|
1065
|
+
const contentValue = payload.choices?.[0]?.message?.content;
|
|
1066
|
+
const text = Array.isArray(contentValue) ? contentValue.map((item) => item.text ?? "").join("\n") : contentValue ?? "";
|
|
1067
|
+
return {
|
|
1068
|
+
text,
|
|
1069
|
+
usage: payload.usage ? { inputTokens: payload.usage.prompt_tokens, outputTokens: payload.usage.completion_tokens } : void 0
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
};
|
|
1073
|
+
|
|
1074
|
+
// src/providers/anthropic.ts
|
|
1075
|
+
var AnthropicProviderAdapter = class extends BaseProviderAdapter {
|
|
1076
|
+
apiKey;
|
|
1077
|
+
headers;
|
|
1078
|
+
baseUrl;
|
|
1079
|
+
constructor(id, model, options) {
|
|
1080
|
+
super(id, "anthropic", model, ["chat", "structured", "tools", "vision", "streaming"]);
|
|
1081
|
+
this.apiKey = options.apiKey;
|
|
1082
|
+
this.headers = options.headers;
|
|
1083
|
+
this.baseUrl = (options.baseUrl ?? "https://api.anthropic.com").replace(/\/+$/, "");
|
|
1084
|
+
}
|
|
1085
|
+
async generateText(request) {
|
|
1086
|
+
const encodedAttachments = await this.encodeAttachments(request.attachments);
|
|
1087
|
+
const content = [
|
|
1088
|
+
{ type: "text", text: request.prompt },
|
|
1089
|
+
...encodedAttachments.map((item) => ({
|
|
1090
|
+
type: "image",
|
|
1091
|
+
source: {
|
|
1092
|
+
type: "base64",
|
|
1093
|
+
media_type: item.mimeType,
|
|
1094
|
+
data: item.base64
|
|
1095
|
+
}
|
|
1096
|
+
}))
|
|
1097
|
+
];
|
|
1098
|
+
const response = await fetch(`${this.baseUrl}/v1/messages`, {
|
|
1099
|
+
method: "POST",
|
|
1100
|
+
headers: {
|
|
1101
|
+
"content-type": "application/json",
|
|
1102
|
+
"anthropic-version": "2023-06-01",
|
|
1103
|
+
...this.apiKey ? { "x-api-key": this.apiKey } : {},
|
|
1104
|
+
...this.headers
|
|
1105
|
+
},
|
|
1106
|
+
body: JSON.stringify({
|
|
1107
|
+
model: this.model,
|
|
1108
|
+
max_tokens: request.maxOutputTokens ?? 1200,
|
|
1109
|
+
system: request.system,
|
|
1110
|
+
messages: [
|
|
1111
|
+
{
|
|
1112
|
+
role: "user",
|
|
1113
|
+
content
|
|
1114
|
+
}
|
|
1115
|
+
]
|
|
1116
|
+
})
|
|
1117
|
+
});
|
|
1118
|
+
if (!response.ok) {
|
|
1119
|
+
throw new Error(`Provider ${this.id} failed: ${response.status} ${response.statusText}`);
|
|
1120
|
+
}
|
|
1121
|
+
const payload = await response.json();
|
|
1122
|
+
return {
|
|
1123
|
+
text: payload.content?.filter((item) => item.type === "text").map((item) => item.text ?? "").join("\n") ?? "",
|
|
1124
|
+
usage: payload.usage ? { inputTokens: payload.usage.input_tokens, outputTokens: payload.usage.output_tokens } : void 0
|
|
1125
|
+
};
|
|
1126
|
+
}
|
|
1127
|
+
};
|
|
1128
|
+
|
|
1129
|
+
// src/providers/gemini.ts
|
|
1130
|
+
var GeminiProviderAdapter = class extends BaseProviderAdapter {
|
|
1131
|
+
apiKey;
|
|
1132
|
+
baseUrl;
|
|
1133
|
+
constructor(id, model, options) {
|
|
1134
|
+
super(id, "gemini", model, ["chat", "structured", "vision", "tools", "streaming"]);
|
|
1135
|
+
this.apiKey = options.apiKey;
|
|
1136
|
+
this.baseUrl = (options.baseUrl ?? "https://generativelanguage.googleapis.com/v1beta").replace(/\/+$/, "");
|
|
1137
|
+
}
|
|
1138
|
+
async generateText(request) {
|
|
1139
|
+
const encodedAttachments = await this.encodeAttachments(request.attachments);
|
|
1140
|
+
const parts = [
|
|
1141
|
+
...request.system ? [{ text: `System instructions:
|
|
1142
|
+
${request.system}` }] : [],
|
|
1143
|
+
{ text: request.prompt },
|
|
1144
|
+
...encodedAttachments.map((item) => ({
|
|
1145
|
+
inline_data: {
|
|
1146
|
+
mime_type: item.mimeType,
|
|
1147
|
+
data: item.base64
|
|
1148
|
+
}
|
|
1149
|
+
}))
|
|
1150
|
+
];
|
|
1151
|
+
const response = await fetch(`${this.baseUrl}/models/${this.model}:generateContent`, {
|
|
1152
|
+
method: "POST",
|
|
1153
|
+
headers: {
|
|
1154
|
+
"content-type": "application/json",
|
|
1155
|
+
...this.apiKey ? { "x-goog-api-key": this.apiKey } : {}
|
|
1156
|
+
},
|
|
1157
|
+
body: JSON.stringify({
|
|
1158
|
+
contents: [
|
|
1159
|
+
{
|
|
1160
|
+
role: "user",
|
|
1161
|
+
parts
|
|
1162
|
+
}
|
|
1163
|
+
],
|
|
1164
|
+
generationConfig: {
|
|
1165
|
+
maxOutputTokens: request.maxOutputTokens ?? 1200
|
|
1166
|
+
}
|
|
1167
|
+
})
|
|
1168
|
+
});
|
|
1169
|
+
if (!response.ok) {
|
|
1170
|
+
throw new Error(`Provider ${this.id} failed: ${response.status} ${response.statusText}`);
|
|
1171
|
+
}
|
|
1172
|
+
const payload = await response.json();
|
|
1173
|
+
const text = payload.candidates?.[0]?.content?.parts?.map((part) => part.text ?? "").join("\n") ?? "";
|
|
1174
|
+
return {
|
|
1175
|
+
text,
|
|
1176
|
+
usage: payload.usageMetadata ? { inputTokens: payload.usageMetadata.promptTokenCount, outputTokens: payload.usageMetadata.candidatesTokenCount } : void 0
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
};
|
|
1180
|
+
|
|
1181
|
+
// src/providers/registry.ts
|
|
1182
|
+
var customModuleSchema = z5.object({
|
|
1183
|
+
createAdapter: z5.function({
|
|
1184
|
+
input: [z5.string(), z5.custom(), z5.string()],
|
|
1185
|
+
output: z5.promise(z5.custom())
|
|
1186
|
+
})
|
|
1187
|
+
});
|
|
1188
|
+
function resolveCapabilities(config, fallback) {
|
|
1189
|
+
return config.capabilities?.length ? config.capabilities : fallback;
|
|
1190
|
+
}
|
|
1191
|
+
function envOrUndefined(name) {
|
|
1192
|
+
return name ? process.env[name] : void 0;
|
|
1193
|
+
}
|
|
1194
|
+
async function createProvider(id, config, rootDir) {
|
|
1195
|
+
switch (config.type) {
|
|
1196
|
+
case "heuristic":
|
|
1197
|
+
return new HeuristicProviderAdapter(id, config.model);
|
|
1198
|
+
case "openai":
|
|
1199
|
+
return new OpenAiCompatibleProviderAdapter(id, "openai", config.model, {
|
|
1200
|
+
baseUrl: config.baseUrl ?? "https://api.openai.com/v1",
|
|
1201
|
+
apiKey: envOrUndefined(config.apiKeyEnv),
|
|
1202
|
+
headers: config.headers,
|
|
1203
|
+
apiStyle: config.apiStyle ?? "responses",
|
|
1204
|
+
capabilities: resolveCapabilities(config, ["responses", "chat", "structured", "tools", "vision", "streaming"])
|
|
1205
|
+
});
|
|
1206
|
+
case "ollama":
|
|
1207
|
+
return new OpenAiCompatibleProviderAdapter(id, "ollama", config.model, {
|
|
1208
|
+
baseUrl: config.baseUrl ?? "http://localhost:11434/v1",
|
|
1209
|
+
apiKey: envOrUndefined(config.apiKeyEnv) ?? "ollama",
|
|
1210
|
+
headers: config.headers,
|
|
1211
|
+
apiStyle: config.apiStyle ?? "responses",
|
|
1212
|
+
capabilities: resolveCapabilities(config, ["responses", "chat", "structured", "tools", "vision", "streaming", "local"])
|
|
1213
|
+
});
|
|
1214
|
+
case "openai-compatible":
|
|
1215
|
+
return new OpenAiCompatibleProviderAdapter(id, "openai-compatible", config.model, {
|
|
1216
|
+
baseUrl: config.baseUrl ?? "http://localhost:8000/v1",
|
|
1217
|
+
apiKey: envOrUndefined(config.apiKeyEnv),
|
|
1218
|
+
headers: config.headers,
|
|
1219
|
+
apiStyle: config.apiStyle ?? "responses",
|
|
1220
|
+
capabilities: resolveCapabilities(config, ["chat", "structured"])
|
|
1221
|
+
});
|
|
1222
|
+
case "anthropic":
|
|
1223
|
+
return new AnthropicProviderAdapter(id, config.model, {
|
|
1224
|
+
apiKey: envOrUndefined(config.apiKeyEnv),
|
|
1225
|
+
headers: config.headers,
|
|
1226
|
+
baseUrl: config.baseUrl
|
|
1227
|
+
});
|
|
1228
|
+
case "gemini":
|
|
1229
|
+
return new GeminiProviderAdapter(id, config.model, {
|
|
1230
|
+
apiKey: envOrUndefined(config.apiKeyEnv),
|
|
1231
|
+
baseUrl: config.baseUrl
|
|
1232
|
+
});
|
|
1233
|
+
case "custom": {
|
|
1234
|
+
if (!config.module) {
|
|
1235
|
+
throw new Error(`Provider ${id} is type "custom" but no module path was configured.`);
|
|
1236
|
+
}
|
|
1237
|
+
const resolvedModule = path7.isAbsolute(config.module) ? config.module : path7.resolve(rootDir, config.module);
|
|
1238
|
+
const loaded = await import(pathToFileURL(resolvedModule).href);
|
|
1239
|
+
const parsed = customModuleSchema.parse(loaded);
|
|
1240
|
+
return parsed.createAdapter(id, config, rootDir);
|
|
1241
|
+
}
|
|
1242
|
+
default:
|
|
1243
|
+
throw new Error(`Unsupported provider type ${String(config.type)}`);
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
async function getProviderForTask(rootDir, task) {
|
|
1247
|
+
const { config } = await loadVaultConfig(rootDir);
|
|
1248
|
+
const providerId = config.tasks[task];
|
|
1249
|
+
const providerConfig = config.providers[providerId];
|
|
1250
|
+
if (!providerConfig) {
|
|
1251
|
+
throw new Error(`No provider configured with id "${providerId}" for task "${task}".`);
|
|
1252
|
+
}
|
|
1253
|
+
return createProvider(providerId, providerConfig, rootDir);
|
|
1254
|
+
}
|
|
1255
|
+
function assertProviderCapability(provider, capability) {
|
|
1256
|
+
if (!provider.capabilities.has(capability)) {
|
|
1257
|
+
throw new Error(`Provider ${provider.id} does not support required capability "${capability}".`);
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
// src/search.ts
|
|
1262
|
+
import fs6 from "fs/promises";
|
|
1263
|
+
import path8 from "path";
|
|
1264
|
+
import matter2 from "gray-matter";
|
|
1265
|
+
function getDatabaseSync() {
|
|
1266
|
+
const builtin = process.getBuiltinModule?.("node:sqlite");
|
|
1267
|
+
if (!builtin?.DatabaseSync) {
|
|
1268
|
+
throw new Error("node:sqlite is unavailable in this Node runtime.");
|
|
1269
|
+
}
|
|
1270
|
+
return builtin.DatabaseSync;
|
|
1271
|
+
}
|
|
1272
|
+
function toFtsQuery(query) {
|
|
1273
|
+
const tokens = query.toLowerCase().match(/[a-z0-9]{2,}/g)?.filter(Boolean) ?? [];
|
|
1274
|
+
return tokens.join(" OR ");
|
|
1275
|
+
}
|
|
1276
|
+
async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
1277
|
+
await ensureDir(path8.dirname(dbPath));
|
|
1278
|
+
const DatabaseSync = getDatabaseSync();
|
|
1279
|
+
const db = new DatabaseSync(dbPath);
|
|
1280
|
+
db.exec("PRAGMA journal_mode = WAL;");
|
|
1281
|
+
db.exec(`
|
|
1282
|
+
CREATE TABLE IF NOT EXISTS pages (
|
|
1283
|
+
id TEXT PRIMARY KEY,
|
|
1284
|
+
path TEXT NOT NULL,
|
|
1285
|
+
title TEXT NOT NULL,
|
|
1286
|
+
body TEXT NOT NULL
|
|
1287
|
+
);
|
|
1288
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(
|
|
1289
|
+
title,
|
|
1290
|
+
body,
|
|
1291
|
+
content='pages',
|
|
1292
|
+
content_rowid='rowid'
|
|
1293
|
+
);
|
|
1294
|
+
DELETE FROM page_search;
|
|
1295
|
+
DELETE FROM pages;
|
|
1296
|
+
`);
|
|
1297
|
+
const insertPage = db.prepare("INSERT INTO pages (id, path, title, body) VALUES (?, ?, ?, ?)");
|
|
1298
|
+
for (const page of pages) {
|
|
1299
|
+
const absolutePath = path8.join(wikiDir, page.path);
|
|
1300
|
+
const content = await fs6.readFile(absolutePath, "utf8");
|
|
1301
|
+
const parsed = matter2(content);
|
|
1302
|
+
insertPage.run(page.id, page.path, page.title, parsed.content);
|
|
1303
|
+
}
|
|
1304
|
+
db.exec("INSERT INTO page_search (rowid, title, body) SELECT rowid, title, body FROM pages;");
|
|
1305
|
+
db.close();
|
|
1306
|
+
}
|
|
1307
|
+
function searchPages(dbPath, query, limit = 5) {
|
|
1308
|
+
const ftsQuery = toFtsQuery(query);
|
|
1309
|
+
if (!ftsQuery) {
|
|
1310
|
+
return [];
|
|
1311
|
+
}
|
|
1312
|
+
const DatabaseSync = getDatabaseSync();
|
|
1313
|
+
const db = new DatabaseSync(dbPath, { readOnly: true });
|
|
1314
|
+
const statement = db.prepare(`
|
|
1315
|
+
SELECT
|
|
1316
|
+
pages.id AS pageId,
|
|
1317
|
+
pages.path AS path,
|
|
1318
|
+
pages.title AS title,
|
|
1319
|
+
snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
|
|
1320
|
+
bm25(page_search) AS rank
|
|
1321
|
+
FROM page_search
|
|
1322
|
+
JOIN pages ON pages.rowid = page_search.rowid
|
|
1323
|
+
WHERE page_search MATCH ?
|
|
1324
|
+
ORDER BY rank
|
|
1325
|
+
LIMIT ?
|
|
1326
|
+
`);
|
|
1327
|
+
const rows = statement.all(ftsQuery, limit);
|
|
1328
|
+
db.close();
|
|
1329
|
+
return rows.map((row) => ({
|
|
1330
|
+
pageId: String(row.pageId ?? ""),
|
|
1331
|
+
path: String(row.path ?? ""),
|
|
1332
|
+
title: String(row.title ?? ""),
|
|
1333
|
+
snippet: String(row.snippet ?? ""),
|
|
1334
|
+
rank: Number(row.rank ?? 0)
|
|
1335
|
+
}));
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
// src/vault.ts
|
|
1339
|
+
function buildGraph(manifests, analyses, pages) {
|
|
1340
|
+
const sourceNodes = manifests.map((manifest) => ({
|
|
1341
|
+
id: `source:${manifest.sourceId}`,
|
|
1342
|
+
type: "source",
|
|
1343
|
+
label: manifest.title,
|
|
1344
|
+
pageId: `source:${manifest.sourceId}`,
|
|
1345
|
+
freshness: "fresh",
|
|
1346
|
+
confidence: 1,
|
|
1347
|
+
sourceIds: [manifest.sourceId]
|
|
1348
|
+
}));
|
|
1349
|
+
const conceptMap = /* @__PURE__ */ new Map();
|
|
1350
|
+
const entityMap = /* @__PURE__ */ new Map();
|
|
1351
|
+
const edges = [];
|
|
1352
|
+
for (const analysis of analyses) {
|
|
1353
|
+
for (const concept of analysis.concepts) {
|
|
1354
|
+
const existing = conceptMap.get(concept.id);
|
|
1355
|
+
conceptMap.set(concept.id, {
|
|
1356
|
+
id: concept.id,
|
|
1357
|
+
type: "concept",
|
|
1358
|
+
label: concept.name,
|
|
1359
|
+
pageId: `concept:${slugify(concept.name)}`,
|
|
1360
|
+
freshness: "fresh",
|
|
1361
|
+
confidence: 0.7,
|
|
1362
|
+
sourceIds: [.../* @__PURE__ */ new Set([...existing?.sourceIds ?? [], analysis.sourceId])]
|
|
1363
|
+
});
|
|
1364
|
+
edges.push({
|
|
1365
|
+
id: `${analysis.sourceId}->${concept.id}`,
|
|
1366
|
+
source: `source:${analysis.sourceId}`,
|
|
1367
|
+
target: concept.id,
|
|
1368
|
+
relation: "mentions",
|
|
1369
|
+
status: "extracted",
|
|
1370
|
+
confidence: 0.72,
|
|
1371
|
+
provenance: [analysis.sourceId]
|
|
1372
|
+
});
|
|
1373
|
+
}
|
|
1374
|
+
for (const entity of analysis.entities) {
|
|
1375
|
+
const existing = entityMap.get(entity.id);
|
|
1376
|
+
entityMap.set(entity.id, {
|
|
1377
|
+
id: entity.id,
|
|
1378
|
+
type: "entity",
|
|
1379
|
+
label: entity.name,
|
|
1380
|
+
pageId: `entity:${slugify(entity.name)}`,
|
|
1381
|
+
freshness: "fresh",
|
|
1382
|
+
confidence: 0.7,
|
|
1383
|
+
sourceIds: [.../* @__PURE__ */ new Set([...existing?.sourceIds ?? [], analysis.sourceId])]
|
|
1384
|
+
});
|
|
1385
|
+
edges.push({
|
|
1386
|
+
id: `${analysis.sourceId}->${entity.id}`,
|
|
1387
|
+
source: `source:${analysis.sourceId}`,
|
|
1388
|
+
target: entity.id,
|
|
1389
|
+
relation: "mentions",
|
|
1390
|
+
status: "extracted",
|
|
1391
|
+
confidence: 0.72,
|
|
1392
|
+
provenance: [analysis.sourceId]
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
1395
|
+
const conflictClaims = analysis.claims.filter((claim) => claim.polarity === "negative");
|
|
1396
|
+
for (const claim of conflictClaims) {
|
|
1397
|
+
const related = analyses.filter((item) => item.sourceId !== analysis.sourceId).flatMap((item) => item.claims.filter((other) => other.polarity === "positive" && other.text.split(" ").some((word) => claim.text.includes(word))));
|
|
1398
|
+
for (const other of related) {
|
|
1399
|
+
edges.push({
|
|
1400
|
+
id: `${claim.id}->${other.id}`,
|
|
1401
|
+
source: `source:${analysis.sourceId}`,
|
|
1402
|
+
target: `source:${other.citation}`,
|
|
1403
|
+
relation: "conflicted_with",
|
|
1404
|
+
status: "conflicted",
|
|
1405
|
+
confidence: 0.6,
|
|
1406
|
+
provenance: [analysis.sourceId, other.citation]
|
|
1407
|
+
});
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
}
|
|
1411
|
+
return {
|
|
1412
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1413
|
+
nodes: [...sourceNodes, ...conceptMap.values(), ...entityMap.values()],
|
|
1414
|
+
edges,
|
|
1415
|
+
sources: manifests,
|
|
1416
|
+
pages
|
|
1417
|
+
};
|
|
1418
|
+
}
|
|
1419
|
+
async function writePage(rootDir, relativePath, content, changedPages) {
|
|
1420
|
+
const absolutePath = path9.resolve(rootDir, "wiki", relativePath);
|
|
1421
|
+
const changed = await writeFileIfChanged(absolutePath, content);
|
|
1422
|
+
if (changed) {
|
|
1423
|
+
changedPages.push(relativePath);
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
function aggregateItems(analyses, kind) {
|
|
1427
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
1428
|
+
for (const analysis of analyses) {
|
|
1429
|
+
for (const item of analysis[kind]) {
|
|
1430
|
+
const key = slugify(item.name);
|
|
1431
|
+
const existing = grouped.get(key) ?? {
|
|
1432
|
+
name: item.name,
|
|
1433
|
+
descriptions: [],
|
|
1434
|
+
sourceAnalyses: [],
|
|
1435
|
+
sourceHashes: {}
|
|
1436
|
+
};
|
|
1437
|
+
existing.descriptions.push(item.description);
|
|
1438
|
+
existing.sourceAnalyses.push(analysis);
|
|
1439
|
+
existing.sourceHashes[analysis.sourceId] = analysis.sourceHash;
|
|
1440
|
+
grouped.set(key, existing);
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
return [...grouped.values()];
|
|
1444
|
+
}
|
|
1445
|
+
async function initVault(rootDir) {
|
|
1446
|
+
await initWorkspace(rootDir);
|
|
1447
|
+
await installConfiguredAgents(rootDir);
|
|
1448
|
+
}
|
|
1449
|
+
async function compileVault(rootDir) {
|
|
1450
|
+
const { paths } = await initWorkspace(rootDir);
|
|
1451
|
+
const provider = await getProviderForTask(rootDir, "compileProvider");
|
|
1452
|
+
const manifests = await listManifests(rootDir);
|
|
1453
|
+
const analyses = await Promise.all(
|
|
1454
|
+
manifests.map(async (manifest) => analyzeSource(manifest, await readExtractedText(rootDir, manifest), provider, paths))
|
|
1455
|
+
);
|
|
1456
|
+
const changedPages = [];
|
|
1457
|
+
const pages = [];
|
|
1458
|
+
await Promise.all([
|
|
1459
|
+
ensureDir(path9.join(paths.wikiDir, "sources")),
|
|
1460
|
+
ensureDir(path9.join(paths.wikiDir, "concepts")),
|
|
1461
|
+
ensureDir(path9.join(paths.wikiDir, "entities")),
|
|
1462
|
+
ensureDir(path9.join(paths.wikiDir, "outputs"))
|
|
1463
|
+
]);
|
|
1464
|
+
for (const manifest of manifests) {
|
|
1465
|
+
const analysis = analyses.find((item) => item.sourceId === manifest.sourceId);
|
|
1466
|
+
if (!analysis) {
|
|
1467
|
+
continue;
|
|
1468
|
+
}
|
|
1469
|
+
const sourcePage = buildSourcePage(manifest, analysis);
|
|
1470
|
+
pages.push(sourcePage.page);
|
|
1471
|
+
await writePage(rootDir, sourcePage.page.path, sourcePage.content, changedPages);
|
|
1472
|
+
}
|
|
1473
|
+
for (const aggregate of aggregateItems(analyses, "concepts")) {
|
|
1474
|
+
const page = buildAggregatePage("concept", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes);
|
|
1475
|
+
pages.push(page.page);
|
|
1476
|
+
await writePage(rootDir, page.page.path, page.content, changedPages);
|
|
1477
|
+
}
|
|
1478
|
+
for (const aggregate of aggregateItems(analyses, "entities")) {
|
|
1479
|
+
const page = buildAggregatePage("entity", aggregate.name, aggregate.descriptions, aggregate.sourceAnalyses, aggregate.sourceHashes);
|
|
1480
|
+
pages.push(page.page);
|
|
1481
|
+
await writePage(rootDir, page.page.path, page.content, changedPages);
|
|
1482
|
+
}
|
|
1483
|
+
const graph = buildGraph(manifests, analyses, pages);
|
|
1484
|
+
await writeJsonFile(paths.graphPath, graph);
|
|
1485
|
+
await writeJsonFile(paths.compileStatePath, {
|
|
1486
|
+
generatedAt: graph.generatedAt,
|
|
1487
|
+
analyses: Object.fromEntries(analyses.map((analysis) => [analysis.sourceId, analysisSignature(analysis)]))
|
|
1488
|
+
});
|
|
1489
|
+
await writePage(rootDir, "index.md", buildIndexPage(pages), changedPages);
|
|
1490
|
+
await writePage(rootDir, "sources/index.md", buildSectionIndex("sources", pages.filter((page) => page.kind === "source")), changedPages);
|
|
1491
|
+
await writePage(rootDir, "concepts/index.md", buildSectionIndex("concepts", pages.filter((page) => page.kind === "concept")), changedPages);
|
|
1492
|
+
await writePage(rootDir, "entities/index.md", buildSectionIndex("entities", pages.filter((page) => page.kind === "entity")), changedPages);
|
|
1493
|
+
await rebuildSearchIndex(paths.searchDbPath, pages, paths.wikiDir);
|
|
1494
|
+
await appendLogEntry(rootDir, "compile", `Compiled ${manifests.length} source(s)`, [`provider=${provider.id}`, `pages=${pages.length}`]);
|
|
1495
|
+
return {
|
|
1496
|
+
graphPath: paths.graphPath,
|
|
1497
|
+
pageCount: pages.length,
|
|
1498
|
+
changedPages,
|
|
1499
|
+
sourceCount: manifests.length
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
async function queryVault(rootDir, question, save = false) {
|
|
1503
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1504
|
+
const provider = await getProviderForTask(rootDir, "queryProvider");
|
|
1505
|
+
if (!await fileExists(paths.searchDbPath)) {
|
|
1506
|
+
await compileVault(rootDir);
|
|
1507
|
+
}
|
|
1508
|
+
const searchResults = searchPages(paths.searchDbPath, question, 5);
|
|
1509
|
+
const excerpts = await Promise.all(
|
|
1510
|
+
searchResults.map(async (result) => {
|
|
1511
|
+
const absolutePath = path9.join(paths.wikiDir, result.path);
|
|
1512
|
+
const content = await fs7.readFile(absolutePath, "utf8");
|
|
1513
|
+
const parsed = matter3(content);
|
|
1514
|
+
return `# ${result.title}
|
|
1515
|
+
${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
1516
|
+
})
|
|
1517
|
+
);
|
|
1518
|
+
let answer;
|
|
1519
|
+
if (provider.type === "heuristic") {
|
|
1520
|
+
answer = [
|
|
1521
|
+
`Question: ${question}`,
|
|
1522
|
+
"",
|
|
1523
|
+
"Relevant pages:",
|
|
1524
|
+
...searchResults.map((result) => `- ${result.title} (${result.path})`),
|
|
1525
|
+
"",
|
|
1526
|
+
excerpts.length ? excerpts.join("\n\n") : "No relevant pages found yet."
|
|
1527
|
+
].join("\n");
|
|
1528
|
+
} else {
|
|
1529
|
+
const response = await provider.generateText({
|
|
1530
|
+
system: "Answer using the provided SwarmVault excerpts. Cite source ids or page titles when possible.",
|
|
1531
|
+
prompt: `Question: ${question}
|
|
1532
|
+
|
|
1533
|
+
Context:
|
|
1534
|
+
${excerpts.join("\n\n---\n\n")}`
|
|
1535
|
+
});
|
|
1536
|
+
answer = response.text;
|
|
1537
|
+
}
|
|
1538
|
+
const citations = uniqueBy(
|
|
1539
|
+
searchResults.filter((result) => result.pageId.startsWith("source:")).map((result) => result.pageId.replace(/^source:/, "")),
|
|
1540
|
+
(item) => item
|
|
1541
|
+
);
|
|
1542
|
+
let savedTo;
|
|
1543
|
+
if (save) {
|
|
1544
|
+
const output = buildOutputPage(question, answer, citations);
|
|
1545
|
+
const absolutePath = path9.join(paths.wikiDir, output.page.path);
|
|
1546
|
+
await ensureDir(path9.dirname(absolutePath));
|
|
1547
|
+
await fs7.writeFile(absolutePath, output.content, "utf8");
|
|
1548
|
+
savedTo = absolutePath;
|
|
1549
|
+
}
|
|
1550
|
+
await appendLogEntry(rootDir, "query", question, [`citations=${citations.join(",") || "none"}`, `saved=${Boolean(savedTo)}`]);
|
|
1551
|
+
return { answer, savedTo, citations };
|
|
1552
|
+
}
|
|
1553
|
+
async function lintVault(rootDir) {
|
|
1554
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
1555
|
+
const manifests = await listManifests(rootDir);
|
|
1556
|
+
const graph = await readJsonFile(paths.graphPath);
|
|
1557
|
+
const findings = [];
|
|
1558
|
+
if (!graph) {
|
|
1559
|
+
return [
|
|
1560
|
+
{
|
|
1561
|
+
severity: "warning",
|
|
1562
|
+
code: "graph_missing",
|
|
1563
|
+
message: "No graph artifact found. Run `swarmvault compile` first."
|
|
1564
|
+
}
|
|
1565
|
+
];
|
|
1566
|
+
}
|
|
1567
|
+
const manifestMap = new Map(manifests.map((manifest) => [manifest.sourceId, manifest]));
|
|
1568
|
+
for (const page of graph.pages) {
|
|
1569
|
+
for (const [sourceId, knownHash] of Object.entries(page.sourceHashes)) {
|
|
1570
|
+
const manifest = manifestMap.get(sourceId);
|
|
1571
|
+
if (manifest && manifest.contentHash !== knownHash) {
|
|
1572
|
+
findings.push({
|
|
1573
|
+
severity: "warning",
|
|
1574
|
+
code: "stale_page",
|
|
1575
|
+
message: `Page ${page.title} is stale because source ${sourceId} changed.`,
|
|
1576
|
+
pagePath: path9.join(paths.wikiDir, page.path)
|
|
1577
|
+
});
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
if (page.kind !== "index" && page.backlinks.length === 0) {
|
|
1581
|
+
findings.push({
|
|
1582
|
+
severity: "info",
|
|
1583
|
+
code: "orphan_page",
|
|
1584
|
+
message: `Page ${page.title} has no backlinks.`,
|
|
1585
|
+
pagePath: path9.join(paths.wikiDir, page.path)
|
|
1586
|
+
});
|
|
1587
|
+
}
|
|
1588
|
+
const absolutePath = path9.join(paths.wikiDir, page.path);
|
|
1589
|
+
if (await fileExists(absolutePath)) {
|
|
1590
|
+
const content = await fs7.readFile(absolutePath, "utf8");
|
|
1591
|
+
if (content.includes("## Claims")) {
|
|
1592
|
+
const uncited = content.split("\n").filter((line) => line.startsWith("- ") && !line.includes("[source:"));
|
|
1593
|
+
if (uncited.length) {
|
|
1594
|
+
findings.push({
|
|
1595
|
+
severity: "warning",
|
|
1596
|
+
code: "uncited_claims",
|
|
1597
|
+
message: `Page ${page.title} contains uncited claim bullets.`,
|
|
1598
|
+
pagePath: absolutePath
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
await appendLogEntry(rootDir, "lint", `Linted ${graph.pages.length} page(s)`, [`findings=${findings.length}`]);
|
|
1605
|
+
return findings;
|
|
1606
|
+
}
|
|
1607
|
+
async function bootstrapDemo(rootDir, input) {
|
|
1608
|
+
await initVault(rootDir);
|
|
1609
|
+
if (!input) {
|
|
1610
|
+
return {};
|
|
1611
|
+
}
|
|
1612
|
+
const manifest = await ingestInput(rootDir, input);
|
|
1613
|
+
const compile = await compileVault(rootDir);
|
|
1614
|
+
return {
|
|
1615
|
+
manifestId: manifest.sourceId,
|
|
1616
|
+
compile
|
|
1617
|
+
};
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
// src/viewer.ts
|
|
1621
|
+
import fs8 from "fs/promises";
|
|
1622
|
+
import http from "http";
|
|
1623
|
+
import path10 from "path";
|
|
1624
|
+
import mime2 from "mime-types";
|
|
1625
|
+
async function startGraphServer(rootDir, port) {
|
|
1626
|
+
const { config, paths } = await loadVaultConfig(rootDir);
|
|
1627
|
+
const effectivePort = port ?? config.viewer.port;
|
|
1628
|
+
const server = http.createServer(async (request, response) => {
|
|
1629
|
+
const url = new URL(request.url ?? "/", `http://${request.headers.host ?? `localhost:${effectivePort}`}`);
|
|
1630
|
+
if (url.pathname === "/api/graph") {
|
|
1631
|
+
if (!await fileExists(paths.graphPath)) {
|
|
1632
|
+
response.writeHead(404, { "content-type": "application/json" });
|
|
1633
|
+
response.end(JSON.stringify({ error: "Graph artifact not found. Run `swarmvault compile` first." }));
|
|
1634
|
+
return;
|
|
1635
|
+
}
|
|
1636
|
+
response.writeHead(200, { "content-type": "application/json" });
|
|
1637
|
+
response.end(await fs8.readFile(paths.graphPath, "utf8"));
|
|
1638
|
+
return;
|
|
1639
|
+
}
|
|
1640
|
+
const relativePath = url.pathname === "/" ? "index.html" : url.pathname.slice(1);
|
|
1641
|
+
const target = path10.join(paths.viewerDistDir, relativePath);
|
|
1642
|
+
const fallback = path10.join(paths.viewerDistDir, "index.html");
|
|
1643
|
+
const filePath = await fileExists(target) ? target : fallback;
|
|
1644
|
+
if (!await fileExists(filePath)) {
|
|
1645
|
+
response.writeHead(503, { "content-type": "text/plain" });
|
|
1646
|
+
response.end("Viewer build not found. Run `pnpm build` first.");
|
|
1647
|
+
return;
|
|
1648
|
+
}
|
|
1649
|
+
response.writeHead(200, { "content-type": mime2.lookup(filePath) || "text/plain" });
|
|
1650
|
+
response.end(await fs8.readFile(filePath));
|
|
1651
|
+
});
|
|
1652
|
+
await new Promise((resolve) => {
|
|
1653
|
+
server.listen(effectivePort, resolve);
|
|
1654
|
+
});
|
|
1655
|
+
return {
|
|
1656
|
+
port: effectivePort,
|
|
1657
|
+
close: async () => {
|
|
1658
|
+
await new Promise((resolve, reject) => {
|
|
1659
|
+
server.close((error) => {
|
|
1660
|
+
if (error) {
|
|
1661
|
+
reject(error);
|
|
1662
|
+
return;
|
|
1663
|
+
}
|
|
1664
|
+
resolve();
|
|
1665
|
+
});
|
|
1666
|
+
});
|
|
1667
|
+
}
|
|
1668
|
+
};
|
|
1669
|
+
}
|
|
1670
|
+
export {
|
|
1671
|
+
assertProviderCapability,
|
|
1672
|
+
bootstrapDemo,
|
|
1673
|
+
compileVault,
|
|
1674
|
+
createProvider,
|
|
1675
|
+
defaultVaultConfig,
|
|
1676
|
+
getProviderForTask,
|
|
1677
|
+
ingestInput,
|
|
1678
|
+
initVault,
|
|
1679
|
+
initWorkspace,
|
|
1680
|
+
installAgent,
|
|
1681
|
+
installConfiguredAgents,
|
|
1682
|
+
lintVault,
|
|
1683
|
+
listManifests,
|
|
1684
|
+
loadVaultConfig,
|
|
1685
|
+
queryVault,
|
|
1686
|
+
readExtractedText,
|
|
1687
|
+
resolvePaths,
|
|
1688
|
+
startGraphServer
|
|
1689
|
+
};
|