chapterhouse 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/agents/korg.agent.md +65 -0
  2. package/dist/api/korg.js +34 -0
  3. package/dist/api/korg.test.js +42 -0
  4. package/dist/api/server.js +238 -2
  5. package/dist/api/server.test.js +199 -0
  6. package/dist/config.js +28 -0
  7. package/dist/config.test.js +20 -0
  8. package/dist/copilot/agents.js +3 -4
  9. package/dist/copilot/agents.test.js +12 -1
  10. package/dist/copilot/orchestrator.js +12 -1
  11. package/dist/copilot/orchestrator.test.js +3 -7
  12. package/dist/copilot/system-message.js +12 -10
  13. package/dist/copilot/system-message.test.js +6 -1
  14. package/dist/copilot/tools.js +193 -375
  15. package/dist/copilot/tools.memory.test.js +32 -0
  16. package/dist/copilot/tools.wiki.test.js +80 -59
  17. package/dist/copilot/turn-event-log-env.test.js +11 -15
  18. package/dist/daemon.js +19 -0
  19. package/dist/memory/decisions.js +6 -5
  20. package/dist/memory/entities.js +20 -9
  21. package/dist/memory/eot.js +30 -8
  22. package/dist/memory/eot.test.js +220 -6
  23. package/dist/memory/hooks.js +151 -0
  24. package/dist/memory/hooks.test.js +325 -0
  25. package/dist/memory/hot-tier.js +37 -0
  26. package/dist/memory/hot-tier.test.js +30 -0
  27. package/dist/memory/housekeeping-scheduler.js +35 -0
  28. package/dist/memory/housekeeping-scheduler.test.js +50 -0
  29. package/dist/memory/inbox.js +10 -0
  30. package/dist/memory/index.js +3 -1
  31. package/dist/memory/migration.js +244 -0
  32. package/dist/memory/migration.test.js +108 -0
  33. package/dist/memory/reflect.js +273 -0
  34. package/dist/memory/reflect.test.js +254 -0
  35. package/dist/paths.js +31 -11
  36. package/dist/store/db.js +187 -4
  37. package/dist/store/db.test.js +66 -2
  38. package/dist/test/helpers/reset-singletons.js +8 -0
  39. package/dist/test/helpers/reset-singletons.test.js +37 -0
  40. package/dist/test/setup-env.js +9 -1
  41. package/dist/wiki/consolidation.js +641 -0
  42. package/dist/wiki/consolidation.test.js +143 -0
  43. package/dist/wiki/frontmatter.js +48 -0
  44. package/dist/wiki/frontmatter.test.js +42 -0
  45. package/dist/wiki/fs.js +22 -13
  46. package/dist/wiki/index-manager.js +305 -330
  47. package/dist/wiki/index-manager.test.js +265 -144
  48. package/dist/wiki/ingest.js +347 -0
  49. package/dist/wiki/ingest.test.js +111 -0
  50. package/dist/wiki/links.js +151 -0
  51. package/dist/wiki/links.test.js +176 -0
  52. package/dist/wiki/log-manager.js +8 -5
  53. package/dist/wiki/log-manager.test.js +4 -0
  54. package/dist/wiki/migrate-topics.test.js +16 -6
  55. package/dist/wiki/scheduler.js +118 -0
  56. package/dist/wiki/scheduler.test.js +64 -0
  57. package/dist/wiki/timeline.js +51 -0
  58. package/dist/wiki/timeline.test.js +65 -0
  59. package/dist/wiki/topic-structure.js +1 -1
  60. package/package.json +1 -1
  61. package/skills/pkb-ideas/SKILL.md +78 -0
  62. package/skills/pkb-ideas/_meta.json +4 -0
  63. package/skills/pkb-org/SKILL.md +82 -0
  64. package/skills/pkb-org/_meta.json +4 -0
  65. package/skills/pkb-people/SKILL.md +74 -0
  66. package/skills/pkb-people/_meta.json +4 -0
  67. package/skills/pkb-research/SKILL.md +83 -0
  68. package/skills/pkb-research/_meta.json +4 -0
  69. package/skills/pkb-source/SKILL.md +38 -0
  70. package/skills/pkb-source/_meta.json +4 -0
  71. package/skills/wiki-conventions/SKILL.md +5 -5
  72. package/web/dist/assets/{index-DuKYxMIR.css → index-5kz9aRU9.css} +1 -1
  73. package/web/dist/assets/{index-DytB69KC.js → index-BbX9RKf3.js} +91 -89
  74. package/web/dist/assets/index-BbX9RKf3.js.map +1 -0
  75. package/web/dist/index.html +2 -2
  76. package/dist/wiki/context.js +0 -138
  77. package/dist/wiki/fix.js +0 -335
  78. package/dist/wiki/fix.test.js +0 -350
  79. package/dist/wiki/lint.js +0 -451
  80. package/dist/wiki/lint.test.js +0 -329
  81. package/web/dist/assets/index-DytB69KC.js.map +0 -1
@@ -0,0 +1,347 @@
1
+ // ---------------------------------------------------------------------------
2
+ // PKB ingestion pipeline — parse, extract entities, write wiki pages
3
+ // ---------------------------------------------------------------------------
4
+ import { createHash } from "node:crypto";
5
+ import { exec } from "node:child_process";
6
+ import { mkdirSync, readFileSync, existsSync, rmSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import { promisify } from "node:util";
9
+ import { getDb } from "../store/db.js";
10
+ import { ensureWikiStructure, writeRawSource, assertPagePath } from "./fs.js";
11
+ import { appendTimeline } from "./timeline.js";
12
+ import { validateAndBackfillFrontmatter } from "./frontmatter.js";
13
+ import { writePage, readPage } from "./fs.js";
14
+ import { childLogger } from "../util/logger.js";
15
+ const log = childLogger("ingest");
16
+ const execAsync = promisify(exec);
17
+ // ---------------------------------------------------------------------------
18
+ // Source ID
19
+ // ---------------------------------------------------------------------------
20
+ export function computeSourceId(sourceType, origin) {
21
+ return createHash("sha256").update(sourceType + origin).digest("hex");
22
+ }
23
+ // ---------------------------------------------------------------------------
24
+ // Content parsers
25
+ // ---------------------------------------------------------------------------
26
+ export function assertSafeRemoteUrl(url) {
27
+ const parsedUrl = new URL(url);
28
+ if (!["http:", "https:"].includes(parsedUrl.protocol)) {
29
+ throw new Error(`Only http/https URLs supported, got: ${parsedUrl.protocol}`);
30
+ }
31
+ const host = parsedUrl.hostname.toLowerCase();
32
+ const octets = host.split(".").map((part) => Number(part));
33
+ const isIpv4 = octets.length === 4 && octets.every((part) => Number.isInteger(part) && part >= 0 && part <= 255);
34
+ const isPrivateIpv4 = isIpv4 && (octets[0] === 10
35
+ || (octets[0] === 172 && octets[1] >= 16 && octets[1] <= 31)
36
+ || (octets[0] === 192 && octets[1] === 168));
37
+ if (host === "localhost" || host === "127.0.0.1" || host === "::1"
38
+ || isPrivateIpv4
39
+ || host.startsWith("169.254.") || host === "metadata.google.internal") {
40
+ throw new Error("Cannot fetch internal/private URLs.");
41
+ }
42
+ return parsedUrl;
43
+ }
44
+ export function createEntityPageContent({ pageTitle, pageSummary, entityType, updatedAt, }) {
45
+ return `---\ntitle: ${pageTitle}\nsummary: ${pageSummary}\nupdated: ${updatedAt}\ntags: []\nmetadata:\n entity_type: ${entityType}\n---\n\n# ${pageTitle}\n\n## Summary\n\n${pageSummary}\n\n## Timeline\n`;
46
+ }
47
+ async function parseUrl(url) {
48
+ const parsedUrl = assertSafeRemoteUrl(url);
49
+ const res = await fetch(url, { headers: { "User-Agent": "Chapterhouse/1.0 PKB-Ingest" } });
50
+ if (!res.ok)
51
+ throw new Error(`HTTP ${res.status} ${res.statusText} fetching ${url}`);
52
+ const html = await res.text();
53
+ // Try @mozilla/readability if available
54
+ let text;
55
+ let title = parsedUrl.hostname;
56
+ try {
57
+ const { Readability } = await import("@mozilla/readability");
58
+ const { JSDOM } = await import("jsdom");
59
+ const dom = new JSDOM(html, { url });
60
+ const reader = new Readability(dom.window.document);
61
+ const article = reader.parse();
62
+ if (article) {
63
+ text = article.textContent;
64
+ title = article.title || title;
65
+ }
66
+ else {
67
+ text = stripHtml(html);
68
+ }
69
+ }
70
+ catch {
71
+ text = stripHtml(html);
72
+ // Try to extract title from <title> tag
73
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
74
+ if (titleMatch)
75
+ title = titleMatch[1].trim();
76
+ }
77
+ if (text.length > 50_000)
78
+ text = text.slice(0, 50_000);
79
+ return { text, title };
80
+ }
81
+ function stripHtml(html) {
82
+ return html
83
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
84
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
85
+ .replace(/<[^>]+>/g, " ")
86
+ .replace(/\s{2,}/g, " ")
87
+ .trim()
88
+ .slice(0, 10_000);
89
+ }
90
+ async function parsePdf(filePath) {
91
+ try {
92
+ const pdfParse = await import("pdf-parse");
93
+ const buf = readFileSync(filePath);
94
+ const data = await pdfParse.default(buf);
95
+ return { text: data.text.slice(0, 50_000), title: filePath.replace(/.*\//, "").replace(/\.pdf$/i, "") };
96
+ }
97
+ catch {
98
+ throw new Error("PDF ingestion requires pdf-parse: npm install pdf-parse");
99
+ }
100
+ }
101
+ async function parseRepo(repoUrl) {
102
+ const tmpDir = join(process.cwd(), ".test-work", `repo-${Date.now()}`);
103
+ mkdirSync(tmpDir, { recursive: true });
104
+ try {
105
+ await execAsync(`git clone --depth 1 ${JSON.stringify(repoUrl)} ${JSON.stringify(tmpDir)}`, { timeout: 60_000 });
106
+ const parts = [];
107
+ // README
108
+ for (const name of ["README.md", "README.rst", "README.txt", "README"]) {
109
+ const p = join(tmpDir, name);
110
+ if (existsSync(p)) {
111
+ parts.push(`## README\n\n${readFileSync(p, "utf-8").slice(0, 10_000)}`);
112
+ break;
113
+ }
114
+ }
115
+ // package.json / Cargo.toml / go.mod
116
+ for (const name of ["package.json", "Cargo.toml", "go.mod"]) {
117
+ const p = join(tmpDir, name);
118
+ if (existsSync(p)) {
119
+ parts.push(`## ${name}\n\n\`\`\`\n${readFileSync(p, "utf-8").slice(0, 2_000)}\n\`\`\``);
120
+ }
121
+ }
122
+ const { stdout } = await execAsync(`ls -la ${JSON.stringify(tmpDir)}`);
123
+ parts.push(`## Directory listing\n\n\`\`\`\n${stdout}\n\`\`\``);
124
+ const title = repoUrl.replace(/.*\//, "").replace(/\.git$/, "");
125
+ return { text: parts.join("\n\n"), title };
126
+ }
127
+ finally {
128
+ try {
129
+ rmSync(tmpDir, { recursive: true, force: true });
130
+ }
131
+ catch { /* best-effort */ }
132
+ }
133
+ }
134
+ // ---------------------------------------------------------------------------
135
+ // Entity extraction via LLM
136
+ // ---------------------------------------------------------------------------
137
+ async function extractEntities(text, topic) {
138
+ // Skip entity extraction if no auth token is configured
139
+ const { config } = await import("../config.js");
140
+ const token = config.copilotAuthToken || process.env.COPILOT_TOKEN || process.env.GITHUB_TOKEN;
141
+ if (!token) {
142
+ log.debug("No Copilot auth token configured, skipping entity extraction");
143
+ return { entities: [], relationships: [] };
144
+ }
145
+ const topicHint = topic ? ` Focus especially on entities related to: ${topic}.` : "";
146
+ const systemPrompt = "Extract entities and relationships from this content. Return JSON only, no other text: " +
147
+ `{ "entities": [{"name": string, "type": string, "description": string}], ` +
148
+ `"relationships": [{"from": string, "to": string, "type": string}] }`;
149
+ const userMessage = `${systemPrompt}${topicHint}\n\n---\n\n${text.slice(0, 8_000)}`;
150
+ try {
151
+ const { CopilotClient, approveAll } = await import("@github/copilot-sdk");
152
+ // Use a one-shot client (autoRestart: false) so it doesn't keep the process alive
153
+ const client = new CopilotClient({
154
+ autoStart: true,
155
+ autoRestart: false,
156
+ gitHubToken: token,
157
+ });
158
+ await client.start();
159
+ try {
160
+ const session = await client.createSession({
161
+ model: "claude-haiku-4.5",
162
+ tools: [],
163
+ onPermissionRequest: approveAll,
164
+ });
165
+ try {
166
+ const result = await session.sendAndWait({ prompt: userMessage }, 30_000);
167
+ const rawText = typeof result === "string" ? result : JSON.stringify(result);
168
+ // Extract JSON from the response (may be wrapped in markdown code blocks)
169
+ const jsonMatch = rawText.match(/\{[\s\S]*\}/);
170
+ if (!jsonMatch)
171
+ return { entities: [], relationships: [] };
172
+ const parsed = JSON.parse(jsonMatch[0]);
173
+ return {
174
+ entities: Array.isArray(parsed.entities) ? parsed.entities : [],
175
+ relationships: Array.isArray(parsed.relationships) ? parsed.relationships : [],
176
+ };
177
+ }
178
+ finally {
179
+ try {
180
+ session.destroy();
181
+ }
182
+ catch { /* best-effort */ }
183
+ }
184
+ }
185
+ finally {
186
+ try {
187
+ await client.stop();
188
+ }
189
+ catch { /* best-effort */ }
190
+ }
191
+ }
192
+ catch (err) {
193
+ log.warn({ err: err instanceof Error ? err.message : err }, "Entity extraction LLM call failed, skipping");
194
+ return { entities: [], relationships: [] };
195
+ }
196
+ }
197
+ // ---------------------------------------------------------------------------
198
+ // Slug helper
199
+ // ---------------------------------------------------------------------------
200
+ function slugify(name) {
201
+ return name
202
+ .toLowerCase()
203
+ .replace(/[^a-z0-9]+/g, "-")
204
+ .replace(/^-+|-+$/g, "")
205
+ .slice(0, 64) || "unknown";
206
+ }
207
+ // ---------------------------------------------------------------------------
208
+ // Main ingest function
209
+ // ---------------------------------------------------------------------------
210
+ export async function ingestSource(source, type, topic, session) {
211
+ ensureWikiStructure();
212
+ const db = getDb();
213
+ const origin = type === "text" ? source.slice(0, 200) : source;
214
+ const sourceId = computeSourceId(type, origin);
215
+ // Idempotency check
216
+ const existing = db.prepare(`SELECT id, pages_updated FROM wiki_sources WHERE id = ?`).get(sourceId);
217
+ if (existing) {
218
+ log.info({ sourceId, type, origin }, "Source already ingested, skipping");
219
+ const pagesUpdated = JSON.parse(existing.pages_updated || "[]");
220
+ return {
221
+ source_id: sourceId,
222
+ pages_created: [],
223
+ pages_updated: pagesUpdated,
224
+ entities: [],
225
+ already_existed: true,
226
+ };
227
+ }
228
+ // Parse content
229
+ let parsedText;
230
+ let title;
231
+ switch (type) {
232
+ case "url": {
233
+ const r = await parseUrl(source);
234
+ parsedText = r.text;
235
+ title = r.title;
236
+ break;
237
+ }
238
+ case "pdf": {
239
+ const r = await parsePdf(source);
240
+ parsedText = r.text;
241
+ title = r.title;
242
+ break;
243
+ }
244
+ case "repo": {
245
+ const r = await parseRepo(source);
246
+ parsedText = r.text;
247
+ title = r.title;
248
+ break;
249
+ }
250
+ case "text":
251
+ default:
252
+ parsedText = source;
253
+ title = topic ?? `text-${sourceId.slice(0, 8)}`;
254
+ break;
255
+ }
256
+ // Save raw source archive
257
+ const rawFileName = `${sourceId.slice(0, 16)}.md`;
258
+ writeRawSource(rawFileName, parsedText);
259
+ // Persist to wiki_sources
260
+ const ingestedAt = new Date().toISOString();
261
+ db.prepare(`
262
+ INSERT INTO wiki_sources (id, source_type, origin, title, ingested_at, raw_path, parsed_content, pages_updated, session_id, session_name)
263
+ VALUES (?, ?, ?, ?, ?, ?, ?, '[]', ?, ?)
264
+ `).run(sourceId, type, origin, title, ingestedAt, `sources/${rawFileName}`, parsedText.slice(0, 100_000), session?.sessionId ?? null, session?.sessionName ?? null);
265
+ // Extract entities
266
+ const extraction = await extractEntities(parsedText, topic);
267
+ const pagesCreated = [];
268
+ const pagesUpdated = [];
269
+ const entitySummaries = [];
270
+ for (const entity of extraction.entities) {
271
+ if (!entity.name || !entity.type)
272
+ continue;
273
+ const entitySlug = slugify(entity.name);
274
+ const typeLower = entity.type.toLowerCase().replace(/[^a-z0-9-]/g, "-");
275
+ const pagePath = `pages/${typeLower}/${entitySlug}/index.md`;
276
+ // Validate path is safe
277
+ try {
278
+ assertPagePath(pagePath);
279
+ }
280
+ catch {
281
+ // Use topics fallback for unknown entity types
282
+ const fallbackPath = `pages/topics/${entitySlug}/index.md`;
283
+ try {
284
+ assertPagePath(fallbackPath);
285
+ }
286
+ catch {
287
+ continue;
288
+ }
289
+ }
290
+ const safePagePath = (() => {
291
+ try {
292
+ assertPagePath(pagePath);
293
+ return pagePath;
294
+ }
295
+ catch {
296
+ return `pages/topics/${entitySlug}/index.md`;
297
+ }
298
+ })();
299
+ const existed = readPage(safePagePath) !== undefined;
300
+ const timelineEntry = `Source ingested: ${title}\n\n${entity.description || entity.name}`;
301
+ if (!existed) {
302
+ const pageTitle = entity.name;
303
+ const pageSummary = (entity.description || entity.name).slice(0, 180).replace(/\n/g, " ");
304
+ const pageContent = createEntityPageContent({
305
+ pageTitle,
306
+ pageSummary,
307
+ entityType: typeLower,
308
+ updatedAt: ingestedAt.slice(0, 10),
309
+ });
310
+ const { content: backfilled } = validateAndBackfillFrontmatter(safePagePath, pageContent);
311
+ writePage(safePagePath, backfilled);
312
+ pagesCreated.push(safePagePath);
313
+ }
314
+ else {
315
+ pagesUpdated.push(safePagePath);
316
+ }
317
+ appendTimeline(safePagePath, timelineEntry);
318
+ entitySummaries.push({ name: entity.name, type: entity.type, path: safePagePath });
319
+ }
320
+ // Update wiki_sources with pages_updated
321
+ const allPages = [...new Set([...pagesCreated, ...pagesUpdated])];
322
+ db.prepare(`UPDATE wiki_sources SET pages_updated = ? WHERE id = ?`).run(JSON.stringify(allPages), sourceId);
323
+ return {
324
+ source_id: sourceId,
325
+ pages_created: pagesCreated,
326
+ pages_updated: pagesUpdated,
327
+ entities: entitySummaries,
328
+ already_existed: false,
329
+ };
330
+ }
331
+ // ---------------------------------------------------------------------------
332
+ // Type auto-detection
333
+ // ---------------------------------------------------------------------------
334
+ export function detectSourceType(source) {
335
+ const trimmed = source.trim();
336
+ if (trimmed.startsWith("http://") || trimmed.startsWith("https://")) {
337
+ if (trimmed.endsWith(".pdf"))
338
+ return "pdf";
339
+ if (trimmed.includes("github.com") || trimmed.includes("gitlab.com") || trimmed.endsWith(".git"))
340
+ return "repo";
341
+ return "url";
342
+ }
343
+ if (trimmed.endsWith(".pdf") && !trimmed.includes(" "))
344
+ return "pdf";
345
+ return "text";
346
+ }
347
+ //# sourceMappingURL=ingest.js.map
@@ -0,0 +1,111 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Ingestion pipeline tests — ingestSource
3
+ // Sandbox: single CHAPTERHOUSE_HOME per file to avoid module-singleton confusion
4
+ // ---------------------------------------------------------------------------
5
+ import assert from "node:assert/strict";
6
+ import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import test from "node:test";
9
+ // Single sandbox shared across all tests in this file
10
+ let SANDBOX;
11
+ let mods;
12
+ test.before(async () => {
13
+ mkdirSync(join(process.cwd(), ".test-work"), { recursive: true });
14
+ SANDBOX = mkdtempSync(join(process.cwd(), ".test-work", "ingest-"));
15
+ process.env.CHAPTERHOUSE_HOME = SANDBOX;
16
+ const nonce = `${Date.now()}-${Math.random()}`;
17
+ const ingestMod = await import(new URL(`./ingest.js?c=${nonce}`, import.meta.url).href);
18
+ const wikiFs = await import(new URL(`./fs.js?c=${nonce}`, import.meta.url).href);
19
+ const dbMod = await import(new URL(`../store/db.js?c=${nonce}`, import.meta.url).href);
20
+ mods = { ingestMod, wikiFs, dbMod };
21
+ mods.wikiFs.ensureWikiStructure();
22
+ });
23
+ test.after(() => {
24
+ try {
25
+ rmSync(SANDBOX, { recursive: true, force: true });
26
+ }
27
+ catch { /* best-effort */ }
28
+ });
29
+ test("ingestSource(text) creates wiki_sources record", async () => {
30
+ const text = `Source-A: Alice is a senior engineer at Acme Corp. timestamp=${Date.now()}`;
31
+ const result = await mods.ingestMod.ingestSource(text, "text", "people");
32
+ assert.ok(result.source_id, "Should return a source_id");
33
+ assert.equal(result.already_existed, false, "Should not already exist on first call");
34
+ const db = mods.dbMod.getDb();
35
+ const row = db.prepare(`SELECT * FROM wiki_sources WHERE id = ?`).get(result.source_id);
36
+ assert.ok(row, "Should be persisted in wiki_sources");
37
+ assert.equal(row.source_type, "text");
38
+ });
39
+ test("ingestSource(text) saves raw source file", async () => {
40
+ const text = `Source-B: Bob leads the platform engineering team at TechCo. timestamp=${Date.now()}`;
41
+ const result = await mods.ingestMod.ingestSource(text, "text");
42
+ const sources = mods.wikiFs.listSources();
43
+ assert.ok(sources.length > 0, "Should have saved a raw source file");
44
+ assert.ok(sources.some((s) => s.startsWith(result.source_id.slice(0, 16))), "Source file should be named with source_id prefix");
45
+ });
46
+ test("ingestSource duplicate ingestion returns already_existed=true", async () => {
47
+ const text = `Source-C: Carol is a product manager at StartupXYZ. timestamp=${Date.now()}`;
48
+ const first = await mods.ingestMod.ingestSource(text, "text");
49
+ assert.equal(first.already_existed, false, "First call should not be a duplicate");
50
+ const second = await mods.ingestMod.ingestSource(text, "text");
51
+ assert.equal(second.already_existed, true, "Second ingestion should be idempotent");
52
+ assert.equal(second.source_id, first.source_id, "Should return same source_id");
53
+ const db = mods.dbMod.getDb();
54
+ const count = db.prepare(`SELECT COUNT(*) as c FROM wiki_sources WHERE id = ?`).get(first.source_id).c;
55
+ assert.equal(count, 1, "Should only have one row in wiki_sources");
56
+ });
57
+ test("computeSourceId is deterministic and type-scoped", () => {
58
+ const id1 = mods.ingestMod.computeSourceId("text", "hello world");
59
+ const id2 = mods.ingestMod.computeSourceId("text", "hello world");
60
+ assert.equal(id1, id2, "Same input should give same id");
61
+ const id3 = mods.ingestMod.computeSourceId("url", "hello world");
62
+ assert.notEqual(id1, id3, "Different types should give different ids");
63
+ });
64
+ test("detectSourceType identifies URLs, repos, and text", () => {
65
+ assert.equal(mods.ingestMod.detectSourceType("https://tokio.rs"), "url");
66
+ assert.equal(mods.ingestMod.detectSourceType("http://example.com"), "url");
67
+ assert.equal(mods.ingestMod.detectSourceType("https://github.com/user/repo"), "repo");
68
+ assert.equal(mods.ingestMod.detectSourceType("some plain text content"), "text");
69
+ assert.equal(mods.ingestMod.detectSourceType(""), "text");
70
+ });
71
+ test("assertSafeRemoteUrl blocks all RFC 1918 private ranges", () => {
72
+ assert.throws(() => mods.ingestMod.assertSafeRemoteUrl("http://10.1.2.3"), /Cannot fetch internal\/private URLs\./);
73
+ assert.throws(() => mods.ingestMod.assertSafeRemoteUrl("http://172.16.5.4"), /Cannot fetch internal\/private URLs\./);
74
+ assert.throws(() => mods.ingestMod.assertSafeRemoteUrl("http://172.31.255.255"), /Cannot fetch internal\/private URLs\./);
75
+ assert.throws(() => mods.ingestMod.assertSafeRemoteUrl("http://192.168.1.9"), /Cannot fetch internal\/private URLs\./);
76
+ });
77
+ test("createEntityPageContent uses the Summary and Timeline headings", () => {
78
+ const content = mods.ingestMod.createEntityPageContent({
79
+ pageTitle: "Alice Example",
80
+ pageSummary: "Senior engineer at Acme.",
81
+ entityType: "people",
82
+ updatedAt: "2026-05-15",
83
+ });
84
+ assert.match(content, /## Summary/);
85
+ assert.match(content, /## Timeline/);
86
+ assert.doesNotMatch(content, /## Compiled Truth/);
87
+ });
88
+ test("ingestSource stores optional research session metadata in wiki_sources", async () => {
89
+ const text = `Source-D: Research session metadata should persist. timestamp=${Date.now()}`;
90
+ const result = await mods.ingestMod.ingestSource(text, "text", "topics", {
91
+ sessionId: "compiler-research",
92
+ sessionName: "Compiler research",
93
+ });
94
+ const db = mods.dbMod.getDb();
95
+ const row = db.prepare(`SELECT session_id, session_name FROM wiki_sources WHERE id = ?`).get(result.source_id);
96
+ assert.deepEqual(row, {
97
+ session_id: "compiler-research",
98
+ session_name: "Compiler research",
99
+ });
100
+ });
101
+ // URL/PDF/repo tests are skipped if connectivity or dependencies are unavailable
102
+ test.skip("ingestSource(url) fetches and parses content — requires network", async () => {
103
+ // Integration test: run manually with network access
104
+ });
105
+ test.skip("ingestSource(pdf) parses PDF — requires pdf-parse", async () => {
106
+ // Integration test: run manually with pdf-parse installed
107
+ });
108
+ test.skip("ingestSource(repo) clones and summarises repo — requires network + git", async () => {
109
+ // Integration test: run manually
110
+ });
111
+ //# sourceMappingURL=ingest.test.js.map
@@ -0,0 +1,151 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Wiki entity graph — link extraction and graph traversal
3
+ // ---------------------------------------------------------------------------
4
+ import { getDb } from "../store/db.js";
5
+ import { readPage, pageExists } from "./fs.js";
6
+ import { parseWikiFrontmatter } from "./frontmatter.js";
7
+ import { normalizeWikiPath } from "./path-utils.js";
8
+ const RELATIONSHIP_PATTERNS = [
9
+ { regex: /\bimplements\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "implements" },
10
+ { regex: /\bsupersedes\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "supersedes" },
11
+ { regex: /\bmember\s+of\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "member_of" },
12
+ { regex: /\bworks?\s+at\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "member_of" },
13
+ { regex: /\bworks\s+on\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "works_on" },
14
+ { regex: /\bdecided\s+by\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "decided_by" },
15
+ { regex: /\bdepends\s+on\s+([A-Za-z0-9][^\n.,;:!?]{1,60})/gi, linkType: "depends_on" },
16
+ ];
17
+ function nameToSlug(name) {
18
+ return name
19
+ .toLowerCase()
20
+ .trim()
21
+ .replace(/\s+/g, "-")
22
+ .replace(/[^a-z0-9-]/g, "");
23
+ }
24
+ function wikiLinkToPath(name) {
25
+ return `pages/${nameToSlug(name)}/index.md`;
26
+ }
27
+ function tagToTopicPath(tag) {
28
+ return `pages/topics/${nameToSlug(tag)}/index.md`;
29
+ }
30
+ /** Extract typed links from a page. Returns deduplicated WikiLink array. */
31
+ export function extractLinks(pagePath) {
32
+ const normalizedPath = normalizeWikiPath(pagePath);
33
+ const content = readPage(normalizedPath);
34
+ if (!content)
35
+ return [];
36
+ const { parsed: fm, body } = parseWikiFrontmatter(content);
37
+ const links = [];
38
+ const seen = new Set();
39
+ const extractedAt = new Date().toISOString();
40
+ function addLink(toPage, linkType) {
41
+ const normalized = normalizeWikiPath(toPage);
42
+ if (!normalized || normalized === normalizedPath)
43
+ return;
44
+ const key = `${normalized}:${linkType}`;
45
+ if (seen.has(key))
46
+ return;
47
+ seen.add(key);
48
+ links.push({ from_page: normalizedPath, to_page: normalized, link_type: linkType, extracted_at: extractedAt });
49
+ }
50
+ // 1. [[Page Name]] wiki links
51
+ const wikiLinkRe = /\[\[([^\]]+)\]\]/g;
52
+ let m;
53
+ while ((m = wikiLinkRe.exec(body)) !== null) {
54
+ const target = wikiLinkToPath(m[1].trim());
55
+ addLink(target, "references");
56
+ }
57
+ // 2. Frontmatter `related` array
58
+ for (const rel of fm.related ?? []) {
59
+ if (typeof rel === "string" && rel.trim()) {
60
+ addLink(normalizeWikiPath(rel.trim()), "references");
61
+ }
62
+ }
63
+ // 3. Frontmatter `tags` → topic pages (only if target page exists on disk)
64
+ for (const tag of fm.tags ?? []) {
65
+ if (typeof tag === "string" && tag.trim()) {
66
+ const target = tagToTopicPath(tag.trim());
67
+ if (pageExists(target)) {
68
+ addLink(target, "references");
69
+ }
70
+ }
71
+ }
72
+ // 4. Relationship statements in body text
73
+ for (const { regex, linkType } of RELATIONSHIP_PATTERNS) {
74
+ regex.lastIndex = 0;
75
+ while ((m = regex.exec(body)) !== null) {
76
+ const rawTarget = nameToSlug(m[1].trim());
77
+ if (rawTarget) {
78
+ addLink(`pages/${rawTarget}/index.md`, linkType);
79
+ }
80
+ }
81
+ }
82
+ return links;
83
+ }
84
+ /** Re-extract links for a page and sync to wiki_links table. */
85
+ export function updateLinks(pagePath) {
86
+ const normalizedPath = normalizeWikiPath(pagePath);
87
+ const db = getDb();
88
+ const existing = db.prepare(`SELECT COUNT(*) as c FROM wiki_links WHERE from_page = ?`).get(normalizedPath);
89
+ const removedCount = existing.c;
90
+ const newLinks = extractLinks(normalizedPath);
91
+ db.transaction(() => {
92
+ db.prepare(`DELETE FROM wiki_links WHERE from_page = ?`).run(normalizedPath);
93
+ const insert = db.prepare(`
94
+ INSERT OR IGNORE INTO wiki_links (from_page, to_page, link_type, extracted_at)
95
+ VALUES (?, ?, ?, ?)
96
+ `);
97
+ for (const link of newLinks) {
98
+ insert.run(link.from_page, link.to_page, link.link_type, link.extracted_at);
99
+ }
100
+ })();
101
+ return { added: newLinks.length, removed: removedCount };
102
+ }
103
+ /**
104
+ * Walk the entity graph from a starting page.
105
+ * Default depth 1, max depth 3. Returns flat list sorted by depth then page.
106
+ */
107
+ export function traverse(pagePath, linkType, depth = 1) {
108
+ const MAX_DEPTH = 3;
109
+ const effectiveDepth = Math.min(Math.max(depth, 1), MAX_DEPTH);
110
+ const normalizedPath = normalizeWikiPath(pagePath);
111
+ const db = getDb();
112
+ const results = [];
113
+ const visited = new Set([normalizedPath]);
114
+ const queue = [{ page: normalizedPath, depth: 0 }];
115
+ while (queue.length > 0) {
116
+ const { page: currentPage, depth: currentDepth } = queue.shift();
117
+ if (currentDepth >= effectiveDepth)
118
+ continue;
119
+ const outbound = (linkType
120
+ ? db.prepare(`SELECT to_page, link_type FROM wiki_links WHERE from_page = ? AND link_type = ?`).all(currentPage, linkType)
121
+ : db.prepare(`SELECT to_page, link_type FROM wiki_links WHERE from_page = ?`).all(currentPage));
122
+ for (const row of outbound) {
123
+ if (!visited.has(row.to_page)) {
124
+ visited.add(row.to_page);
125
+ results.push({ page: row.to_page, link_type: row.link_type, direction: "outbound", depth: currentDepth + 1 });
126
+ if (currentDepth + 1 < effectiveDepth) {
127
+ queue.push({ page: row.to_page, depth: currentDepth + 1 });
128
+ }
129
+ }
130
+ }
131
+ const inbound = (linkType
132
+ ? db.prepare(`SELECT from_page, link_type FROM wiki_links WHERE to_page = ? AND link_type = ?`).all(currentPage, linkType)
133
+ : db.prepare(`SELECT from_page, link_type FROM wiki_links WHERE to_page = ?`).all(currentPage));
134
+ for (const row of inbound) {
135
+ if (!visited.has(row.from_page)) {
136
+ visited.add(row.from_page);
137
+ results.push({ page: row.from_page, link_type: row.link_type, direction: "inbound", depth: currentDepth + 1 });
138
+ if (currentDepth + 1 < effectiveDepth) {
139
+ queue.push({ page: row.from_page, depth: currentDepth + 1 });
140
+ }
141
+ }
142
+ }
143
+ }
144
+ results.sort((a, b) => {
145
+ if (a.depth !== b.depth)
146
+ return a.depth - b.depth;
147
+ return a.page.localeCompare(b.page);
148
+ });
149
+ return results;
150
+ }
151
+ //# sourceMappingURL=links.js.map