chapterhouse 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/agents/korg.agent.md +65 -0
  2. package/dist/api/korg.js +34 -0
  3. package/dist/api/korg.test.js +42 -0
  4. package/dist/api/server.js +238 -2
  5. package/dist/api/server.test.js +199 -0
  6. package/dist/config.js +28 -0
  7. package/dist/config.test.js +20 -0
  8. package/dist/copilot/agents.js +3 -4
  9. package/dist/copilot/agents.test.js +12 -1
  10. package/dist/copilot/orchestrator.js +12 -1
  11. package/dist/copilot/orchestrator.test.js +3 -7
  12. package/dist/copilot/system-message.js +11 -10
  13. package/dist/copilot/system-message.test.js +6 -1
  14. package/dist/copilot/tools.js +184 -376
  15. package/dist/copilot/tools.memory.test.js +32 -0
  16. package/dist/copilot/tools.wiki.test.js +53 -59
  17. package/dist/daemon.js +9 -0
  18. package/dist/memory/decisions.js +6 -5
  19. package/dist/memory/entities.js +20 -9
  20. package/dist/memory/hooks.js +151 -0
  21. package/dist/memory/hooks.test.js +325 -0
  22. package/dist/memory/hot-tier.js +37 -0
  23. package/dist/memory/hot-tier.test.js +30 -0
  24. package/dist/memory/housekeeping-scheduler.js +35 -0
  25. package/dist/memory/housekeeping-scheduler.test.js +50 -0
  26. package/dist/memory/inbox.js +10 -0
  27. package/dist/memory/index.js +3 -1
  28. package/dist/memory/migration.js +244 -0
  29. package/dist/memory/migration.test.js +100 -0
  30. package/dist/memory/reflect.js +273 -0
  31. package/dist/memory/reflect.test.js +254 -0
  32. package/dist/store/db.js +119 -4
  33. package/dist/store/db.test.js +19 -1
  34. package/dist/test/setup-env.js +1 -0
  35. package/dist/wiki/consolidation.js +641 -0
  36. package/dist/wiki/consolidation.test.js +140 -0
  37. package/dist/wiki/frontmatter.js +48 -0
  38. package/dist/wiki/frontmatter.test.js +42 -0
  39. package/dist/wiki/index-manager.js +246 -330
  40. package/dist/wiki/index-manager.test.js +138 -145
  41. package/dist/wiki/ingest.js +347 -0
  42. package/dist/wiki/ingest.test.js +111 -0
  43. package/dist/wiki/links.js +151 -0
  44. package/dist/wiki/links.test.js +176 -0
  45. package/dist/wiki/migrate-topics.test.js +16 -6
  46. package/dist/wiki/scheduler.js +118 -0
  47. package/dist/wiki/scheduler.test.js +64 -0
  48. package/dist/wiki/timeline.js +51 -0
  49. package/dist/wiki/timeline.test.js +65 -0
  50. package/dist/wiki/topic-structure.js +1 -1
  51. package/package.json +1 -1
  52. package/skills/pkb-ideas/SKILL.md +78 -0
  53. package/skills/pkb-ideas/_meta.json +4 -0
  54. package/skills/pkb-org/SKILL.md +82 -0
  55. package/skills/pkb-org/_meta.json +4 -0
  56. package/skills/pkb-people/SKILL.md +74 -0
  57. package/skills/pkb-people/_meta.json +4 -0
  58. package/skills/pkb-research/SKILL.md +83 -0
  59. package/skills/pkb-research/_meta.json +4 -0
  60. package/skills/pkb-source/SKILL.md +38 -0
  61. package/skills/pkb-source/_meta.json +4 -0
  62. package/skills/wiki-conventions/SKILL.md +5 -5
  63. package/web/dist/assets/{index-DuKYxMIR.css → index-5kz9aRU9.css} +1 -1
  64. package/web/dist/assets/{index-DytB69KC.js → index-BbX9RKf3.js} +91 -89
  65. package/web/dist/assets/index-BbX9RKf3.js.map +1 -0
  66. package/web/dist/index.html +2 -2
  67. package/dist/wiki/context.js +0 -138
  68. package/dist/wiki/fix.js +0 -335
  69. package/dist/wiki/fix.test.js +0 -350
  70. package/dist/wiki/lint.js +0 -451
  71. package/dist/wiki/lint.test.js +0 -329
  72. package/web/dist/assets/index-DytB69KC.js.map +0 -1
@@ -1,160 +1,153 @@
1
1
  import assert from "node:assert/strict";
2
- import { mkdirSync, rmSync } from "node:fs";
2
+ import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
3
3
  import { join } from "node:path";
4
4
  import test from "node:test";
5
- const repoRoot = process.cwd();
6
- const sandboxRoot = join(repoRoot, ".test-work", `wiki-index-${process.pid}`);
7
- process.env.CHAPTERHOUSE_HOME = sandboxRoot;
8
- async function loadModules() {
5
+ // Sandbox: every test gets a fresh CHAPTERHOUSE_HOME
6
+ function makeSandbox() {
7
+ const dir = mkdtempSync(join(process.cwd(), ".test-work", "wiki-idx-"));
8
+ process.env.CHAPTERHOUSE_HOME = dir;
9
+ return dir;
10
+ }
11
+ async function loadModules(sandbox) {
9
12
  const nonce = `${Date.now()}-${Math.random()}`;
10
- const indexManager = await import(new URL(`./index-manager.js?case=${nonce}`, import.meta.url).href);
11
- const wikiFs = await import(new URL(`./fs.js?case=${nonce}`, import.meta.url).href);
13
+ const indexManager = await import(new URL(`./index-manager.js?c=${nonce}`, import.meta.url).href);
14
+ const wikiFs = await import(new URL(`./fs.js?c=${nonce}`, import.meta.url).href);
12
15
  return { indexManager, wikiFs };
13
16
  }
14
- function resetSandbox() {
15
- mkdirSync(join(repoRoot, ".test-work"), { recursive: true });
16
- rmSync(sandboxRoot, { recursive: true, force: true });
17
- }
18
- test.beforeEach(() => {
19
- resetSandbox();
17
+ test.before(() => {
18
+ mkdirSync(join(process.cwd(), ".test-work"), { recursive: true });
19
+ });
20
+ test("wikiSearch returns FTS5 results for matching query", async () => {
21
+ const sandbox = makeSandbox();
22
+ try {
23
+ const { indexManager, wikiFs } = await loadModules(sandbox);
24
+ wikiFs.writePage("pages/topics/rust/index.md", "---\ntitle: Rust Programming\nsummary: Systems programming with async support\ntags: [rust, async]\nupdated: 2026-05-01\n---\n\n# Rust\n\nSystems language.\n");
25
+ wikiFs.writePage("pages/topics/typescript/index.md", "---\ntitle: TypeScript\nsummary: Typed JavaScript for large projects\ntags: [ts, web]\nupdated: 2026-05-02\n---\n\n# TypeScript\n\nJS with types.\n");
26
+ indexManager.rebuildWikiIndex();
27
+ const results = indexManager.wikiSearch("rust async");
28
+ assert.ok(results.length > 0, "Should return results for 'rust async'");
29
+ assert.ok(results.some((r) => r.path === "pages/topics/rust/index.md"), "Should include rust page");
30
+ }
31
+ finally {
32
+ rmSync(sandbox, { recursive: true, force: true });
33
+ }
20
34
  });
21
- test.after(() => {
22
- rmSync(sandboxRoot, { recursive: true, force: true });
35
+ test("wikiSearch empty query returns most recently updated pages", async () => {
36
+ const sandbox = makeSandbox();
37
+ try {
38
+ const { indexManager, wikiFs } = await loadModules(sandbox);
39
+ wikiFs.writePage("pages/topics/alpha/index.md", "---\ntitle: Alpha\nsummary: First topic\nupdated: 2026-01-01\n---\n\n# Alpha\n");
40
+ wikiFs.writePage("pages/topics/beta/index.md", "---\ntitle: Beta\nsummary: Second topic\nupdated: 2026-05-14\n---\n\n# Beta\n");
41
+ indexManager.rebuildWikiIndex();
42
+ const results = indexManager.wikiSearch("", 10);
43
+ assert.ok(results.length >= 2, "Should return pages for empty query");
44
+ // Most recent first
45
+ const betaIdx = results.findIndex((r) => r.path === "pages/topics/beta/index.md");
46
+ const alphaIdx = results.findIndex((r) => r.path === "pages/topics/alpha/index.md");
47
+ assert.ok(betaIdx < alphaIdx, "More recently updated page should come first");
48
+ }
49
+ finally {
50
+ rmSync(sandbox, { recursive: true, force: true });
51
+ }
23
52
  });
24
- test("parseIndex reads sections, summaries, tags, and updated dates", async () => {
25
- const { indexManager, wikiFs } = await loadModules();
26
- wikiFs.writeIndexFile(`# Wiki Index\n\n## People\n\n- [Ada Lovelace](pages/people/ada.md) — Platform owner | tags: engineer, compiler | updated: 2026-05-01\n\n## Projects\n\n- [Roadmap](pages/projects/roadmap.md) - Shared priorities\n`);
27
- assert.deepEqual(indexManager.parseIndex(), [
28
- {
29
- path: "pages/people/ada.md",
30
- title: "Ada Lovelace",
31
- summary: "Platform owner",
32
- section: "People",
33
- tags: ["engineer", "compiler"],
34
- updated: "2026-05-01",
35
- },
36
- {
37
- path: "pages/projects/roadmap.md",
38
- title: "Roadmap",
39
- summary: "Shared priorities",
40
- section: "Projects",
41
- tags: undefined,
42
- updated: undefined,
43
- },
44
- ]);
53
+ test("rebuildWikiIndex populates wiki_pages from filesystem", async () => {
54
+ const sandbox = makeSandbox();
55
+ try {
56
+ const { indexManager, wikiFs } = await loadModules(sandbox);
57
+ wikiFs.writePage("pages/projects/chapterhouse/index.md", "---\ntitle: Chapterhouse\nsummary: AI orchestrator\ntags: [ai, orchestration]\nupdated: 2026-05-10\n---\n\n# Chapterhouse\n");
58
+ wikiFs.writePage("pages/projects/chapterhouse/decisions.md", "---\ntitle: Decisions\nsummary: Architectural decisions\nupdated: 2026-05-09\n---\n\n# Decisions\n");
59
+ indexManager.rebuildWikiIndex();
60
+ const entries = indexManager.parseIndex();
61
+ const paths = entries.map((e) => e.path).sort();
62
+ assert.ok(paths.includes("pages/projects/chapterhouse/index.md"), "Should include index.md");
63
+ assert.ok(paths.includes("pages/projects/chapterhouse/decisions.md"), "Should include decisions.md");
64
+ }
65
+ finally {
66
+ rmSync(sandbox, { recursive: true, force: true });
67
+ }
45
68
  });
46
- test("buildIndexEntryForPage treats frontmatter summary as the canonical index summary", async () => {
47
- const { indexManager, wikiFs } = await loadModules();
48
- wikiFs.writePage("pages/shared/runbooks/deploy.md", `---\ntitle: Deploy Runbook\nsummary: Production deployment checklist\ntags: [ops, release]\nupdated: 2026-05-04\n---\n\n# Deploy\n\n${"Deploy carefully ".repeat(20)}\n`);
49
- const entry = indexManager.buildIndexEntryForPage("pages/shared/runbooks/deploy.md");
50
- assert.deepEqual(entry, {
51
- path: "pages/shared/runbooks/deploy.md",
52
- title: "Deploy Runbook",
53
- summary: "Production deployment checklist",
54
- section: "Knowledge",
55
- tags: ["ops", "release"],
56
- updated: "2026-05-04",
57
- });
69
+ test("upsertWikiPage inserts and updates correctly", async () => {
70
+ const sandbox = makeSandbox();
71
+ try {
72
+ const { indexManager } = await loadModules(sandbox);
73
+ indexManager.upsertWikiPage("pages/people/ada/index.md", { title: "Ada Lovelace", summary: "Mathematician", tags: ["math"], updated: "2026-05-01", metadata: {} }, "First programmer");
74
+ const results = indexManager.wikiSearch("Ada");
75
+ assert.ok(results.some((r) => r.title === "Ada Lovelace"), "Should find Ada");
76
+ // Update
77
+ indexManager.upsertWikiPage("pages/people/ada/index.md", { title: "Ada Lovelace", summary: "Mathematician and programmer", tags: ["math", "history"], updated: "2026-05-02", metadata: {} }, "First programmer and mathematician");
78
+ const updated = indexManager.wikiSearch("programmer");
79
+ assert.ok(updated.length > 0, "Should find updated page");
80
+ }
81
+ finally {
82
+ rmSync(sandbox, { recursive: true, force: true });
83
+ }
58
84
  });
59
- test("parseIndex self-heals an empty index from on-disk pages", async () => {
60
- const { indexManager, wikiFs } = await loadModules();
61
- const today = new Date().toISOString().slice(0, 10);
62
- wikiFs.writePage("pages/team/vision.md", "# Vision\n\nShared direction for the team.\n");
63
- wikiFs.writeIndexFile("# Wiki Index\n\n");
64
- const entries = indexManager.parseIndex();
65
- assert.deepEqual(entries, [
66
- {
67
- path: "pages/index.md",
68
- title: "Wiki",
69
- summary: "Index of all wiki pages.",
70
- section: "Knowledge",
71
- tags: undefined,
72
- updated: today,
73
- },
74
- {
75
- path: "pages/team/vision.md",
76
- title: "Vision",
77
- summary: "Shared direction for the team.",
78
- section: "Knowledge",
79
- tags: undefined,
80
- updated: undefined,
81
- },
82
- ]);
83
- assert.match(wikiFs.readIndexFile(), /\[Vision\]\(pages\/team\/vision\.md\)/);
85
+ test("FTS search returns results under 50ms", async () => {
86
+ const sandbox = makeSandbox();
87
+ try {
88
+ const { indexManager, wikiFs } = await loadModules(sandbox);
89
+ // Populate with 20 pages
90
+ for (let i = 0; i < 20; i++) {
91
+ wikiFs.writePage(`pages/topics/topic-${i}/index.md`, `---\ntitle: Topic ${i}\nsummary: Description for topic ${i} covering various subjects\ntags: [topic${i}]\nupdated: 2026-05-01\n---\n\n# Topic ${i}\n\nContent.\n`);
92
+ }
93
+ indexManager.rebuildWikiIndex();
94
+ const start = Date.now();
95
+ const results = indexManager.wikiSearch("topic description");
96
+ const elapsed = Date.now() - start;
97
+ assert.ok(results.length > 0, "Should return results");
98
+ assert.ok(elapsed < 50, `FTS search should complete in <50ms, took ${elapsed}ms`);
99
+ }
100
+ finally {
101
+ rmSync(sandbox, { recursive: true, force: true });
102
+ }
84
103
  });
85
- test("the index renders entity categories as topic groups with nested facet pages", async () => {
86
- const { indexManager, wikiFs } = await loadModules();
87
- wikiFs.writePage("pages/projects/chapterhouse/index.md", "---\ntitle: Chapterhouse\nupdated: 2026-05-09\n---\n\n# Chapterhouse\n\nThe per-session orchestrator.\n");
88
- wikiFs.writePage("pages/projects/chapterhouse/decisions.md", "---\ntitle: Chapterhouse Decisions\nupdated: 2026-05-09\n---\n\n# Decisions\n\nUse SSE for streaming.\n");
89
- wikiFs.writePage("pages/preferences.md", "---\ntitle: Preferences\n---\n\n# Preferences\n\nDark mode.\n");
90
- indexManager.rebuildIndexFromPages();
91
- const index = wikiFs.readIndexFile();
92
- assert.match(index, /## Projects/);
93
- assert.match(index, /^- \[Chapterhouse\]\(pages\/projects\/chapterhouse\/index\.md\) — /m);
94
- assert.match(index, /^ {2}- \[Chapterhouse Decisions\]\(pages\/projects\/chapterhouse\/decisions\.md\) — /m);
95
- assert.match(index, /## Preferences/);
96
- // Indented facet bullets must still round-trip through parseIndex.
97
- const paths = indexManager.parseIndex().map((entry) => entry.path).sort();
98
- assert.deepEqual(paths, [
99
- "pages/preferences.md",
100
- "pages/projects/chapterhouse/decisions.md",
101
- "pages/projects/chapterhouse/index.md",
102
- ]);
104
+ test("removeFromIndex removes from wiki_pages", async () => {
105
+ const sandbox = makeSandbox();
106
+ try {
107
+ const { indexManager } = await loadModules(sandbox);
108
+ indexManager.upsertWikiPage("pages/people/test/index.md", { title: "Test Person", summary: "A test", tags: [], updated: "2026-05-01", metadata: {} }, "A test");
109
+ const before = indexManager.wikiSearch("Test Person");
110
+ assert.ok(before.length > 0, "Should exist before removal");
111
+ const removed = indexManager.removeFromIndex("pages/people/test/index.md");
112
+ assert.equal(removed, true);
113
+ const after = indexManager.wikiSearch("Test Person");
114
+ assert.equal(after.length, 0, "Should not exist after removal");
115
+ }
116
+ finally {
117
+ rmSync(sandbox, { recursive: true, force: true });
118
+ }
103
119
  });
104
- test("searchIndex ranks strong metadata matches and falls back to page bodies", async () => {
105
- const { indexManager, wikiFs } = await loadModules();
106
- wikiFs.writePage("pages/team/api.md", "# API\n\nObservability budget and telemetry plans.\n");
107
- wikiFs.writePage("pages/team/ops.md", "# Ops\n\nDaily operational notes.\n");
108
- indexManager.writeIndex([
109
- {
110
- path: "pages/team/api.md",
111
- title: "API",
112
- summary: "Status of the platform",
113
- section: "Team",
114
- tags: ["api"],
115
- updated: new Date().toISOString().slice(0, 10),
116
- },
117
- {
118
- path: "pages/team/ops.md",
119
- title: "Operations",
120
- summary: "Runbooks and incident work",
121
- section: "Team",
122
- },
123
- ]);
124
- const metadataHit = indexManager.searchIndex("api", 1);
125
- const bodyFallback = indexManager.searchIndex("telemetry", 1);
126
- assert.deepEqual(metadataHit.map((entry) => entry.path), ["pages/team/api.md"]);
127
- assert.deepEqual(bodyFallback.map((entry) => entry.path), ["pages/team/api.md"]);
120
+ test("searchIndex delegates to wikiSearch and returns IndexEntry shape", async () => {
121
+ const sandbox = makeSandbox();
122
+ try {
123
+ const { indexManager, wikiFs } = await loadModules(sandbox);
124
+ wikiFs.writePage("pages/team/api.md", "---\ntitle: API Docs\nsummary: API documentation\ntags: [api]\nupdated: 2026-05-01\n---\n\n# API\n");
125
+ indexManager.rebuildWikiIndex();
126
+ const results = indexManager.searchIndex("api");
127
+ assert.ok(results.length > 0);
128
+ assert.ok("section" in results[0], "Should have section field");
129
+ assert.ok("title" in results[0], "Should have title field");
130
+ }
131
+ finally {
132
+ rmSync(sandbox, { recursive: true, force: true });
133
+ }
128
134
  });
129
- test("addToIndex, removeFromIndex, and getIndexSummary keep the catalog in sync", async () => {
130
- const { indexManager } = await loadModules();
131
- const today = new Date().toISOString().slice(0, 10);
132
- indexManager.addToIndex({
133
- path: "pages/people/ada.md",
134
- title: "Ada Lovelace",
135
- summary: "Owns release quality",
136
- section: "People",
137
- tags: ["qa"],
138
- updated: "2026-05-05",
139
- });
140
- indexManager.addToIndex({
141
- path: "pages/projects/launch.md",
142
- title: "Launch",
143
- summary: "Tracks release milestones",
144
- section: "Projects",
145
- });
146
- indexManager.addToIndex({
147
- path: "pages/people/ada.md",
148
- title: "Ada Lovelace",
149
- summary: "Owns regression coverage",
150
- section: "People",
151
- tags: ["qa", "testing"],
152
- updated: "2026-05-06",
153
- });
154
- assert.equal(indexManager.removeFromIndex("pages/projects/launch.md"), true);
155
- assert.equal(indexManager.removeFromIndex("pages/projects/missing.md"), false);
156
- const summary = indexManager.getIndexSummary();
157
- assert.match(summary, /\*\*People\*\*: Ada Lovelace: Owns regression coverage \[qa, testing\] \(2026-05-06\)/);
158
- assert.match(summary, new RegExp(`\\*\\*Index\\*\\*: Wiki: Index of all wiki pages\\. \\(${today}\\)`));
135
+ test("rebuildWikiIndex removes stale entries not on disk", async () => {
136
+ const sandbox = makeSandbox();
137
+ try {
138
+ const { indexManager, wikiFs } = await loadModules(sandbox);
139
+ wikiFs.writePage("pages/topics/keep/index.md", "---\ntitle: Keep\nsummary: Keep this\nupdated: 2026-05-01\n---\n\n# Keep\n");
140
+ // Insert stale entry directly
141
+ indexManager.upsertWikiPage("pages/topics/stale/index.md", { title: "Stale", summary: "Should be removed", tags: [], updated: "2026-01-01", metadata: {} }, "Stale");
142
+ // Rebuild syncs disk → DB
143
+ indexManager.rebuildWikiIndex();
144
+ const entries = indexManager.parseIndex();
145
+ const paths = entries.map((e) => e.path);
146
+ assert.ok(paths.includes("pages/topics/keep/index.md"), "Should keep on-disk page");
147
+ assert.ok(!paths.includes("pages/topics/stale/index.md"), "Should remove stale entry");
148
+ }
149
+ finally {
150
+ rmSync(sandbox, { recursive: true, force: true });
151
+ }
159
152
  });
160
153
  //# sourceMappingURL=index-manager.test.js.map
@@ -0,0 +1,347 @@
1
+ // ---------------------------------------------------------------------------
2
+ // PKB ingestion pipeline — parse, extract entities, write wiki pages
3
+ // ---------------------------------------------------------------------------
4
+ import { createHash } from "node:crypto";
5
+ import { exec } from "node:child_process";
6
+ import { mkdirSync, readFileSync, existsSync, rmSync } from "node:fs";
7
+ import { join } from "node:path";
8
+ import { promisify } from "node:util";
9
+ import { getDb } from "../store/db.js";
10
+ import { ensureWikiStructure, writeRawSource, assertPagePath } from "./fs.js";
11
+ import { appendTimeline } from "./timeline.js";
12
+ import { validateAndBackfillFrontmatter } from "./frontmatter.js";
13
+ import { writePage, readPage } from "./fs.js";
14
+ import { childLogger } from "../util/logger.js";
15
+ const log = childLogger("ingest");
16
+ const execAsync = promisify(exec);
17
+ // ---------------------------------------------------------------------------
18
+ // Source ID
19
+ // ---------------------------------------------------------------------------
20
+ export function computeSourceId(sourceType, origin) {
21
+ return createHash("sha256").update(sourceType + origin).digest("hex");
22
+ }
23
+ // ---------------------------------------------------------------------------
24
+ // Content parsers
25
+ // ---------------------------------------------------------------------------
26
+ export function assertSafeRemoteUrl(url) {
27
+ const parsedUrl = new URL(url);
28
+ if (!["http:", "https:"].includes(parsedUrl.protocol)) {
29
+ throw new Error(`Only http/https URLs supported, got: ${parsedUrl.protocol}`);
30
+ }
31
+ const host = parsedUrl.hostname.toLowerCase();
32
+ const octets = host.split(".").map((part) => Number(part));
33
+ const isIpv4 = octets.length === 4 && octets.every((part) => Number.isInteger(part) && part >= 0 && part <= 255);
34
+ const isPrivateIpv4 = isIpv4 && (octets[0] === 10
35
+ || (octets[0] === 172 && octets[1] >= 16 && octets[1] <= 31)
36
+ || (octets[0] === 192 && octets[1] === 168));
37
+ if (host === "localhost" || host === "127.0.0.1" || host === "::1"
38
+ || isPrivateIpv4
39
+ || host.startsWith("169.254.") || host === "metadata.google.internal") {
40
+ throw new Error("Cannot fetch internal/private URLs.");
41
+ }
42
+ return parsedUrl;
43
+ }
44
+ export function createEntityPageContent({ pageTitle, pageSummary, entityType, updatedAt, }) {
45
+ return `---\ntitle: ${pageTitle}\nsummary: ${pageSummary}\nupdated: ${updatedAt}\ntags: []\nmetadata:\n entity_type: ${entityType}\n---\n\n# ${pageTitle}\n\n## Summary\n\n${pageSummary}\n\n## Timeline\n`;
46
+ }
47
+ async function parseUrl(url) {
48
+ const parsedUrl = assertSafeRemoteUrl(url);
49
+ const res = await fetch(url, { headers: { "User-Agent": "Chapterhouse/1.0 PKB-Ingest" } });
50
+ if (!res.ok)
51
+ throw new Error(`HTTP ${res.status} ${res.statusText} fetching ${url}`);
52
+ const html = await res.text();
53
+ // Try @mozilla/readability if available
54
+ let text;
55
+ let title = parsedUrl.hostname;
56
+ try {
57
+ const { Readability } = await import("@mozilla/readability");
58
+ const { JSDOM } = await import("jsdom");
59
+ const dom = new JSDOM(html, { url });
60
+ const reader = new Readability(dom.window.document);
61
+ const article = reader.parse();
62
+ if (article) {
63
+ text = article.textContent;
64
+ title = article.title || title;
65
+ }
66
+ else {
67
+ text = stripHtml(html);
68
+ }
69
+ }
70
+ catch {
71
+ text = stripHtml(html);
72
+ // Try to extract title from <title> tag
73
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
74
+ if (titleMatch)
75
+ title = titleMatch[1].trim();
76
+ }
77
+ if (text.length > 50_000)
78
+ text = text.slice(0, 50_000);
79
+ return { text, title };
80
+ }
81
+ function stripHtml(html) {
82
+ return html
83
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
84
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
85
+ .replace(/<[^>]+>/g, " ")
86
+ .replace(/\s{2,}/g, " ")
87
+ .trim()
88
+ .slice(0, 10_000);
89
+ }
90
+ async function parsePdf(filePath) {
91
+ try {
92
+ const pdfParse = await import("pdf-parse");
93
+ const buf = readFileSync(filePath);
94
+ const data = await pdfParse.default(buf);
95
+ return { text: data.text.slice(0, 50_000), title: filePath.replace(/.*\//, "").replace(/\.pdf$/i, "") };
96
+ }
97
+ catch {
98
+ throw new Error("PDF ingestion requires pdf-parse: npm install pdf-parse");
99
+ }
100
+ }
101
+ async function parseRepo(repoUrl) {
102
+ const tmpDir = join(process.cwd(), ".test-work", `repo-${Date.now()}`);
103
+ mkdirSync(tmpDir, { recursive: true });
104
+ try {
105
+ await execAsync(`git clone --depth 1 ${JSON.stringify(repoUrl)} ${JSON.stringify(tmpDir)}`, { timeout: 60_000 });
106
+ const parts = [];
107
+ // README
108
+ for (const name of ["README.md", "README.rst", "README.txt", "README"]) {
109
+ const p = join(tmpDir, name);
110
+ if (existsSync(p)) {
111
+ parts.push(`## README\n\n${readFileSync(p, "utf-8").slice(0, 10_000)}`);
112
+ break;
113
+ }
114
+ }
115
+ // package.json / Cargo.toml / go.mod
116
+ for (const name of ["package.json", "Cargo.toml", "go.mod"]) {
117
+ const p = join(tmpDir, name);
118
+ if (existsSync(p)) {
119
+ parts.push(`## ${name}\n\n\`\`\`\n${readFileSync(p, "utf-8").slice(0, 2_000)}\n\`\`\``);
120
+ }
121
+ }
122
+ const { stdout } = await execAsync(`ls -la ${JSON.stringify(tmpDir)}`);
123
+ parts.push(`## Directory listing\n\n\`\`\`\n${stdout}\n\`\`\``);
124
+ const title = repoUrl.replace(/.*\//, "").replace(/\.git$/, "");
125
+ return { text: parts.join("\n\n"), title };
126
+ }
127
+ finally {
128
+ try {
129
+ rmSync(tmpDir, { recursive: true, force: true });
130
+ }
131
+ catch { /* best-effort */ }
132
+ }
133
+ }
134
+ // ---------------------------------------------------------------------------
135
+ // Entity extraction via LLM
136
+ // ---------------------------------------------------------------------------
137
+ async function extractEntities(text, topic) {
138
+ // Skip entity extraction if no auth token is configured
139
+ const { config } = await import("../config.js");
140
+ const token = config.copilotAuthToken || process.env.COPILOT_TOKEN || process.env.GITHUB_TOKEN;
141
+ if (!token) {
142
+ log.debug("No Copilot auth token configured, skipping entity extraction");
143
+ return { entities: [], relationships: [] };
144
+ }
145
+ const topicHint = topic ? ` Focus especially on entities related to: ${topic}.` : "";
146
+ const systemPrompt = "Extract entities and relationships from this content. Return JSON only, no other text: " +
147
+ `{ "entities": [{"name": string, "type": string, "description": string}], ` +
148
+ `"relationships": [{"from": string, "to": string, "type": string}] }`;
149
+ const userMessage = `${systemPrompt}${topicHint}\n\n---\n\n${text.slice(0, 8_000)}`;
150
+ try {
151
+ const { CopilotClient, approveAll } = await import("@github/copilot-sdk");
152
+ // Use a one-shot client (autoRestart: false) so it doesn't keep the process alive
153
+ const client = new CopilotClient({
154
+ autoStart: true,
155
+ autoRestart: false,
156
+ gitHubToken: token,
157
+ });
158
+ await client.start();
159
+ try {
160
+ const session = await client.createSession({
161
+ model: "claude-haiku-4.5",
162
+ tools: [],
163
+ onPermissionRequest: approveAll,
164
+ });
165
+ try {
166
+ const result = await session.sendAndWait({ prompt: userMessage }, 30_000);
167
+ const rawText = typeof result === "string" ? result : JSON.stringify(result);
168
+ // Extract JSON from the response (may be wrapped in markdown code blocks)
169
+ const jsonMatch = rawText.match(/\{[\s\S]*\}/);
170
+ if (!jsonMatch)
171
+ return { entities: [], relationships: [] };
172
+ const parsed = JSON.parse(jsonMatch[0]);
173
+ return {
174
+ entities: Array.isArray(parsed.entities) ? parsed.entities : [],
175
+ relationships: Array.isArray(parsed.relationships) ? parsed.relationships : [],
176
+ };
177
+ }
178
+ finally {
179
+ try {
180
+ session.destroy();
181
+ }
182
+ catch { /* best-effort */ }
183
+ }
184
+ }
185
+ finally {
186
+ try {
187
+ await client.stop();
188
+ }
189
+ catch { /* best-effort */ }
190
+ }
191
+ }
192
+ catch (err) {
193
+ log.warn({ err: err instanceof Error ? err.message : err }, "Entity extraction LLM call failed, skipping");
194
+ return { entities: [], relationships: [] };
195
+ }
196
+ }
197
+ // ---------------------------------------------------------------------------
198
+ // Slug helper
199
+ // ---------------------------------------------------------------------------
200
+ function slugify(name) {
201
+ return name
202
+ .toLowerCase()
203
+ .replace(/[^a-z0-9]+/g, "-")
204
+ .replace(/^-+|-+$/g, "")
205
+ .slice(0, 64) || "unknown";
206
+ }
207
+ // ---------------------------------------------------------------------------
208
+ // Main ingest function
209
+ // ---------------------------------------------------------------------------
210
+ export async function ingestSource(source, type, topic, session) {
211
+ ensureWikiStructure();
212
+ const db = getDb();
213
+ const origin = type === "text" ? source.slice(0, 200) : source;
214
+ const sourceId = computeSourceId(type, origin);
215
+ // Idempotency check
216
+ const existing = db.prepare(`SELECT id, pages_updated FROM wiki_sources WHERE id = ?`).get(sourceId);
217
+ if (existing) {
218
+ log.info({ sourceId, type, origin }, "Source already ingested, skipping");
219
+ const pagesUpdated = JSON.parse(existing.pages_updated || "[]");
220
+ return {
221
+ source_id: sourceId,
222
+ pages_created: [],
223
+ pages_updated: pagesUpdated,
224
+ entities: [],
225
+ already_existed: true,
226
+ };
227
+ }
228
+ // Parse content
229
+ let parsedText;
230
+ let title;
231
+ switch (type) {
232
+ case "url": {
233
+ const r = await parseUrl(source);
234
+ parsedText = r.text;
235
+ title = r.title;
236
+ break;
237
+ }
238
+ case "pdf": {
239
+ const r = await parsePdf(source);
240
+ parsedText = r.text;
241
+ title = r.title;
242
+ break;
243
+ }
244
+ case "repo": {
245
+ const r = await parseRepo(source);
246
+ parsedText = r.text;
247
+ title = r.title;
248
+ break;
249
+ }
250
+ case "text":
251
+ default:
252
+ parsedText = source;
253
+ title = topic ?? `text-${sourceId.slice(0, 8)}`;
254
+ break;
255
+ }
256
+ // Save raw source archive
257
+ const rawFileName = `${sourceId.slice(0, 16)}.md`;
258
+ writeRawSource(rawFileName, parsedText);
259
+ // Persist to wiki_sources
260
+ const ingestedAt = new Date().toISOString();
261
+ db.prepare(`
262
+ INSERT INTO wiki_sources (id, source_type, origin, title, ingested_at, raw_path, parsed_content, pages_updated, session_id, session_name)
263
+ VALUES (?, ?, ?, ?, ?, ?, ?, '[]', ?, ?)
264
+ `).run(sourceId, type, origin, title, ingestedAt, `sources/${rawFileName}`, parsedText.slice(0, 100_000), session?.sessionId ?? null, session?.sessionName ?? null);
265
+ // Extract entities
266
+ const extraction = await extractEntities(parsedText, topic);
267
+ const pagesCreated = [];
268
+ const pagesUpdated = [];
269
+ const entitySummaries = [];
270
+ for (const entity of extraction.entities) {
271
+ if (!entity.name || !entity.type)
272
+ continue;
273
+ const entitySlug = slugify(entity.name);
274
+ const typeLower = entity.type.toLowerCase().replace(/[^a-z0-9-]/g, "-");
275
+ const pagePath = `pages/${typeLower}/${entitySlug}/index.md`;
276
+ // Validate path is safe
277
+ try {
278
+ assertPagePath(pagePath);
279
+ }
280
+ catch {
281
+ // Use topics fallback for unknown entity types
282
+ const fallbackPath = `pages/topics/${entitySlug}/index.md`;
283
+ try {
284
+ assertPagePath(fallbackPath);
285
+ }
286
+ catch {
287
+ continue;
288
+ }
289
+ }
290
+ const safePagePath = (() => {
291
+ try {
292
+ assertPagePath(pagePath);
293
+ return pagePath;
294
+ }
295
+ catch {
296
+ return `pages/topics/${entitySlug}/index.md`;
297
+ }
298
+ })();
299
+ const existed = readPage(safePagePath) !== undefined;
300
+ const timelineEntry = `Source ingested: ${title}\n\n${entity.description || entity.name}`;
301
+ if (!existed) {
302
+ const pageTitle = entity.name;
303
+ const pageSummary = (entity.description || entity.name).slice(0, 180).replace(/\n/g, " ");
304
+ const pageContent = createEntityPageContent({
305
+ pageTitle,
306
+ pageSummary,
307
+ entityType: typeLower,
308
+ updatedAt: ingestedAt.slice(0, 10),
309
+ });
310
+ const { content: backfilled } = validateAndBackfillFrontmatter(safePagePath, pageContent);
311
+ writePage(safePagePath, backfilled);
312
+ pagesCreated.push(safePagePath);
313
+ }
314
+ else {
315
+ pagesUpdated.push(safePagePath);
316
+ }
317
+ appendTimeline(safePagePath, timelineEntry);
318
+ entitySummaries.push({ name: entity.name, type: entity.type, path: safePagePath });
319
+ }
320
+ // Update wiki_sources with pages_updated
321
+ const allPages = [...new Set([...pagesCreated, ...pagesUpdated])];
322
+ db.prepare(`UPDATE wiki_sources SET pages_updated = ? WHERE id = ?`).run(JSON.stringify(allPages), sourceId);
323
+ return {
324
+ source_id: sourceId,
325
+ pages_created: pagesCreated,
326
+ pages_updated: pagesUpdated,
327
+ entities: entitySummaries,
328
+ already_existed: false,
329
+ };
330
+ }
331
+ // ---------------------------------------------------------------------------
332
+ // Type auto-detection
333
+ // ---------------------------------------------------------------------------
334
+ export function detectSourceType(source) {
335
+ const trimmed = source.trim();
336
+ if (trimmed.startsWith("http://") || trimmed.startsWith("https://")) {
337
+ if (trimmed.endsWith(".pdf"))
338
+ return "pdf";
339
+ if (trimmed.includes("github.com") || trimmed.includes("gitlab.com") || trimmed.endsWith(".git"))
340
+ return "repo";
341
+ return "url";
342
+ }
343
+ if (trimmed.endsWith(".pdf") && !trimmed.includes(" "))
344
+ return "pdf";
345
+ return "text";
346
+ }
347
+ //# sourceMappingURL=ingest.js.map