@kibhq/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +40 -0
  2. package/src/compile/backlinks.test.ts +112 -0
  3. package/src/compile/backlinks.ts +80 -0
  4. package/src/compile/cache.test.ts +126 -0
  5. package/src/compile/cache.ts +125 -0
  6. package/src/compile/compiler.test.ts +278 -0
  7. package/src/compile/compiler.ts +305 -0
  8. package/src/compile/diff.test.ts +164 -0
  9. package/src/compile/diff.ts +121 -0
  10. package/src/compile/index-manager.test.ts +227 -0
  11. package/src/compile/index-manager.ts +148 -0
  12. package/src/compile/prompts.ts +124 -0
  13. package/src/constants.ts +40 -0
  14. package/src/errors.ts +66 -0
  15. package/src/hash.test.ts +21 -0
  16. package/src/hash.ts +24 -0
  17. package/src/index.ts +22 -0
  18. package/src/ingest/extractors/file.test.ts +129 -0
  19. package/src/ingest/extractors/file.ts +136 -0
  20. package/src/ingest/extractors/github.test.ts +47 -0
  21. package/src/ingest/extractors/github.ts +135 -0
  22. package/src/ingest/extractors/interface.ts +26 -0
  23. package/src/ingest/extractors/pdf.ts +130 -0
  24. package/src/ingest/extractors/web.test.ts +242 -0
  25. package/src/ingest/extractors/web.ts +163 -0
  26. package/src/ingest/extractors/youtube.test.ts +44 -0
  27. package/src/ingest/extractors/youtube.ts +166 -0
  28. package/src/ingest/ingest.test.ts +187 -0
  29. package/src/ingest/ingest.ts +179 -0
  30. package/src/ingest/normalize.test.ts +120 -0
  31. package/src/ingest/normalize.ts +83 -0
  32. package/src/ingest/router.test.ts +154 -0
  33. package/src/ingest/router.ts +119 -0
  34. package/src/lint/lint.test.ts +253 -0
  35. package/src/lint/lint.ts +43 -0
  36. package/src/lint/rules.ts +178 -0
  37. package/src/providers/anthropic.ts +107 -0
  38. package/src/providers/index.ts +4 -0
  39. package/src/providers/ollama.ts +101 -0
  40. package/src/providers/openai.ts +67 -0
  41. package/src/providers/router.ts +62 -0
  42. package/src/query/query.test.ts +165 -0
  43. package/src/query/query.ts +136 -0
  44. package/src/schemas.ts +193 -0
  45. package/src/search/engine.test.ts +230 -0
  46. package/src/search/engine.ts +390 -0
  47. package/src/skills/loader.ts +163 -0
  48. package/src/skills/runner.ts +139 -0
  49. package/src/skills/schema.ts +28 -0
  50. package/src/skills/skills.test.ts +134 -0
  51. package/src/types.ts +136 -0
  52. package/src/vault.test.ts +141 -0
  53. package/src/vault.ts +251 -0
@@ -0,0 +1,278 @@
1
+ import { afterEach, describe, expect, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
4
+ import { tmpdir } from "node:os";
5
+ import { join } from "node:path";
6
+ import { ingestSource } from "../ingest/ingest.js";
7
+ import type { CompletionParams, CompletionResult, LLMProvider, StreamChunk } from "../types.js";
8
+ import { initVault, listWiki, loadManifest, readWiki } from "../vault.js";
9
+ import { compileVault } from "./compiler.js";
10
+
11
+ let tempDir: string;
12
+
13
+ afterEach(async () => {
14
+ if (tempDir) await rm(tempDir, { recursive: true, force: true });
15
+ });
16
+
17
+ async function makeTempVault() {
18
+ tempDir = await mkdtemp(join(tmpdir(), "kib-compile-test-"));
19
+ await initVault(tempDir, { name: "test" });
20
+ return tempDir;
21
+ }
22
+
23
+ /**
24
+ * Create a mock LLM provider that returns canned responses.
25
+ */
26
+ function createMockProvider(responses: string[]): LLMProvider {
27
+ let callIndex = 0;
28
+ return {
29
+ name: "mock",
30
+ async complete(params: CompletionParams): Promise<CompletionResult> {
31
+ const content = responses[callIndex] ?? "[]";
32
+ callIndex++;
33
+ return {
34
+ content,
35
+ usage: { inputTokens: 100, outputTokens: 200 },
36
+ stopReason: "end_turn",
37
+ };
38
+ },
39
+ async *stream(): AsyncIterable<StreamChunk> {
40
+ yield { type: "text", text: "stream not used in tests" };
41
+ },
42
+ };
43
+ }
44
+
45
+ describe("compileVault", () => {
46
+ test("compiles a single source into articles", async () => {
47
+ const root = await makeTempVault();
48
+
49
+ // Ingest a test file
50
+ const testFile = join(root, "source.md");
51
+ await writeFile(
52
+ testFile,
53
+ "# Transformer Architecture\n\nThe transformer is a neural network architecture.\n\nIt uses self-attention mechanisms.",
54
+ );
55
+ await ingestSource(root, testFile);
56
+
57
+ // Mock provider returns a compile result
58
+ const mockResponse = JSON.stringify([
59
+ {
60
+ op: "create",
61
+ path: "wiki/concepts/transformer-architecture.md",
62
+ content: `---
63
+ title: Transformer Architecture
64
+ slug: transformer-architecture
65
+ category: concept
66
+ tags: [deep-learning, nlp]
67
+ sources:
68
+ - raw/articles/transformer-architecture.md
69
+ created: 2026-04-05
70
+ updated: 2026-04-05
71
+ summary: The transformer replaces recurrence with self-attention.
72
+ ---
73
+
74
+ # Transformer Architecture
75
+
76
+ The transformer is a neural network architecture that replaces recurrence with self-attention mechanisms, enabling parallel training and superior sequence modeling.
77
+
78
+ ## Key Features
79
+
80
+ - **Self-Attention**: Allows the model to weigh the importance of different positions
81
+ - **Parallelizable**: Unlike RNNs, transformers can process all positions simultaneously
82
+ - **Scalable**: Performance improves predictably with more compute
83
+
84
+ See also: [[attention-mechanisms]], [[positional-encoding]]`,
85
+ },
86
+ ]);
87
+
88
+ const provider = createMockProvider([mockResponse]);
89
+ const config = (await import("../vault.js")).loadConfig;
90
+ const vaultConfig = await config(root);
91
+
92
+ const result = await compileVault(root, provider, vaultConfig);
93
+
94
+ expect(result.sourcesCompiled).toBe(1);
95
+ expect(result.articlesCreated).toBe(1);
96
+ expect(result.articlesUpdated).toBe(0);
97
+
98
+ // Verify the article was written
99
+ const article = await readWiki(root, "concepts/transformer-architecture.md");
100
+ expect(article).toContain("Transformer Architecture");
101
+ expect(article).toContain("self-attention");
102
+
103
+ // Verify manifest was updated
104
+ const manifest = await loadManifest(root);
105
+ expect(manifest.vault.lastCompiled).not.toBeNull();
106
+ expect(manifest.stats.totalArticles).toBeGreaterThan(0);
107
+ expect(manifest.articles["transformer-architecture"]).toBeDefined();
108
+ expect(manifest.articles["transformer-architecture"]!.category).toBe("concept");
109
+ expect(manifest.articles["transformer-architecture"]!.forwardLinks).toContain(
110
+ "attention-mechanisms",
111
+ );
112
+
113
+ // Verify INDEX.md was generated
114
+ expect(existsSync(join(root, "wiki", "INDEX.md"))).toBe(true);
115
+ const index = await readWiki(root, "INDEX.md");
116
+ expect(index).toContain("Transformer Architecture");
117
+ expect(index).toContain("## Concepts");
118
+
119
+ // Verify GRAPH.md was generated
120
+ expect(existsSync(join(root, "wiki", "GRAPH.md"))).toBe(true);
121
+ const graph = await readWiki(root, "GRAPH.md");
122
+ expect(graph).toContain("transformer-architecture");
123
+ });
124
+
125
+ test("skips already-compiled sources", async () => {
126
+ const root = await makeTempVault();
127
+
128
+ const testFile = join(root, "source.md");
129
+ await writeFile(testFile, "# Test\n\nContent.");
130
+ await ingestSource(root, testFile);
131
+
132
+ // First compile
133
+ const mockResponse = JSON.stringify([
134
+ {
135
+ op: "create",
136
+ path: "wiki/concepts/test.md",
137
+ content:
138
+ "---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: A test.\n---\n\n# Test\n\nContent.",
139
+ },
140
+ ]);
141
+ const provider1 = createMockProvider([mockResponse]);
142
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
143
+ await compileVault(root, provider1, vaultConfig);
144
+
145
+ // Second compile — should skip (no new sources)
146
+ const provider2 = createMockProvider([]);
147
+ const result = await compileVault(root, provider2, vaultConfig);
148
+ expect(result.sourcesCompiled).toBe(0);
149
+ });
150
+
151
+ test("recompiles when force is true", async () => {
152
+ const root = await makeTempVault();
153
+
154
+ const testFile = join(root, "source.md");
155
+ await writeFile(testFile, "# Test\n\nContent.");
156
+ await ingestSource(root, testFile);
157
+
158
+ // First compile
159
+ const mockResponse = JSON.stringify([
160
+ {
161
+ op: "create",
162
+ path: "wiki/concepts/test.md",
163
+ content:
164
+ "---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: A test.\n---\n\n# Test\n\nContent.",
165
+ },
166
+ ]);
167
+ const provider1 = createMockProvider([mockResponse]);
168
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
169
+ await compileVault(root, provider1, vaultConfig);
170
+
171
+ // Force recompile
172
+ const provider2 = createMockProvider([
173
+ JSON.stringify([
174
+ {
175
+ op: "update",
176
+ path: "wiki/concepts/test.md",
177
+ content:
178
+ "---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: Updated.\n---\n\n# Test\n\nUpdated content.",
179
+ },
180
+ ]),
181
+ ]);
182
+ const result = await compileVault(root, provider2, vaultConfig, {
183
+ force: true,
184
+ });
185
+ expect(result.sourcesCompiled).toBe(1);
186
+ expect(result.articlesUpdated).toBe(1);
187
+ });
188
+
189
+ test("dry run does not write files", async () => {
190
+ const root = await makeTempVault();
191
+
192
+ const testFile = join(root, "source.md");
193
+ await writeFile(testFile, "# Test\n\nContent.");
194
+ await ingestSource(root, testFile);
195
+
196
+ const mockResponse = JSON.stringify([
197
+ {
198
+ op: "create",
199
+ path: "wiki/concepts/test.md",
200
+ content: "---\ntitle: Test\nslug: test\ncategory: concept\n---\n\n# Test",
201
+ },
202
+ ]);
203
+ const provider = createMockProvider([mockResponse]);
204
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
205
+
206
+ const result = await compileVault(root, provider, vaultConfig, {
207
+ dryRun: true,
208
+ });
209
+
210
+ expect(result.sourcesCompiled).toBe(1);
211
+ expect(result.articlesCreated).toBe(1);
212
+
213
+ // File should NOT exist
214
+ expect(existsSync(join(root, "wiki", "concepts", "test.md"))).toBe(false);
215
+ });
216
+
217
+ test("handles empty response from LLM", async () => {
218
+ const root = await makeTempVault();
219
+
220
+ const testFile = join(root, "source.md");
221
+ await writeFile(testFile, "# Test\n\nContent.");
222
+ await ingestSource(root, testFile);
223
+
224
+ const provider = createMockProvider(["[]"]);
225
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
226
+
227
+ const result = await compileVault(root, provider, vaultConfig);
228
+ expect(result.sourcesCompiled).toBe(1);
229
+ expect(result.articlesCreated).toBe(0);
230
+ });
231
+
232
+ test("handles no pending sources", async () => {
233
+ const root = await makeTempVault();
234
+ const provider = createMockProvider([]);
235
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
236
+
237
+ const result = await compileVault(root, provider, vaultConfig);
238
+ expect(result.sourcesCompiled).toBe(0);
239
+ });
240
+
241
+ test("compiles multiple sources", async () => {
242
+ const root = await makeTempVault();
243
+
244
+ const file1 = join(root, "article1.md");
245
+ const file2 = join(root, "article2.md");
246
+ await writeFile(file1, "# Article One\n\nFirst article content.");
247
+ await writeFile(file2, "# Article Two\n\nSecond article content.");
248
+ await ingestSource(root, file1);
249
+ await ingestSource(root, file2);
250
+
251
+ const provider = createMockProvider([
252
+ JSON.stringify([
253
+ {
254
+ op: "create",
255
+ path: "wiki/concepts/article-one.md",
256
+ content:
257
+ "---\ntitle: Article One\nslug: article-one\ncategory: concept\ntags: []\nsummary: First.\n---\n\n# Article One\n\nCompiled first.",
258
+ },
259
+ ]),
260
+ JSON.stringify([
261
+ {
262
+ op: "create",
263
+ path: "wiki/concepts/article-two.md",
264
+ content:
265
+ "---\ntitle: Article Two\nslug: article-two\ncategory: concept\ntags: []\nsummary: Second.\n---\n\n# Article Two\n\nCompiled second.",
266
+ },
267
+ ]),
268
+ ]);
269
+ const vaultConfig = await (await import("../vault.js")).loadConfig(root);
270
+
271
+ const result = await compileVault(root, provider, vaultConfig);
272
+ expect(result.sourcesCompiled).toBe(2);
273
+ expect(result.articlesCreated).toBe(2);
274
+
275
+ const manifest = await loadManifest(root);
276
+ expect(manifest.stats.totalArticles).toBe(2);
277
+ });
278
+ });
@@ -0,0 +1,305 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { join, relative } from "node:path";
3
+ import { GRAPH_FILE, INDEX_FILE, WIKI_DIR } from "../constants.js";
4
+ import { hash } from "../hash.js";
5
+ import { countWords } from "../ingest/normalize.js";
6
+ import type { CompileResult, LLMProvider, Manifest, VaultConfig } from "../types.js";
7
+ import {
8
+ deleteFile,
9
+ listWiki,
10
+ loadManifest,
11
+ readIndex,
12
+ readRaw,
13
+ readWiki,
14
+ saveManifest,
15
+ writeWiki,
16
+ } from "../vault.js";
17
+ import { buildLinkGraph, generateGraphMd } from "./backlinks.js";
18
+ import { extractWikilinks, parseCompileOutput, parseFrontmatter } from "./diff.js";
19
+ import { computeStats, generateIndexMd } from "./index-manager.js";
20
+ import { compileSystemPrompt, compileUserPrompt } from "./prompts.js";
21
+
22
+ export interface CompileOptions {
23
+ /** Recompile all sources regardless of state */
24
+ force?: boolean;
25
+ /** Only compile this specific source path */
26
+ sourceFilter?: string;
27
+ /** Max sources to compile in this pass */
28
+ maxSources?: number;
29
+ /** Don't actually write files, just return what would happen */
30
+ dryRun?: boolean;
31
+ /** Callback for progress updates */
32
+ onProgress?: (msg: string) => void;
33
+ }
34
+
35
+ /**
36
+ * Compile pending raw sources into wiki articles.
37
+ */
38
+ export async function compileVault(
39
+ root: string,
40
+ provider: LLMProvider,
41
+ config: VaultConfig,
42
+ options: CompileOptions = {},
43
+ ): Promise<CompileResult> {
44
+ const manifest = await loadManifest(root);
45
+ const categories = config.compile.categories;
46
+
47
+ // Find sources that need compilation
48
+ const pendingSources = findPendingSources(manifest, options);
49
+
50
+ if (pendingSources.length === 0) {
51
+ return {
52
+ sourcesCompiled: 0,
53
+ articlesCreated: 0,
54
+ articlesUpdated: 0,
55
+ articlesDeleted: 0,
56
+ operations: [],
57
+ };
58
+ }
59
+
60
+ // Limit sources per pass
61
+ const maxSources = options.maxSources ?? config.compile.max_sources_per_pass;
62
+ const sourcesToCompile = pendingSources.slice(0, maxSources);
63
+
64
+ // Read current INDEX.md for context
65
+ const indexContent = await readIndex(root);
66
+
67
+ let totalCreated = 0;
68
+ let totalUpdated = 0;
69
+ let totalDeleted = 0;
70
+ const allOperations: CompileResult["operations"] = [];
71
+
72
+ for (const [sourceId, sourcePath] of sourcesToCompile) {
73
+ options.onProgress?.(`Compiling ${sourcePath}...`);
74
+
75
+ try {
76
+ // Read the raw source content
77
+ const sourceContent = await readRaw(root, sourcePath);
78
+
79
+ // Read existing articles this source produced (for context)
80
+ const existingArticles = await loadExistingArticles(root, manifest, sourceId);
81
+
82
+ // Build the compile prompt
83
+ const today = new Date().toISOString().split("T")[0]!;
84
+ const userPrompt = compileUserPrompt({
85
+ indexContent,
86
+ sourceContent,
87
+ sourcePath: `raw/${sourcePath}`,
88
+ existingArticles,
89
+ today,
90
+ });
91
+
92
+ // Call the LLM
93
+ const result = await provider.complete({
94
+ system: compileSystemPrompt(categories),
95
+ messages: [{ role: "user", content: userPrompt }],
96
+ temperature: 0,
97
+ maxTokens: 8192,
98
+ });
99
+
100
+ // Parse the response into file operations
101
+ const operations = parseCompileOutput(result.content);
102
+
103
+ if (options.dryRun) {
104
+ allOperations.push(...operations);
105
+ for (const op of operations) {
106
+ if (op.op === "create") totalCreated++;
107
+ else if (op.op === "update") totalUpdated++;
108
+ else if (op.op === "delete") totalDeleted++;
109
+ }
110
+ continue;
111
+ }
112
+
113
+ // Execute file operations
114
+ const producedArticles: string[] = [];
115
+
116
+ for (const op of operations) {
117
+ const wikiRelPath = op.path.replace(/^wiki\//, "");
118
+
119
+ if (op.op === "create" || op.op === "update") {
120
+ if (!op.content) continue;
121
+ await writeWiki(root, wikiRelPath, op.content);
122
+ producedArticles.push(op.path);
123
+
124
+ // Update article entry in manifest
125
+ const { frontmatter, body } = parseFrontmatter(op.content);
126
+ const articleSlug = (frontmatter.slug as string) ?? wikiRelPath.replace(/\.md$/, "");
127
+ const contentHash = await hash(op.content);
128
+ const wikilinks = extractWikilinks(op.content);
129
+ const now = new Date().toISOString();
130
+
131
+ manifest.articles[articleSlug] = {
132
+ hash: contentHash,
133
+ createdAt:
134
+ op.op === "create" ? now : (manifest.articles[articleSlug]?.createdAt ?? now),
135
+ lastUpdated: now,
136
+ derivedFrom: [`raw/${sourcePath}`],
137
+ backlinks: [], // will be computed after all sources are compiled
138
+ forwardLinks: wikilinks,
139
+ tags: Array.isArray(frontmatter.tags) ? (frontmatter.tags as string[]) : [],
140
+ summary: (frontmatter.summary as string) ?? "",
141
+ wordCount: countWords(body),
142
+ category: (frontmatter.category as string) ?? "topic",
143
+ };
144
+
145
+ if (op.op === "create") totalCreated++;
146
+ else totalUpdated++;
147
+ } else if (op.op === "delete") {
148
+ const fullPath = join(root, op.path);
149
+ await deleteFile(fullPath);
150
+ totalDeleted++;
151
+
152
+ // Remove from manifest
153
+ const slug = wikiRelPath.replace(/\.md$/, "");
154
+ delete manifest.articles[slug];
155
+ }
156
+
157
+ allOperations.push(op);
158
+ }
159
+
160
+ // Update source entry
161
+ manifest.sources[sourceId]!.lastCompiled = new Date().toISOString();
162
+ manifest.sources[sourceId]!.producedArticles = producedArticles;
163
+ } catch (err) {
164
+ options.onProgress?.(`Failed to compile ${sourcePath}: ${(err as Error).message}`);
165
+ // Continue with other sources
166
+ }
167
+ }
168
+
169
+ if (!options.dryRun) {
170
+ // Rebuild link graph (backlinks)
171
+ options.onProgress?.("Updating backlinks...");
172
+ const graph = await buildLinkGraph(root);
173
+
174
+ // Update backlinks in manifest
175
+ for (const [slug, backlinksSet] of graph.backlinks) {
176
+ if (manifest.articles[slug]) {
177
+ manifest.articles[slug]!.backlinks = [...backlinksSet];
178
+ }
179
+ }
180
+ for (const [slug, forwardSet] of graph.forwardLinks) {
181
+ if (manifest.articles[slug]) {
182
+ manifest.articles[slug]!.forwardLinks = [...forwardSet];
183
+ }
184
+ }
185
+
186
+ // Regenerate INDEX.md
187
+ if (config.compile.auto_index) {
188
+ options.onProgress?.("Updating INDEX.md...");
189
+ const indexMd = await generateIndexMd(root);
190
+ await writeWiki(root, INDEX_FILE, indexMd);
191
+ }
192
+
193
+ // Regenerate GRAPH.md
194
+ if (config.compile.auto_graph) {
195
+ options.onProgress?.("Updating GRAPH.md...");
196
+ const graphMd = generateGraphMd(graph);
197
+ await writeWiki(root, GRAPH_FILE, graphMd);
198
+ }
199
+
200
+ // Update manifest stats
201
+ const stats = await computeStats(root);
202
+ manifest.stats.totalArticles = stats.totalArticles;
203
+ manifest.stats.totalWords = stats.totalWords;
204
+ manifest.stats.totalSources = Object.keys(manifest.sources).length;
205
+ manifest.vault.lastCompiled = new Date().toISOString();
206
+
207
+ await saveManifest(root, manifest);
208
+ }
209
+
210
+ return {
211
+ sourcesCompiled: sourcesToCompile.length,
212
+ articlesCreated: totalCreated,
213
+ articlesUpdated: totalUpdated,
214
+ articlesDeleted: totalDeleted,
215
+ operations: allOperations,
216
+ };
217
+ }
218
+
219
+ /**
220
+ * Find sources that need compilation.
221
+ */
222
+ function findPendingSources(manifest: Manifest, options: CompileOptions): [string, string][] {
223
+ const pending: [string, string][] = [];
224
+
225
+ for (const [sourceId, source] of Object.entries(manifest.sources)) {
226
+ // If filtering to a specific source
227
+ if (options.sourceFilter) {
228
+ const matchesId = sourceId === options.sourceFilter;
229
+ const matchesPath = source.producedArticles.some((p) => p.includes(options.sourceFilter!));
230
+ if (!matchesId && !matchesPath) continue;
231
+ }
232
+
233
+ // Determine the raw file path from sourceId
234
+ // Source was written to raw/{category}/{slug}.md
235
+ const rawPath = findRawPath(manifest, sourceId);
236
+ if (!rawPath) continue;
237
+
238
+ if (options.force || !source.lastCompiled || source.lastCompiled < source.ingestedAt) {
239
+ pending.push([sourceId, rawPath]);
240
+ }
241
+ }
242
+
243
+ return pending;
244
+ }
245
+
246
+ /**
247
+ * Find the raw file path for a source.
248
+ * Sources are tracked by ID; we need to find which raw/ file they correspond to.
249
+ */
250
+ function findRawPath(manifest: Manifest, sourceId: string): string | null {
251
+ const source = manifest.sources[sourceId];
252
+ if (!source) return null;
253
+
254
+ // The raw path is derived from the source metadata
255
+ const title = source.metadata.title ?? sourceId;
256
+ const slug = title
257
+ .toLowerCase()
258
+ .replace(/[^a-z0-9\s-]/g, "")
259
+ .replace(/\s+/g, "-")
260
+ .replace(/-+/g, "-")
261
+ .replace(/^-|-$/g, "")
262
+ .slice(0, 80);
263
+
264
+ const category = categoryForSourceType(source.sourceType);
265
+ return `${category}/${slug}.md`;
266
+ }
267
+
268
+ function categoryForSourceType(sourceType: string): string {
269
+ switch (sourceType) {
270
+ case "pdf":
271
+ return "papers";
272
+ case "youtube":
273
+ return "transcripts";
274
+ case "github":
275
+ return "repos";
276
+ case "image":
277
+ return "images";
278
+ default:
279
+ return "articles";
280
+ }
281
+ }
282
+
283
+ /**
284
+ * Load existing wiki articles that a source previously produced.
285
+ */
286
+ async function loadExistingArticles(
287
+ root: string,
288
+ manifest: Manifest,
289
+ sourceId: string,
290
+ ): Promise<{ path: string; content: string }[]> {
291
+ const source = manifest.sources[sourceId];
292
+ if (!source?.producedArticles.length) return [];
293
+
294
+ const articles: { path: string; content: string }[] = [];
295
+ for (const articlePath of source.producedArticles) {
296
+ try {
297
+ const relPath = articlePath.replace(/^wiki\//, "");
298
+ const content = await readWiki(root, relPath);
299
+ articles.push({ path: articlePath, content });
300
+ } catch {
301
+ // Article might have been deleted
302
+ }
303
+ }
304
+ return articles;
305
+ }