@kibhq/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +40 -0
- package/src/compile/backlinks.test.ts +112 -0
- package/src/compile/backlinks.ts +80 -0
- package/src/compile/cache.test.ts +126 -0
- package/src/compile/cache.ts +125 -0
- package/src/compile/compiler.test.ts +278 -0
- package/src/compile/compiler.ts +305 -0
- package/src/compile/diff.test.ts +164 -0
- package/src/compile/diff.ts +121 -0
- package/src/compile/index-manager.test.ts +227 -0
- package/src/compile/index-manager.ts +148 -0
- package/src/compile/prompts.ts +124 -0
- package/src/constants.ts +40 -0
- package/src/errors.ts +66 -0
- package/src/hash.test.ts +21 -0
- package/src/hash.ts +24 -0
- package/src/index.ts +22 -0
- package/src/ingest/extractors/file.test.ts +129 -0
- package/src/ingest/extractors/file.ts +136 -0
- package/src/ingest/extractors/github.test.ts +47 -0
- package/src/ingest/extractors/github.ts +135 -0
- package/src/ingest/extractors/interface.ts +26 -0
- package/src/ingest/extractors/pdf.ts +130 -0
- package/src/ingest/extractors/web.test.ts +242 -0
- package/src/ingest/extractors/web.ts +163 -0
- package/src/ingest/extractors/youtube.test.ts +44 -0
- package/src/ingest/extractors/youtube.ts +166 -0
- package/src/ingest/ingest.test.ts +187 -0
- package/src/ingest/ingest.ts +179 -0
- package/src/ingest/normalize.test.ts +120 -0
- package/src/ingest/normalize.ts +83 -0
- package/src/ingest/router.test.ts +154 -0
- package/src/ingest/router.ts +119 -0
- package/src/lint/lint.test.ts +253 -0
- package/src/lint/lint.ts +43 -0
- package/src/lint/rules.ts +178 -0
- package/src/providers/anthropic.ts +107 -0
- package/src/providers/index.ts +4 -0
- package/src/providers/ollama.ts +101 -0
- package/src/providers/openai.ts +67 -0
- package/src/providers/router.ts +62 -0
- package/src/query/query.test.ts +165 -0
- package/src/query/query.ts +136 -0
- package/src/schemas.ts +193 -0
- package/src/search/engine.test.ts +230 -0
- package/src/search/engine.ts +390 -0
- package/src/skills/loader.ts +163 -0
- package/src/skills/runner.ts +139 -0
- package/src/skills/schema.ts +28 -0
- package/src/skills/skills.test.ts +134 -0
- package/src/types.ts +136 -0
- package/src/vault.test.ts +141 -0
- package/src/vault.ts +251 -0
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
import { afterEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { ingestSource } from "../ingest/ingest.js";
|
|
7
|
+
import type { CompletionParams, CompletionResult, LLMProvider, StreamChunk } from "../types.js";
|
|
8
|
+
import { initVault, listWiki, loadManifest, readWiki } from "../vault.js";
|
|
9
|
+
import { compileVault } from "./compiler.js";
|
|
10
|
+
|
|
11
|
+
let tempDir: string;
|
|
12
|
+
|
|
13
|
+
afterEach(async () => {
|
|
14
|
+
if (tempDir) await rm(tempDir, { recursive: true, force: true });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
async function makeTempVault() {
|
|
18
|
+
tempDir = await mkdtemp(join(tmpdir(), "kib-compile-test-"));
|
|
19
|
+
await initVault(tempDir, { name: "test" });
|
|
20
|
+
return tempDir;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Create a mock LLM provider that returns canned responses.
|
|
25
|
+
*/
|
|
26
|
+
function createMockProvider(responses: string[]): LLMProvider {
|
|
27
|
+
let callIndex = 0;
|
|
28
|
+
return {
|
|
29
|
+
name: "mock",
|
|
30
|
+
async complete(params: CompletionParams): Promise<CompletionResult> {
|
|
31
|
+
const content = responses[callIndex] ?? "[]";
|
|
32
|
+
callIndex++;
|
|
33
|
+
return {
|
|
34
|
+
content,
|
|
35
|
+
usage: { inputTokens: 100, outputTokens: 200 },
|
|
36
|
+
stopReason: "end_turn",
|
|
37
|
+
};
|
|
38
|
+
},
|
|
39
|
+
async *stream(): AsyncIterable<StreamChunk> {
|
|
40
|
+
yield { type: "text", text: "stream not used in tests" };
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
describe("compileVault", () => {
|
|
46
|
+
test("compiles a single source into articles", async () => {
|
|
47
|
+
const root = await makeTempVault();
|
|
48
|
+
|
|
49
|
+
// Ingest a test file
|
|
50
|
+
const testFile = join(root, "source.md");
|
|
51
|
+
await writeFile(
|
|
52
|
+
testFile,
|
|
53
|
+
"# Transformer Architecture\n\nThe transformer is a neural network architecture.\n\nIt uses self-attention mechanisms.",
|
|
54
|
+
);
|
|
55
|
+
await ingestSource(root, testFile);
|
|
56
|
+
|
|
57
|
+
// Mock provider returns a compile result
|
|
58
|
+
const mockResponse = JSON.stringify([
|
|
59
|
+
{
|
|
60
|
+
op: "create",
|
|
61
|
+
path: "wiki/concepts/transformer-architecture.md",
|
|
62
|
+
content: `---
|
|
63
|
+
title: Transformer Architecture
|
|
64
|
+
slug: transformer-architecture
|
|
65
|
+
category: concept
|
|
66
|
+
tags: [deep-learning, nlp]
|
|
67
|
+
sources:
|
|
68
|
+
- raw/articles/transformer-architecture.md
|
|
69
|
+
created: 2026-04-05
|
|
70
|
+
updated: 2026-04-05
|
|
71
|
+
summary: The transformer replaces recurrence with self-attention.
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
# Transformer Architecture
|
|
75
|
+
|
|
76
|
+
The transformer is a neural network architecture that replaces recurrence with self-attention mechanisms, enabling parallel training and superior sequence modeling.
|
|
77
|
+
|
|
78
|
+
## Key Features
|
|
79
|
+
|
|
80
|
+
- **Self-Attention**: Allows the model to weigh the importance of different positions
|
|
81
|
+
- **Parallelizable**: Unlike RNNs, transformers can process all positions simultaneously
|
|
82
|
+
- **Scalable**: Performance improves predictably with more compute
|
|
83
|
+
|
|
84
|
+
See also: [[attention-mechanisms]], [[positional-encoding]]`,
|
|
85
|
+
},
|
|
86
|
+
]);
|
|
87
|
+
|
|
88
|
+
const provider = createMockProvider([mockResponse]);
|
|
89
|
+
const config = (await import("../vault.js")).loadConfig;
|
|
90
|
+
const vaultConfig = await config(root);
|
|
91
|
+
|
|
92
|
+
const result = await compileVault(root, provider, vaultConfig);
|
|
93
|
+
|
|
94
|
+
expect(result.sourcesCompiled).toBe(1);
|
|
95
|
+
expect(result.articlesCreated).toBe(1);
|
|
96
|
+
expect(result.articlesUpdated).toBe(0);
|
|
97
|
+
|
|
98
|
+
// Verify the article was written
|
|
99
|
+
const article = await readWiki(root, "concepts/transformer-architecture.md");
|
|
100
|
+
expect(article).toContain("Transformer Architecture");
|
|
101
|
+
expect(article).toContain("self-attention");
|
|
102
|
+
|
|
103
|
+
// Verify manifest was updated
|
|
104
|
+
const manifest = await loadManifest(root);
|
|
105
|
+
expect(manifest.vault.lastCompiled).not.toBeNull();
|
|
106
|
+
expect(manifest.stats.totalArticles).toBeGreaterThan(0);
|
|
107
|
+
expect(manifest.articles["transformer-architecture"]).toBeDefined();
|
|
108
|
+
expect(manifest.articles["transformer-architecture"]!.category).toBe("concept");
|
|
109
|
+
expect(manifest.articles["transformer-architecture"]!.forwardLinks).toContain(
|
|
110
|
+
"attention-mechanisms",
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
// Verify INDEX.md was generated
|
|
114
|
+
expect(existsSync(join(root, "wiki", "INDEX.md"))).toBe(true);
|
|
115
|
+
const index = await readWiki(root, "INDEX.md");
|
|
116
|
+
expect(index).toContain("Transformer Architecture");
|
|
117
|
+
expect(index).toContain("## Concepts");
|
|
118
|
+
|
|
119
|
+
// Verify GRAPH.md was generated
|
|
120
|
+
expect(existsSync(join(root, "wiki", "GRAPH.md"))).toBe(true);
|
|
121
|
+
const graph = await readWiki(root, "GRAPH.md");
|
|
122
|
+
expect(graph).toContain("transformer-architecture");
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
test("skips already-compiled sources", async () => {
|
|
126
|
+
const root = await makeTempVault();
|
|
127
|
+
|
|
128
|
+
const testFile = join(root, "source.md");
|
|
129
|
+
await writeFile(testFile, "# Test\n\nContent.");
|
|
130
|
+
await ingestSource(root, testFile);
|
|
131
|
+
|
|
132
|
+
// First compile
|
|
133
|
+
const mockResponse = JSON.stringify([
|
|
134
|
+
{
|
|
135
|
+
op: "create",
|
|
136
|
+
path: "wiki/concepts/test.md",
|
|
137
|
+
content:
|
|
138
|
+
"---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: A test.\n---\n\n# Test\n\nContent.",
|
|
139
|
+
},
|
|
140
|
+
]);
|
|
141
|
+
const provider1 = createMockProvider([mockResponse]);
|
|
142
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
143
|
+
await compileVault(root, provider1, vaultConfig);
|
|
144
|
+
|
|
145
|
+
// Second compile — should skip (no new sources)
|
|
146
|
+
const provider2 = createMockProvider([]);
|
|
147
|
+
const result = await compileVault(root, provider2, vaultConfig);
|
|
148
|
+
expect(result.sourcesCompiled).toBe(0);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
test("recompiles when force is true", async () => {
|
|
152
|
+
const root = await makeTempVault();
|
|
153
|
+
|
|
154
|
+
const testFile = join(root, "source.md");
|
|
155
|
+
await writeFile(testFile, "# Test\n\nContent.");
|
|
156
|
+
await ingestSource(root, testFile);
|
|
157
|
+
|
|
158
|
+
// First compile
|
|
159
|
+
const mockResponse = JSON.stringify([
|
|
160
|
+
{
|
|
161
|
+
op: "create",
|
|
162
|
+
path: "wiki/concepts/test.md",
|
|
163
|
+
content:
|
|
164
|
+
"---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: A test.\n---\n\n# Test\n\nContent.",
|
|
165
|
+
},
|
|
166
|
+
]);
|
|
167
|
+
const provider1 = createMockProvider([mockResponse]);
|
|
168
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
169
|
+
await compileVault(root, provider1, vaultConfig);
|
|
170
|
+
|
|
171
|
+
// Force recompile
|
|
172
|
+
const provider2 = createMockProvider([
|
|
173
|
+
JSON.stringify([
|
|
174
|
+
{
|
|
175
|
+
op: "update",
|
|
176
|
+
path: "wiki/concepts/test.md",
|
|
177
|
+
content:
|
|
178
|
+
"---\ntitle: Test\nslug: test\ncategory: concept\ntags: []\nsummary: Updated.\n---\n\n# Test\n\nUpdated content.",
|
|
179
|
+
},
|
|
180
|
+
]),
|
|
181
|
+
]);
|
|
182
|
+
const result = await compileVault(root, provider2, vaultConfig, {
|
|
183
|
+
force: true,
|
|
184
|
+
});
|
|
185
|
+
expect(result.sourcesCompiled).toBe(1);
|
|
186
|
+
expect(result.articlesUpdated).toBe(1);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test("dry run does not write files", async () => {
|
|
190
|
+
const root = await makeTempVault();
|
|
191
|
+
|
|
192
|
+
const testFile = join(root, "source.md");
|
|
193
|
+
await writeFile(testFile, "# Test\n\nContent.");
|
|
194
|
+
await ingestSource(root, testFile);
|
|
195
|
+
|
|
196
|
+
const mockResponse = JSON.stringify([
|
|
197
|
+
{
|
|
198
|
+
op: "create",
|
|
199
|
+
path: "wiki/concepts/test.md",
|
|
200
|
+
content: "---\ntitle: Test\nslug: test\ncategory: concept\n---\n\n# Test",
|
|
201
|
+
},
|
|
202
|
+
]);
|
|
203
|
+
const provider = createMockProvider([mockResponse]);
|
|
204
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
205
|
+
|
|
206
|
+
const result = await compileVault(root, provider, vaultConfig, {
|
|
207
|
+
dryRun: true,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
expect(result.sourcesCompiled).toBe(1);
|
|
211
|
+
expect(result.articlesCreated).toBe(1);
|
|
212
|
+
|
|
213
|
+
// File should NOT exist
|
|
214
|
+
expect(existsSync(join(root, "wiki", "concepts", "test.md"))).toBe(false);
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
test("handles empty response from LLM", async () => {
|
|
218
|
+
const root = await makeTempVault();
|
|
219
|
+
|
|
220
|
+
const testFile = join(root, "source.md");
|
|
221
|
+
await writeFile(testFile, "# Test\n\nContent.");
|
|
222
|
+
await ingestSource(root, testFile);
|
|
223
|
+
|
|
224
|
+
const provider = createMockProvider(["[]"]);
|
|
225
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
226
|
+
|
|
227
|
+
const result = await compileVault(root, provider, vaultConfig);
|
|
228
|
+
expect(result.sourcesCompiled).toBe(1);
|
|
229
|
+
expect(result.articlesCreated).toBe(0);
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
test("handles no pending sources", async () => {
|
|
233
|
+
const root = await makeTempVault();
|
|
234
|
+
const provider = createMockProvider([]);
|
|
235
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
236
|
+
|
|
237
|
+
const result = await compileVault(root, provider, vaultConfig);
|
|
238
|
+
expect(result.sourcesCompiled).toBe(0);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
test("compiles multiple sources", async () => {
|
|
242
|
+
const root = await makeTempVault();
|
|
243
|
+
|
|
244
|
+
const file1 = join(root, "article1.md");
|
|
245
|
+
const file2 = join(root, "article2.md");
|
|
246
|
+
await writeFile(file1, "# Article One\n\nFirst article content.");
|
|
247
|
+
await writeFile(file2, "# Article Two\n\nSecond article content.");
|
|
248
|
+
await ingestSource(root, file1);
|
|
249
|
+
await ingestSource(root, file2);
|
|
250
|
+
|
|
251
|
+
const provider = createMockProvider([
|
|
252
|
+
JSON.stringify([
|
|
253
|
+
{
|
|
254
|
+
op: "create",
|
|
255
|
+
path: "wiki/concepts/article-one.md",
|
|
256
|
+
content:
|
|
257
|
+
"---\ntitle: Article One\nslug: article-one\ncategory: concept\ntags: []\nsummary: First.\n---\n\n# Article One\n\nCompiled first.",
|
|
258
|
+
},
|
|
259
|
+
]),
|
|
260
|
+
JSON.stringify([
|
|
261
|
+
{
|
|
262
|
+
op: "create",
|
|
263
|
+
path: "wiki/concepts/article-two.md",
|
|
264
|
+
content:
|
|
265
|
+
"---\ntitle: Article Two\nslug: article-two\ncategory: concept\ntags: []\nsummary: Second.\n---\n\n# Article Two\n\nCompiled second.",
|
|
266
|
+
},
|
|
267
|
+
]),
|
|
268
|
+
]);
|
|
269
|
+
const vaultConfig = await (await import("../vault.js")).loadConfig(root);
|
|
270
|
+
|
|
271
|
+
const result = await compileVault(root, provider, vaultConfig);
|
|
272
|
+
expect(result.sourcesCompiled).toBe(2);
|
|
273
|
+
expect(result.articlesCreated).toBe(2);
|
|
274
|
+
|
|
275
|
+
const manifest = await loadManifest(root);
|
|
276
|
+
expect(manifest.stats.totalArticles).toBe(2);
|
|
277
|
+
});
|
|
278
|
+
});
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { join, relative } from "node:path";
|
|
3
|
+
import { GRAPH_FILE, INDEX_FILE, WIKI_DIR } from "../constants.js";
|
|
4
|
+
import { hash } from "../hash.js";
|
|
5
|
+
import { countWords } from "../ingest/normalize.js";
|
|
6
|
+
import type { CompileResult, LLMProvider, Manifest, VaultConfig } from "../types.js";
|
|
7
|
+
import {
|
|
8
|
+
deleteFile,
|
|
9
|
+
listWiki,
|
|
10
|
+
loadManifest,
|
|
11
|
+
readIndex,
|
|
12
|
+
readRaw,
|
|
13
|
+
readWiki,
|
|
14
|
+
saveManifest,
|
|
15
|
+
writeWiki,
|
|
16
|
+
} from "../vault.js";
|
|
17
|
+
import { buildLinkGraph, generateGraphMd } from "./backlinks.js";
|
|
18
|
+
import { extractWikilinks, parseCompileOutput, parseFrontmatter } from "./diff.js";
|
|
19
|
+
import { computeStats, generateIndexMd } from "./index-manager.js";
|
|
20
|
+
import { compileSystemPrompt, compileUserPrompt } from "./prompts.js";
|
|
21
|
+
|
|
22
|
+
export interface CompileOptions {
|
|
23
|
+
/** Recompile all sources regardless of state */
|
|
24
|
+
force?: boolean;
|
|
25
|
+
/** Only compile this specific source path */
|
|
26
|
+
sourceFilter?: string;
|
|
27
|
+
/** Max sources to compile in this pass */
|
|
28
|
+
maxSources?: number;
|
|
29
|
+
/** Don't actually write files, just return what would happen */
|
|
30
|
+
dryRun?: boolean;
|
|
31
|
+
/** Callback for progress updates */
|
|
32
|
+
onProgress?: (msg: string) => void;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Compile pending raw sources into wiki articles.
|
|
37
|
+
*/
|
|
38
|
+
export async function compileVault(
|
|
39
|
+
root: string,
|
|
40
|
+
provider: LLMProvider,
|
|
41
|
+
config: VaultConfig,
|
|
42
|
+
options: CompileOptions = {},
|
|
43
|
+
): Promise<CompileResult> {
|
|
44
|
+
const manifest = await loadManifest(root);
|
|
45
|
+
const categories = config.compile.categories;
|
|
46
|
+
|
|
47
|
+
// Find sources that need compilation
|
|
48
|
+
const pendingSources = findPendingSources(manifest, options);
|
|
49
|
+
|
|
50
|
+
if (pendingSources.length === 0) {
|
|
51
|
+
return {
|
|
52
|
+
sourcesCompiled: 0,
|
|
53
|
+
articlesCreated: 0,
|
|
54
|
+
articlesUpdated: 0,
|
|
55
|
+
articlesDeleted: 0,
|
|
56
|
+
operations: [],
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Limit sources per pass
|
|
61
|
+
const maxSources = options.maxSources ?? config.compile.max_sources_per_pass;
|
|
62
|
+
const sourcesToCompile = pendingSources.slice(0, maxSources);
|
|
63
|
+
|
|
64
|
+
// Read current INDEX.md for context
|
|
65
|
+
const indexContent = await readIndex(root);
|
|
66
|
+
|
|
67
|
+
let totalCreated = 0;
|
|
68
|
+
let totalUpdated = 0;
|
|
69
|
+
let totalDeleted = 0;
|
|
70
|
+
const allOperations: CompileResult["operations"] = [];
|
|
71
|
+
|
|
72
|
+
for (const [sourceId, sourcePath] of sourcesToCompile) {
|
|
73
|
+
options.onProgress?.(`Compiling ${sourcePath}...`);
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
// Read the raw source content
|
|
77
|
+
const sourceContent = await readRaw(root, sourcePath);
|
|
78
|
+
|
|
79
|
+
// Read existing articles this source produced (for context)
|
|
80
|
+
const existingArticles = await loadExistingArticles(root, manifest, sourceId);
|
|
81
|
+
|
|
82
|
+
// Build the compile prompt
|
|
83
|
+
const today = new Date().toISOString().split("T")[0]!;
|
|
84
|
+
const userPrompt = compileUserPrompt({
|
|
85
|
+
indexContent,
|
|
86
|
+
sourceContent,
|
|
87
|
+
sourcePath: `raw/${sourcePath}`,
|
|
88
|
+
existingArticles,
|
|
89
|
+
today,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Call the LLM
|
|
93
|
+
const result = await provider.complete({
|
|
94
|
+
system: compileSystemPrompt(categories),
|
|
95
|
+
messages: [{ role: "user", content: userPrompt }],
|
|
96
|
+
temperature: 0,
|
|
97
|
+
maxTokens: 8192,
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Parse the response into file operations
|
|
101
|
+
const operations = parseCompileOutput(result.content);
|
|
102
|
+
|
|
103
|
+
if (options.dryRun) {
|
|
104
|
+
allOperations.push(...operations);
|
|
105
|
+
for (const op of operations) {
|
|
106
|
+
if (op.op === "create") totalCreated++;
|
|
107
|
+
else if (op.op === "update") totalUpdated++;
|
|
108
|
+
else if (op.op === "delete") totalDeleted++;
|
|
109
|
+
}
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Execute file operations
|
|
114
|
+
const producedArticles: string[] = [];
|
|
115
|
+
|
|
116
|
+
for (const op of operations) {
|
|
117
|
+
const wikiRelPath = op.path.replace(/^wiki\//, "");
|
|
118
|
+
|
|
119
|
+
if (op.op === "create" || op.op === "update") {
|
|
120
|
+
if (!op.content) continue;
|
|
121
|
+
await writeWiki(root, wikiRelPath, op.content);
|
|
122
|
+
producedArticles.push(op.path);
|
|
123
|
+
|
|
124
|
+
// Update article entry in manifest
|
|
125
|
+
const { frontmatter, body } = parseFrontmatter(op.content);
|
|
126
|
+
const articleSlug = (frontmatter.slug as string) ?? wikiRelPath.replace(/\.md$/, "");
|
|
127
|
+
const contentHash = await hash(op.content);
|
|
128
|
+
const wikilinks = extractWikilinks(op.content);
|
|
129
|
+
const now = new Date().toISOString();
|
|
130
|
+
|
|
131
|
+
manifest.articles[articleSlug] = {
|
|
132
|
+
hash: contentHash,
|
|
133
|
+
createdAt:
|
|
134
|
+
op.op === "create" ? now : (manifest.articles[articleSlug]?.createdAt ?? now),
|
|
135
|
+
lastUpdated: now,
|
|
136
|
+
derivedFrom: [`raw/${sourcePath}`],
|
|
137
|
+
backlinks: [], // will be computed after all sources are compiled
|
|
138
|
+
forwardLinks: wikilinks,
|
|
139
|
+
tags: Array.isArray(frontmatter.tags) ? (frontmatter.tags as string[]) : [],
|
|
140
|
+
summary: (frontmatter.summary as string) ?? "",
|
|
141
|
+
wordCount: countWords(body),
|
|
142
|
+
category: (frontmatter.category as string) ?? "topic",
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
if (op.op === "create") totalCreated++;
|
|
146
|
+
else totalUpdated++;
|
|
147
|
+
} else if (op.op === "delete") {
|
|
148
|
+
const fullPath = join(root, op.path);
|
|
149
|
+
await deleteFile(fullPath);
|
|
150
|
+
totalDeleted++;
|
|
151
|
+
|
|
152
|
+
// Remove from manifest
|
|
153
|
+
const slug = wikiRelPath.replace(/\.md$/, "");
|
|
154
|
+
delete manifest.articles[slug];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
allOperations.push(op);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Update source entry
|
|
161
|
+
manifest.sources[sourceId]!.lastCompiled = new Date().toISOString();
|
|
162
|
+
manifest.sources[sourceId]!.producedArticles = producedArticles;
|
|
163
|
+
} catch (err) {
|
|
164
|
+
options.onProgress?.(`Failed to compile ${sourcePath}: ${(err as Error).message}`);
|
|
165
|
+
// Continue with other sources
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (!options.dryRun) {
|
|
170
|
+
// Rebuild link graph (backlinks)
|
|
171
|
+
options.onProgress?.("Updating backlinks...");
|
|
172
|
+
const graph = await buildLinkGraph(root);
|
|
173
|
+
|
|
174
|
+
// Update backlinks in manifest
|
|
175
|
+
for (const [slug, backlinksSet] of graph.backlinks) {
|
|
176
|
+
if (manifest.articles[slug]) {
|
|
177
|
+
manifest.articles[slug]!.backlinks = [...backlinksSet];
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
for (const [slug, forwardSet] of graph.forwardLinks) {
|
|
181
|
+
if (manifest.articles[slug]) {
|
|
182
|
+
manifest.articles[slug]!.forwardLinks = [...forwardSet];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Regenerate INDEX.md
|
|
187
|
+
if (config.compile.auto_index) {
|
|
188
|
+
options.onProgress?.("Updating INDEX.md...");
|
|
189
|
+
const indexMd = await generateIndexMd(root);
|
|
190
|
+
await writeWiki(root, INDEX_FILE, indexMd);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Regenerate GRAPH.md
|
|
194
|
+
if (config.compile.auto_graph) {
|
|
195
|
+
options.onProgress?.("Updating GRAPH.md...");
|
|
196
|
+
const graphMd = generateGraphMd(graph);
|
|
197
|
+
await writeWiki(root, GRAPH_FILE, graphMd);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Update manifest stats
|
|
201
|
+
const stats = await computeStats(root);
|
|
202
|
+
manifest.stats.totalArticles = stats.totalArticles;
|
|
203
|
+
manifest.stats.totalWords = stats.totalWords;
|
|
204
|
+
manifest.stats.totalSources = Object.keys(manifest.sources).length;
|
|
205
|
+
manifest.vault.lastCompiled = new Date().toISOString();
|
|
206
|
+
|
|
207
|
+
await saveManifest(root, manifest);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
sourcesCompiled: sourcesToCompile.length,
|
|
212
|
+
articlesCreated: totalCreated,
|
|
213
|
+
articlesUpdated: totalUpdated,
|
|
214
|
+
articlesDeleted: totalDeleted,
|
|
215
|
+
operations: allOperations,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Find sources that need compilation.
|
|
221
|
+
*/
|
|
222
|
+
function findPendingSources(manifest: Manifest, options: CompileOptions): [string, string][] {
|
|
223
|
+
const pending: [string, string][] = [];
|
|
224
|
+
|
|
225
|
+
for (const [sourceId, source] of Object.entries(manifest.sources)) {
|
|
226
|
+
// If filtering to a specific source
|
|
227
|
+
if (options.sourceFilter) {
|
|
228
|
+
const matchesId = sourceId === options.sourceFilter;
|
|
229
|
+
const matchesPath = source.producedArticles.some((p) => p.includes(options.sourceFilter!));
|
|
230
|
+
if (!matchesId && !matchesPath) continue;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Determine the raw file path from sourceId
|
|
234
|
+
// Source was written to raw/{category}/{slug}.md
|
|
235
|
+
const rawPath = findRawPath(manifest, sourceId);
|
|
236
|
+
if (!rawPath) continue;
|
|
237
|
+
|
|
238
|
+
if (options.force || !source.lastCompiled || source.lastCompiled < source.ingestedAt) {
|
|
239
|
+
pending.push([sourceId, rawPath]);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return pending;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Find the raw file path for a source.
|
|
248
|
+
* Sources are tracked by ID; we need to find which raw/ file they correspond to.
|
|
249
|
+
*/
|
|
250
|
+
function findRawPath(manifest: Manifest, sourceId: string): string | null {
|
|
251
|
+
const source = manifest.sources[sourceId];
|
|
252
|
+
if (!source) return null;
|
|
253
|
+
|
|
254
|
+
// The raw path is derived from the source metadata
|
|
255
|
+
const title = source.metadata.title ?? sourceId;
|
|
256
|
+
const slug = title
|
|
257
|
+
.toLowerCase()
|
|
258
|
+
.replace(/[^a-z0-9\s-]/g, "")
|
|
259
|
+
.replace(/\s+/g, "-")
|
|
260
|
+
.replace(/-+/g, "-")
|
|
261
|
+
.replace(/^-|-$/g, "")
|
|
262
|
+
.slice(0, 80);
|
|
263
|
+
|
|
264
|
+
const category = categoryForSourceType(source.sourceType);
|
|
265
|
+
return `${category}/${slug}.md`;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function categoryForSourceType(sourceType: string): string {
|
|
269
|
+
switch (sourceType) {
|
|
270
|
+
case "pdf":
|
|
271
|
+
return "papers";
|
|
272
|
+
case "youtube":
|
|
273
|
+
return "transcripts";
|
|
274
|
+
case "github":
|
|
275
|
+
return "repos";
|
|
276
|
+
case "image":
|
|
277
|
+
return "images";
|
|
278
|
+
default:
|
|
279
|
+
return "articles";
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Load existing wiki articles that a source previously produced.
|
|
285
|
+
*/
|
|
286
|
+
async function loadExistingArticles(
|
|
287
|
+
root: string,
|
|
288
|
+
manifest: Manifest,
|
|
289
|
+
sourceId: string,
|
|
290
|
+
): Promise<{ path: string; content: string }[]> {
|
|
291
|
+
const source = manifest.sources[sourceId];
|
|
292
|
+
if (!source?.producedArticles.length) return [];
|
|
293
|
+
|
|
294
|
+
const articles: { path: string; content: string }[] = [];
|
|
295
|
+
for (const articlePath of source.producedArticles) {
|
|
296
|
+
try {
|
|
297
|
+
const relPath = articlePath.replace(/^wiki\//, "");
|
|
298
|
+
const content = await readWiki(root, relPath);
|
|
299
|
+
articles.push({ path: articlePath, content });
|
|
300
|
+
} catch {
|
|
301
|
+
// Article might have been deleted
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return articles;
|
|
305
|
+
}
|