@kibhq/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/package.json +40 -0
  2. package/src/compile/backlinks.test.ts +112 -0
  3. package/src/compile/backlinks.ts +80 -0
  4. package/src/compile/cache.test.ts +126 -0
  5. package/src/compile/cache.ts +125 -0
  6. package/src/compile/compiler.test.ts +278 -0
  7. package/src/compile/compiler.ts +305 -0
  8. package/src/compile/diff.test.ts +164 -0
  9. package/src/compile/diff.ts +121 -0
  10. package/src/compile/index-manager.test.ts +227 -0
  11. package/src/compile/index-manager.ts +148 -0
  12. package/src/compile/prompts.ts +124 -0
  13. package/src/constants.ts +40 -0
  14. package/src/errors.ts +66 -0
  15. package/src/hash.test.ts +21 -0
  16. package/src/hash.ts +24 -0
  17. package/src/index.ts +22 -0
  18. package/src/ingest/extractors/file.test.ts +129 -0
  19. package/src/ingest/extractors/file.ts +136 -0
  20. package/src/ingest/extractors/github.test.ts +47 -0
  21. package/src/ingest/extractors/github.ts +135 -0
  22. package/src/ingest/extractors/interface.ts +26 -0
  23. package/src/ingest/extractors/pdf.ts +130 -0
  24. package/src/ingest/extractors/web.test.ts +242 -0
  25. package/src/ingest/extractors/web.ts +163 -0
  26. package/src/ingest/extractors/youtube.test.ts +44 -0
  27. package/src/ingest/extractors/youtube.ts +166 -0
  28. package/src/ingest/ingest.test.ts +187 -0
  29. package/src/ingest/ingest.ts +179 -0
  30. package/src/ingest/normalize.test.ts +120 -0
  31. package/src/ingest/normalize.ts +83 -0
  32. package/src/ingest/router.test.ts +154 -0
  33. package/src/ingest/router.ts +119 -0
  34. package/src/lint/lint.test.ts +253 -0
  35. package/src/lint/lint.ts +43 -0
  36. package/src/lint/rules.ts +178 -0
  37. package/src/providers/anthropic.ts +107 -0
  38. package/src/providers/index.ts +4 -0
  39. package/src/providers/ollama.ts +101 -0
  40. package/src/providers/openai.ts +67 -0
  41. package/src/providers/router.ts +62 -0
  42. package/src/query/query.test.ts +165 -0
  43. package/src/query/query.ts +136 -0
  44. package/src/schemas.ts +193 -0
  45. package/src/search/engine.test.ts +230 -0
  46. package/src/search/engine.ts +390 -0
  47. package/src/skills/loader.ts +163 -0
  48. package/src/skills/runner.ts +139 -0
  49. package/src/skills/schema.ts +28 -0
  50. package/src/skills/skills.test.ts +134 -0
  51. package/src/types.ts +136 -0
  52. package/src/vault.test.ts +141 -0
  53. package/src/vault.ts +251 -0
@@ -0,0 +1,136 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { parseFrontmatter } from "../compile/diff.js";
3
+ import { SearchIndex } from "../search/engine.js";
4
+ import type { CompletionResult, LLMProvider, Message } from "../types.js";
5
+ import { listWiki, readIndex } from "../vault.js";
6
+
7
+ export interface QueryOptions {
8
+ /** Maximum articles to include as context */
9
+ maxArticles?: number;
10
+ /** Existing conversation history (for chat mode) */
11
+ history?: Message[];
12
+ /** Callback for streaming chunks */
13
+ onChunk?: (text: string) => void;
14
+ }
15
+
16
+ export interface QueryResult {
17
+ answer: string;
18
+ sourcePaths: string[];
19
+ usage: { inputTokens: number; outputTokens: number };
20
+ }
21
+
22
+ const QUERY_SYSTEM_PROMPT = `You are a knowledge assistant for a personal wiki. Answer questions using ONLY the information provided in the articles below.
23
+
24
+ RULES:
25
+ - Base your answer strictly on the provided articles
26
+ - Cite sources using [Article Title] notation when referencing specific information
27
+ - If the answer is not in the provided articles, say so clearly
28
+ - Be concise and direct
29
+ - Use markdown formatting for readability`;
30
+
31
+ /**
32
+ * Query the knowledge base using RAG:
33
+ * 1. Search for relevant articles
34
+ * 2. Load top articles into context
35
+ * 3. Send to LLM with query
36
+ * 4. Return answer with citations
37
+ */
38
+ export async function queryVault(
39
+ root: string,
40
+ question: string,
41
+ provider: LLMProvider,
42
+ options: QueryOptions = {},
43
+ ): Promise<QueryResult> {
44
+ const maxArticles = options.maxArticles ?? 5;
45
+
46
+ // Build or load search index
47
+ const index = new SearchIndex();
48
+ const loaded = await index.load(root);
49
+ if (!loaded) {
50
+ await index.build(root, "wiki");
51
+ }
52
+
53
+ // Search for relevant articles
54
+ const searchResults = index.search(question, { limit: maxArticles });
55
+
56
+ // Load the full articles
57
+ const articles: { title: string; path: string; content: string }[] = [];
58
+
59
+ for (const result of searchResults) {
60
+ try {
61
+ const content = await readFile(result.path, "utf-8");
62
+ const { frontmatter, body } = parseFrontmatter(content);
63
+ articles.push({
64
+ title: (frontmatter.title as string) ?? result.title ?? result.path,
65
+ path: result.path,
66
+ content: body,
67
+ });
68
+ } catch {
69
+ // File might have been deleted
70
+ }
71
+ }
72
+
73
+ // If no articles found, try using INDEX.md as fallback context
74
+ if (articles.length === 0) {
75
+ const indexContent = await readIndex(root);
76
+ if (indexContent) {
77
+ articles.push({
78
+ title: "Knowledge Base Index",
79
+ path: "wiki/INDEX.md",
80
+ content: indexContent,
81
+ });
82
+ }
83
+ }
84
+
85
+ // Build context from articles
86
+ const articleContext = articles
87
+ .map((a) => `--- ${a.title} (${a.path}) ---\n${a.content}`)
88
+ .join("\n\n");
89
+
90
+ const userMessage =
91
+ articles.length > 0
92
+ ? `RELEVANT ARTICLES:\n\n${articleContext}\n\n---\n\nQUESTION: ${question}`
93
+ : `No relevant articles found in the knowledge base.\n\nQUESTION: ${question}`;
94
+
95
+ // Build message history
96
+ const messages: Message[] = [...(options.history ?? []), { role: "user", content: userMessage }];
97
+
98
+ // Call LLM
99
+ let result: CompletionResult;
100
+
101
+ if (options.onChunk) {
102
+ // Streaming mode
103
+ let fullContent = "";
104
+ let usage = { inputTokens: 0, outputTokens: 0 };
105
+
106
+ for await (const chunk of provider.stream({
107
+ system: QUERY_SYSTEM_PROMPT,
108
+ messages,
109
+ })) {
110
+ if (chunk.type === "text" && chunk.text) {
111
+ fullContent += chunk.text;
112
+ options.onChunk(chunk.text);
113
+ }
114
+ if (chunk.type === "usage" && chunk.usage) {
115
+ usage = chunk.usage;
116
+ }
117
+ }
118
+
119
+ result = {
120
+ content: fullContent,
121
+ usage,
122
+ stopReason: "end_turn",
123
+ };
124
+ } else {
125
+ result = await provider.complete({
126
+ system: QUERY_SYSTEM_PROMPT,
127
+ messages,
128
+ });
129
+ }
130
+
131
+ return {
132
+ answer: result.content,
133
+ sourcePaths: articles.map((a) => a.path),
134
+ usage: result.usage,
135
+ };
136
+ }
package/src/schemas.ts ADDED
@@ -0,0 +1,193 @@
1
+ import { z } from "zod";
2
+ import { DEFAULT_CATEGORIES, DEFAULTS, MANIFEST_VERSION, RAW_CATEGORIES } from "./constants.js";
3
+
4
+ // ─── Source Types ────────────────────────────────────────────────
5
+
6
+ export const SourceTypeSchema = z.enum(["web", "pdf", "youtube", "github", "image", "file"]);
7
+
8
+ // ─── Article Categories ──────────────────────────────────────────
9
+
10
+ export const ArticleCategorySchema = z.enum(["concept", "topic", "reference", "output"]);
11
+
12
+ // ─── Source Entry (in manifest) ──────────────────────────────────
13
+
14
+ export const SourceEntrySchema = z.object({
15
+ hash: z.string(),
16
+ ingestedAt: z.string().datetime(),
17
+ lastCompiled: z.string().datetime().nullable(),
18
+ sourceType: SourceTypeSchema,
19
+ originalUrl: z.string().optional(),
20
+ producedArticles: z.array(z.string()),
21
+ metadata: z.object({
22
+ title: z.string().optional(),
23
+ author: z.string().optional(),
24
+ date: z.string().optional(),
25
+ wordCount: z.number().int().nonnegative(),
26
+ }),
27
+ });
28
+
29
+ // ─── Article Entry (in manifest) ─────────────────────────────────
30
+
31
+ export const ArticleEntrySchema = z.object({
32
+ hash: z.string(),
33
+ createdAt: z.string().datetime(),
34
+ lastUpdated: z.string().datetime(),
35
+ derivedFrom: z.array(z.string()),
36
+ backlinks: z.array(z.string()),
37
+ forwardLinks: z.array(z.string()),
38
+ tags: z.array(z.string()),
39
+ summary: z.string(),
40
+ wordCount: z.number().int().nonnegative(),
41
+ category: ArticleCategorySchema,
42
+ });
43
+
44
+ // ─── Manifest ────────────────────────────────────────────────────
45
+
46
+ export const ManifestSchema = z.object({
47
+ version: z.literal(MANIFEST_VERSION),
48
+ vault: z.object({
49
+ name: z.string(),
50
+ created: z.string().datetime(),
51
+ lastCompiled: z.string().datetime().nullable(),
52
+ provider: z.string(),
53
+ model: z.string(),
54
+ }),
55
+ sources: z.record(z.string(), SourceEntrySchema),
56
+ articles: z.record(z.string(), ArticleEntrySchema),
57
+ stats: z.object({
58
+ totalSources: z.number().int().nonnegative(),
59
+ totalArticles: z.number().int().nonnegative(),
60
+ totalWords: z.number().int().nonnegative(),
61
+ lastLintAt: z.string().datetime().nullable(),
62
+ }),
63
+ });
64
+
65
+ // ─── Vault Config ────────────────────────────────────────────────
66
+
67
+ export const VaultConfigSchema = z.object({
68
+ provider: z.object({
69
+ default: z.string().default(DEFAULTS.provider),
70
+ model: z.string().default(DEFAULTS.model),
71
+ fast_model: z.string().default(DEFAULTS.fastModel),
72
+ }),
73
+ compile: z.object({
74
+ auto_index: z.boolean().default(true),
75
+ auto_graph: z.boolean().default(true),
76
+ max_sources_per_pass: z.number().int().positive().default(DEFAULTS.maxSourcesPerPass),
77
+ categories: z.array(z.string()).default([...DEFAULT_CATEGORIES]),
78
+ }),
79
+ ingest: z.object({
80
+ download_images: z.boolean().default(true),
81
+ max_file_size_mb: z.number().positive().default(DEFAULTS.maxFileSizeMb),
82
+ default_category: z.string().default("articles"),
83
+ }),
84
+ watch: z.object({
85
+ enabled: z.boolean().default(false),
86
+ inbox_path: z.string().default("inbox"),
87
+ auto_compile: z.boolean().default(true),
88
+ poll_interval_ms: z.number().int().positive().default(DEFAULTS.watchPollIntervalMs),
89
+ }),
90
+ search: z.object({
91
+ engine: z.string().default("builtin"),
92
+ max_results: z.number().int().positive().default(DEFAULTS.searchMaxResults),
93
+ }),
94
+ query: z.object({
95
+ file_output: z.boolean().default(true),
96
+ auto_file: z.boolean().default(false),
97
+ }),
98
+ cache: z.object({
99
+ enabled: z.boolean().default(true),
100
+ ttl_hours: z.number().int().positive().default(DEFAULTS.cacheTtlHours),
101
+ max_size_mb: z.number().positive().default(DEFAULTS.cacheMaxSizeMb),
102
+ }),
103
+ });
104
+
105
+ // ─── Article Frontmatter ─────────────────────────────────────────
106
+
107
+ export const ArticleFrontmatterSchema = z.object({
108
+ title: z.string(),
109
+ slug: z.string(),
110
+ category: ArticleCategorySchema,
111
+ tags: z.array(z.string()),
112
+ sources: z.array(z.string()),
113
+ created: z.string(),
114
+ updated: z.string(),
115
+ summary: z.string(),
116
+ });
117
+
118
+ // ─── LLM Provider Types ─────────────────────────────────────────
119
+
120
+ export const MessageRoleSchema = z.enum(["user", "assistant"]);
121
+
122
+ export const MessageSchema = z.object({
123
+ role: MessageRoleSchema,
124
+ content: z.string(),
125
+ });
126
+
127
+ export const CompletionParamsSchema = z.object({
128
+ system: z.string(),
129
+ messages: z.array(MessageSchema),
130
+ maxTokens: z.number().int().positive().optional(),
131
+ temperature: z.number().min(0).max(2).optional(),
132
+ });
133
+
134
+ export const CompletionResultSchema = z.object({
135
+ content: z.string(),
136
+ usage: z.object({
137
+ inputTokens: z.number().int().nonnegative(),
138
+ outputTokens: z.number().int().nonnegative(),
139
+ }),
140
+ stopReason: z.enum(["end_turn", "max_tokens", "tool_use"]),
141
+ });
142
+
143
+ // ─── Compile File Operation ──────────────────────────────────────
144
+
145
+ export const FileOperationSchema = z.object({
146
+ op: z.enum(["create", "update", "delete"]),
147
+ path: z.string(),
148
+ content: z.string().optional(),
149
+ });
150
+
151
+ // ─── Search Result ───────────────────────────────────────────────
152
+
153
+ export const SearchResultSchema = z.object({
154
+ path: z.string(),
155
+ score: z.number(),
156
+ snippet: z.string(),
157
+ title: z.string().optional(),
158
+ });
159
+
160
+ // ─── Ingest Result ───────────────────────────────────────────────
161
+
162
+ export const IngestResultSchema = z.object({
163
+ sourceId: z.string(),
164
+ path: z.string(),
165
+ sourceType: SourceTypeSchema,
166
+ title: z.string(),
167
+ wordCount: z.number().int().nonnegative(),
168
+ skipped: z.boolean(),
169
+ skipReason: z.string().optional(),
170
+ });
171
+
172
+ // ─── Compile Result ──────────────────────────────────────────────
173
+
174
+ export const CompileResultSchema = z.object({
175
+ sourcesCompiled: z.number().int().nonnegative(),
176
+ articlesCreated: z.number().int().nonnegative(),
177
+ articlesUpdated: z.number().int().nonnegative(),
178
+ articlesDeleted: z.number().int().nonnegative(),
179
+ operations: z.array(FileOperationSchema),
180
+ });
181
+
182
+ // ─── Lint Diagnostic ─────────────────────────────────────────────
183
+
184
+ export const LintSeveritySchema = z.enum(["error", "warning", "info"]);
185
+ export const LintRuleSchema = z.enum(["orphan", "stale", "missing", "broken-link", "frontmatter"]);
186
+
187
+ export const LintDiagnosticSchema = z.object({
188
+ rule: LintRuleSchema,
189
+ severity: LintSeveritySchema,
190
+ message: z.string(),
191
+ path: z.string().optional(),
192
+ fixable: z.boolean(),
193
+ });
@@ -0,0 +1,230 @@
1
+ import { afterEach, describe, expect, test } from "bun:test";
2
+ import { mkdtemp, rm } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { initVault, writeWiki } from "../vault.js";
6
+ import { SearchIndex } from "./engine.js";
7
+
8
+ let tempDir: string;
9
+
10
+ afterEach(async () => {
11
+ if (tempDir) await rm(tempDir, { recursive: true, force: true });
12
+ });
13
+
14
+ async function makeTempVault() {
15
+ tempDir = await mkdtemp(join(tmpdir(), "kib-search-test-"));
16
+ await initVault(tempDir, { name: "test" });
17
+ return tempDir;
18
+ }
19
+
20
+ function articleMd(title: string, content: string): string {
21
+ return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\n---\n\n# ${title}\n\n${content}`;
22
+ }
23
+
24
+ describe("SearchIndex", () => {
25
+ test("builds index from wiki files", async () => {
26
+ const root = await makeTempVault();
27
+ await writeWiki(
28
+ root,
29
+ "concepts/transformers.md",
30
+ articleMd(
31
+ "Transformer Architecture",
32
+ "The transformer is a neural network architecture based on self-attention mechanisms used in deep learning.",
33
+ ),
34
+ );
35
+ await writeWiki(
36
+ root,
37
+ "topics/scaling.md",
38
+ articleMd(
39
+ "Scaling Laws",
40
+ "Scaling laws describe power-law relationships between compute, data, and model performance in neural networks.",
41
+ ),
42
+ );
43
+
44
+ const index = new SearchIndex();
45
+ await index.build(root, "wiki");
46
+
47
+ expect(index.documentCount).toBe(2);
48
+ });
49
+
50
+ test("returns relevant results for a query", async () => {
51
+ const root = await makeTempVault();
52
+ await writeWiki(
53
+ root,
54
+ "concepts/transformers.md",
55
+ articleMd(
56
+ "Transformer Architecture",
57
+ "The transformer is a neural network architecture based on self-attention mechanisms. It was introduced in 2017 by Vaswani et al.",
58
+ ),
59
+ );
60
+ await writeWiki(
61
+ root,
62
+ "concepts/attention.md",
63
+ articleMd(
64
+ "Attention Mechanisms",
65
+ "Attention mechanisms compute weighted sums over value vectors using query-key compatibility scores. Self-attention is a special case.",
66
+ ),
67
+ );
68
+ await writeWiki(
69
+ root,
70
+ "topics/cnn.md",
71
+ articleMd(
72
+ "Convolutional Neural Networks",
73
+ "CNNs use convolutional layers to detect spatial patterns in images and other grid-structured data. They are unrelated to attention.",
74
+ ),
75
+ );
76
+
77
+ const index = new SearchIndex();
78
+ await index.build(root, "wiki");
79
+
80
+ const results = index.search("self-attention transformer");
81
+ expect(results.length).toBeGreaterThan(0);
82
+
83
+ // Transformer article should rank highest (has both terms)
84
+ expect(results[0]!.title).toBe("Transformer Architecture");
85
+
86
+ // Attention article should also appear
87
+ expect(results.some((r) => r.title === "Attention Mechanisms")).toBe(true);
88
+ });
89
+
90
+ test("returns empty results for unmatched query", async () => {
91
+ const root = await makeTempVault();
92
+ await writeWiki(root, "concepts/test.md", articleMd("Test", "Some content about testing."));
93
+
94
+ const index = new SearchIndex();
95
+ await index.build(root, "wiki");
96
+
97
+ const results = index.search("quantum computing blockchain");
98
+ expect(results).toHaveLength(0);
99
+ });
100
+
101
+ test("respects limit parameter", async () => {
102
+ const root = await makeTempVault();
103
+
104
+ // Create many articles that all match "neural"
105
+ for (let i = 0; i < 10; i++) {
106
+ await writeWiki(
107
+ root,
108
+ `concepts/article-${i}.md`,
109
+ articleMd(`Neural Network ${i}`, `Article ${i} about neural networks and deep learning.`),
110
+ );
111
+ }
112
+
113
+ const index = new SearchIndex();
114
+ await index.build(root, "wiki");
115
+
116
+ const results = index.search("neural", { limit: 3 });
117
+ expect(results).toHaveLength(3);
118
+ });
119
+
120
+ test("returns results with scores and snippets", async () => {
121
+ const root = await makeTempVault();
122
+ await writeWiki(
123
+ root,
124
+ "concepts/test.md",
125
+ articleMd("Test Article", "This is a test article about knowledge compilation."),
126
+ );
127
+
128
+ const index = new SearchIndex();
129
+ await index.build(root, "wiki");
130
+
131
+ const results = index.search("knowledge compilation");
132
+ expect(results.length).toBeGreaterThan(0);
133
+ expect(results[0]!.score).toBeGreaterThan(0);
134
+ expect(results[0]!.snippet).toBeTruthy();
135
+ expect(results[0]!.path).toContain("test.md");
136
+ });
137
+
138
+ test("handles empty index gracefully", async () => {
139
+ const root = await makeTempVault();
140
+ const index = new SearchIndex();
141
+ await index.build(root, "wiki");
142
+
143
+ const results = index.search("anything");
144
+ expect(results).toHaveLength(0);
145
+ });
146
+
147
+ test("handles empty query gracefully", async () => {
148
+ const root = await makeTempVault();
149
+ await writeWiki(root, "concepts/test.md", articleMd("Test", "Content."));
150
+
151
+ const index = new SearchIndex();
152
+ await index.build(root, "wiki");
153
+
154
+ const results = index.search("");
155
+ expect(results).toHaveLength(0);
156
+ });
157
+
158
+ test("save and load round-trip preserves search ability", async () => {
159
+ const root = await makeTempVault();
160
+ await writeWiki(
161
+ root,
162
+ "concepts/ai.md",
163
+ articleMd(
164
+ "Artificial Intelligence",
165
+ "AI is the simulation of human intelligence by machines.",
166
+ ),
167
+ );
168
+
169
+ const index1 = new SearchIndex();
170
+ await index1.build(root, "wiki");
171
+ await index1.save(root);
172
+
173
+ const index2 = new SearchIndex();
174
+ const loaded = await index2.load(root);
175
+ expect(loaded).toBe(true);
176
+ expect(index2.documentCount).toBe(1);
177
+
178
+ const results = index2.search("artificial intelligence");
179
+ expect(results.length).toBeGreaterThan(0);
180
+ expect(results[0]!.title).toBe("Artificial Intelligence");
181
+ });
182
+
183
+ test("load returns false for missing index", async () => {
184
+ const root = await makeTempVault();
185
+ const index = new SearchIndex();
186
+ const loaded = await index.load(root);
187
+ expect(loaded).toBe(false);
188
+ });
189
+
190
+ test("skips INDEX.md and GRAPH.md", async () => {
191
+ const root = await makeTempVault();
192
+ await writeWiki(root, "INDEX.md", "# Index\nindex content");
193
+ await writeWiki(root, "GRAPH.md", "# Graph\ngraph content");
194
+ await writeWiki(root, "concepts/real.md", articleMd("Real Article", "Actual content."));
195
+
196
+ const index = new SearchIndex();
197
+ await index.build(root, "wiki");
198
+
199
+ expect(index.documentCount).toBe(1);
200
+ });
201
+
202
+ test("title gets boosted in ranking", async () => {
203
+ const root = await makeTempVault();
204
+
205
+ // Article with "transformer" in title
206
+ await writeWiki(
207
+ root,
208
+ "concepts/transformer.md",
209
+ articleMd("Transformer", "A neural network architecture."),
210
+ );
211
+
212
+ // Article with "transformer" only in body
213
+ await writeWiki(
214
+ root,
215
+ "concepts/overview.md",
216
+ articleMd(
217
+ "Deep Learning Overview",
218
+ "Various architectures include the transformer and others.",
219
+ ),
220
+ );
221
+
222
+ const index = new SearchIndex();
223
+ await index.build(root, "wiki");
224
+
225
+ const results = index.search("transformer");
226
+ expect(results.length).toBe(2);
227
+ // Title match should rank higher
228
+ expect(results[0]!.title).toBe("Transformer");
229
+ });
230
+ });