@kibhq/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +40 -0
- package/src/compile/backlinks.test.ts +112 -0
- package/src/compile/backlinks.ts +80 -0
- package/src/compile/cache.test.ts +126 -0
- package/src/compile/cache.ts +125 -0
- package/src/compile/compiler.test.ts +278 -0
- package/src/compile/compiler.ts +305 -0
- package/src/compile/diff.test.ts +164 -0
- package/src/compile/diff.ts +121 -0
- package/src/compile/index-manager.test.ts +227 -0
- package/src/compile/index-manager.ts +148 -0
- package/src/compile/prompts.ts +124 -0
- package/src/constants.ts +40 -0
- package/src/errors.ts +66 -0
- package/src/hash.test.ts +21 -0
- package/src/hash.ts +24 -0
- package/src/index.ts +22 -0
- package/src/ingest/extractors/file.test.ts +129 -0
- package/src/ingest/extractors/file.ts +136 -0
- package/src/ingest/extractors/github.test.ts +47 -0
- package/src/ingest/extractors/github.ts +135 -0
- package/src/ingest/extractors/interface.ts +26 -0
- package/src/ingest/extractors/pdf.ts +130 -0
- package/src/ingest/extractors/web.test.ts +242 -0
- package/src/ingest/extractors/web.ts +163 -0
- package/src/ingest/extractors/youtube.test.ts +44 -0
- package/src/ingest/extractors/youtube.ts +166 -0
- package/src/ingest/ingest.test.ts +187 -0
- package/src/ingest/ingest.ts +179 -0
- package/src/ingest/normalize.test.ts +120 -0
- package/src/ingest/normalize.ts +83 -0
- package/src/ingest/router.test.ts +154 -0
- package/src/ingest/router.ts +119 -0
- package/src/lint/lint.test.ts +253 -0
- package/src/lint/lint.ts +43 -0
- package/src/lint/rules.ts +178 -0
- package/src/providers/anthropic.ts +107 -0
- package/src/providers/index.ts +4 -0
- package/src/providers/ollama.ts +101 -0
- package/src/providers/openai.ts +67 -0
- package/src/providers/router.ts +62 -0
- package/src/query/query.test.ts +165 -0
- package/src/query/query.ts +136 -0
- package/src/schemas.ts +193 -0
- package/src/search/engine.test.ts +230 -0
- package/src/search/engine.ts +390 -0
- package/src/skills/loader.ts +163 -0
- package/src/skills/runner.ts +139 -0
- package/src/skills/schema.ts +28 -0
- package/src/skills/skills.test.ts +134 -0
- package/src/types.ts +136 -0
- package/src/vault.test.ts +141 -0
- package/src/vault.ts +251 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { parseFrontmatter } from "../compile/diff.js";
|
|
3
|
+
import { SearchIndex } from "../search/engine.js";
|
|
4
|
+
import type { CompletionResult, LLMProvider, Message } from "../types.js";
|
|
5
|
+
import { listWiki, readIndex } from "../vault.js";
|
|
6
|
+
|
|
7
|
+
export interface QueryOptions {
|
|
8
|
+
/** Maximum articles to include as context */
|
|
9
|
+
maxArticles?: number;
|
|
10
|
+
/** Existing conversation history (for chat mode) */
|
|
11
|
+
history?: Message[];
|
|
12
|
+
/** Callback for streaming chunks */
|
|
13
|
+
onChunk?: (text: string) => void;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface QueryResult {
|
|
17
|
+
answer: string;
|
|
18
|
+
sourcePaths: string[];
|
|
19
|
+
usage: { inputTokens: number; outputTokens: number };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const QUERY_SYSTEM_PROMPT = `You are a knowledge assistant for a personal wiki. Answer questions using ONLY the information provided in the articles below.
|
|
23
|
+
|
|
24
|
+
RULES:
|
|
25
|
+
- Base your answer strictly on the provided articles
|
|
26
|
+
- Cite sources using [Article Title] notation when referencing specific information
|
|
27
|
+
- If the answer is not in the provided articles, say so clearly
|
|
28
|
+
- Be concise and direct
|
|
29
|
+
- Use markdown formatting for readability`;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Query the knowledge base using RAG:
|
|
33
|
+
* 1. Search for relevant articles
|
|
34
|
+
* 2. Load top articles into context
|
|
35
|
+
* 3. Send to LLM with query
|
|
36
|
+
* 4. Return answer with citations
|
|
37
|
+
*/
|
|
38
|
+
export async function queryVault(
|
|
39
|
+
root: string,
|
|
40
|
+
question: string,
|
|
41
|
+
provider: LLMProvider,
|
|
42
|
+
options: QueryOptions = {},
|
|
43
|
+
): Promise<QueryResult> {
|
|
44
|
+
const maxArticles = options.maxArticles ?? 5;
|
|
45
|
+
|
|
46
|
+
// Build or load search index
|
|
47
|
+
const index = new SearchIndex();
|
|
48
|
+
const loaded = await index.load(root);
|
|
49
|
+
if (!loaded) {
|
|
50
|
+
await index.build(root, "wiki");
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Search for relevant articles
|
|
54
|
+
const searchResults = index.search(question, { limit: maxArticles });
|
|
55
|
+
|
|
56
|
+
// Load the full articles
|
|
57
|
+
const articles: { title: string; path: string; content: string }[] = [];
|
|
58
|
+
|
|
59
|
+
for (const result of searchResults) {
|
|
60
|
+
try {
|
|
61
|
+
const content = await readFile(result.path, "utf-8");
|
|
62
|
+
const { frontmatter, body } = parseFrontmatter(content);
|
|
63
|
+
articles.push({
|
|
64
|
+
title: (frontmatter.title as string) ?? result.title ?? result.path,
|
|
65
|
+
path: result.path,
|
|
66
|
+
content: body,
|
|
67
|
+
});
|
|
68
|
+
} catch {
|
|
69
|
+
// File might have been deleted
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// If no articles found, try using INDEX.md as fallback context
|
|
74
|
+
if (articles.length === 0) {
|
|
75
|
+
const indexContent = await readIndex(root);
|
|
76
|
+
if (indexContent) {
|
|
77
|
+
articles.push({
|
|
78
|
+
title: "Knowledge Base Index",
|
|
79
|
+
path: "wiki/INDEX.md",
|
|
80
|
+
content: indexContent,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Build context from articles
|
|
86
|
+
const articleContext = articles
|
|
87
|
+
.map((a) => `--- ${a.title} (${a.path}) ---\n${a.content}`)
|
|
88
|
+
.join("\n\n");
|
|
89
|
+
|
|
90
|
+
const userMessage =
|
|
91
|
+
articles.length > 0
|
|
92
|
+
? `RELEVANT ARTICLES:\n\n${articleContext}\n\n---\n\nQUESTION: ${question}`
|
|
93
|
+
: `No relevant articles found in the knowledge base.\n\nQUESTION: ${question}`;
|
|
94
|
+
|
|
95
|
+
// Build message history
|
|
96
|
+
const messages: Message[] = [...(options.history ?? []), { role: "user", content: userMessage }];
|
|
97
|
+
|
|
98
|
+
// Call LLM
|
|
99
|
+
let result: CompletionResult;
|
|
100
|
+
|
|
101
|
+
if (options.onChunk) {
|
|
102
|
+
// Streaming mode
|
|
103
|
+
let fullContent = "";
|
|
104
|
+
let usage = { inputTokens: 0, outputTokens: 0 };
|
|
105
|
+
|
|
106
|
+
for await (const chunk of provider.stream({
|
|
107
|
+
system: QUERY_SYSTEM_PROMPT,
|
|
108
|
+
messages,
|
|
109
|
+
})) {
|
|
110
|
+
if (chunk.type === "text" && chunk.text) {
|
|
111
|
+
fullContent += chunk.text;
|
|
112
|
+
options.onChunk(chunk.text);
|
|
113
|
+
}
|
|
114
|
+
if (chunk.type === "usage" && chunk.usage) {
|
|
115
|
+
usage = chunk.usage;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
result = {
|
|
120
|
+
content: fullContent,
|
|
121
|
+
usage,
|
|
122
|
+
stopReason: "end_turn",
|
|
123
|
+
};
|
|
124
|
+
} else {
|
|
125
|
+
result = await provider.complete({
|
|
126
|
+
system: QUERY_SYSTEM_PROMPT,
|
|
127
|
+
messages,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
answer: result.content,
|
|
133
|
+
sourcePaths: articles.map((a) => a.path),
|
|
134
|
+
usage: result.usage,
|
|
135
|
+
};
|
|
136
|
+
}
|
package/src/schemas.ts
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { DEFAULT_CATEGORIES, DEFAULTS, MANIFEST_VERSION, RAW_CATEGORIES } from "./constants.js";
|
|
3
|
+
|
|
4
|
+
// ─── Source Types ────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
export const SourceTypeSchema = z.enum(["web", "pdf", "youtube", "github", "image", "file"]);
|
|
7
|
+
|
|
8
|
+
// ─── Article Categories ──────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
export const ArticleCategorySchema = z.enum(["concept", "topic", "reference", "output"]);
|
|
11
|
+
|
|
12
|
+
// ─── Source Entry (in manifest) ──────────────────────────────────
|
|
13
|
+
|
|
14
|
+
export const SourceEntrySchema = z.object({
|
|
15
|
+
hash: z.string(),
|
|
16
|
+
ingestedAt: z.string().datetime(),
|
|
17
|
+
lastCompiled: z.string().datetime().nullable(),
|
|
18
|
+
sourceType: SourceTypeSchema,
|
|
19
|
+
originalUrl: z.string().optional(),
|
|
20
|
+
producedArticles: z.array(z.string()),
|
|
21
|
+
metadata: z.object({
|
|
22
|
+
title: z.string().optional(),
|
|
23
|
+
author: z.string().optional(),
|
|
24
|
+
date: z.string().optional(),
|
|
25
|
+
wordCount: z.number().int().nonnegative(),
|
|
26
|
+
}),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// ─── Article Entry (in manifest) ─────────────────────────────────
|
|
30
|
+
|
|
31
|
+
export const ArticleEntrySchema = z.object({
|
|
32
|
+
hash: z.string(),
|
|
33
|
+
createdAt: z.string().datetime(),
|
|
34
|
+
lastUpdated: z.string().datetime(),
|
|
35
|
+
derivedFrom: z.array(z.string()),
|
|
36
|
+
backlinks: z.array(z.string()),
|
|
37
|
+
forwardLinks: z.array(z.string()),
|
|
38
|
+
tags: z.array(z.string()),
|
|
39
|
+
summary: z.string(),
|
|
40
|
+
wordCount: z.number().int().nonnegative(),
|
|
41
|
+
category: ArticleCategorySchema,
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// ─── Manifest ────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
export const ManifestSchema = z.object({
|
|
47
|
+
version: z.literal(MANIFEST_VERSION),
|
|
48
|
+
vault: z.object({
|
|
49
|
+
name: z.string(),
|
|
50
|
+
created: z.string().datetime(),
|
|
51
|
+
lastCompiled: z.string().datetime().nullable(),
|
|
52
|
+
provider: z.string(),
|
|
53
|
+
model: z.string(),
|
|
54
|
+
}),
|
|
55
|
+
sources: z.record(z.string(), SourceEntrySchema),
|
|
56
|
+
articles: z.record(z.string(), ArticleEntrySchema),
|
|
57
|
+
stats: z.object({
|
|
58
|
+
totalSources: z.number().int().nonnegative(),
|
|
59
|
+
totalArticles: z.number().int().nonnegative(),
|
|
60
|
+
totalWords: z.number().int().nonnegative(),
|
|
61
|
+
lastLintAt: z.string().datetime().nullable(),
|
|
62
|
+
}),
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// ─── Vault Config ────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
export const VaultConfigSchema = z.object({
|
|
68
|
+
provider: z.object({
|
|
69
|
+
default: z.string().default(DEFAULTS.provider),
|
|
70
|
+
model: z.string().default(DEFAULTS.model),
|
|
71
|
+
fast_model: z.string().default(DEFAULTS.fastModel),
|
|
72
|
+
}),
|
|
73
|
+
compile: z.object({
|
|
74
|
+
auto_index: z.boolean().default(true),
|
|
75
|
+
auto_graph: z.boolean().default(true),
|
|
76
|
+
max_sources_per_pass: z.number().int().positive().default(DEFAULTS.maxSourcesPerPass),
|
|
77
|
+
categories: z.array(z.string()).default([...DEFAULT_CATEGORIES]),
|
|
78
|
+
}),
|
|
79
|
+
ingest: z.object({
|
|
80
|
+
download_images: z.boolean().default(true),
|
|
81
|
+
max_file_size_mb: z.number().positive().default(DEFAULTS.maxFileSizeMb),
|
|
82
|
+
default_category: z.string().default("articles"),
|
|
83
|
+
}),
|
|
84
|
+
watch: z.object({
|
|
85
|
+
enabled: z.boolean().default(false),
|
|
86
|
+
inbox_path: z.string().default("inbox"),
|
|
87
|
+
auto_compile: z.boolean().default(true),
|
|
88
|
+
poll_interval_ms: z.number().int().positive().default(DEFAULTS.watchPollIntervalMs),
|
|
89
|
+
}),
|
|
90
|
+
search: z.object({
|
|
91
|
+
engine: z.string().default("builtin"),
|
|
92
|
+
max_results: z.number().int().positive().default(DEFAULTS.searchMaxResults),
|
|
93
|
+
}),
|
|
94
|
+
query: z.object({
|
|
95
|
+
file_output: z.boolean().default(true),
|
|
96
|
+
auto_file: z.boolean().default(false),
|
|
97
|
+
}),
|
|
98
|
+
cache: z.object({
|
|
99
|
+
enabled: z.boolean().default(true),
|
|
100
|
+
ttl_hours: z.number().int().positive().default(DEFAULTS.cacheTtlHours),
|
|
101
|
+
max_size_mb: z.number().positive().default(DEFAULTS.cacheMaxSizeMb),
|
|
102
|
+
}),
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// ─── Article Frontmatter ─────────────────────────────────────────
|
|
106
|
+
|
|
107
|
+
export const ArticleFrontmatterSchema = z.object({
|
|
108
|
+
title: z.string(),
|
|
109
|
+
slug: z.string(),
|
|
110
|
+
category: ArticleCategorySchema,
|
|
111
|
+
tags: z.array(z.string()),
|
|
112
|
+
sources: z.array(z.string()),
|
|
113
|
+
created: z.string(),
|
|
114
|
+
updated: z.string(),
|
|
115
|
+
summary: z.string(),
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// ─── LLM Provider Types ─────────────────────────────────────────
|
|
119
|
+
|
|
120
|
+
export const MessageRoleSchema = z.enum(["user", "assistant"]);
|
|
121
|
+
|
|
122
|
+
export const MessageSchema = z.object({
|
|
123
|
+
role: MessageRoleSchema,
|
|
124
|
+
content: z.string(),
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
export const CompletionParamsSchema = z.object({
|
|
128
|
+
system: z.string(),
|
|
129
|
+
messages: z.array(MessageSchema),
|
|
130
|
+
maxTokens: z.number().int().positive().optional(),
|
|
131
|
+
temperature: z.number().min(0).max(2).optional(),
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
export const CompletionResultSchema = z.object({
|
|
135
|
+
content: z.string(),
|
|
136
|
+
usage: z.object({
|
|
137
|
+
inputTokens: z.number().int().nonnegative(),
|
|
138
|
+
outputTokens: z.number().int().nonnegative(),
|
|
139
|
+
}),
|
|
140
|
+
stopReason: z.enum(["end_turn", "max_tokens", "tool_use"]),
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// ─── Compile File Operation ──────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
export const FileOperationSchema = z.object({
|
|
146
|
+
op: z.enum(["create", "update", "delete"]),
|
|
147
|
+
path: z.string(),
|
|
148
|
+
content: z.string().optional(),
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// ─── Search Result ───────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
export const SearchResultSchema = z.object({
|
|
154
|
+
path: z.string(),
|
|
155
|
+
score: z.number(),
|
|
156
|
+
snippet: z.string(),
|
|
157
|
+
title: z.string().optional(),
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
// ─── Ingest Result ───────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
export const IngestResultSchema = z.object({
|
|
163
|
+
sourceId: z.string(),
|
|
164
|
+
path: z.string(),
|
|
165
|
+
sourceType: SourceTypeSchema,
|
|
166
|
+
title: z.string(),
|
|
167
|
+
wordCount: z.number().int().nonnegative(),
|
|
168
|
+
skipped: z.boolean(),
|
|
169
|
+
skipReason: z.string().optional(),
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// ─── Compile Result ──────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
export const CompileResultSchema = z.object({
|
|
175
|
+
sourcesCompiled: z.number().int().nonnegative(),
|
|
176
|
+
articlesCreated: z.number().int().nonnegative(),
|
|
177
|
+
articlesUpdated: z.number().int().nonnegative(),
|
|
178
|
+
articlesDeleted: z.number().int().nonnegative(),
|
|
179
|
+
operations: z.array(FileOperationSchema),
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
// ─── Lint Diagnostic ─────────────────────────────────────────────
|
|
183
|
+
|
|
184
|
+
export const LintSeveritySchema = z.enum(["error", "warning", "info"]);
|
|
185
|
+
export const LintRuleSchema = z.enum(["orphan", "stale", "missing", "broken-link", "frontmatter"]);
|
|
186
|
+
|
|
187
|
+
export const LintDiagnosticSchema = z.object({
|
|
188
|
+
rule: LintRuleSchema,
|
|
189
|
+
severity: LintSeveritySchema,
|
|
190
|
+
message: z.string(),
|
|
191
|
+
path: z.string().optional(),
|
|
192
|
+
fixable: z.boolean(),
|
|
193
|
+
});
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import { afterEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { initVault, writeWiki } from "../vault.js";
|
|
6
|
+
import { SearchIndex } from "./engine.js";
|
|
7
|
+
|
|
8
|
+
let tempDir: string;
|
|
9
|
+
|
|
10
|
+
afterEach(async () => {
|
|
11
|
+
if (tempDir) await rm(tempDir, { recursive: true, force: true });
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
async function makeTempVault() {
|
|
15
|
+
tempDir = await mkdtemp(join(tmpdir(), "kib-search-test-"));
|
|
16
|
+
await initVault(tempDir, { name: "test" });
|
|
17
|
+
return tempDir;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function articleMd(title: string, content: string): string {
|
|
21
|
+
return `---\ntitle: ${title}\nslug: ${title.toLowerCase().replace(/\s+/g, "-")}\n---\n\n# ${title}\n\n${content}`;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
describe("SearchIndex", () => {
|
|
25
|
+
test("builds index from wiki files", async () => {
|
|
26
|
+
const root = await makeTempVault();
|
|
27
|
+
await writeWiki(
|
|
28
|
+
root,
|
|
29
|
+
"concepts/transformers.md",
|
|
30
|
+
articleMd(
|
|
31
|
+
"Transformer Architecture",
|
|
32
|
+
"The transformer is a neural network architecture based on self-attention mechanisms used in deep learning.",
|
|
33
|
+
),
|
|
34
|
+
);
|
|
35
|
+
await writeWiki(
|
|
36
|
+
root,
|
|
37
|
+
"topics/scaling.md",
|
|
38
|
+
articleMd(
|
|
39
|
+
"Scaling Laws",
|
|
40
|
+
"Scaling laws describe power-law relationships between compute, data, and model performance in neural networks.",
|
|
41
|
+
),
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
const index = new SearchIndex();
|
|
45
|
+
await index.build(root, "wiki");
|
|
46
|
+
|
|
47
|
+
expect(index.documentCount).toBe(2);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test("returns relevant results for a query", async () => {
|
|
51
|
+
const root = await makeTempVault();
|
|
52
|
+
await writeWiki(
|
|
53
|
+
root,
|
|
54
|
+
"concepts/transformers.md",
|
|
55
|
+
articleMd(
|
|
56
|
+
"Transformer Architecture",
|
|
57
|
+
"The transformer is a neural network architecture based on self-attention mechanisms. It was introduced in 2017 by Vaswani et al.",
|
|
58
|
+
),
|
|
59
|
+
);
|
|
60
|
+
await writeWiki(
|
|
61
|
+
root,
|
|
62
|
+
"concepts/attention.md",
|
|
63
|
+
articleMd(
|
|
64
|
+
"Attention Mechanisms",
|
|
65
|
+
"Attention mechanisms compute weighted sums over value vectors using query-key compatibility scores. Self-attention is a special case.",
|
|
66
|
+
),
|
|
67
|
+
);
|
|
68
|
+
await writeWiki(
|
|
69
|
+
root,
|
|
70
|
+
"topics/cnn.md",
|
|
71
|
+
articleMd(
|
|
72
|
+
"Convolutional Neural Networks",
|
|
73
|
+
"CNNs use convolutional layers to detect spatial patterns in images and other grid-structured data. They are unrelated to attention.",
|
|
74
|
+
),
|
|
75
|
+
);
|
|
76
|
+
|
|
77
|
+
const index = new SearchIndex();
|
|
78
|
+
await index.build(root, "wiki");
|
|
79
|
+
|
|
80
|
+
const results = index.search("self-attention transformer");
|
|
81
|
+
expect(results.length).toBeGreaterThan(0);
|
|
82
|
+
|
|
83
|
+
// Transformer article should rank highest (has both terms)
|
|
84
|
+
expect(results[0]!.title).toBe("Transformer Architecture");
|
|
85
|
+
|
|
86
|
+
// Attention article should also appear
|
|
87
|
+
expect(results.some((r) => r.title === "Attention Mechanisms")).toBe(true);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("returns empty results for unmatched query", async () => {
|
|
91
|
+
const root = await makeTempVault();
|
|
92
|
+
await writeWiki(root, "concepts/test.md", articleMd("Test", "Some content about testing."));
|
|
93
|
+
|
|
94
|
+
const index = new SearchIndex();
|
|
95
|
+
await index.build(root, "wiki");
|
|
96
|
+
|
|
97
|
+
const results = index.search("quantum computing blockchain");
|
|
98
|
+
expect(results).toHaveLength(0);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test("respects limit parameter", async () => {
|
|
102
|
+
const root = await makeTempVault();
|
|
103
|
+
|
|
104
|
+
// Create many articles that all match "neural"
|
|
105
|
+
for (let i = 0; i < 10; i++) {
|
|
106
|
+
await writeWiki(
|
|
107
|
+
root,
|
|
108
|
+
`concepts/article-${i}.md`,
|
|
109
|
+
articleMd(`Neural Network ${i}`, `Article ${i} about neural networks and deep learning.`),
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const index = new SearchIndex();
|
|
114
|
+
await index.build(root, "wiki");
|
|
115
|
+
|
|
116
|
+
const results = index.search("neural", { limit: 3 });
|
|
117
|
+
expect(results).toHaveLength(3);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
test("returns results with scores and snippets", async () => {
|
|
121
|
+
const root = await makeTempVault();
|
|
122
|
+
await writeWiki(
|
|
123
|
+
root,
|
|
124
|
+
"concepts/test.md",
|
|
125
|
+
articleMd("Test Article", "This is a test article about knowledge compilation."),
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
const index = new SearchIndex();
|
|
129
|
+
await index.build(root, "wiki");
|
|
130
|
+
|
|
131
|
+
const results = index.search("knowledge compilation");
|
|
132
|
+
expect(results.length).toBeGreaterThan(0);
|
|
133
|
+
expect(results[0]!.score).toBeGreaterThan(0);
|
|
134
|
+
expect(results[0]!.snippet).toBeTruthy();
|
|
135
|
+
expect(results[0]!.path).toContain("test.md");
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("handles empty index gracefully", async () => {
|
|
139
|
+
const root = await makeTempVault();
|
|
140
|
+
const index = new SearchIndex();
|
|
141
|
+
await index.build(root, "wiki");
|
|
142
|
+
|
|
143
|
+
const results = index.search("anything");
|
|
144
|
+
expect(results).toHaveLength(0);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("handles empty query gracefully", async () => {
|
|
148
|
+
const root = await makeTempVault();
|
|
149
|
+
await writeWiki(root, "concepts/test.md", articleMd("Test", "Content."));
|
|
150
|
+
|
|
151
|
+
const index = new SearchIndex();
|
|
152
|
+
await index.build(root, "wiki");
|
|
153
|
+
|
|
154
|
+
const results = index.search("");
|
|
155
|
+
expect(results).toHaveLength(0);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test("save and load round-trip preserves search ability", async () => {
|
|
159
|
+
const root = await makeTempVault();
|
|
160
|
+
await writeWiki(
|
|
161
|
+
root,
|
|
162
|
+
"concepts/ai.md",
|
|
163
|
+
articleMd(
|
|
164
|
+
"Artificial Intelligence",
|
|
165
|
+
"AI is the simulation of human intelligence by machines.",
|
|
166
|
+
),
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
const index1 = new SearchIndex();
|
|
170
|
+
await index1.build(root, "wiki");
|
|
171
|
+
await index1.save(root);
|
|
172
|
+
|
|
173
|
+
const index2 = new SearchIndex();
|
|
174
|
+
const loaded = await index2.load(root);
|
|
175
|
+
expect(loaded).toBe(true);
|
|
176
|
+
expect(index2.documentCount).toBe(1);
|
|
177
|
+
|
|
178
|
+
const results = index2.search("artificial intelligence");
|
|
179
|
+
expect(results.length).toBeGreaterThan(0);
|
|
180
|
+
expect(results[0]!.title).toBe("Artificial Intelligence");
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
test("load returns false for missing index", async () => {
|
|
184
|
+
const root = await makeTempVault();
|
|
185
|
+
const index = new SearchIndex();
|
|
186
|
+
const loaded = await index.load(root);
|
|
187
|
+
expect(loaded).toBe(false);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
test("skips INDEX.md and GRAPH.md", async () => {
|
|
191
|
+
const root = await makeTempVault();
|
|
192
|
+
await writeWiki(root, "INDEX.md", "# Index\nindex content");
|
|
193
|
+
await writeWiki(root, "GRAPH.md", "# Graph\ngraph content");
|
|
194
|
+
await writeWiki(root, "concepts/real.md", articleMd("Real Article", "Actual content."));
|
|
195
|
+
|
|
196
|
+
const index = new SearchIndex();
|
|
197
|
+
await index.build(root, "wiki");
|
|
198
|
+
|
|
199
|
+
expect(index.documentCount).toBe(1);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
test("title gets boosted in ranking", async () => {
|
|
203
|
+
const root = await makeTempVault();
|
|
204
|
+
|
|
205
|
+
// Article with "transformer" in title
|
|
206
|
+
await writeWiki(
|
|
207
|
+
root,
|
|
208
|
+
"concepts/transformer.md",
|
|
209
|
+
articleMd("Transformer", "A neural network architecture."),
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
// Article with "transformer" only in body
|
|
213
|
+
await writeWiki(
|
|
214
|
+
root,
|
|
215
|
+
"concepts/overview.md",
|
|
216
|
+
articleMd(
|
|
217
|
+
"Deep Learning Overview",
|
|
218
|
+
"Various architectures include the transformer and others.",
|
|
219
|
+
),
|
|
220
|
+
);
|
|
221
|
+
|
|
222
|
+
const index = new SearchIndex();
|
|
223
|
+
await index.build(root, "wiki");
|
|
224
|
+
|
|
225
|
+
const results = index.search("transformer");
|
|
226
|
+
expect(results.length).toBe(2);
|
|
227
|
+
// Title match should rank higher
|
|
228
|
+
expect(results[0]!.title).toBe("Transformer");
|
|
229
|
+
});
|
|
230
|
+
});
|