ralph-hero-knowledge-index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.claude-plugin/plugin.json +21 -0
  2. package/.mcp.json +12 -0
  3. package/dist/db.d.ts +30 -0
  4. package/dist/db.js +73 -0
  5. package/dist/db.js.map +1 -0
  6. package/dist/embedder.d.ts +4 -0
  7. package/dist/embedder.js +24 -0
  8. package/dist/embedder.js.map +1 -0
  9. package/dist/hybrid-search.d.ts +13 -0
  10. package/dist/hybrid-search.js +85 -0
  11. package/dist/hybrid-search.js.map +1 -0
  12. package/dist/index.d.ts +14 -0
  13. package/dist/index.js +64 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/parser.d.ts +18 -0
  16. package/dist/parser.js +39 -0
  17. package/dist/parser.js.map +1 -0
  18. package/dist/reindex.d.ts +1 -0
  19. package/dist/reindex.js +77 -0
  20. package/dist/reindex.js.map +1 -0
  21. package/dist/search.d.ts +23 -0
  22. package/dist/search.js +63 -0
  23. package/dist/search.js.map +1 -0
  24. package/dist/traverse.d.ts +22 -0
  25. package/dist/traverse.js +91 -0
  26. package/dist/traverse.js.map +1 -0
  27. package/dist/vector-search.d.ts +15 -0
  28. package/dist/vector-search.js +52 -0
  29. package/dist/vector-search.js.map +1 -0
  30. package/package.json +27 -0
  31. package/src/__tests__/db.test.ts +51 -0
  32. package/src/__tests__/hybrid-search.test.ts +112 -0
  33. package/src/__tests__/index.test.ts +8 -0
  34. package/src/__tests__/parser.test.ts +100 -0
  35. package/src/__tests__/search.test.ts +92 -0
  36. package/src/__tests__/traverse.test.ts +115 -0
  37. package/src/__tests__/vector-search.test.ts +66 -0
  38. package/src/db.ts +103 -0
  39. package/src/embedder.ts +37 -0
  40. package/src/hybrid-search.ts +102 -0
  41. package/src/index.ts +76 -0
  42. package/src/parser.ts +63 -0
  43. package/src/reindex.ts +89 -0
  44. package/src/search.ts +92 -0
  45. package/src/traverse.ts +130 -0
  46. package/src/vector-search.ts +64 -0
  47. package/tsconfig.json +17 -0
@@ -0,0 +1,66 @@
1
+ import { describe, it, expect, beforeEach } from "vitest";
2
+ import { KnowledgeDB } from "../db.js";
3
+ import { VectorSearch } from "../vector-search.js";
4
+
5
+ let db: KnowledgeDB;
6
+ let vecSearch: VectorSearch;
7
+
8
+ function mockEmbedding(seed: number): Float32Array {
9
+ const vec = new Float32Array(384);
10
+ // Place energy in different dimensions per seed to ensure distinct directions
11
+ for (let i = 0; i < 384; i++) {
12
+ vec[i] = Math.sin(seed * (i + 1) * 0.1);
13
+ }
14
+ let norm = 0;
15
+ for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
16
+ norm = Math.sqrt(norm);
17
+ if (norm > 0) for (let i = 0; i < vec.length; i++) vec[i] /= norm;
18
+ return vec;
19
+ }
20
+
21
+ beforeEach(() => {
22
+ db = new KnowledgeDB(":memory:");
23
+ vecSearch = new VectorSearch(db);
24
+ vecSearch.createIndex();
25
+ db.upsertDocument({
26
+ id: "doc-1",
27
+ path: "p1",
28
+ title: "Cache Strategy",
29
+ date: "2026-03-08",
30
+ type: "research",
31
+ status: "draft",
32
+ githubIssue: 100,
33
+ content: "caching",
34
+ });
35
+ db.upsertDocument({
36
+ id: "doc-2",
37
+ path: "p2",
38
+ title: "Auth Tokens",
39
+ date: "2026-03-07",
40
+ type: "plan",
41
+ status: "draft",
42
+ githubIssue: 200,
43
+ content: "auth",
44
+ });
45
+ vecSearch.upsertEmbedding("doc-1", mockEmbedding(1));
46
+ vecSearch.upsertEmbedding("doc-2", mockEmbedding(5));
47
+ });
48
+
49
+ describe("VectorSearch", () => {
50
+ it("finds nearest document by vector similarity", () => {
51
+ const results = vecSearch.search(mockEmbedding(1), 5);
52
+ expect(results.length).toBeGreaterThanOrEqual(1);
53
+ expect(results[0].id).toBe("doc-1");
54
+ });
55
+
56
+ it("returns distance scores", () => {
57
+ const results = vecSearch.search(mockEmbedding(1), 5);
58
+ expect(typeof results[0].distance).toBe("number");
59
+ expect(results[0].distance).toBeLessThan(results[1].distance);
60
+ });
61
+
62
+ it("respects limit", () => {
63
+ const results = vecSearch.search(mockEmbedding(1), 1);
64
+ expect(results).toHaveLength(1);
65
+ });
66
+ });
package/src/db.ts ADDED
@@ -0,0 +1,103 @@
1
+ import Database from "better-sqlite3";
2
+ import type { Database as DatabaseType } from "better-sqlite3";
3
+
4
+ export interface DocumentRow {
5
+ id: string;
6
+ path: string;
7
+ title: string;
8
+ date: string | null;
9
+ type: string | null;
10
+ status: string | null;
11
+ githubIssue: number | null;
12
+ content: string;
13
+ }
14
+
15
+ export interface RelationshipRow {
16
+ sourceId: string;
17
+ targetId: string;
18
+ type: string;
19
+ }
20
+
21
+ export class KnowledgeDB {
22
+ readonly db: DatabaseType;
23
+
24
+ constructor(dbPath: string) {
25
+ this.db = new Database(dbPath);
26
+ this.db.pragma("journal_mode = WAL");
27
+ this.createSchema();
28
+ }
29
+
30
+ private createSchema(): void {
31
+ this.db.exec(`
32
+ CREATE TABLE IF NOT EXISTS documents (
33
+ id TEXT PRIMARY KEY,
34
+ path TEXT NOT NULL,
35
+ title TEXT,
36
+ date TEXT,
37
+ type TEXT,
38
+ status TEXT,
39
+ github_issue INTEGER,
40
+ content TEXT
41
+ );
42
+ CREATE TABLE IF NOT EXISTS tags (
43
+ doc_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
44
+ tag TEXT,
45
+ PRIMARY KEY (doc_id, tag)
46
+ );
47
+ CREATE TABLE IF NOT EXISTS relationships (
48
+ source_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
49
+ target_id TEXT,
50
+ type TEXT CHECK(type IN ('builds_on', 'tensions', 'superseded_by')),
51
+ PRIMARY KEY (source_id, target_id, type)
52
+ );
53
+ CREATE INDEX IF NOT EXISTS idx_rel_target ON relationships(target_id, type);
54
+ CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag);
55
+ `);
56
+ }
57
+
58
+ upsertDocument(doc: DocumentRow): void {
59
+ this.db.prepare(`
60
+ INSERT INTO documents (id, path, title, date, type, status, github_issue, content)
61
+ VALUES (@id, @path, @title, @date, @type, @status, @githubIssue, @content)
62
+ ON CONFLICT(id) DO UPDATE SET
63
+ path = @path, title = @title, date = @date, type = @type,
64
+ status = @status, github_issue = @githubIssue, content = @content
65
+ `).run(doc);
66
+ }
67
+
68
+ getDocument(id: string): DocumentRow | undefined {
69
+ return this.db.prepare(
70
+ `SELECT id, path, title, date, type, status, github_issue AS githubIssue, content FROM documents WHERE id = ?`
71
+ ).get(id) as DocumentRow | undefined;
72
+ }
73
+
74
+ setTags(docId: string, tags: string[]): void {
75
+ this.db.prepare("DELETE FROM tags WHERE doc_id = ?").run(docId);
76
+ const insert = this.db.prepare("INSERT INTO tags (doc_id, tag) VALUES (?, ?)");
77
+ for (const tag of tags) insert.run(docId, tag);
78
+ }
79
+
80
+ getTags(docId: string): string[] {
81
+ return (this.db.prepare("SELECT tag FROM tags WHERE doc_id = ? ORDER BY tag").all(docId) as Array<{ tag: string }>).map(r => r.tag);
82
+ }
83
+
84
+ addRelationship(sourceId: string, targetId: string, type: string): void {
85
+ this.db.prepare("INSERT OR IGNORE INTO relationships (source_id, target_id, type) VALUES (?, ?, ?)").run(sourceId, targetId, type);
86
+ }
87
+
88
+ getRelationshipsFrom(sourceId: string): RelationshipRow[] {
89
+ return this.db.prepare("SELECT source_id AS sourceId, target_id AS targetId, type FROM relationships WHERE source_id = ?").all(sourceId) as RelationshipRow[];
90
+ }
91
+
92
+ getRelationshipsTo(targetId: string): RelationshipRow[] {
93
+ return this.db.prepare("SELECT source_id AS sourceId, target_id AS targetId, type FROM relationships WHERE target_id = ?").all(targetId) as RelationshipRow[];
94
+ }
95
+
96
+ clearAll(): void {
97
+ this.db.exec("DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents;");
98
+ }
99
+
100
+ close(): void {
101
+ this.db.close();
102
+ }
103
+ }
@@ -0,0 +1,37 @@
1
+ import {
2
+ pipeline,
3
+ type FeatureExtractionPipeline,
4
+ } from "@huggingface/transformers";
5
+
6
+ const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
7
+ const MAX_CHARS = 500;
8
+
9
+ let embedderInstance: FeatureExtractionPipeline | null = null;
10
+
11
+ export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
12
+ if (!embedderInstance) {
13
+ // @ts-expect-error pipeline() overload union is too complex for TS
14
+ embedderInstance = (await pipeline(
15
+ "feature-extraction",
16
+ MODEL_ID
17
+ )) as FeatureExtractionPipeline;
18
+ }
19
+ return embedderInstance;
20
+ }
21
+
22
+ export async function embed(text: string): Promise<Float32Array> {
23
+ const embedder = await getEmbedder();
24
+ const truncated = text.slice(0, MAX_CHARS);
25
+ const output = await embedder(truncated, {
26
+ pooling: "mean",
27
+ normalize: true,
28
+ });
29
+ return new Float32Array(output.data as ArrayLike<number>);
30
+ }
31
+
32
+ export function prepareTextForEmbedding(
33
+ title: string,
34
+ content: string
35
+ ): string {
36
+ return `${title}\n${content}`.slice(0, MAX_CHARS);
37
+ }
@@ -0,0 +1,102 @@
1
+ import type { KnowledgeDB } from "./db.js";
2
+ import type { FtsSearch, SearchOptions, SearchResult } from "./search.js";
3
+ import type { VectorSearch } from "./vector-search.js";
4
+
5
+ export type EmbedFn = (text: string) => Promise<Float32Array>;
6
+
7
+ export class HybridSearch {
8
+ private static readonly RRF_K = 60;
9
+
10
+ constructor(
11
+ private readonly db: KnowledgeDB,
12
+ private readonly fts: FtsSearch,
13
+ private readonly vec: VectorSearch,
14
+ private readonly embedFn: EmbedFn,
15
+ ) {}
16
+
17
+ async search(
18
+ query: string,
19
+ options: SearchOptions = {},
20
+ ): Promise<SearchResult[]> {
21
+ const { type, tags, includeSuperseded = false, limit = 20 } = options;
22
+
23
+ // Run FTS and vector search
24
+ const ftsResults = this.fts.search(query, {
25
+ includeSuperseded: true,
26
+ limit: limit * 2,
27
+ });
28
+
29
+ const queryEmbedding = await this.embedFn(query);
30
+ const vecResults = this.vec.search(queryEmbedding, limit * 2);
31
+
32
+ // Build RRF score map
33
+ const scores = new Map<string, number>();
34
+
35
+ for (let i = 0; i < ftsResults.length; i++) {
36
+ const id = ftsResults[i].id;
37
+ const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
38
+ scores.set(id, (scores.get(id) ?? 0) + rrfScore);
39
+ }
40
+
41
+ for (let i = 0; i < vecResults.length; i++) {
42
+ const id = vecResults[i].id;
43
+ const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
44
+ scores.set(id, (scores.get(id) ?? 0) + rrfScore);
45
+ }
46
+
47
+ // Build a lookup of FTS results by id for quick access
48
+ const ftsById = new Map<string, SearchResult>();
49
+ for (const r of ftsResults) {
50
+ ftsById.set(r.id, r);
51
+ }
52
+
53
+ // Assemble combined results
54
+ const combined: SearchResult[] = [];
55
+
56
+ for (const [id, rrfScore] of scores) {
57
+ const ftsHit = ftsById.get(id);
58
+ if (ftsHit) {
59
+ combined.push({ ...ftsHit, score: rrfScore });
60
+ } else {
61
+ // Vector-only result: fetch document metadata from db
62
+ const doc = this.db.getDocument(id);
63
+ if (!doc) continue;
64
+ combined.push({
65
+ id: doc.id,
66
+ path: doc.path,
67
+ title: doc.title,
68
+ type: doc.type,
69
+ status: doc.status,
70
+ date: doc.date,
71
+ score: rrfScore,
72
+ snippet: "",
73
+ });
74
+ }
75
+ }
76
+
77
+ // Sort by RRF score descending
78
+ combined.sort((a, b) => b.score - a.score);
79
+
80
+ // Post-filter: superseded
81
+ let filtered = combined;
82
+ if (!includeSuperseded) {
83
+ filtered = filtered.filter((r) => r.status !== "superseded");
84
+ }
85
+
86
+ // Post-filter: type
87
+ if (type) {
88
+ filtered = filtered.filter((r) => r.type === type);
89
+ }
90
+
91
+ // Post-filter: tags
92
+ if (tags && tags.length > 0) {
93
+ const tagSet = new Set(tags);
94
+ filtered = filtered.filter((r) => {
95
+ const docTags = this.db.getTags(r.id);
96
+ return docTags.some((t) => tagSet.has(t));
97
+ });
98
+ }
99
+
100
+ return filtered.slice(0, limit);
101
+ }
102
+ }
package/src/index.ts ADDED
@@ -0,0 +1,76 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+ import { KnowledgeDB } from "./db.js";
5
+ import { FtsSearch } from "./search.js";
6
+ import { VectorSearch } from "./vector-search.js";
7
+ import { HybridSearch } from "./hybrid-search.js";
8
+ import { Traverser } from "./traverse.js";
9
+ import { embed } from "./embedder.js";
10
+
11
+ export function createServer(dbPath: string) {
12
+ const server = new McpServer({ name: "ralph-hero-knowledge", version: "0.1.0" });
13
+ const db = new KnowledgeDB(dbPath);
14
+ const fts = new FtsSearch(db);
15
+ const vec = new VectorSearch(db);
16
+ const hybrid = new HybridSearch(db, fts, vec, embed);
17
+ const traverser = new Traverser(db);
18
+
19
+ server.tool(
20
+ "knowledge_search",
21
+ "Search the knowledge base by keyword, semantic similarity, and tags. Returns ranked documents.",
22
+ {
23
+ query: z.string().describe("Search query (keywords or natural language)"),
24
+ tags: z.array(z.string()).optional().describe("Filter by tags"),
25
+ type: z.string().optional().describe("Filter by document type (research, plan, review, idea, report)"),
26
+ limit: z.number().optional().describe("Max results (default: 10)"),
27
+ includeSuperseded: z.boolean().optional().describe("Include superseded documents (default: false)"),
28
+ },
29
+ async (args) => {
30
+ try {
31
+ const results = await hybrid.search(args.query, {
32
+ tags: args.tags,
33
+ type: args.type,
34
+ limit: args.limit ?? 10,
35
+ includeSuperseded: args.includeSuperseded,
36
+ });
37
+ const enriched = results.map(r => ({ ...r, tags: db.getTags(r.id) }));
38
+ return { content: [{ type: "text" as const, text: JSON.stringify(enriched, null, 2) }] };
39
+ } catch (e) {
40
+ return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
41
+ }
42
+ },
43
+ );
44
+
45
+ server.tool(
46
+ "knowledge_traverse",
47
+ "Walk typed relationship edges (builds_on, tensions, superseded_by) from a document.",
48
+ {
49
+ from: z.string().describe("Document ID (filename without extension)"),
50
+ type: z.enum(["builds_on", "tensions", "superseded_by"]).optional().describe("Filter by relationship type"),
51
+ depth: z.number().optional().describe("Max traversal depth (default: 3)"),
52
+ direction: z.enum(["outgoing", "incoming"]).optional().describe("Edge direction (default: outgoing)"),
53
+ },
54
+ async (args) => {
55
+ try {
56
+ const opts = { type: args.type, depth: args.depth ?? 3 };
57
+ const results = args.direction === "incoming"
58
+ ? traverser.traverseIncoming(args.from, opts)
59
+ : traverser.traverse(args.from, opts);
60
+ return { content: [{ type: "text" as const, text: JSON.stringify(results, null, 2) }] };
61
+ } catch (e) {
62
+ return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
63
+ }
64
+ },
65
+ );
66
+
67
+ return { server, db, fts, vec, hybrid, traverser };
68
+ }
69
+
70
+ const isMain = process.argv[1]?.endsWith("index.js");
71
+ if (isMain) {
72
+ const dbPath = process.env.RALPH_KNOWLEDGE_DB ?? "knowledge.db";
73
+ const { server } = createServer(dbPath);
74
+ const transport = new StdioServerTransport();
75
+ server.connect(transport).catch(console.error);
76
+ }
package/src/parser.ts ADDED
@@ -0,0 +1,63 @@
1
+ import { parse as parseYaml } from "yaml";
2
+
3
+ export interface Relationship {
4
+ sourceId: string;
5
+ targetId: string;
6
+ type: "builds_on" | "tensions" | "superseded_by";
7
+ }
8
+
9
+ export interface ParsedDocument {
10
+ id: string;
11
+ path: string;
12
+ title: string;
13
+ date: string | null;
14
+ type: string | null;
15
+ status: string | null;
16
+ githubIssue: number | null;
17
+ tags: string[];
18
+ relationships: Relationship[];
19
+ content: string;
20
+ }
21
+
22
+ const FRONTMATTER_RE = /^---\n([\s\S]*?)\n---/;
23
+ const TITLE_RE = /^# (.+)$/m;
24
+ const WIKILINK_REL_RE = /^- (builds_on|tensions):: \[\[(.+?)\]\]/gm;
25
+ const SUPERSEDED_BY_RE = /\[\[(.+?)\]\]/;
26
+
27
+ export function parseDocument(id: string, path: string, raw: string): ParsedDocument {
28
+ const fmMatch = raw.match(FRONTMATTER_RE);
29
+ const frontmatter = fmMatch ? parseYaml(fmMatch[1]) ?? {} : {};
30
+ const body = fmMatch ? raw.slice(fmMatch[0].length).trim() : raw.trim();
31
+ const titleMatch = body.match(TITLE_RE);
32
+ const title = titleMatch ? titleMatch[1].trim() : id;
33
+
34
+ const relationships: Relationship[] = [];
35
+ let match: RegExpExecArray | null;
36
+ const relRe = new RegExp(WIKILINK_REL_RE.source, "gm");
37
+ while ((match = relRe.exec(body)) !== null) {
38
+ relationships.push({
39
+ sourceId: id,
40
+ targetId: match[2],
41
+ type: match[1] as "builds_on" | "tensions",
42
+ });
43
+ }
44
+
45
+ const supersededBy = frontmatter.superseded_by;
46
+ if (typeof supersededBy === "string") {
47
+ const wlMatch = supersededBy.match(SUPERSEDED_BY_RE);
48
+ if (wlMatch) {
49
+ relationships.push({ sourceId: id, targetId: wlMatch[1], type: "superseded_by" });
50
+ }
51
+ }
52
+
53
+ const tags: string[] = Array.isArray(frontmatter.tags) ? frontmatter.tags.map(String) : [];
54
+
55
+ return {
56
+ id, path, title,
57
+ date: frontmatter.date ? String(frontmatter.date) : null,
58
+ type: frontmatter.type ?? null,
59
+ status: frontmatter.status ?? null,
60
+ githubIssue: typeof frontmatter.github_issue === "number" ? frontmatter.github_issue : null,
61
+ tags, relationships, content: body,
62
+ };
63
+ }
package/src/reindex.ts ADDED
@@ -0,0 +1,89 @@
1
+ import { readFileSync, readdirSync } from "node:fs";
2
+ import { join, relative, basename } from "node:path";
3
+ import { KnowledgeDB } from "./db.js";
4
+ import { FtsSearch } from "./search.js";
5
+ import { VectorSearch } from "./vector-search.js";
6
+ import { embed, prepareTextForEmbedding } from "./embedder.js";
7
+ import { parseDocument } from "./parser.js";
8
+
9
+ function findMarkdownFiles(dir: string): string[] {
10
+ const results: string[] = [];
11
+ function walk(d: string) {
12
+ for (const entry of readdirSync(d, { withFileTypes: true })) {
13
+ const fullPath = join(d, entry.name);
14
+ if (entry.isDirectory() && !entry.name.startsWith(".")) {
15
+ walk(fullPath);
16
+ } else if (entry.isFile() && entry.name.endsWith(".md")) {
17
+ results.push(fullPath);
18
+ }
19
+ }
20
+ }
21
+ walk(dir);
22
+ return results;
23
+ }
24
+
25
+ async function reindex(thoughtsDir: string, dbPath: string): Promise<void> {
26
+ console.log(`Indexing ${thoughtsDir} -> ${dbPath}`);
27
+
28
+ const db = new KnowledgeDB(dbPath);
29
+ const fts = new FtsSearch(db);
30
+ const vec = new VectorSearch(db);
31
+ vec.createIndex();
32
+
33
+ db.clearAll();
34
+ vec.dropIndex();
35
+ vec.createIndex();
36
+
37
+ const files = findMarkdownFiles(thoughtsDir);
38
+ console.log(`Found ${files.length} markdown files`);
39
+
40
+ let indexed = 0;
41
+ for (const filePath of files) {
42
+ const raw = readFileSync(filePath, "utf-8");
43
+ const relPath = relative(join(thoughtsDir, ".."), filePath);
44
+ const id = basename(filePath, ".md");
45
+
46
+ const parsed = parseDocument(id, relPath, raw);
47
+
48
+ db.upsertDocument({
49
+ id: parsed.id,
50
+ path: parsed.path,
51
+ title: parsed.title,
52
+ date: parsed.date,
53
+ type: parsed.type,
54
+ status: parsed.status,
55
+ githubIssue: parsed.githubIssue,
56
+ content: parsed.content,
57
+ });
58
+
59
+ if (parsed.tags.length > 0) {
60
+ db.setTags(parsed.id, parsed.tags);
61
+ }
62
+
63
+ for (const rel of parsed.relationships) {
64
+ db.addRelationship(rel.sourceId, rel.targetId, rel.type);
65
+ }
66
+
67
+ const text = prepareTextForEmbedding(parsed.title, parsed.content);
68
+ try {
69
+ const embedding = await embed(text);
70
+ vec.upsertEmbedding(parsed.id, embedding);
71
+ } catch (e) {
72
+ console.warn(`Failed to embed ${id}: ${(e as Error).message}`);
73
+ }
74
+
75
+ indexed++;
76
+ if (indexed % 50 === 0) {
77
+ console.log(` ${indexed}/${files.length} indexed`);
78
+ }
79
+ }
80
+
81
+ fts.rebuildIndex();
82
+
83
+ console.log(`Done. ${indexed} documents indexed.`);
84
+ db.close();
85
+ }
86
+
87
+ const thoughtsDir = process.argv[2] ?? "../../thoughts";
88
+ const dbPath = process.argv[3] ?? "knowledge.db";
89
+ reindex(thoughtsDir, dbPath).catch(console.error);
package/src/search.ts ADDED
@@ -0,0 +1,92 @@
1
+ import type { KnowledgeDB } from "./db.js";
2
+
3
+ export interface SearchOptions {
4
+ type?: string;
5
+ tags?: string[];
6
+ includeSuperseded?: boolean;
7
+ limit?: number;
8
+ }
9
+
10
+ export interface SearchResult {
11
+ id: string;
12
+ path: string;
13
+ title: string;
14
+ type: string | null;
15
+ status: string | null;
16
+ date: string | null;
17
+ score: number;
18
+ snippet: string;
19
+ }
20
+
21
+ export class FtsSearch {
22
+ private readonly db: KnowledgeDB;
23
+
24
+ constructor(db: KnowledgeDB) {
25
+ this.db = db;
26
+ }
27
+
28
+ rebuildIndex(): void {
29
+ this.db.db.exec(`DROP TABLE IF EXISTS documents_fts`);
30
+ this.db.db.exec(`
31
+ CREATE VIRTUAL TABLE documents_fts USING fts5(
32
+ title,
33
+ path,
34
+ content,
35
+ content='documents',
36
+ content_rowid='rowid'
37
+ )
38
+ `);
39
+ this.db.db.exec(`
40
+ INSERT INTO documents_fts(rowid, title, path, content)
41
+ SELECT rowid, title, path, content FROM documents
42
+ `);
43
+ }
44
+
45
+ search(query: string, options: SearchOptions = {}): SearchResult[] {
46
+ const { type, tags, includeSuperseded = false, limit = 20 } = options;
47
+
48
+ const conditions: string[] = ["documents_fts MATCH @query"];
49
+ const params: Record<string, unknown> = { query, limit };
50
+
51
+ if (!includeSuperseded) {
52
+ conditions.push("d.status IS NOT 'superseded'");
53
+ }
54
+
55
+ if (type) {
56
+ conditions.push("d.type = @type");
57
+ params.type = type;
58
+ }
59
+
60
+ let joinClause = "";
61
+ if (tags && tags.length > 0) {
62
+ joinClause = "JOIN tags t ON t.doc_id = d.id";
63
+ const tagPlaceholders = tags.map((_, i) => `@tag${i}`);
64
+ conditions.push(`t.tag IN (${tagPlaceholders.join(", ")})`);
65
+ tags.forEach((tag, i) => {
66
+ params[`tag${i}`] = tag;
67
+ });
68
+ }
69
+
70
+ const whereClause = conditions.join(" AND ");
71
+
72
+ const sql = `
73
+ SELECT
74
+ d.id,
75
+ d.path,
76
+ d.title,
77
+ d.type,
78
+ d.status,
79
+ d.date,
80
+ rank AS score,
81
+ snippet(documents_fts, 2, '<b>', '</b>', '...', 32) AS snippet
82
+ FROM documents_fts
83
+ JOIN documents d ON d.rowid = documents_fts.rowid
84
+ ${joinClause}
85
+ WHERE ${whereClause}
86
+ ORDER BY rank ASC
87
+ LIMIT @limit
88
+ `;
89
+
90
+ return this.db.db.prepare(sql).all(params) as SearchResult[];
91
+ }
92
+ }