npm - ralph-hero-knowledge-index - Versions diffs - 0.1.0 - Mend

ralph-hero-knowledge-index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/.claude-plugin/plugin.json +21 -0
package/.mcp.json +12 -0
package/dist/db.d.ts +30 -0
package/dist/db.js +73 -0
package/dist/db.js.map +1 -0
package/dist/embedder.d.ts +4 -0
package/dist/embedder.js +24 -0
package/dist/embedder.js.map +1 -0
package/dist/hybrid-search.d.ts +13 -0
package/dist/hybrid-search.js +85 -0
package/dist/hybrid-search.js.map +1 -0
package/dist/index.d.ts +14 -0
package/dist/index.js +64 -0
package/dist/index.js.map +1 -0
package/dist/parser.d.ts +18 -0
package/dist/parser.js +39 -0
package/dist/parser.js.map +1 -0
package/dist/reindex.d.ts +1 -0
package/dist/reindex.js +77 -0
package/dist/reindex.js.map +1 -0
package/dist/search.d.ts +23 -0
package/dist/search.js +63 -0
package/dist/search.js.map +1 -0
package/dist/traverse.d.ts +22 -0
package/dist/traverse.js +91 -0
package/dist/traverse.js.map +1 -0
package/dist/vector-search.d.ts +15 -0
package/dist/vector-search.js +52 -0
package/dist/vector-search.js.map +1 -0
package/package.json +27 -0
package/src/__tests__/db.test.ts +51 -0
package/src/__tests__/hybrid-search.test.ts +112 -0
package/src/__tests__/index.test.ts +8 -0
package/src/__tests__/parser.test.ts +100 -0
package/src/__tests__/search.test.ts +92 -0
package/src/__tests__/traverse.test.ts +115 -0
package/src/__tests__/vector-search.test.ts +66 -0
package/src/db.ts +103 -0
package/src/embedder.ts +37 -0
package/src/hybrid-search.ts +102 -0
package/src/index.ts +76 -0
package/src/parser.ts +63 -0
package/src/reindex.ts +89 -0
package/src/search.ts +92 -0
package/src/traverse.ts +130 -0
package/src/vector-search.ts +64 -0
package/tsconfig.json +17 -0

package/src/__tests__/vector-search.test.ts ADDED Viewed

@@ -0,0 +1,66 @@
+import { describe, it, expect, beforeEach } from "vitest";
+import { KnowledgeDB } from "../db.js";
+import { VectorSearch } from "../vector-search.js";
+let db: KnowledgeDB;
+let vecSearch: VectorSearch;
+function mockEmbedding(seed: number): Float32Array {
+  const vec = new Float32Array(384);
+  // Place energy in different dimensions per seed to ensure distinct directions
+  for (let i = 0; i < 384; i++) {
+    vec[i] = Math.sin(seed * (i + 1) * 0.1);
+  }
+  let norm = 0;
+  for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i];
+  norm = Math.sqrt(norm);
+  if (norm > 0) for (let i = 0; i < vec.length; i++) vec[i] /= norm;
+  return vec;
+}
+beforeEach(() => {
+  db = new KnowledgeDB(":memory:");
+  vecSearch = new VectorSearch(db);
+  vecSearch.createIndex();
+  db.upsertDocument({
+    id: "doc-1",
+    path: "p1",
+    title: "Cache Strategy",
+    date: "2026-03-08",
+    type: "research",
+    status: "draft",
+    githubIssue: 100,
+    content: "caching",
+  });
+  db.upsertDocument({
+    id: "doc-2",
+    path: "p2",
+    title: "Auth Tokens",
+    date: "2026-03-07",
+    type: "plan",
+    status: "draft",
+    githubIssue: 200,
+    content: "auth",
+  });
+  vecSearch.upsertEmbedding("doc-1", mockEmbedding(1));
+  vecSearch.upsertEmbedding("doc-2", mockEmbedding(5));
+});
+describe("VectorSearch", () => {
+  it("finds nearest document by vector similarity", () => {
+    const results = vecSearch.search(mockEmbedding(1), 5);
+    expect(results.length).toBeGreaterThanOrEqual(1);
+    expect(results[0].id).toBe("doc-1");
+  });
+  it("returns distance scores", () => {
+    const results = vecSearch.search(mockEmbedding(1), 5);
+    expect(typeof results[0].distance).toBe("number");
+    expect(results[0].distance).toBeLessThan(results[1].distance);
+  });
+  it("respects limit", () => {
+    const results = vecSearch.search(mockEmbedding(1), 1);
+    expect(results).toHaveLength(1);
+  });
+});

package/src/db.ts ADDED Viewed

@@ -0,0 +1,103 @@
+import Database from "better-sqlite3";
+import type { Database as DatabaseType } from "better-sqlite3";
+export interface DocumentRow {
+  id: string;
+  path: string;
+  title: string;
+  date: string | null;
+  type: string | null;
+  status: string | null;
+  githubIssue: number | null;
+  content: string;
+}
+export interface RelationshipRow {
+  sourceId: string;
+  targetId: string;
+  type: string;
+}
+export class KnowledgeDB {
+  readonly db: DatabaseType;
+  constructor(dbPath: string) {
+    this.db = new Database(dbPath);
+    this.db.pragma("journal_mode = WAL");
+    this.createSchema();
+  }
+  private createSchema(): void {
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS documents (
+        id TEXT PRIMARY KEY,
+        path TEXT NOT NULL,
+        title TEXT,
+        date TEXT,
+        type TEXT,
+        status TEXT,
+        github_issue INTEGER,
+        content TEXT
+      );
+      CREATE TABLE IF NOT EXISTS tags (
+        doc_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
+        tag TEXT,
+        PRIMARY KEY (doc_id, tag)
+      );
+      CREATE TABLE IF NOT EXISTS relationships (
+        source_id TEXT REFERENCES documents(id) ON DELETE CASCADE,
+        target_id TEXT,
+        type TEXT CHECK(type IN ('builds_on', 'tensions', 'superseded_by')),
+        PRIMARY KEY (source_id, target_id, type)
+      );
+      CREATE INDEX IF NOT EXISTS idx_rel_target ON relationships(target_id, type);
+      CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag);
+    `);
+  }
+  upsertDocument(doc: DocumentRow): void {
+    this.db.prepare(`
+      INSERT INTO documents (id, path, title, date, type, status, github_issue, content)
+      VALUES (@id, @path, @title, @date, @type, @status, @githubIssue, @content)
+      ON CONFLICT(id) DO UPDATE SET
+        path = @path, title = @title, date = @date, type = @type,
+        status = @status, github_issue = @githubIssue, content = @content
+    `).run(doc);
+  }
+  getDocument(id: string): DocumentRow | undefined {
+    return this.db.prepare(
+      `SELECT id, path, title, date, type, status, github_issue AS githubIssue, content FROM documents WHERE id = ?`
+    ).get(id) as DocumentRow | undefined;
+  }
+  setTags(docId: string, tags: string[]): void {
+    this.db.prepare("DELETE FROM tags WHERE doc_id = ?").run(docId);
+    const insert = this.db.prepare("INSERT INTO tags (doc_id, tag) VALUES (?, ?)");
+    for (const tag of tags) insert.run(docId, tag);
+  }
+  getTags(docId: string): string[] {
+    return (this.db.prepare("SELECT tag FROM tags WHERE doc_id = ? ORDER BY tag").all(docId) as Array<{ tag: string }>).map(r => r.tag);
+  }
+  addRelationship(sourceId: string, targetId: string, type: string): void {
+    this.db.prepare("INSERT OR IGNORE INTO relationships (source_id, target_id, type) VALUES (?, ?, ?)").run(sourceId, targetId, type);
+  }
+  getRelationshipsFrom(sourceId: string): RelationshipRow[] {
+    return this.db.prepare("SELECT source_id AS sourceId, target_id AS targetId, type FROM relationships WHERE source_id = ?").all(sourceId) as RelationshipRow[];
+  }
+  getRelationshipsTo(targetId: string): RelationshipRow[] {
+    return this.db.prepare("SELECT source_id AS sourceId, target_id AS targetId, type FROM relationships WHERE target_id = ?").all(targetId) as RelationshipRow[];
+  }
+  clearAll(): void {
+    this.db.exec("DELETE FROM relationships; DELETE FROM tags; DELETE FROM documents;");
+  }
+  close(): void {
+    this.db.close();
+  }
+}

package/src/embedder.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import {
+  pipeline,
+  type FeatureExtractionPipeline,
+} from "@huggingface/transformers";
+const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
+const MAX_CHARS = 500;
+let embedderInstance: FeatureExtractionPipeline | null = null;
+export async function getEmbedder(): Promise<FeatureExtractionPipeline> {
+  if (!embedderInstance) {
+    // @ts-expect-error pipeline() overload union is too complex for TS
+    embedderInstance = (await pipeline(
+      "feature-extraction",
+      MODEL_ID
+    )) as FeatureExtractionPipeline;
+  }
+  return embedderInstance;
+}
+export async function embed(text: string): Promise<Float32Array> {
+  const embedder = await getEmbedder();
+  const truncated = text.slice(0, MAX_CHARS);
+  const output = await embedder(truncated, {
+    pooling: "mean",
+    normalize: true,
+  });
+  return new Float32Array(output.data as ArrayLike<number>);
+}
+export function prepareTextForEmbedding(
+  title: string,
+  content: string
+): string {
+  return `${title}\n${content}`.slice(0, MAX_CHARS);
+}

package/src/hybrid-search.ts ADDED Viewed

@@ -0,0 +1,102 @@
+import type { KnowledgeDB } from "./db.js";
+import type { FtsSearch, SearchOptions, SearchResult } from "./search.js";
+import type { VectorSearch } from "./vector-search.js";
+export type EmbedFn = (text: string) => Promise<Float32Array>;
+export class HybridSearch {
+  private static readonly RRF_K = 60;
+  constructor(
+    private readonly db: KnowledgeDB,
+    private readonly fts: FtsSearch,
+    private readonly vec: VectorSearch,
+    private readonly embedFn: EmbedFn,
+  ) {}
+  async search(
+    query: string,
+    options: SearchOptions = {},
+  ): Promise<SearchResult[]> {
+    const { type, tags, includeSuperseded = false, limit = 20 } = options;
+    // Run FTS and vector search
+    const ftsResults = this.fts.search(query, {
+      includeSuperseded: true,
+      limit: limit * 2,
+    });
+    const queryEmbedding = await this.embedFn(query);
+    const vecResults = this.vec.search(queryEmbedding, limit * 2);
+    // Build RRF score map
+    const scores = new Map<string, number>();
+    for (let i = 0; i < ftsResults.length; i++) {
+      const id = ftsResults[i].id;
+      const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
+      scores.set(id, (scores.get(id) ?? 0) + rrfScore);
+    }
+    for (let i = 0; i < vecResults.length; i++) {
+      const id = vecResults[i].id;
+      const rrfScore = 1 / (HybridSearch.RRF_K + i + 1);
+      scores.set(id, (scores.get(id) ?? 0) + rrfScore);
+    }
+    // Build a lookup of FTS results by id for quick access
+    const ftsById = new Map<string, SearchResult>();
+    for (const r of ftsResults) {
+      ftsById.set(r.id, r);
+    }
+    // Assemble combined results
+    const combined: SearchResult[] = [];
+    for (const [id, rrfScore] of scores) {
+      const ftsHit = ftsById.get(id);
+      if (ftsHit) {
+        combined.push({ ...ftsHit, score: rrfScore });
+      } else {
+        // Vector-only result: fetch document metadata from db
+        const doc = this.db.getDocument(id);
+        if (!doc) continue;
+        combined.push({
+          id: doc.id,
+          path: doc.path,
+          title: doc.title,
+          type: doc.type,
+          status: doc.status,
+          date: doc.date,
+          score: rrfScore,
+          snippet: "",
+        });
+      }
+    }
+    // Sort by RRF score descending
+    combined.sort((a, b) => b.score - a.score);
+    // Post-filter: superseded
+    let filtered = combined;
+    if (!includeSuperseded) {
+      filtered = filtered.filter((r) => r.status !== "superseded");
+    }
+    // Post-filter: type
+    if (type) {
+      filtered = filtered.filter((r) => r.type === type);
+    }
+    // Post-filter: tags
+    if (tags && tags.length > 0) {
+      const tagSet = new Set(tags);
+      filtered = filtered.filter((r) => {
+        const docTags = this.db.getTags(r.id);
+        return docTags.some((t) => tagSet.has(t));
+      });
+    }
+    return filtered.slice(0, limit);
+  }
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,76 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { z } from "zod";
+import { KnowledgeDB } from "./db.js";
+import { FtsSearch } from "./search.js";
+import { VectorSearch } from "./vector-search.js";
+import { HybridSearch } from "./hybrid-search.js";
+import { Traverser } from "./traverse.js";
+import { embed } from "./embedder.js";
+export function createServer(dbPath: string) {
+  const server = new McpServer({ name: "ralph-hero-knowledge", version: "0.1.0" });
+  const db = new KnowledgeDB(dbPath);
+  const fts = new FtsSearch(db);
+  const vec = new VectorSearch(db);
+  const hybrid = new HybridSearch(db, fts, vec, embed);
+  const traverser = new Traverser(db);
+  server.tool(
+    "knowledge_search",
+    "Search the knowledge base by keyword, semantic similarity, and tags. Returns ranked documents.",
+    {
+      query: z.string().describe("Search query (keywords or natural language)"),
+      tags: z.array(z.string()).optional().describe("Filter by tags"),
+      type: z.string().optional().describe("Filter by document type (research, plan, review, idea, report)"),
+      limit: z.number().optional().describe("Max results (default: 10)"),
+      includeSuperseded: z.boolean().optional().describe("Include superseded documents (default: false)"),
+    },
+    async (args) => {
+      try {
+        const results = await hybrid.search(args.query, {
+          tags: args.tags,
+          type: args.type,
+          limit: args.limit ?? 10,
+          includeSuperseded: args.includeSuperseded,
+        });
+        const enriched = results.map(r => ({ ...r, tags: db.getTags(r.id) }));
+        return { content: [{ type: "text" as const, text: JSON.stringify(enriched, null, 2) }] };
+      } catch (e) {
+        return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
+      }
+    },
+  );
+  server.tool(
+    "knowledge_traverse",
+    "Walk typed relationship edges (builds_on, tensions, superseded_by) from a document.",
+    {
+      from: z.string().describe("Document ID (filename without extension)"),
+      type: z.enum(["builds_on", "tensions", "superseded_by"]).optional().describe("Filter by relationship type"),
+      depth: z.number().optional().describe("Max traversal depth (default: 3)"),
+      direction: z.enum(["outgoing", "incoming"]).optional().describe("Edge direction (default: outgoing)"),
+    },
+    async (args) => {
+      try {
+        const opts = { type: args.type, depth: args.depth ?? 3 };
+        const results = args.direction === "incoming"
+          ? traverser.traverseIncoming(args.from, opts)
+          : traverser.traverse(args.from, opts);
+        return { content: [{ type: "text" as const, text: JSON.stringify(results, null, 2) }] };
+      } catch (e) {
+        return { content: [{ type: "text" as const, text: `Error: ${(e as Error).message}` }], isError: true };
+      }
+    },
+  );
+  return { server, db, fts, vec, hybrid, traverser };
+}
+const isMain = process.argv[1]?.endsWith("index.js");
+if (isMain) {
+  const dbPath = process.env.RALPH_KNOWLEDGE_DB ?? "knowledge.db";
+  const { server } = createServer(dbPath);
+  const transport = new StdioServerTransport();
+  server.connect(transport).catch(console.error);
+}

package/src/parser.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import { parse as parseYaml } from "yaml";
+export interface Relationship {
+  sourceId: string;
+  targetId: string;
+  type: "builds_on" | "tensions" | "superseded_by";
+}
+export interface ParsedDocument {
+  id: string;
+  path: string;
+  title: string;
+  date: string | null;
+  type: string | null;
+  status: string | null;
+  githubIssue: number | null;
+  tags: string[];
+  relationships: Relationship[];
+  content: string;
+}
+const FRONTMATTER_RE = /^---\n([\s\S]*?)\n---/;
+const TITLE_RE = /^# (.+)$/m;
+const WIKILINK_REL_RE = /^- (builds_on|tensions):: \[\[(.+?)\]\]/gm;
+const SUPERSEDED_BY_RE = /\[\[(.+?)\]\]/;
+export function parseDocument(id: string, path: string, raw: string): ParsedDocument {
+  const fmMatch = raw.match(FRONTMATTER_RE);
+  const frontmatter = fmMatch ? parseYaml(fmMatch[1]) ?? {} : {};
+  const body = fmMatch ? raw.slice(fmMatch[0].length).trim() : raw.trim();
+  const titleMatch = body.match(TITLE_RE);
+  const title = titleMatch ? titleMatch[1].trim() : id;
+  const relationships: Relationship[] = [];
+  let match: RegExpExecArray | null;
+  const relRe = new RegExp(WIKILINK_REL_RE.source, "gm");
+  while ((match = relRe.exec(body)) !== null) {
+    relationships.push({
+      sourceId: id,
+      targetId: match[2],
+      type: match[1] as "builds_on" | "tensions",
+    });
+  }
+  const supersededBy = frontmatter.superseded_by;
+  if (typeof supersededBy === "string") {
+    const wlMatch = supersededBy.match(SUPERSEDED_BY_RE);
+    if (wlMatch) {
+      relationships.push({ sourceId: id, targetId: wlMatch[1], type: "superseded_by" });
+    }
+  }
+  const tags: string[] = Array.isArray(frontmatter.tags) ? frontmatter.tags.map(String) : [];
+  return {
+    id, path, title,
+    date: frontmatter.date ? String(frontmatter.date) : null,
+    type: frontmatter.type ?? null,
+    status: frontmatter.status ?? null,
+    githubIssue: typeof frontmatter.github_issue === "number" ? frontmatter.github_issue : null,
+    tags, relationships, content: body,
+  };
+}

package/src/reindex.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { readFileSync, readdirSync } from "node:fs";
+import { join, relative, basename } from "node:path";
+import { KnowledgeDB } from "./db.js";
+import { FtsSearch } from "./search.js";
+import { VectorSearch } from "./vector-search.js";
+import { embed, prepareTextForEmbedding } from "./embedder.js";
+import { parseDocument } from "./parser.js";
+function findMarkdownFiles(dir: string): string[] {
+  const results: string[] = [];
+  function walk(d: string) {
+    for (const entry of readdirSync(d, { withFileTypes: true })) {
+      const fullPath = join(d, entry.name);
+      if (entry.isDirectory() && !entry.name.startsWith(".")) {
+        walk(fullPath);
+      } else if (entry.isFile() && entry.name.endsWith(".md")) {
+        results.push(fullPath);
+      }
+    }
+  }
+  walk(dir);
+  return results;
+}
+async function reindex(thoughtsDir: string, dbPath: string): Promise<void> {
+  console.log(`Indexing ${thoughtsDir} -> ${dbPath}`);
+  const db = new KnowledgeDB(dbPath);
+  const fts = new FtsSearch(db);
+  const vec = new VectorSearch(db);
+  vec.createIndex();
+  db.clearAll();
+  vec.dropIndex();
+  vec.createIndex();
+  const files = findMarkdownFiles(thoughtsDir);
+  console.log(`Found ${files.length} markdown files`);
+  let indexed = 0;
+  for (const filePath of files) {
+    const raw = readFileSync(filePath, "utf-8");
+    const relPath = relative(join(thoughtsDir, ".."), filePath);
+    const id = basename(filePath, ".md");
+    const parsed = parseDocument(id, relPath, raw);
+    db.upsertDocument({
+      id: parsed.id,
+      path: parsed.path,
+      title: parsed.title,
+      date: parsed.date,
+      type: parsed.type,
+      status: parsed.status,
+      githubIssue: parsed.githubIssue,
+      content: parsed.content,
+    });
+    if (parsed.tags.length > 0) {
+      db.setTags(parsed.id, parsed.tags);
+    }
+    for (const rel of parsed.relationships) {
+      db.addRelationship(rel.sourceId, rel.targetId, rel.type);
+    }
+    const text = prepareTextForEmbedding(parsed.title, parsed.content);
+    try {
+      const embedding = await embed(text);
+      vec.upsertEmbedding(parsed.id, embedding);
+    } catch (e) {
+      console.warn(`Failed to embed ${id}: ${(e as Error).message}`);
+    }
+    indexed++;
+    if (indexed % 50 === 0) {
+      console.log(`  ${indexed}/${files.length} indexed`);
+    }
+  }
+  fts.rebuildIndex();
+  console.log(`Done. ${indexed} documents indexed.`);
+  db.close();
+}
+const thoughtsDir = process.argv[2] ?? "../../thoughts";
+const dbPath = process.argv[3] ?? "knowledge.db";
+reindex(thoughtsDir, dbPath).catch(console.error);

package/src/search.ts ADDED Viewed

@@ -0,0 +1,92 @@
+import type { KnowledgeDB } from "./db.js";
+export interface SearchOptions {
+  type?: string;
+  tags?: string[];
+  includeSuperseded?: boolean;
+  limit?: number;
+}
+export interface SearchResult {
+  id: string;
+  path: string;
+  title: string;
+  type: string | null;
+  status: string | null;
+  date: string | null;
+  score: number;
+  snippet: string;
+}
+export class FtsSearch {
+  private readonly db: KnowledgeDB;
+  constructor(db: KnowledgeDB) {
+    this.db = db;
+  }
+  rebuildIndex(): void {
+    this.db.db.exec(`DROP TABLE IF EXISTS documents_fts`);
+    this.db.db.exec(`
+      CREATE VIRTUAL TABLE documents_fts USING fts5(
+        title,
+        path,
+        content,
+        content='documents',
+        content_rowid='rowid'
+      )
+    `);
+    this.db.db.exec(`
+      INSERT INTO documents_fts(rowid, title, path, content)
+      SELECT rowid, title, path, content FROM documents
+    `);
+  }
+  search(query: string, options: SearchOptions = {}): SearchResult[] {
+    const { type, tags, includeSuperseded = false, limit = 20 } = options;
+    const conditions: string[] = ["documents_fts MATCH @query"];
+    const params: Record<string, unknown> = { query, limit };
+    if (!includeSuperseded) {
+      conditions.push("d.status IS NOT 'superseded'");
+    }
+    if (type) {
+      conditions.push("d.type = @type");
+      params.type = type;
+    }
+    let joinClause = "";
+    if (tags && tags.length > 0) {
+      joinClause = "JOIN tags t ON t.doc_id = d.id";
+      const tagPlaceholders = tags.map((_, i) => `@tag${i}`);
+      conditions.push(`t.tag IN (${tagPlaceholders.join(", ")})`);
+      tags.forEach((tag, i) => {
+        params[`tag${i}`] = tag;
+      });
+    }
+    const whereClause = conditions.join(" AND ");
+    const sql = `
+      SELECT
+        d.id,
+        d.path,
+        d.title,
+        d.type,
+        d.status,
+        d.date,
+        rank AS score,
+        snippet(documents_fts, 2, '<b>', '</b>', '...', 32) AS snippet
+      FROM documents_fts
+      JOIN documents d ON d.rowid = documents_fts.rowid
+      ${joinClause}
+      WHERE ${whereClause}
+      ORDER BY rank ASC
+      LIMIT @limit
+    `;
+    return this.db.db.prepare(sql).all(params) as SearchResult[];
+  }
+}