npm - @199-bio/engram - Versions diffs - 0.1.0 - Mend

@199-bio/engram 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/.env.example +19 -0
package/LICENSE +21 -0
package/LIVING_PLAN.md +180 -0
package/PLAN.md +514 -0
package/README.md +304 -0
package/dist/graph/extractor.d.ts.map +1 -0
package/dist/graph/index.d.ts.map +1 -0
package/dist/graph/knowledge-graph.d.ts.map +1 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +473 -0
package/dist/retrieval/colbert.d.ts.map +1 -0
package/dist/retrieval/hybrid.d.ts.map +1 -0
package/dist/retrieval/index.d.ts.map +1 -0
package/dist/storage/database.d.ts.map +1 -0
package/dist/storage/index.d.ts.map +1 -0
package/package.json +62 -0
package/src/graph/extractor.ts +441 -0
package/src/graph/index.ts +2 -0
package/src/graph/knowledge-graph.ts +263 -0
package/src/index.ts +558 -0
package/src/retrieval/colbert-bridge.py +222 -0
package/src/retrieval/colbert.ts +317 -0
package/src/retrieval/hybrid.ts +218 -0
package/src/retrieval/index.ts +2 -0
package/src/storage/database.ts +527 -0
package/src/storage/index.ts +1 -0
package/tests/test-interactive.js +218 -0
package/tests/test-mcp.sh +81 -0
package/tsconfig.json +20 -0

package/src/retrieval/colbert-bridge.py ADDED Viewed

@@ -0,0 +1,222 @@
+#!/usr/bin/env python3
+"""
+ColBERT bridge for Engram
+Uses RAGatouille for state-of-the-art retrieval
+Run as subprocess from Node.js, communicates via JSON over stdin/stdout.
+"""
+import sys
+import json
+import os
+from pathlib import Path
+# Suppress warnings
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+def lazy_load_ragatouille():
+    """Lazy load RAGatouille to speed up startup"""
+    try:
+        from ragatouille import RAGPretrainedModel
+        return RAGPretrainedModel
+    except ImportError:
+        return None
+class ColBERTBridge:
+    def __init__(self, index_path: str):
+        self.index_path = Path(index_path)
+        self.index_path.mkdir(parents=True, exist_ok=True)
+        self.model = None
+        self.index = None
+        self.index_name = "engram_index"
+    def _ensure_model(self):
+        """Load model if not already loaded"""
+        if self.model is None:
+            RAGPretrainedModel = lazy_load_ragatouille()
+            if RAGPretrainedModel is None:
+                raise RuntimeError("RAGatouille not installed. Run: pip install ragatouille")
+            self.model = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
+    def _ensure_index(self):
+        """Load existing index if available"""
+        if self.index is None:
+            index_dir = self.index_path / ".ragatouille" / "colbert" / "indexes" / self.index_name
+            if index_dir.exists():
+                RAGPretrainedModel = lazy_load_ragatouille()
+                if RAGPretrainedModel:
+                    try:
+                        self.index = RAGPretrainedModel.from_index(str(index_dir))
+                    except Exception:
+                        pass  # Will recreate index
+    def index_documents(self, documents: list[dict]) -> dict:
+        """
+        Index documents for search
+        documents: [{"id": "...", "content": "..."}]
+        """
+        self._ensure_model()
+        if not documents:
+            return {"success": True, "count": 0}
+        doc_ids = [d["id"] for d in documents]
+        doc_contents = [d["content"] for d in documents]
+        # Index with RAGatouille
+        self.index = self.model.index(
+            collection=doc_contents,
+            document_ids=doc_ids,
+            index_name=self.index_name,
+            max_document_length=512,
+            split_documents=True,
+        )
+        return {"success": True, "count": len(documents)}
+    def add_documents(self, documents: list[dict]) -> dict:
+        """
+        Add documents to existing index
+        """
+        self._ensure_index()
+        if self.index is None:
+            # No existing index, create new
+            return self.index_documents(documents)
+        doc_ids = [d["id"] for d in documents]
+        doc_contents = [d["content"] for d in documents]
+        try:
+            self.index.add_to_index(
+                new_collection=doc_contents,
+                new_document_ids=doc_ids,
+            )
+            return {"success": True, "count": len(documents)}
+        except Exception as e:
+            # Fallback: reindex everything
+            return {"success": False, "error": str(e)}
+    def search(self, query: str, k: int = 10) -> dict:
+        """
+        Search for documents
+        Returns: {"results": [{"id": "...", "score": 0.9, "content": "..."}]}
+        """
+        self._ensure_index()
+        if self.index is None:
+            return {"results": []}
+        try:
+            results = self.index.search(query=query, k=k)
+            formatted = []
+            for r in results:
+                formatted.append({
+                    "id": r.get("document_id", r.get("doc_id", "")),
+                    "score": float(r.get("score", 0)),
+                    "content": r.get("content", ""),
+                })
+            return {"results": formatted}
+        except Exception as e:
+            return {"results": [], "error": str(e)}
+    def rerank(self, query: str, documents: list[dict], k: int = 10) -> dict:
+        """
+        Rerank documents using ColBERT
+        documents: [{"id": "...", "content": "..."}]
+        """
+        self._ensure_model()
+        if not documents:
+            return {"results": []}
+        doc_contents = [d["content"] for d in documents]
+        try:
+            # Use ColBERT as reranker
+            results = self.model.rerank(
+                query=query,
+                documents=doc_contents,
+                k=min(k, len(documents)),
+            )
+            formatted = []
+            for r in results:
+                idx = r.get("result_index", 0)
+                if idx < len(documents):
+                    formatted.append({
+                        "id": documents[idx]["id"],
+                        "score": float(r.get("score", 0)),
+                        "content": documents[idx]["content"],
+                    })
+            return {"results": formatted}
+        except Exception as e:
+            return {"results": [], "error": str(e)}
+    def delete_documents(self, doc_ids: list[str]) -> dict:
+        """
+        Delete documents from index
+        """
+        self._ensure_index()
+        if self.index is None:
+            return {"success": True, "count": 0}
+        try:
+            self.index.delete_from_index(document_ids=doc_ids)
+            return {"success": True, "count": len(doc_ids)}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
+def main():
+    """Main loop - read JSON commands from stdin, write responses to stdout"""
+    index_path = os.environ.get("ENGRAM_INDEX_PATH", os.path.expanduser("~/.engram"))
+    bridge = ColBERTBridge(index_path)
+    # Signal ready
+    print(json.dumps({"status": "ready"}), flush=True)
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            cmd = json.loads(line)
+            action = cmd.get("action")
+            if action == "index":
+                result = bridge.index_documents(cmd.get("documents", []))
+            elif action == "add":
+                result = bridge.add_documents(cmd.get("documents", []))
+            elif action == "search":
+                result = bridge.search(cmd.get("query", ""), cmd.get("k", 10))
+            elif action == "rerank":
+                result = bridge.rerank(
+                    cmd.get("query", ""),
+                    cmd.get("documents", []),
+                    cmd.get("k", 10)
+                )
+            elif action == "delete":
+                result = bridge.delete_documents(cmd.get("ids", []))
+            elif action == "ping":
+                result = {"status": "ok"}
+            elif action == "quit":
+                break
+            else:
+                result = {"error": f"Unknown action: {action}"}
+            print(json.dumps(result), flush=True)
+        except json.JSONDecodeError as e:
+            print(json.dumps({"error": f"Invalid JSON: {e}"}), flush=True)
+        except Exception as e:
+            print(json.dumps({"error": str(e)}), flush=True)
+if __name__ == "__main__":
+    main()

package/src/retrieval/colbert.ts ADDED Viewed

@@ -0,0 +1,317 @@
+/**
+ * ColBERT retriever - TypeScript wrapper for Python bridge
+ */
+import { spawn, ChildProcess } from "child_process";
+import { createInterface, Interface } from "readline";
+import path from "path";
+import { fileURLToPath } from "url";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+// Python bridge is in src/, not dist/ - go up from dist/retrieval to project root, then into src/
+const BRIDGE_PATH = path.join(__dirname, "..", "..", "src", "retrieval", "colbert-bridge.py");
+export interface Document {
+  id: string;
+  content: string;
+}
+export interface SearchResult {
+  id: string;
+  score: number;
+  content: string;
+}
+interface BridgeResponse {
+  status?: string;
+  success?: boolean;
+  count?: number;
+  results?: SearchResult[];
+  error?: string;
+}
+export class ColBERTRetriever {
+  private process: ChildProcess | null = null;
+  private readline: Interface | null = null;
+  private pendingRequests: Map<number, {
+    resolve: (value: BridgeResponse) => void;
+    reject: (error: Error) => void;
+  }> = new Map();
+  private requestId = 0;
+  private ready = false;
+  private readyPromise: Promise<void>;
+  private readyResolve: (() => void) | null = null;
+  private buffer = "";
+  constructor(private indexPath: string) {
+    this.readyPromise = new Promise((resolve) => {
+      this.readyResolve = resolve;
+    });
+  }
+  /**
+   * Start the Python bridge process
+   */
+  async start(): Promise<void> {
+    if (this.process) return;
+    this.process = spawn("python3", [BRIDGE_PATH], {
+      env: {
+        ...process.env,
+        ENGRAM_INDEX_PATH: this.indexPath,
+      },
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+    this.readline = createInterface({
+      input: this.process.stdout!,
+      crlfDelay: Infinity,
+    });
+    this.readline.on("line", (line) => {
+      this.handleLine(line);
+    });
+    this.process.stderr?.on("data", (data) => {
+      // Log Python errors for debugging
+      console.error(`[ColBERT] ${data.toString()}`);
+    });
+    this.process.on("exit", (code) => {
+      console.error(`[ColBERT] Process exited with code ${code}`);
+      this.ready = false;
+      this.process = null;
+      this.readline = null;
+    });
+    // Wait for ready signal
+    await this.readyPromise;
+  }
+  private handleLine(line: string): void {
+    try {
+      const response = JSON.parse(line) as BridgeResponse;
+      // Check for ready signal
+      if (response.status === "ready") {
+        this.ready = true;
+        this.readyResolve?.();
+        return;
+      }
+      // Handle response (simple protocol - responses come in order)
+      const oldest = Array.from(this.pendingRequests.entries())[0];
+      if (oldest) {
+        const [id, { resolve }] = oldest;
+        this.pendingRequests.delete(id);
+        resolve(response);
+      }
+    } catch (error) {
+      console.error(`[ColBERT] Failed to parse: ${line}`);
+    }
+  }
+  private async send(command: Record<string, unknown>): Promise<BridgeResponse> {
+    if (!this.process || !this.ready) {
+      await this.start();
+    }
+    return new Promise((resolve, reject) => {
+      const id = this.requestId++;
+      this.pendingRequests.set(id, { resolve, reject });
+      const json = JSON.stringify(command) + "\n";
+      this.process!.stdin!.write(json);
+    });
+  }
+  /**
+   * Index documents for search
+   */
+  async index(documents: Document[]): Promise<{ success: boolean; count: number }> {
+    const response = await this.send({
+      action: "index",
+      documents,
+    });
+    return {
+      success: response.success ?? false,
+      count: response.count ?? 0,
+    };
+  }
+  /**
+   * Add documents to existing index
+   */
+  async add(documents: Document[]): Promise<{ success: boolean; count: number }> {
+    const response = await this.send({
+      action: "add",
+      documents,
+    });
+    return {
+      success: response.success ?? false,
+      count: response.count ?? 0,
+    };
+  }
+  /**
+   * Search for documents
+   */
+  async search(query: string, k: number = 10): Promise<SearchResult[]> {
+    const response = await this.send({
+      action: "search",
+      query,
+      k,
+    });
+    return response.results ?? [];
+  }
+  /**
+   * Rerank documents using ColBERT
+   */
+  async rerank(query: string, documents: Document[], k: number = 10): Promise<SearchResult[]> {
+    const response = await this.send({
+      action: "rerank",
+      query,
+      documents,
+      k,
+    });
+    return response.results ?? [];
+  }
+  /**
+   * Delete documents from index
+   */
+  async delete(ids: string[]): Promise<{ success: boolean; count: number }> {
+    const response = await this.send({
+      action: "delete",
+      ids,
+    });
+    return {
+      success: response.success ?? false,
+      count: response.count ?? 0,
+    };
+  }
+  /**
+   * Check if bridge is ready
+   */
+  async ping(): Promise<boolean> {
+    try {
+      const response = await this.send({ action: "ping" });
+      return response.status === "ok";
+    } catch {
+      return false;
+    }
+  }
+  /**
+   * Stop the Python bridge
+   */
+  async stop(): Promise<void> {
+    if (this.process) {
+      try {
+        await this.send({ action: "quit" });
+      } catch {
+        // Ignore errors during shutdown
+      }
+      this.process.kill();
+      this.process = null;
+      this.readline = null;
+      this.ready = false;
+    }
+  }
+}
+/**
+ * Fallback retriever when ColBERT is not available
+ * Uses simple TF-IDF-like scoring
+ */
+export class SimpleRetriever {
+  private documents: Map<string, Document> = new Map();
+  async index(documents: Document[]): Promise<{ success: boolean; count: number }> {
+    for (const doc of documents) {
+      this.documents.set(doc.id, doc);
+    }
+    return { success: true, count: documents.length };
+  }
+  async add(documents: Document[]): Promise<{ success: boolean; count: number }> {
+    return this.index(documents);
+  }
+  async search(query: string, k: number = 10): Promise<SearchResult[]> {
+    const queryTerms = query.toLowerCase().split(/\s+/);
+    const results: SearchResult[] = [];
+    for (const [id, doc] of this.documents) {
+      const contentLower = doc.content.toLowerCase();
+      let score = 0;
+      for (const term of queryTerms) {
+        if (contentLower.includes(term)) {
+          score += 1;
+        }
+      }
+      if (score > 0) {
+        results.push({ id, score: score / queryTerms.length, content: doc.content });
+      }
+    }
+    return results
+      .sort((a, b) => b.score - a.score)
+      .slice(0, k);
+  }
+  async rerank(query: string, documents: Document[], k: number = 10): Promise<SearchResult[]> {
+    const temp = new Map(this.documents);
+    this.documents.clear();
+    for (const doc of documents) {
+      this.documents.set(doc.id, doc);
+    }
+    const results = await this.search(query, k);
+    this.documents = temp;
+    return results;
+  }
+  async delete(ids: string[]): Promise<{ success: boolean; count: number }> {
+    let count = 0;
+    for (const id of ids) {
+      if (this.documents.delete(id)) {
+        count++;
+      }
+    }
+    return { success: true, count };
+  }
+}
+/**
+ * Create the best available retriever
+ */
+export async function createRetriever(indexPath: string): Promise<ColBERTRetriever | SimpleRetriever> {
+  const colbert = new ColBERTRetriever(indexPath);
+  try {
+    await colbert.start();
+    if (await colbert.ping()) {
+      console.error("[Engram] Using ColBERT retriever");
+      return colbert;
+    }
+  } catch (error) {
+    console.error("[Engram] ColBERT not available, using simple retriever:", error);
+  }
+  console.error("[Engram] Using simple fallback retriever");
+  return new SimpleRetriever();
+}