npm - @100xprompt/chitta - Versions diffs - 0.1.2 → 0.1.4 - Mend

@100xprompt/chitta 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +6 -0
package/package.json +16 -20
package/src/embedded/graph-query.ts +29 -0
package/src/embedded/reranker.ts +3 -1
package/src/mcp/backend.ts +15 -0
package/src/mcp/tools/get-context.ts +19 -4

package/README.md CHANGED Viewed

@@ -100,6 +100,12 @@ bunx @100xprompt/chitta install --print         # just print the MCP config to p
 Options: `--project` (write project-scoped config instead of global) · `--user-id <id> --org-id <id>`
 (bake identity into the config) · `--list` (show all tools) · `uninstall`.
+**Optional extras** (kept out of the default install so `bunx` stays lightweight — the core
+runs great with the built-in fast hashing embedder):
+- Real semantic embeddings: `bun add @huggingface/transformers` then set `CONTEXT_EMBEDDINGS=real`
+  (the default `auto` already uses them when present, else falls back to hashing).
+- Encryption at rest: `bun add libsql` then set `CONTEXT_DB_KEY=<key>` (transparent AES whole-file).
 **Supported tools (15):** Claude Code, Claude Desktop, Cursor, VS Code (Copilot), Windsurf,
 Zed, Cline, Roo Code, Codex CLI, Gemini CLI, opencode, Kiro, Amp, Factory Droid, Kilo Code.
 Skill (not just MCP) is installed for the ones that support it (Claude Code, Cursor, Gemini,

package/package.json CHANGED Viewed

@@ -1,15 +1,23 @@
 {
   "name": "@100xprompt/chitta",
-  "version": "0.1.2",
-  "description": "Chitta - permission-aware memory for AI agents: a knowledge-graph + vector memory MCP server with per-user access control. Runs on Bun. By 100xprompt.",
-  "type": "module",
-  "license": "MIT",
-  "engines": {
-    "bun": ">=1.0.0"
+  "version": "0.1.4",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "sqlite-vec": "^0.1.9",
+    "tree-sitter-wasms": "^0.1.13",
+    "web-tree-sitter": "0.24.7"
+  },
+  "devDependencies": {
+    "@types/bun": "latest",
+    "typescript": "^5.6.0"
   },
   "bin": {
     "chitta": "./src/bin.ts"
   },
+  "description": "Chitta - permission-aware memory for AI agents: a knowledge-graph + vector memory MCP server with per-user access control. Runs on Bun. By 100xprompt.",
+  "engines": {
+    "bun": ">=1.0.0"
+  },
   "files": [
     "src",
     "assets",
@@ -31,6 +39,7 @@
     "access-control",
     "ai-agents"
   ],
+  "license": "MIT",
   "publishConfig": {
     "access": "public"
   },
@@ -42,18 +51,5 @@
     "install:tools": "bun run src/bin.ts install",
     "cli": "bun run src/embedded/cli.ts"
   },
-  "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.0.0",
-    "sqlite-vec": "^0.1.9",
-    "tree-sitter-wasms": "^0.1.13",
-    "web-tree-sitter": "0.24.7"
-  },
-  "optionalDependencies": {
-    "@huggingface/transformers": "^4.2.0",
-    "libsql": "^0.5.29"
-  },
-  "devDependencies": {
-    "@types/bun": "latest",
-    "typescript": "^5.6.0"
-  }
+  "type": "module"
 }

package/src/embedded/graph-query.ts CHANGED Viewed

@@ -58,6 +58,35 @@ export class GraphQueryService {
     return neighborsOf(ids, byId, adj, relation)
   }
+  /** Entity-centric recall for a FREE-TEXT query ("everything about Elon Musk"):
+   *  resolve the entity NAMED in the question (its label occurs in the text), anchor on
+   *  the most specific match plus its aliases, and return the full typed neighborhood -
+   *  the same 100%-complete edge set as neighbors(), but driven by natural language
+   *  instead of an exact entity name. Powers get_context's graph-facts section, which is
+   *  what closes the gap between ranked retrieval (lossy) and the typed graph (complete). */
+  async neighborsForQuery(query: string, userId: string, orgId: string, limit = 40): Promise<NeighborResult | null> {
+    const { entities, byId, adj } = await this.scope(userId, orgId)
+    if (entities.length === 0) return null
+    const q = query.toLowerCase()
+    // entities literally named in the query, most specific (longest label) first
+    const named = entities
+      .filter((e) => e.label.length >= 3 && q.includes(e.label.toLowerCase()))
+      .sort((a, b) => b.label.length - a.label.length)
+    if (named.length === 0) return null
+    // anchor on the most specific named entity + any alias/fragment of it that also
+    // matched (e.g. "Elon Musk" + "Musk"), so fragmented references are unified.
+    const topL = named[0].label.toLowerCase()
+    const ids = named
+      .filter((e) => {
+        const l = e.label.toLowerCase()
+        return l === topL || topL.includes(l) || l.includes(topL)
+      })
+      .map((e) => e.id)
+    const result = neighborsOf([...new Set(ids)], byId, adj)
+    result.neighbors = result.neighbors.slice(0, limit)
+    return result
+  }
   /** Shortest relation chain between two entities (undirected BFS, hub-avoiding).
    *  Answers "how are X and Y related?" - the single most useful graph query. */
   async pathBetween(a: string, b: string, userId: string, orgId: string): Promise<PathResult> {

package/src/embedded/reranker.ts CHANGED Viewed

@@ -28,7 +28,9 @@ export class CrossEncoderReranker implements Reranker {
     if (!this.loading) {
       this.loading = (async () => {
         try {
-          const t: any = await import("@huggingface/transformers")
+          // Optional dep — indirect the specifier so tsc doesn't require it to resolve.
+          const spec = "@huggingface/transformers"
+          const t: any = await import(spec as string)
           this.tokenizer = await t.AutoTokenizer.from_pretrained(this.modelId)
           this.model = await t.AutoModelForSequenceClassification.from_pretrained(this.modelId, { quantized: true })
         } catch {

package/src/mcp/backend.ts CHANGED Viewed

@@ -45,6 +45,10 @@ export interface ContextBackend {
   query(q: string, limit?: number): Promise<RetrievalResponse>
   /** KGQA: exact answer from the typed graph, or null to fall back to ranked. */
   ask?: (q: string) => Promise<ExactAnswer | null>
+  /** Full typed-graph neighborhood of the entity named in a free-text query - the
+   *  complete edge set (same as context_relate), as readable fact lines. Null when no
+   *  entity is named. Lets get_context reach graph-query completeness for breadth recall. */
+  relatedFacts?: (q: string, limit?: number) => Promise<{ entity: string; facts: string[] } | null>
   ingest?: (doc: IngestDoc) => Promise<{ recordId: string; chunks: number; entities: number }>
   /** The accessible knowledge graph (entities + relations). Local mode only. */
   graph?: () => Promise<KnowledgeGraph>
@@ -104,6 +108,17 @@ export function resolveBackend(): ContextBackend {
     // reconcile() heals embedder/dim drift once before any vector op (ingest already does)
     query: async (q, limit) => (await ctx.reconcile(), ctx.searchWithGraph(q, ctx.userId, ctx.orgId, undefined, limit)), // vector + ACL + GraphRAG
     ask: async (q) => (await ctx.reconcile(), ctx.ask(q, ctx.userId, ctx.orgId)), // KGQA: exact answer from the typed graph
+    // Full typed neighborhood of the entity named in the query, as fact lines. This is
+    // what closes get_context's completeness gap vs context_relate for breadth recall.
+    relatedFacts: async (q, limit) => {
+      const n = await ctx.graphQuery.neighborsForQuery(q, ctx.userId, ctx.orgId, limit)
+      if (!n || n.neighbors.length === 0) return null
+      const facts = n.neighbors.map((nb) => {
+        const rel = nb.relation.replace(/_/g, " ")
+        return nb.direction === "out" ? `${n.entity} ${rel} ${nb.label}` : `${nb.label} ${rel} ${n.entity}`
+      })
+      return { entity: n.entity, facts }
+    },
     ingest: (doc) => ctx.authorizedIngest(ctx.userId, doc), // write-side authorization + ownership
     graph: async () => {
       const accessible = await ctx.graph.getAccessibleVirtualRecordIds({ userId: ctx.userId, orgId: ctx.orgId })

package/src/mcp/tools/get-context.ts CHANGED Viewed

@@ -47,17 +47,32 @@ async function handler(args: Record<string, unknown>, backend: ContextBackend):
     }
   }
-  // (2) Full ranked recall (vector + BM25 + GraphRAG), breadth-aware.
+  // (2) Graph-complete recall: when the query NAMES an entity, fold in that entity's
+  // full typed neighborhood (every relation, like context_relate). Ranked retrieval is
+  // inherently lossy (topk-capped, similarity-ordered), so it misses graph neighbors that
+  // aren't lexically/semantically close to the query — this is what made breadth recall
+  // ("everything about X") top out at ~73%. The typed graph is complete, so adding it
+  // closes the gap. Gated to breadth queries or when KGQA found no precise answer, so a
+  // narrow factual question stays focused. (For an exhaustive map, context_graph remains.)
+  let graphFacts = ""
+  if (backend.relatedFacts && (BREADTH.test(query) || !highlight)) {
+    const rel = await backend.relatedFacts(query, limit && limit > 0 ? limit : 40)
+    if (rel && rel.facts.length) {
+      const body = rel.facts.map((f) => `• ${sanitizeText(f)}`).join("\n")
+      graphFacts = `Related facts about ${sanitizeText(rel.entity)} (from the knowledge graph):\n${body}`
+    }
+  }
+  // (3) Full ranked recall (vector + BM25 + GraphRAG), breadth-aware.
   const res = await backend.query(query, limit)
   const recalled =
     res.status === RetrievalStatus.SUCCESS && res.searchResults.length
       ? renderRecalled(res.searchResults.map((r) => ({ content: r.content, source: r.metadata.recordName ?? "untitled" })))
       : ""
+  const sections = [highlight, graphFacts, recalled].filter(Boolean)
   let text: string
-  if (highlight && recalled) text = `${highlight}\n\n---\n\n${recalled}`
-  else if (highlight) text = highlight
-  else if (recalled) text = recalled
+  if (sections.length) text = sections.join("\n\n---\n\n")
   else
     text =
       res.status === RetrievalStatus.ACCESSIBLE_RECORDS_NOT_FOUND