@100xprompt/chitta 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -100,6 +100,12 @@ bunx @100xprompt/chitta install --print # just print the MCP config to p
100
100
  Options: `--project` (write project-scoped config instead of global) · `--user-id <id> --org-id <id>`
101
101
  (bake identity into the config) · `--list` (show all tools) · `uninstall`.
102
102
 
103
+ **Optional extras** (kept out of the default install so `bunx` stays lightweight — the core
104
+ runs great with the built-in fast hashing embedder):
105
+ - Real semantic embeddings: `bun add @huggingface/transformers` then set `CONTEXT_EMBEDDINGS=real`
106
+ (the default `auto` already uses them when present, else falls back to hashing).
107
+ - Encryption at rest: `bun add libsql` then set `CONTEXT_DB_KEY=<key>` (transparent AES whole-file).
108
+
103
109
  **Supported tools (15):** Claude Code, Claude Desktop, Cursor, VS Code (Copilot), Windsurf,
104
110
  Zed, Cline, Roo Code, Codex CLI, Gemini CLI, opencode, Kiro, Amp, Factory Droid, Kilo Code.
105
111
  Skill (not just MCP) is installed for the ones that support it (Claude Code, Cursor, Gemini,
package/package.json CHANGED
@@ -1,15 +1,23 @@
1
1
  {
2
2
  "name": "@100xprompt/chitta",
3
- "version": "0.1.2",
4
- "description": "Chitta - permission-aware memory for AI agents: a knowledge-graph + vector memory MCP server with per-user access control. Runs on Bun. By 100xprompt.",
5
- "type": "module",
6
- "license": "MIT",
7
- "engines": {
8
- "bun": ">=1.0.0"
3
+ "version": "0.1.4",
4
+ "dependencies": {
5
+ "@modelcontextprotocol/sdk": "^1.0.0",
6
+ "sqlite-vec": "^0.1.9",
7
+ "tree-sitter-wasms": "^0.1.13",
8
+ "web-tree-sitter": "0.24.7"
9
+ },
10
+ "devDependencies": {
11
+ "@types/bun": "latest",
12
+ "typescript": "^5.6.0"
9
13
  },
10
14
  "bin": {
11
15
  "chitta": "./src/bin.ts"
12
16
  },
17
+ "description": "Chitta - permission-aware memory for AI agents: a knowledge-graph + vector memory MCP server with per-user access control. Runs on Bun. By 100xprompt.",
18
+ "engines": {
19
+ "bun": ">=1.0.0"
20
+ },
13
21
  "files": [
14
22
  "src",
15
23
  "assets",
@@ -31,6 +39,7 @@
31
39
  "access-control",
32
40
  "ai-agents"
33
41
  ],
42
+ "license": "MIT",
34
43
  "publishConfig": {
35
44
  "access": "public"
36
45
  },
@@ -42,18 +51,5 @@
42
51
  "install:tools": "bun run src/bin.ts install",
43
52
  "cli": "bun run src/embedded/cli.ts"
44
53
  },
45
- "dependencies": {
46
- "@modelcontextprotocol/sdk": "^1.0.0",
47
- "sqlite-vec": "^0.1.9",
48
- "tree-sitter-wasms": "^0.1.13",
49
- "web-tree-sitter": "0.24.7"
50
- },
51
- "optionalDependencies": {
52
- "@huggingface/transformers": "^4.2.0",
53
- "libsql": "^0.5.29"
54
- },
55
- "devDependencies": {
56
- "@types/bun": "latest",
57
- "typescript": "^5.6.0"
58
- }
54
+ "type": "module"
59
55
  }
@@ -58,6 +58,35 @@ export class GraphQueryService {
58
58
  return neighborsOf(ids, byId, adj, relation)
59
59
  }
60
60
 
61
+ /** Entity-centric recall for a FREE-TEXT query ("everything about Elon Musk"):
62
+ * resolve the entity NAMED in the question (its label occurs in the text), anchor on
63
+ * the most specific match plus its aliases, and return the full typed neighborhood -
64
+ * the same 100%-complete edge set as neighbors(), but driven by natural language
65
+ * instead of an exact entity name. Powers get_context's graph-facts section, which is
66
+ * what closes the gap between ranked retrieval (lossy) and the typed graph (complete). */
67
+ async neighborsForQuery(query: string, userId: string, orgId: string, limit = 40): Promise<NeighborResult | null> {
68
+ const { entities, byId, adj } = await this.scope(userId, orgId)
69
+ if (entities.length === 0) return null
70
+ const q = query.toLowerCase()
71
+ // entities literally named in the query, most specific (longest label) first
72
+ const named = entities
73
+ .filter((e) => e.label.length >= 3 && q.includes(e.label.toLowerCase()))
74
+ .sort((a, b) => b.label.length - a.label.length)
75
+ if (named.length === 0) return null
76
+ // anchor on the most specific named entity + any alias/fragment of it that also
77
+ // matched (e.g. "Elon Musk" + "Musk"), so fragmented references are unified.
78
+ const topL = named[0].label.toLowerCase()
79
+ const ids = named
80
+ .filter((e) => {
81
+ const l = e.label.toLowerCase()
82
+ return l === topL || topL.includes(l) || l.includes(topL)
83
+ })
84
+ .map((e) => e.id)
85
+ const result = neighborsOf([...new Set(ids)], byId, adj)
86
+ result.neighbors = result.neighbors.slice(0, limit)
87
+ return result
88
+ }
89
+
61
90
  /** Shortest relation chain between two entities (undirected BFS, hub-avoiding).
62
91
  * Answers "how are X and Y related?" - the single most useful graph query. */
63
92
  async pathBetween(a: string, b: string, userId: string, orgId: string): Promise<PathResult> {
@@ -28,7 +28,9 @@ export class CrossEncoderReranker implements Reranker {
28
28
  if (!this.loading) {
29
29
  this.loading = (async () => {
30
30
  try {
31
- const t: any = await import("@huggingface/transformers")
31
+ // Optional dep — indirect the specifier so tsc doesn't require it to resolve.
32
+ const spec = "@huggingface/transformers"
33
+ const t: any = await import(spec as string)
32
34
  this.tokenizer = await t.AutoTokenizer.from_pretrained(this.modelId)
33
35
  this.model = await t.AutoModelForSequenceClassification.from_pretrained(this.modelId, { quantized: true })
34
36
  } catch {
@@ -45,6 +45,10 @@ export interface ContextBackend {
45
45
  query(q: string, limit?: number): Promise<RetrievalResponse>
46
46
  /** KGQA: exact answer from the typed graph, or null to fall back to ranked. */
47
47
  ask?: (q: string) => Promise<ExactAnswer | null>
48
+ /** Full typed-graph neighborhood of the entity named in a free-text query - the
49
+ * complete edge set (same as context_relate), as readable fact lines. Null when no
50
+ * entity is named. Lets get_context reach graph-query completeness for breadth recall. */
51
+ relatedFacts?: (q: string, limit?: number) => Promise<{ entity: string; facts: string[] } | null>
48
52
  ingest?: (doc: IngestDoc) => Promise<{ recordId: string; chunks: number; entities: number }>
49
53
  /** The accessible knowledge graph (entities + relations). Local mode only. */
50
54
  graph?: () => Promise<KnowledgeGraph>
@@ -104,6 +108,17 @@ export function resolveBackend(): ContextBackend {
104
108
  // reconcile() heals embedder/dim drift once before any vector op (ingest already does)
105
109
  query: async (q, limit) => (await ctx.reconcile(), ctx.searchWithGraph(q, ctx.userId, ctx.orgId, undefined, limit)), // vector + ACL + GraphRAG
106
110
  ask: async (q) => (await ctx.reconcile(), ctx.ask(q, ctx.userId, ctx.orgId)), // KGQA: exact answer from the typed graph
111
+ // Full typed neighborhood of the entity named in the query, as fact lines. This is
112
+ // what closes get_context's completeness gap vs context_relate for breadth recall.
113
+ relatedFacts: async (q, limit) => {
114
+ const n = await ctx.graphQuery.neighborsForQuery(q, ctx.userId, ctx.orgId, limit)
115
+ if (!n || n.neighbors.length === 0) return null
116
+ const facts = n.neighbors.map((nb) => {
117
+ const rel = nb.relation.replace(/_/g, " ")
118
+ return nb.direction === "out" ? `${n.entity} ${rel} ${nb.label}` : `${nb.label} ${rel} ${n.entity}`
119
+ })
120
+ return { entity: n.entity, facts }
121
+ },
107
122
  ingest: (doc) => ctx.authorizedIngest(ctx.userId, doc), // write-side authorization + ownership
108
123
  graph: async () => {
109
124
  const accessible = await ctx.graph.getAccessibleVirtualRecordIds({ userId: ctx.userId, orgId: ctx.orgId })
@@ -47,17 +47,32 @@ async function handler(args: Record<string, unknown>, backend: ContextBackend):
47
47
  }
48
48
  }
49
49
 
50
- // (2) Full ranked recall (vector + BM25 + GraphRAG), breadth-aware.
50
+ // (2) Graph-complete recall: when the query NAMES an entity, fold in that entity's
51
+ // full typed neighborhood (every relation, like context_relate). Ranked retrieval is
52
+ // inherently lossy (topk-capped, similarity-ordered), so it misses graph neighbors that
53
+ // aren't lexically/semantically close to the query — this is what made breadth recall
54
+ // ("everything about X") top out at ~73%. The typed graph is complete, so adding it
55
+ // closes the gap. Gated to breadth queries or when KGQA found no precise answer, so a
56
+ // narrow factual question stays focused. (For an exhaustive map, context_graph remains.)
57
+ let graphFacts = ""
58
+ if (backend.relatedFacts && (BREADTH.test(query) || !highlight)) {
59
+ const rel = await backend.relatedFacts(query, limit && limit > 0 ? limit : 40)
60
+ if (rel && rel.facts.length) {
61
+ const body = rel.facts.map((f) => `• ${sanitizeText(f)}`).join("\n")
62
+ graphFacts = `Related facts about ${sanitizeText(rel.entity)} (from the knowledge graph):\n${body}`
63
+ }
64
+ }
65
+
66
+ // (3) Full ranked recall (vector + BM25 + GraphRAG), breadth-aware.
51
67
  const res = await backend.query(query, limit)
52
68
  const recalled =
53
69
  res.status === RetrievalStatus.SUCCESS && res.searchResults.length
54
70
  ? renderRecalled(res.searchResults.map((r) => ({ content: r.content, source: r.metadata.recordName ?? "untitled" })))
55
71
  : ""
56
72
 
73
+ const sections = [highlight, graphFacts, recalled].filter(Boolean)
57
74
  let text: string
58
- if (highlight && recalled) text = `${highlight}\n\n---\n\n${recalled}`
59
- else if (highlight) text = highlight
60
- else if (recalled) text = recalled
75
+ if (sections.length) text = sections.join("\n\n---\n\n")
61
76
  else
62
77
  text =
63
78
  res.status === RetrievalStatus.ACCESSIBLE_RECORDS_NOT_FOUND