@100xprompt/chitta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +203 -0
  3. package/assets/rules/claude-md.md +9 -0
  4. package/assets/skill/SKILL.md +47 -0
  5. package/package.json +48 -0
  6. package/src/README.md +124 -0
  7. package/src/arango-client.ts +67 -0
  8. package/src/arango-graph-provider.ts +364 -0
  9. package/src/bin.ts +27 -0
  10. package/src/config-env.ts +53 -0
  11. package/src/embedded/authorizer.ts +89 -0
  12. package/src/embedded/cli.ts +86 -0
  13. package/src/embedded/code-extractor.ts +9 -0
  14. package/src/embedded/demo.ts +36 -0
  15. package/src/embedded/extract.ts +12 -0
  16. package/src/embedded/extractors/code.ts +308 -0
  17. package/src/embedded/extractors/deterministic.ts +63 -0
  18. package/src/embedded/extractors/llm.ts +151 -0
  19. package/src/embedded/extractors/text-hygiene.ts +54 -0
  20. package/src/embedded/extractors/types.ts +34 -0
  21. package/src/embedded/graph/acl-paths.ts +96 -0
  22. package/src/embedded/graph/adjacency.ts +61 -0
  23. package/src/embedded/graph/centrality.ts +23 -0
  24. package/src/embedded/graph/communities.ts +46 -0
  25. package/src/embedded/graph/cypher.ts +17 -0
  26. package/src/embedded/graph/impact.ts +24 -0
  27. package/src/embedded/graph/knowledge-graph.ts +108 -0
  28. package/src/embedded/graph/pagerank.ts +57 -0
  29. package/src/embedded/graph/sql-access.ts +13 -0
  30. package/src/embedded/graph/traversal.ts +73 -0
  31. package/src/embedded/graph/types.ts +35 -0
  32. package/src/embedded/graph-query.ts +126 -0
  33. package/src/embedded/index.ts +171 -0
  34. package/src/embedded/ingest.ts +262 -0
  35. package/src/embedded/kgqa/answer-paths.ts +197 -0
  36. package/src/embedded/kgqa/entity-link.ts +13 -0
  37. package/src/embedded/kgqa/intent.ts +14 -0
  38. package/src/embedded/kgqa/predicates.ts +9 -0
  39. package/src/embedded/kgqa/preference.ts +20 -0
  40. package/src/embedded/kgqa/select.ts +99 -0
  41. package/src/embedded/kgqa/text.ts +16 -0
  42. package/src/embedded/kgqa/types.ts +6 -0
  43. package/src/embedded/kgqa-service.ts +122 -0
  44. package/src/embedded/llm-extractor.ts +10 -0
  45. package/src/embedded/local-embeddings.ts +36 -0
  46. package/src/embedded/personal.ts +100 -0
  47. package/src/embedded/reranker.ts +62 -0
  48. package/src/embedded/retrieval/decay-stage.ts +59 -0
  49. package/src/embedded/retrieval/diversity.ts +37 -0
  50. package/src/embedded/retrieval/fuse.ts +52 -0
  51. package/src/embedded/retrieval/graph-stage.ts +45 -0
  52. package/src/embedded/retrieval/hybrid-retriever.ts +80 -0
  53. package/src/embedded/retrieval/keyword-stage.ts +27 -0
  54. package/src/embedded/retrieval/passage.ts +44 -0
  55. package/src/embedded/retrieval/rerank-stage.ts +31 -0
  56. package/src/embedded/retrieval/trace.ts +31 -0
  57. package/src/embedded/retrieval/vector-stage.ts +15 -0
  58. package/src/embedded/sqlite-graph-provider.ts +119 -0
  59. package/src/embedded/sqlite-store.ts +95 -0
  60. package/src/embedded/sqlite-vec-service.ts +122 -0
  61. package/src/embedded/store/chunks.ts +61 -0
  62. package/src/embedded/store/fts.ts +50 -0
  63. package/src/embedded/store/nodes-edges.ts +112 -0
  64. package/src/embedded/store/salience.ts +37 -0
  65. package/src/embedded/store/schema.ts +109 -0
  66. package/src/embedded/transformers-embeddings.ts +100 -0
  67. package/src/embeddings.ts +51 -0
  68. package/src/eval/goldset.ts +46 -0
  69. package/src/eval/harness.ts +65 -0
  70. package/src/eval/metrics.ts +38 -0
  71. package/src/http/server.ts +93 -0
  72. package/src/index.ts +44 -0
  73. package/src/install/index.ts +139 -0
  74. package/src/install/platforms.ts +126 -0
  75. package/src/install/skill.ts +46 -0
  76. package/src/install/writers.ts +82 -0
  77. package/src/mcp/backend.ts +129 -0
  78. package/src/mcp/server.ts +83 -0
  79. package/src/mcp/tools/context-about.ts +69 -0
  80. package/src/mcp/tools/context-graph.ts +23 -0
  81. package/src/mcp/tools/context-ingest.ts +88 -0
  82. package/src/mcp/tools/context-rebuild.ts +22 -0
  83. package/src/mcp/tools/context-relate.ts +88 -0
  84. package/src/mcp/tools/get-context.ts +52 -0
  85. package/src/mcp/tools/index.ts +40 -0
  86. package/src/mcp/tools/types.ts +33 -0
  87. package/src/permission.ts +72 -0
  88. package/src/provider.ts +65 -0
  89. package/src/qdrant-vector.ts +76 -0
  90. package/src/retrieval.ts +218 -0
  91. package/src/service.ts +40 -0
  92. package/src/types.ts +91 -0
@@ -0,0 +1,364 @@
1
+ // Ported from PipesHub `services/graph_db/arango/arango_http_provider.py`
2
+ // (get_accessible_virtual_record_ids + _get_virtual_ids_for_connector +
3
+ // _get_kb_virtual_ids + _get_user_app_ids).
4
+ //
5
+ // This is the moat. The AQL is preserved verbatim from the source - the eight
6
+ // permission paths (direct / group×2 / org×2 / record-group inheritance×2 /
7
+ // anyone) and the two KB paths (direct / team). Do not "simplify" a path without
8
+ // understanding which access route it represents; each one is a way a user can
9
+ // legitimately reach a record, and dropping one silently denies access while
10
+ // loosening one silently leaks data.
11
+
12
+ import type { ArangoClient, GraphProvider } from "./provider"
13
+ import type { AccessibleMap, MetadataFilters, RecordDoc, RetrievalFilters, UserDoc } from "./types"
14
+
15
+ // Arango edge/vertex collection names, matching the source schema.
16
+ const C = {
17
+ USERS: "users",
18
+ RECORDS: "records",
19
+ ANYONE: "anyone",
20
+ PERMISSION: "permissions",
21
+ BELONGS_TO: "belongsTo",
22
+ INHERIT_PERMISSIONS: "inheritPermissions",
23
+ BELONGS_TO_DEPARTMENT: "belongsToDepartment",
24
+ BELONGS_TO_CATEGORY: "belongsToCategory",
25
+ BELONGS_TO_LANGUAGE: "belongsToLanguage",
26
+ BELONGS_TO_TOPIC: "belongsToTopic",
27
+ } as const
28
+
29
+ const COMPLETED_STATUS = "COMPLETED"
30
+
31
+ // Build the optional metadata-facet FILTER lines + their bind vars. Shared by the
32
+ // connector and KB queries so both honor department/category/language/topic facets.
33
+ function buildMetadataFilters(metadataFilters?: MetadataFilters): {
34
+ clause: string
35
+ bindVars: Record<string, unknown>
36
+ } {
37
+ const lines: string[] = []
38
+ const bindVars: Record<string, unknown> = {}
39
+ if (metadataFilters) {
40
+ const facet = (
41
+ values: string[] | undefined,
42
+ edge: string,
43
+ field: string,
44
+ bindName: string,
45
+ ) => {
46
+ if (!values || values.length === 0) return
47
+ lines.push(`
48
+ FILTER LENGTH(
49
+ FOR x IN OUTBOUND record._id ${edge}
50
+ FILTER x.${field} IN @${bindName}
51
+ LIMIT 1
52
+ RETURN 1
53
+ ) > 0`)
54
+ bindVars[bindName] = values
55
+ }
56
+ facet(metadataFilters.departments, C.BELONGS_TO_DEPARTMENT, "departmentName", "departmentNames")
57
+ facet(metadataFilters.categories, C.BELONGS_TO_CATEGORY, "name", "categoryNames")
58
+ facet(metadataFilters.subcategories1, C.BELONGS_TO_CATEGORY, "name", "subcat1Names")
59
+ facet(metadataFilters.subcategories2, C.BELONGS_TO_CATEGORY, "name", "subcat2Names")
60
+ facet(metadataFilters.subcategories3, C.BELONGS_TO_CATEGORY, "name", "subcat3Names")
61
+ facet(metadataFilters.languages, C.BELONGS_TO_LANGUAGE, "name", "languageNames")
62
+ facet(metadataFilters.topics, C.BELONGS_TO_TOPIC, "name", "topicNames")
63
+ }
64
+ return { clause: lines.join("\n"), bindVars }
65
+ }
66
+
67
+ function rowsToMap(rows: any[]): AccessibleMap {
68
+ const map: AccessibleMap = {}
69
+ for (const r of rows ?? []) {
70
+ if (r && r.virtualRecordId && r.recordId) map[r.virtualRecordId] = r.recordId
71
+ }
72
+ return map
73
+ }
74
+
75
+ export class ArangoGraphProvider implements GraphProvider {
76
+ constructor(
77
+ private readonly client: ArangoClient,
78
+ private readonly log: { error: (m: string, ...a: unknown[]) => void; debug?: (m: string) => void } = {
79
+ error: () => {},
80
+ },
81
+ ) {}
82
+
83
+ async getUserByUserId(userId: string): Promise<UserDoc | null> {
84
+ const rows = await this.client.executeAql(
85
+ `FOR user IN @@users FILTER user.userId == @userId LIMIT 1 RETURN user`,
86
+ { userId, "@users": C.USERS },
87
+ )
88
+ return rows?.[0] ?? null
89
+ }
90
+
91
+ async getUserApps(userKey: string): Promise<Array<{ _key?: string; id?: string }>> {
92
+ // Apps the user can reach. Kept as a seam - wire to the source's get_user_apps
93
+ // traversal. Returning [] means "connectors contribute nothing"; KB paths still run.
94
+ const rows = await this.client.executeAql(
95
+ `FOR app IN 1..1 ANY @userKey @@permission
96
+ FILTER IS_SAME_COLLECTION("apps", app)
97
+ RETURN app`,
98
+ { userKey: `${C.USERS}/${userKey}`, "@permission": C.PERMISSION },
99
+ )
100
+ return rows ?? []
101
+ }
102
+
103
+ private async getUserAppIds(userId: string): Promise<string[]> {
104
+ const user = await this.getUserByUserId(userId)
105
+ if (!user) return []
106
+ const userKey = user._key ?? user.id
107
+ if (!userKey) return []
108
+ const apps = await this.getUserApps(userKey)
109
+ return apps.map((a) => a._key ?? a.id).filter((x): x is string => Boolean(x))
110
+ }
111
+
112
+ // --- THE moat orchestration: union of connector paths + KB path, deduped. ---
113
+ async getAccessibleVirtualRecordIds(args: {
114
+ userId: string
115
+ orgId: string
116
+ filters?: RetrievalFilters
117
+ }): Promise<AccessibleMap> {
118
+ const { userId, orgId } = args
119
+ const filters = args.filters ?? {}
120
+ try {
121
+ const userAppIds = await this.getUserAppIds(userId)
122
+ const kbIds = filters.kb
123
+ const connectorIdsFilter = filters.apps
124
+ const { kb: _kb, apps: _apps, ...metadataFilters } = filters
125
+
126
+ const hasKbFilter = Boolean(kbIds && kbIds.length)
127
+ const hasAppFilter = Boolean(connectorIdsFilter && connectorIdsFilter.length)
128
+
129
+ const tasks: Promise<AccessibleMap>[] = []
130
+ const connectors = (ids: string[]) =>
131
+ ids.filter((cid) => !cid.startsWith("knowledgeBase_"))
132
+
133
+ if (hasAppFilter && hasKbFilter) {
134
+ for (const cid of connectors(userAppIds.filter((c) => connectorIdsFilter!.includes(c))))
135
+ tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
136
+ tasks.push(this.getKbVirtualIds(userId, orgId, kbIds!, metadataFilters))
137
+ } else if (!hasAppFilter && hasKbFilter) {
138
+ tasks.push(this.getKbVirtualIds(userId, orgId, kbIds!, metadataFilters))
139
+ } else if (!hasAppFilter && !hasKbFilter) {
140
+ for (const cid of connectors(userAppIds))
141
+ tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
142
+ tasks.push(this.getKbVirtualIds(userId, orgId, undefined, metadataFilters))
143
+ } else {
144
+ for (const cid of connectors(userAppIds.filter((c) => connectorIdsFilter!.includes(c))))
145
+ tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
146
+ }
147
+
148
+ if (tasks.length === 0) return {}
149
+
150
+ const results = await Promise.allSettled(tasks)
151
+ const merged: AccessibleMap = {}
152
+ for (const r of results) {
153
+ if (r.status !== "fulfilled") {
154
+ this.log.error(`accessible-ids task failed: ${String(r.reason)}`)
155
+ continue
156
+ }
157
+ // First writer wins per virtualRecordId - mirrors the source's dedup so a
158
+ // record reachable via several paths resolves to a single recordId.
159
+ for (const [vid, rid] of Object.entries(r.value)) if (!(vid in merged)) merged[vid] = rid
160
+ }
161
+ return merged
162
+ } catch (e) {
163
+ this.log.error(`getAccessibleVirtualRecordIds failed: ${String(e)}`)
164
+ return {}
165
+ }
166
+ }
167
+
168
+ // The eight permission paths for one connector.
169
+ private async getVirtualIdsForConnector(
170
+ userId: string,
171
+ orgId: string,
172
+ connectorId: string,
173
+ metadataFilters?: MetadataFilters,
174
+ ): Promise<AccessibleMap> {
175
+ const { clause, bindVars: mdBind } = buildMetadataFilters(metadataFilters)
176
+ const query = `
177
+ LET userDoc = FIRST(FOR user IN @@users FILTER user.userId == @userId RETURN user)
178
+
179
+ LET directRecords = (
180
+ FOR record IN 1..1 ANY userDoc._id ${C.PERMISSION}
181
+ FILTER IS_SAME_COLLECTION("records", record)
182
+ FILTER record.connectorId == @connectorId
183
+ FILTER record.indexingStatus == @completedStatus
184
+ ${clause}
185
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
186
+ )
187
+ LET groupRecords = (
188
+ FOR group IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
189
+ FOR record IN 1..1 ANY group._id ${C.PERMISSION}
190
+ FILTER IS_SAME_COLLECTION("records", record)
191
+ FILTER record.connectorId == @connectorId
192
+ FILTER record.indexingStatus == @completedStatus
193
+ ${clause}
194
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
195
+ )
196
+ LET groupRecordsPermissionEdge = (
197
+ FOR group IN 1..1 ANY userDoc._id ${C.PERMISSION}
198
+ FOR record IN 1..1 ANY group._id ${C.PERMISSION}
199
+ FILTER IS_SAME_COLLECTION("records", record)
200
+ FILTER record.connectorId == @connectorId
201
+ FILTER record.indexingStatus == @completedStatus
202
+ ${clause}
203
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
204
+ )
205
+ LET orgRecords = (
206
+ FOR org IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
207
+ FOR record IN 1..1 ANY org._id ${C.PERMISSION}
208
+ FILTER IS_SAME_COLLECTION("records", record)
209
+ FILTER record.connectorId == @connectorId
210
+ FILTER record.indexingStatus == @completedStatus
211
+ ${clause}
212
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
213
+ )
214
+ LET orgRecordGroupRecords = (
215
+ FOR org IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
216
+ FOR recordGroup IN 1..1 ANY org._id ${C.PERMISSION}
217
+ FILTER IS_SAME_COLLECTION("recordGroups", recordGroup)
218
+ FOR record IN 0..2 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
219
+ FILTER IS_SAME_COLLECTION("records", record)
220
+ FILTER record.connectorId == @connectorId
221
+ FILTER record.indexingStatus == @completedStatus
222
+ ${clause}
223
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
224
+ )
225
+ LET recordGroupRecords = (
226
+ FOR group IN 1..1 ANY userDoc._id ${C.PERMISSION}
227
+ FILTER IS_SAME_COLLECTION("groups", group) OR IS_SAME_COLLECTION("roles", group)
228
+ FOR recordGroup IN 1..1 ANY group._id ${C.PERMISSION}
229
+ FOR record IN 0..5 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
230
+ FILTER IS_SAME_COLLECTION("records", record)
231
+ FILTER record.connectorId == @connectorId
232
+ FILTER record.indexingStatus == @completedStatus
233
+ ${clause}
234
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
235
+ )
236
+ LET inheritedRecordGroupRecords = (
237
+ FOR recordGroup IN 1..1 ANY userDoc._id ${C.PERMISSION}
238
+ FILTER IS_SAME_COLLECTION("recordGroups", recordGroup)
239
+ FOR record IN 0..5 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
240
+ FILTER IS_SAME_COLLECTION("records", record)
241
+ FILTER record.connectorId == @connectorId
242
+ FILTER record.indexingStatus == @completedStatus
243
+ ${clause}
244
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
245
+ )
246
+ LET anyoneRecords = (
247
+ FOR anyone IN @@anyone
248
+ FILTER anyone.organization == @orgId
249
+ FOR record IN @@records
250
+ FILTER record._key == anyone.file_key
251
+ FILTER record.connectorId == @connectorId
252
+ FILTER record.indexingStatus == @completedStatus
253
+ ${clause}
254
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
255
+ )
256
+ LET allPairs = UNION(
257
+ directRecords, groupRecords, groupRecordsPermissionEdge,
258
+ orgRecords, orgRecordGroupRecords, recordGroupRecords,
259
+ inheritedRecordGroupRecords, anyoneRecords
260
+ )
261
+ FOR pair IN allPairs
262
+ FILTER pair != null AND pair.virtualRecordId != null AND pair.recordId != null
263
+ COLLECT virtualRecordId = pair.virtualRecordId INTO groups
264
+ LET recordId = FIRST(groups).pair.recordId
265
+ FILTER recordId != null
266
+ RETURN {virtualRecordId: virtualRecordId, recordId: recordId}`
267
+
268
+ try {
269
+ const rows = await this.client.executeAql(query, {
270
+ userId,
271
+ orgId,
272
+ connectorId,
273
+ completedStatus: COMPLETED_STATUS,
274
+ "@users": C.USERS,
275
+ "@records": C.RECORDS,
276
+ "@anyone": C.ANYONE,
277
+ ...mdBind,
278
+ })
279
+ return rowsToMap(rows)
280
+ } catch (e) {
281
+ this.log.error(`connector ${connectorId} acl query failed: ${String(e)}`)
282
+ return {}
283
+ }
284
+ }
285
+
286
+ // KB (RecordGroup) paths: direct membership + team membership.
287
+ private async getKbVirtualIds(
288
+ userId: string,
289
+ _orgId: string,
290
+ kbIds?: string[],
291
+ metadataFilters?: MetadataFilters,
292
+ ): Promise<AccessibleMap> {
293
+ const { clause, bindVars: mdBind } = buildMetadataFilters(metadataFilters)
294
+ const kbFilter = kbIds && kbIds.length ? "FILTER kb._key IN @kb_ids" : ""
295
+ const query = `
296
+ LET userDoc = FIRST(FOR user IN @@users FILTER user.userId == @userId RETURN user)
297
+
298
+ LET directKbRecords = (
299
+ FOR kb IN 1..1 ANY userDoc._id ${C.PERMISSION}
300
+ FILTER IS_SAME_COLLECTION("recordGroups", kb)
301
+ ${kbFilter}
302
+ FOR record IN 1..1 ANY kb._id ${C.BELONGS_TO}
303
+ FILTER IS_SAME_COLLECTION("records", record)
304
+ FILTER record.origin == "UPLOAD"
305
+ FILTER record.indexingStatus == @completedStatus
306
+ ${clause}
307
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
308
+ )
309
+ LET teamKbRecords = (
310
+ FOR team, userTeamEdge IN 1..1 OUTBOUND userDoc._id ${C.PERMISSION}
311
+ FILTER IS_SAME_COLLECTION("teams", team)
312
+ FILTER userTeamEdge.type == "USER"
313
+ FOR kb, teamKbEdge IN 1..1 OUTBOUND team._id ${C.PERMISSION}
314
+ FILTER IS_SAME_COLLECTION("recordGroups", kb)
315
+ FILTER teamKbEdge.type == "TEAM"
316
+ ${kbFilter}
317
+ FOR record IN 1..1 ANY kb._id ${C.BELONGS_TO}
318
+ FILTER IS_SAME_COLLECTION("records", record)
319
+ FILTER record.origin == "UPLOAD"
320
+ FILTER record.indexingStatus == @completedStatus
321
+ ${clause}
322
+ RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
323
+ )
324
+ LET allKbPairs = UNION(directKbRecords, teamKbRecords)
325
+ FOR pair IN allKbPairs
326
+ FILTER pair != null AND pair.virtualRecordId != null AND pair.recordId != null
327
+ COLLECT virtualRecordId = pair.virtualRecordId INTO groups
328
+ LET recordId = FIRST(groups).pair.recordId
329
+ FILTER recordId != null
330
+ RETURN {virtualRecordId: virtualRecordId, recordId: recordId}`
331
+
332
+ try {
333
+ const bind: Record<string, unknown> = {
334
+ userId,
335
+ completedStatus: COMPLETED_STATUS,
336
+ "@users": C.USERS,
337
+ ...mdBind,
338
+ }
339
+ if (kbIds && kbIds.length) bind.kb_ids = kbIds
340
+ const rows = await this.client.executeAql(query, bind)
341
+ return rowsToMap(rows)
342
+ } catch (e) {
343
+ this.log.error(`kb acl query failed: ${String(e)}`)
344
+ return {}
345
+ }
346
+ }
347
+
348
+ async getRecordsByRecordIds(recordIds: string[], orgId: string): Promise<RecordDoc[]> {
349
+ if (recordIds.length === 0) return []
350
+ const rows = await this.client.executeAql(
351
+ `FOR record IN @@records FILTER record._key IN @recordIds AND record.orgId == @orgId RETURN record`,
352
+ { "@records": C.RECORDS, recordIds, orgId },
353
+ )
354
+ return rows ?? []
355
+ }
356
+
357
+ async getDocument(recordId: string, collection: string): Promise<RecordDoc | null> {
358
+ const rows = await this.client.executeAql(
359
+ `FOR d IN @@col FILTER d._key == @recordId LIMIT 1 RETURN d`,
360
+ { "@col": collection, recordId },
361
+ )
362
+ return rows?.[0] ?? null
363
+ }
364
+ }
package/src/bin.ts ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env bun
2
+ // Chitta entrypoint dispatcher. One binary, three modes:
3
+ // chitta → MCP stdio server (what MCP clients launch; the default)
4
+ // chitta install [...] → wire Chitta into AI tools (MCP config + Skill)
5
+ // chitta ingest|query|... → the embedded CLI
6
+ // Sub-entrypoints self-run on import, so we route by dynamic import (no eager start).
7
+ export {} // module marker (enables top-level await)
8
+
9
+ // Arg layout differs by how we're launched:
10
+ // `bun run src/bin.ts <args>` → argv = [bun, /path/bin.ts, ...args] (args at index 2)
11
+ // compiled `./chitta <args>` → argv = [/path/chitta, ...args] (args at index 1)
12
+ // (the Node shim execs the compiled binary, so it hits the compiled layout too)
13
+ // Detect via whether argv[1] is a script file, then NORMALIZE process.argv to the canonical
14
+ // [exec, "chitta", ...args] so every downstream module's process.argv.slice(2) is correct.
15
+ const launchedFromScript = !!process.argv[1] && /\.(ts|js|mjs|cjs)$/.test(process.argv[1])
16
+ const userArgs = launchedFromScript ? process.argv.slice(2) : process.argv.slice(1)
17
+ process.argv = [process.argv[0], "chitta", ...userArgs]
18
+
19
+ const cmd = userArgs[0]
20
+
21
+ if (!cmd || cmd.startsWith("-")) {
22
+ await import("./mcp/server") // bare invocation (or flags) = MCP server
23
+ } else if (cmd === "install" || cmd === "uninstall") {
24
+ await import("./install/index")
25
+ } else {
26
+ await import("./embedded/cli") // ingest | query | user-add | ...
27
+ }
@@ -0,0 +1,53 @@
1
+ // Env-based config loader for the context layer. Dep-free so it stays testable.
2
+ // v0 source of truth; later this can be superseded by the CLI config + per-session
3
+ // identity (packages/identity) without touching the moat.
4
+
5
+ import type { ContextConfig } from "./service"
6
+
7
+ export interface ContextIdentity {
8
+ userId: string
9
+ orgId: string
10
+ }
11
+
12
+ type Env = Record<string, string | undefined>
13
+
14
+ /** Returns null when the required backend vars aren't set (feature off). */
15
+ export function loadContextConfigFromEnv(env: Env): ContextConfig | null {
16
+ const arangoUrl = env.CONTEXT_ARANGO_URL
17
+ const qdrantUrl = env.CONTEXT_QDRANT_URL
18
+ const denseEndpoint = env.CONTEXT_EMBED_URL
19
+ const collectionName = env.CONTEXT_COLLECTION
20
+ if (!arangoUrl || !qdrantUrl || !denseEndpoint || !collectionName) return null
21
+
22
+ return {
23
+ arango: {
24
+ url: arangoUrl,
25
+ database: env.CONTEXT_ARANGO_DB ?? "_system",
26
+ username: env.CONTEXT_ARANGO_USER,
27
+ password: env.CONTEXT_ARANGO_PASSWORD,
28
+ },
29
+ qdrant: { url: qdrantUrl, apiKey: env.CONTEXT_QDRANT_API_KEY },
30
+ embeddings: {
31
+ denseEndpoint,
32
+ denseModel: env.CONTEXT_EMBED_MODEL ?? "BAAI/bge-small-en-v1.5",
33
+ sparseEndpoint: env.CONTEXT_SPARSE_URL,
34
+ },
35
+ collectionName,
36
+ }
37
+ }
38
+
39
+ /** The asking user. v0 reads env; production wires this to packages/identity. */
40
+ export function loadContextIdentityFromEnv(env: Env): ContextIdentity | null {
41
+ const userId = env.CONTEXT_USER_ID
42
+ const orgId = env.CONTEXT_ORG_ID
43
+ if (!userId || !orgId) return null
44
+ return { userId, orgId }
45
+ }
46
+
47
+ /** Names of the env vars, for help text / diagnostics. */
48
+ export const REQUIRED_CONTEXT_ENV = [
49
+ "CONTEXT_ARANGO_URL",
50
+ "CONTEXT_QDRANT_URL",
51
+ "CONTEXT_EMBED_URL",
52
+ "CONTEXT_COLLECTION",
53
+ ] as const
@@ -0,0 +1,89 @@
1
+ // Write-side access control - the mutation counterpart to the read ACL. Answers
2
+ // "who can CREATE / MODIFY / DELETE / SHARE what", which read-filtering doesn't.
3
+ //
4
+ // Roles (per user): admin (full), editor (create + manage own), viewer (read only).
5
+ // Ownership: the creator owns a record; only owner or admin may modify/delete it.
6
+ // Grant validation: a non-admin can only share within their own org/groups - they
7
+ // cannot grant access to principals or orgs outside their scope (no over-sharing).
8
+
9
+ import type { SqliteStore } from "./sqlite-store"
10
+
11
+ export type Role = "admin" | "editor" | "viewer"
12
+
13
+ export class AuthorizationError extends Error {
14
+ constructor(message: string) {
15
+ super(message)
16
+ this.name = "AuthorizationError"
17
+ }
18
+ }
19
+
20
+ export class Authorizer {
21
+ constructor(private readonly store: SqliteStore) {}
22
+
23
+ roleOf(userId: string): Role {
24
+ const r = this.store.db.query("SELECT data FROM nodes WHERE id = ? AND coll = 'users' LIMIT 1").get(userId) as
25
+ | { data: string }
26
+ | undefined
27
+ if (!r) return "viewer"
28
+ return ((JSON.parse(r.data) as { role?: Role }).role ?? "viewer") as Role
29
+ }
30
+
31
+ ownerOf(recordId: string): string | null {
32
+ const r = this.store.db.query("SELECT data FROM nodes WHERE id = ? AND coll = 'records' LIMIT 1").get(recordId) as
33
+ | { data: string }
34
+ | undefined
35
+ return r ? ((JSON.parse(r.data) as { ownerId?: string }).ownerId ?? null) : null
36
+ }
37
+
38
+ /** May this user create new records? (editor or admin) */
39
+ canCreate(userId: string): boolean {
40
+ return this.roleOf(userId) !== "viewer"
41
+ }
42
+
43
+ /** May this user modify/delete this record? (admin, or the record's owner) */
44
+ canModify(userId: string, recordId: string): boolean {
45
+ return this.roleOf(userId) === "admin" || this.ownerOf(recordId) === userId
46
+ }
47
+
48
+ private memberships(userId: string): Set<string> {
49
+ const rows = this.store.db.query("SELECT dst FROM edges WHERE src = ? AND label = 'belongsTo'").all(userId) as Array<{
50
+ dst: string
51
+ }>
52
+ return new Set(rows.map((r) => r.dst))
53
+ }
54
+
55
+ private belongsToOrg(principal: string, orgId: string): boolean {
56
+ return !!this.store.db
57
+ .query("SELECT 1 FROM edges WHERE src = ? AND dst = ? AND label = 'belongsTo' LIMIT 1")
58
+ .get(principal, orgId)
59
+ }
60
+
61
+ /** A non-admin may only grant to themselves, their own groups/teams, principals
62
+ * in the same org, or share-with-own-org. Throws on any out-of-scope grant. */
63
+ assertCanGrant(userId: string, orgId: string, principals: string[], shareWithOrg?: string): void {
64
+ if (this.roleOf(userId) === "admin") return
65
+ if (shareWithOrg && shareWithOrg !== orgId) {
66
+ throw new AuthorizationError(`cannot share to org '${shareWithOrg}' - outside your org`)
67
+ }
68
+ const mine = this.memberships(userId)
69
+ for (const p of principals) {
70
+ const ok = p === userId || p === orgId || mine.has(p) || this.belongsToOrg(p, orgId)
71
+ if (!ok) throw new AuthorizationError(`cannot grant access to '${p}' - outside your scope`)
72
+ }
73
+ }
74
+
75
+ /** Throws unless the user may create with the requested sharing. */
76
+ assertCanCreate(userId: string, orgId: string, principals: string[], shareWithOrg?: string): void {
77
+ if (!this.canCreate(userId)) {
78
+ throw new AuthorizationError(`user '${userId}' (role: ${this.roleOf(userId)}) is not permitted to create records`)
79
+ }
80
+ this.assertCanGrant(userId, orgId, principals, shareWithOrg)
81
+ }
82
+
83
+ assertCanModify(userId: string, recordId: string): void {
84
+ if (!this.canModify(userId, recordId)) {
85
+ const owner = this.ownerOf(recordId) ?? "none"
86
+ throw new AuthorizationError(`user '${userId}' may not modify/delete '${recordId}' (owner: ${owner})`)
87
+ }
88
+ }
89
+ }
@@ -0,0 +1,86 @@
1
+ // Standalone context CLI - the complete system in one runnable program.
2
+ // Persists to a real .db file, so ingest and query work across invocations.
3
+ //
4
+ // bun run cli.ts user-add alice --org org1
5
+ // bun run cli.ts ingest --id doc1 --org org1 --name "Notes" --share-user alice --text "hello world"
6
+ // bun run cli.ts query "hello" --user alice --org org1
7
+ //
8
+ // Compile to a single binary:
9
+ // bun build cli.ts --compile --outfile ctx
10
+
11
+ import { buildEmbeddedContext } from "./index"
12
+
13
+ function arg(flag: string, fallback?: string): string | undefined {
14
+ const i = process.argv.indexOf(flag)
15
+ return i >= 0 && i + 1 < process.argv.length ? process.argv[i + 1] : fallback
16
+ }
17
+ function has(flag: string): boolean {
18
+ return process.argv.includes(flag)
19
+ }
20
+
21
+ async function main() {
22
+ const cmd = process.argv[2]
23
+ const dbPath = arg("--db", process.env.CONTEXT_DB ?? "context.db")!
24
+ const ctx = buildEmbeddedContext({ path: dbPath })
25
+
26
+ switch (cmd) {
27
+ case "user-add": {
28
+ const userId = process.argv[3]
29
+ const org = arg("--org", "org1")!
30
+ ctx.ingestor.registerUser(userId, org, arg("--email"))
31
+ console.log(`user '${userId}' added to org '${org}'`)
32
+ break
33
+ }
34
+ case "group-add": {
35
+ ctx.ingestor.registerGroup(process.argv[3])
36
+ console.log(`group '${process.argv[3]}' added`)
37
+ break
38
+ }
39
+ case "member-add": {
40
+ ctx.ingestor.addMembership(process.argv[3], arg("--group")!)
41
+ console.log(`'${process.argv[3]}' added to group '${arg("--group")}'`)
42
+ break
43
+ }
44
+ case "ingest": {
45
+ const out = await ctx.ingestor.ingest({
46
+ recordId: arg("--id") ?? `rec-${Date.now().toString(36)}`,
47
+ orgId: arg("--org", "org1")!,
48
+ recordName: arg("--name", "Untitled")!,
49
+ text: arg("--text") ?? (arg("--file") ? await Bun.file(arg("--file")!).text() : ""),
50
+ permittedPrincipals: [arg("--share-user"), arg("--share-group")].filter(Boolean) as string[],
51
+ shareWithOrg: has("--share-org") ? arg("--org", "org1") : undefined,
52
+ })
53
+ console.log(`ingested '${out.recordId}' (${out.chunks} chunks)`)
54
+ break
55
+ }
56
+ case "query": {
57
+ const res = await ctx.retrieval.searchWithFilters({
58
+ queries: [process.argv[3]],
59
+ userId: arg("--user")!,
60
+ orgId: arg("--org", "org1")!,
61
+ limit: Number(arg("--limit", "5")),
62
+ })
63
+ console.log(`status: ${res.status}`)
64
+ for (const r of res.searchResults) {
65
+ console.log(` • [${r.metadata.recordName}] ${r.content.slice(0, 80)}`)
66
+ }
67
+ if (res.searchResults.length === 0) console.log(" (no accessible context)")
68
+ break
69
+ }
70
+ case "rebuild-graph": {
71
+ const res = await ctx.rebuildGraph()
72
+ console.log(`rebuilt knowledge graph: ${res.records} records → ${res.entities} concept-mentions`)
73
+ break
74
+ }
75
+ case "reindex-vectors": {
76
+ const n = await ctx.reindex()
77
+ console.log(`re-embedded ${n} chunks and rebuilt the vector index`)
78
+ break
79
+ }
80
+ default:
81
+ console.log("commands: user-add | group-add | member-add | ingest | query | rebuild-graph | reindex-vectors")
82
+ }
83
+ ctx.store.close()
84
+ }
85
+
86
+ main()
@@ -0,0 +1,9 @@
1
+ // Code → graph extractor (the Graphify capability, ported TS-native). Parses source
2
+ // with tree-sitter (WASM grammars) into the SAME entity/edge shape every other
3
+ // extractor produces - so code nodes get ACL, vectors, temporal edges, and graph
4
+ // algorithms for free. STRICT SUPERSET of Graphify in one embedded store.
5
+ //
6
+ // Thin facade: the implementation lives in ./extractors/code and is re-exported here
7
+ // so existing imports keep resolving unchanged. Public API is preserved exactly.
8
+
9
+ export { CodeExtractor } from "./extractors/code"
@@ -0,0 +1,36 @@
1
+ // Single-binary demo. Compile with:
2
+ // bun build src/context/embedded/demo.ts --compile --outfile ctxdemo
3
+ // Then run ./ctxdemo - one self-contained executable, no servers, no Python.
4
+
5
+ import { buildEmbeddedContext, LocalHashEmbeddings } from "./index"
6
+
7
+ async function main() {
8
+ const ctx = buildEmbeddedContext({ path: ":memory:" })
9
+ const emb = new LocalHashEmbeddings()
10
+ const ORG = "org1"
11
+ const rec = (id: string, vid: string, name: string) => ({
12
+ _key: id, virtualRecordId: vid, orgId: ORG, indexingStatus: "COMPLETED",
13
+ origin: "CONNECTOR", recordName: name, mimeType: "text/plain", connectorId: "slack", connectorName: "slack",
14
+ })
15
+
16
+ ctx.store.addNode("org1", "organizations", { name: "Acme" })
17
+ ctx.store.addNode("alice", "users", { userId: "alice", email: "alice@acme.co" })
18
+ ctx.store.addNode("bob", "users", { userId: "bob", email: "bob@acme.co" })
19
+ ctx.store.addEdge("alice", "org1", "belongsTo")
20
+ ctx.store.addEdge("bob", "org1", "belongsTo")
21
+ ctx.store.addNode("recPub", "records", rec("recPub", "vPub", "Q3 Revenue Report"))
22
+ ctx.store.addNode("anyPub", "anyone", { organization: ORG, file_key: "recPub" })
23
+ ctx.store.addNode("recSec", "records", rec("recSec", "vSec", "Secret Merger Plans"))
24
+ ctx.store.addEdge("alice", "recSec", "permissions")
25
+ ctx.store.addChunk("p1", "vPub", ORG, "quarterly revenue report Q3", await emb.embedDense("quarterly revenue report"))
26
+ ctx.store.addChunk("p2", "vSec", ORG, "secret merger plans globex", await emb.embedDense("secret merger plans globex"))
27
+
28
+ for (const user of ["alice", "bob"]) {
29
+ const res = await ctx.retrieval.searchWithFilters({ queries: ["secret merger plans"], userId: user, orgId: ORG })
30
+ const names = res.searchResults.map((r) => r.metadata.recordName)
31
+ console.log(`${user} → [${names.join(", ") || "nothing"}]`)
32
+ }
33
+ console.log("(expected: alice sees the Secret doc; bob does NOT - ACL enforced in one binary)")
34
+ }
35
+
36
+ main()