@100xprompt/chitta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +203 -0
- package/assets/rules/claude-md.md +9 -0
- package/assets/skill/SKILL.md +47 -0
- package/package.json +48 -0
- package/src/README.md +124 -0
- package/src/arango-client.ts +67 -0
- package/src/arango-graph-provider.ts +364 -0
- package/src/bin.ts +27 -0
- package/src/config-env.ts +53 -0
- package/src/embedded/authorizer.ts +89 -0
- package/src/embedded/cli.ts +86 -0
- package/src/embedded/code-extractor.ts +9 -0
- package/src/embedded/demo.ts +36 -0
- package/src/embedded/extract.ts +12 -0
- package/src/embedded/extractors/code.ts +308 -0
- package/src/embedded/extractors/deterministic.ts +63 -0
- package/src/embedded/extractors/llm.ts +151 -0
- package/src/embedded/extractors/text-hygiene.ts +54 -0
- package/src/embedded/extractors/types.ts +34 -0
- package/src/embedded/graph/acl-paths.ts +96 -0
- package/src/embedded/graph/adjacency.ts +61 -0
- package/src/embedded/graph/centrality.ts +23 -0
- package/src/embedded/graph/communities.ts +46 -0
- package/src/embedded/graph/cypher.ts +17 -0
- package/src/embedded/graph/impact.ts +24 -0
- package/src/embedded/graph/knowledge-graph.ts +108 -0
- package/src/embedded/graph/pagerank.ts +57 -0
- package/src/embedded/graph/sql-access.ts +13 -0
- package/src/embedded/graph/traversal.ts +73 -0
- package/src/embedded/graph/types.ts +35 -0
- package/src/embedded/graph-query.ts +126 -0
- package/src/embedded/index.ts +171 -0
- package/src/embedded/ingest.ts +262 -0
- package/src/embedded/kgqa/answer-paths.ts +197 -0
- package/src/embedded/kgqa/entity-link.ts +13 -0
- package/src/embedded/kgqa/intent.ts +14 -0
- package/src/embedded/kgqa/predicates.ts +9 -0
- package/src/embedded/kgqa/preference.ts +20 -0
- package/src/embedded/kgqa/select.ts +99 -0
- package/src/embedded/kgqa/text.ts +16 -0
- package/src/embedded/kgqa/types.ts +6 -0
- package/src/embedded/kgqa-service.ts +122 -0
- package/src/embedded/llm-extractor.ts +10 -0
- package/src/embedded/local-embeddings.ts +36 -0
- package/src/embedded/personal.ts +100 -0
- package/src/embedded/reranker.ts +62 -0
- package/src/embedded/retrieval/decay-stage.ts +59 -0
- package/src/embedded/retrieval/diversity.ts +37 -0
- package/src/embedded/retrieval/fuse.ts +52 -0
- package/src/embedded/retrieval/graph-stage.ts +45 -0
- package/src/embedded/retrieval/hybrid-retriever.ts +80 -0
- package/src/embedded/retrieval/keyword-stage.ts +27 -0
- package/src/embedded/retrieval/passage.ts +44 -0
- package/src/embedded/retrieval/rerank-stage.ts +31 -0
- package/src/embedded/retrieval/trace.ts +31 -0
- package/src/embedded/retrieval/vector-stage.ts +15 -0
- package/src/embedded/sqlite-graph-provider.ts +119 -0
- package/src/embedded/sqlite-store.ts +95 -0
- package/src/embedded/sqlite-vec-service.ts +122 -0
- package/src/embedded/store/chunks.ts +61 -0
- package/src/embedded/store/fts.ts +50 -0
- package/src/embedded/store/nodes-edges.ts +112 -0
- package/src/embedded/store/salience.ts +37 -0
- package/src/embedded/store/schema.ts +109 -0
- package/src/embedded/transformers-embeddings.ts +100 -0
- package/src/embeddings.ts +51 -0
- package/src/eval/goldset.ts +46 -0
- package/src/eval/harness.ts +65 -0
- package/src/eval/metrics.ts +38 -0
- package/src/http/server.ts +93 -0
- package/src/index.ts +44 -0
- package/src/install/index.ts +139 -0
- package/src/install/platforms.ts +126 -0
- package/src/install/skill.ts +46 -0
- package/src/install/writers.ts +82 -0
- package/src/mcp/backend.ts +129 -0
- package/src/mcp/server.ts +83 -0
- package/src/mcp/tools/context-about.ts +69 -0
- package/src/mcp/tools/context-graph.ts +23 -0
- package/src/mcp/tools/context-ingest.ts +88 -0
- package/src/mcp/tools/context-rebuild.ts +22 -0
- package/src/mcp/tools/context-relate.ts +88 -0
- package/src/mcp/tools/get-context.ts +52 -0
- package/src/mcp/tools/index.ts +40 -0
- package/src/mcp/tools/types.ts +33 -0
- package/src/permission.ts +72 -0
- package/src/provider.ts +65 -0
- package/src/qdrant-vector.ts +76 -0
- package/src/retrieval.ts +218 -0
- package/src/service.ts +40 -0
- package/src/types.ts +91 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
// Ported from PipesHub `services/graph_db/arango/arango_http_provider.py`
|
|
2
|
+
// (get_accessible_virtual_record_ids + _get_virtual_ids_for_connector +
|
|
3
|
+
// _get_kb_virtual_ids + _get_user_app_ids).
|
|
4
|
+
//
|
|
5
|
+
// This is the moat. The AQL is preserved verbatim from the source - the eight
|
|
6
|
+
// permission paths (direct / group×2 / org×2 / record-group inheritance×2 /
|
|
7
|
+
// anyone) and the two KB paths (direct / team). Do not "simplify" a path without
|
|
8
|
+
// understanding which access route it represents; each one is a way a user can
|
|
9
|
+
// legitimately reach a record, and dropping one silently denies access while
|
|
10
|
+
// loosening one silently leaks data.
|
|
11
|
+
|
|
12
|
+
import type { ArangoClient, GraphProvider } from "./provider"
|
|
13
|
+
import type { AccessibleMap, MetadataFilters, RecordDoc, RetrievalFilters, UserDoc } from "./types"
|
|
14
|
+
|
|
15
|
+
// Arango edge/vertex collection names, matching the source schema.
|
|
16
|
+
const C = {
|
|
17
|
+
USERS: "users",
|
|
18
|
+
RECORDS: "records",
|
|
19
|
+
ANYONE: "anyone",
|
|
20
|
+
PERMISSION: "permissions",
|
|
21
|
+
BELONGS_TO: "belongsTo",
|
|
22
|
+
INHERIT_PERMISSIONS: "inheritPermissions",
|
|
23
|
+
BELONGS_TO_DEPARTMENT: "belongsToDepartment",
|
|
24
|
+
BELONGS_TO_CATEGORY: "belongsToCategory",
|
|
25
|
+
BELONGS_TO_LANGUAGE: "belongsToLanguage",
|
|
26
|
+
BELONGS_TO_TOPIC: "belongsToTopic",
|
|
27
|
+
} as const
|
|
28
|
+
|
|
29
|
+
const COMPLETED_STATUS = "COMPLETED"
|
|
30
|
+
|
|
31
|
+
// Build the optional metadata-facet FILTER lines + their bind vars. Shared by the
|
|
32
|
+
// connector and KB queries so both honor department/category/language/topic facets.
|
|
33
|
+
function buildMetadataFilters(metadataFilters?: MetadataFilters): {
|
|
34
|
+
clause: string
|
|
35
|
+
bindVars: Record<string, unknown>
|
|
36
|
+
} {
|
|
37
|
+
const lines: string[] = []
|
|
38
|
+
const bindVars: Record<string, unknown> = {}
|
|
39
|
+
if (metadataFilters) {
|
|
40
|
+
const facet = (
|
|
41
|
+
values: string[] | undefined,
|
|
42
|
+
edge: string,
|
|
43
|
+
field: string,
|
|
44
|
+
bindName: string,
|
|
45
|
+
) => {
|
|
46
|
+
if (!values || values.length === 0) return
|
|
47
|
+
lines.push(`
|
|
48
|
+
FILTER LENGTH(
|
|
49
|
+
FOR x IN OUTBOUND record._id ${edge}
|
|
50
|
+
FILTER x.${field} IN @${bindName}
|
|
51
|
+
LIMIT 1
|
|
52
|
+
RETURN 1
|
|
53
|
+
) > 0`)
|
|
54
|
+
bindVars[bindName] = values
|
|
55
|
+
}
|
|
56
|
+
facet(metadataFilters.departments, C.BELONGS_TO_DEPARTMENT, "departmentName", "departmentNames")
|
|
57
|
+
facet(metadataFilters.categories, C.BELONGS_TO_CATEGORY, "name", "categoryNames")
|
|
58
|
+
facet(metadataFilters.subcategories1, C.BELONGS_TO_CATEGORY, "name", "subcat1Names")
|
|
59
|
+
facet(metadataFilters.subcategories2, C.BELONGS_TO_CATEGORY, "name", "subcat2Names")
|
|
60
|
+
facet(metadataFilters.subcategories3, C.BELONGS_TO_CATEGORY, "name", "subcat3Names")
|
|
61
|
+
facet(metadataFilters.languages, C.BELONGS_TO_LANGUAGE, "name", "languageNames")
|
|
62
|
+
facet(metadataFilters.topics, C.BELONGS_TO_TOPIC, "name", "topicNames")
|
|
63
|
+
}
|
|
64
|
+
return { clause: lines.join("\n"), bindVars }
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function rowsToMap(rows: any[]): AccessibleMap {
|
|
68
|
+
const map: AccessibleMap = {}
|
|
69
|
+
for (const r of rows ?? []) {
|
|
70
|
+
if (r && r.virtualRecordId && r.recordId) map[r.virtualRecordId] = r.recordId
|
|
71
|
+
}
|
|
72
|
+
return map
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export class ArangoGraphProvider implements GraphProvider {
|
|
76
|
+
constructor(
|
|
77
|
+
private readonly client: ArangoClient,
|
|
78
|
+
private readonly log: { error: (m: string, ...a: unknown[]) => void; debug?: (m: string) => void } = {
|
|
79
|
+
error: () => {},
|
|
80
|
+
},
|
|
81
|
+
) {}
|
|
82
|
+
|
|
83
|
+
async getUserByUserId(userId: string): Promise<UserDoc | null> {
|
|
84
|
+
const rows = await this.client.executeAql(
|
|
85
|
+
`FOR user IN @@users FILTER user.userId == @userId LIMIT 1 RETURN user`,
|
|
86
|
+
{ userId, "@users": C.USERS },
|
|
87
|
+
)
|
|
88
|
+
return rows?.[0] ?? null
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async getUserApps(userKey: string): Promise<Array<{ _key?: string; id?: string }>> {
|
|
92
|
+
// Apps the user can reach. Kept as a seam - wire to the source's get_user_apps
|
|
93
|
+
// traversal. Returning [] means "connectors contribute nothing"; KB paths still run.
|
|
94
|
+
const rows = await this.client.executeAql(
|
|
95
|
+
`FOR app IN 1..1 ANY @userKey @@permission
|
|
96
|
+
FILTER IS_SAME_COLLECTION("apps", app)
|
|
97
|
+
RETURN app`,
|
|
98
|
+
{ userKey: `${C.USERS}/${userKey}`, "@permission": C.PERMISSION },
|
|
99
|
+
)
|
|
100
|
+
return rows ?? []
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private async getUserAppIds(userId: string): Promise<string[]> {
|
|
104
|
+
const user = await this.getUserByUserId(userId)
|
|
105
|
+
if (!user) return []
|
|
106
|
+
const userKey = user._key ?? user.id
|
|
107
|
+
if (!userKey) return []
|
|
108
|
+
const apps = await this.getUserApps(userKey)
|
|
109
|
+
return apps.map((a) => a._key ?? a.id).filter((x): x is string => Boolean(x))
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// --- THE moat orchestration: union of connector paths + KB path, deduped. ---
|
|
113
|
+
async getAccessibleVirtualRecordIds(args: {
|
|
114
|
+
userId: string
|
|
115
|
+
orgId: string
|
|
116
|
+
filters?: RetrievalFilters
|
|
117
|
+
}): Promise<AccessibleMap> {
|
|
118
|
+
const { userId, orgId } = args
|
|
119
|
+
const filters = args.filters ?? {}
|
|
120
|
+
try {
|
|
121
|
+
const userAppIds = await this.getUserAppIds(userId)
|
|
122
|
+
const kbIds = filters.kb
|
|
123
|
+
const connectorIdsFilter = filters.apps
|
|
124
|
+
const { kb: _kb, apps: _apps, ...metadataFilters } = filters
|
|
125
|
+
|
|
126
|
+
const hasKbFilter = Boolean(kbIds && kbIds.length)
|
|
127
|
+
const hasAppFilter = Boolean(connectorIdsFilter && connectorIdsFilter.length)
|
|
128
|
+
|
|
129
|
+
const tasks: Promise<AccessibleMap>[] = []
|
|
130
|
+
const connectors = (ids: string[]) =>
|
|
131
|
+
ids.filter((cid) => !cid.startsWith("knowledgeBase_"))
|
|
132
|
+
|
|
133
|
+
if (hasAppFilter && hasKbFilter) {
|
|
134
|
+
for (const cid of connectors(userAppIds.filter((c) => connectorIdsFilter!.includes(c))))
|
|
135
|
+
tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
|
|
136
|
+
tasks.push(this.getKbVirtualIds(userId, orgId, kbIds!, metadataFilters))
|
|
137
|
+
} else if (!hasAppFilter && hasKbFilter) {
|
|
138
|
+
tasks.push(this.getKbVirtualIds(userId, orgId, kbIds!, metadataFilters))
|
|
139
|
+
} else if (!hasAppFilter && !hasKbFilter) {
|
|
140
|
+
for (const cid of connectors(userAppIds))
|
|
141
|
+
tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
|
|
142
|
+
tasks.push(this.getKbVirtualIds(userId, orgId, undefined, metadataFilters))
|
|
143
|
+
} else {
|
|
144
|
+
for (const cid of connectors(userAppIds.filter((c) => connectorIdsFilter!.includes(c))))
|
|
145
|
+
tasks.push(this.getVirtualIdsForConnector(userId, orgId, cid, metadataFilters))
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (tasks.length === 0) return {}
|
|
149
|
+
|
|
150
|
+
const results = await Promise.allSettled(tasks)
|
|
151
|
+
const merged: AccessibleMap = {}
|
|
152
|
+
for (const r of results) {
|
|
153
|
+
if (r.status !== "fulfilled") {
|
|
154
|
+
this.log.error(`accessible-ids task failed: ${String(r.reason)}`)
|
|
155
|
+
continue
|
|
156
|
+
}
|
|
157
|
+
// First writer wins per virtualRecordId - mirrors the source's dedup so a
|
|
158
|
+
// record reachable via several paths resolves to a single recordId.
|
|
159
|
+
for (const [vid, rid] of Object.entries(r.value)) if (!(vid in merged)) merged[vid] = rid
|
|
160
|
+
}
|
|
161
|
+
return merged
|
|
162
|
+
} catch (e) {
|
|
163
|
+
this.log.error(`getAccessibleVirtualRecordIds failed: ${String(e)}`)
|
|
164
|
+
return {}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// The eight permission paths for one connector.
|
|
169
|
+
private async getVirtualIdsForConnector(
|
|
170
|
+
userId: string,
|
|
171
|
+
orgId: string,
|
|
172
|
+
connectorId: string,
|
|
173
|
+
metadataFilters?: MetadataFilters,
|
|
174
|
+
): Promise<AccessibleMap> {
|
|
175
|
+
const { clause, bindVars: mdBind } = buildMetadataFilters(metadataFilters)
|
|
176
|
+
const query = `
|
|
177
|
+
LET userDoc = FIRST(FOR user IN @@users FILTER user.userId == @userId RETURN user)
|
|
178
|
+
|
|
179
|
+
LET directRecords = (
|
|
180
|
+
FOR record IN 1..1 ANY userDoc._id ${C.PERMISSION}
|
|
181
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
182
|
+
FILTER record.connectorId == @connectorId
|
|
183
|
+
FILTER record.indexingStatus == @completedStatus
|
|
184
|
+
${clause}
|
|
185
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
186
|
+
)
|
|
187
|
+
LET groupRecords = (
|
|
188
|
+
FOR group IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
|
|
189
|
+
FOR record IN 1..1 ANY group._id ${C.PERMISSION}
|
|
190
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
191
|
+
FILTER record.connectorId == @connectorId
|
|
192
|
+
FILTER record.indexingStatus == @completedStatus
|
|
193
|
+
${clause}
|
|
194
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
195
|
+
)
|
|
196
|
+
LET groupRecordsPermissionEdge = (
|
|
197
|
+
FOR group IN 1..1 ANY userDoc._id ${C.PERMISSION}
|
|
198
|
+
FOR record IN 1..1 ANY group._id ${C.PERMISSION}
|
|
199
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
200
|
+
FILTER record.connectorId == @connectorId
|
|
201
|
+
FILTER record.indexingStatus == @completedStatus
|
|
202
|
+
${clause}
|
|
203
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
204
|
+
)
|
|
205
|
+
LET orgRecords = (
|
|
206
|
+
FOR org IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
|
|
207
|
+
FOR record IN 1..1 ANY org._id ${C.PERMISSION}
|
|
208
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
209
|
+
FILTER record.connectorId == @connectorId
|
|
210
|
+
FILTER record.indexingStatus == @completedStatus
|
|
211
|
+
${clause}
|
|
212
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
213
|
+
)
|
|
214
|
+
LET orgRecordGroupRecords = (
|
|
215
|
+
FOR org IN 1..1 ANY userDoc._id ${C.BELONGS_TO}
|
|
216
|
+
FOR recordGroup IN 1..1 ANY org._id ${C.PERMISSION}
|
|
217
|
+
FILTER IS_SAME_COLLECTION("recordGroups", recordGroup)
|
|
218
|
+
FOR record IN 0..2 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
|
|
219
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
220
|
+
FILTER record.connectorId == @connectorId
|
|
221
|
+
FILTER record.indexingStatus == @completedStatus
|
|
222
|
+
${clause}
|
|
223
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
224
|
+
)
|
|
225
|
+
LET recordGroupRecords = (
|
|
226
|
+
FOR group IN 1..1 ANY userDoc._id ${C.PERMISSION}
|
|
227
|
+
FILTER IS_SAME_COLLECTION("groups", group) OR IS_SAME_COLLECTION("roles", group)
|
|
228
|
+
FOR recordGroup IN 1..1 ANY group._id ${C.PERMISSION}
|
|
229
|
+
FOR record IN 0..5 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
|
|
230
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
231
|
+
FILTER record.connectorId == @connectorId
|
|
232
|
+
FILTER record.indexingStatus == @completedStatus
|
|
233
|
+
${clause}
|
|
234
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
235
|
+
)
|
|
236
|
+
LET inheritedRecordGroupRecords = (
|
|
237
|
+
FOR recordGroup IN 1..1 ANY userDoc._id ${C.PERMISSION}
|
|
238
|
+
FILTER IS_SAME_COLLECTION("recordGroups", recordGroup)
|
|
239
|
+
FOR record IN 0..5 INBOUND recordGroup._id ${C.INHERIT_PERMISSIONS}
|
|
240
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
241
|
+
FILTER record.connectorId == @connectorId
|
|
242
|
+
FILTER record.indexingStatus == @completedStatus
|
|
243
|
+
${clause}
|
|
244
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
245
|
+
)
|
|
246
|
+
LET anyoneRecords = (
|
|
247
|
+
FOR anyone IN @@anyone
|
|
248
|
+
FILTER anyone.organization == @orgId
|
|
249
|
+
FOR record IN @@records
|
|
250
|
+
FILTER record._key == anyone.file_key
|
|
251
|
+
FILTER record.connectorId == @connectorId
|
|
252
|
+
FILTER record.indexingStatus == @completedStatus
|
|
253
|
+
${clause}
|
|
254
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
255
|
+
)
|
|
256
|
+
LET allPairs = UNION(
|
|
257
|
+
directRecords, groupRecords, groupRecordsPermissionEdge,
|
|
258
|
+
orgRecords, orgRecordGroupRecords, recordGroupRecords,
|
|
259
|
+
inheritedRecordGroupRecords, anyoneRecords
|
|
260
|
+
)
|
|
261
|
+
FOR pair IN allPairs
|
|
262
|
+
FILTER pair != null AND pair.virtualRecordId != null AND pair.recordId != null
|
|
263
|
+
COLLECT virtualRecordId = pair.virtualRecordId INTO groups
|
|
264
|
+
LET recordId = FIRST(groups).pair.recordId
|
|
265
|
+
FILTER recordId != null
|
|
266
|
+
RETURN {virtualRecordId: virtualRecordId, recordId: recordId}`
|
|
267
|
+
|
|
268
|
+
try {
|
|
269
|
+
const rows = await this.client.executeAql(query, {
|
|
270
|
+
userId,
|
|
271
|
+
orgId,
|
|
272
|
+
connectorId,
|
|
273
|
+
completedStatus: COMPLETED_STATUS,
|
|
274
|
+
"@users": C.USERS,
|
|
275
|
+
"@records": C.RECORDS,
|
|
276
|
+
"@anyone": C.ANYONE,
|
|
277
|
+
...mdBind,
|
|
278
|
+
})
|
|
279
|
+
return rowsToMap(rows)
|
|
280
|
+
} catch (e) {
|
|
281
|
+
this.log.error(`connector ${connectorId} acl query failed: ${String(e)}`)
|
|
282
|
+
return {}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// KB (RecordGroup) paths: direct membership + team membership.
|
|
287
|
+
private async getKbVirtualIds(
|
|
288
|
+
userId: string,
|
|
289
|
+
_orgId: string,
|
|
290
|
+
kbIds?: string[],
|
|
291
|
+
metadataFilters?: MetadataFilters,
|
|
292
|
+
): Promise<AccessibleMap> {
|
|
293
|
+
const { clause, bindVars: mdBind } = buildMetadataFilters(metadataFilters)
|
|
294
|
+
const kbFilter = kbIds && kbIds.length ? "FILTER kb._key IN @kb_ids" : ""
|
|
295
|
+
const query = `
|
|
296
|
+
LET userDoc = FIRST(FOR user IN @@users FILTER user.userId == @userId RETURN user)
|
|
297
|
+
|
|
298
|
+
LET directKbRecords = (
|
|
299
|
+
FOR kb IN 1..1 ANY userDoc._id ${C.PERMISSION}
|
|
300
|
+
FILTER IS_SAME_COLLECTION("recordGroups", kb)
|
|
301
|
+
${kbFilter}
|
|
302
|
+
FOR record IN 1..1 ANY kb._id ${C.BELONGS_TO}
|
|
303
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
304
|
+
FILTER record.origin == "UPLOAD"
|
|
305
|
+
FILTER record.indexingStatus == @completedStatus
|
|
306
|
+
${clause}
|
|
307
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
308
|
+
)
|
|
309
|
+
LET teamKbRecords = (
|
|
310
|
+
FOR team, userTeamEdge IN 1..1 OUTBOUND userDoc._id ${C.PERMISSION}
|
|
311
|
+
FILTER IS_SAME_COLLECTION("teams", team)
|
|
312
|
+
FILTER userTeamEdge.type == "USER"
|
|
313
|
+
FOR kb, teamKbEdge IN 1..1 OUTBOUND team._id ${C.PERMISSION}
|
|
314
|
+
FILTER IS_SAME_COLLECTION("recordGroups", kb)
|
|
315
|
+
FILTER teamKbEdge.type == "TEAM"
|
|
316
|
+
${kbFilter}
|
|
317
|
+
FOR record IN 1..1 ANY kb._id ${C.BELONGS_TO}
|
|
318
|
+
FILTER IS_SAME_COLLECTION("records", record)
|
|
319
|
+
FILTER record.origin == "UPLOAD"
|
|
320
|
+
FILTER record.indexingStatus == @completedStatus
|
|
321
|
+
${clause}
|
|
322
|
+
RETURN {virtualRecordId: record.virtualRecordId, recordId: record._key}
|
|
323
|
+
)
|
|
324
|
+
LET allKbPairs = UNION(directKbRecords, teamKbRecords)
|
|
325
|
+
FOR pair IN allKbPairs
|
|
326
|
+
FILTER pair != null AND pair.virtualRecordId != null AND pair.recordId != null
|
|
327
|
+
COLLECT virtualRecordId = pair.virtualRecordId INTO groups
|
|
328
|
+
LET recordId = FIRST(groups).pair.recordId
|
|
329
|
+
FILTER recordId != null
|
|
330
|
+
RETURN {virtualRecordId: virtualRecordId, recordId: recordId}`
|
|
331
|
+
|
|
332
|
+
try {
|
|
333
|
+
const bind: Record<string, unknown> = {
|
|
334
|
+
userId,
|
|
335
|
+
completedStatus: COMPLETED_STATUS,
|
|
336
|
+
"@users": C.USERS,
|
|
337
|
+
...mdBind,
|
|
338
|
+
}
|
|
339
|
+
if (kbIds && kbIds.length) bind.kb_ids = kbIds
|
|
340
|
+
const rows = await this.client.executeAql(query, bind)
|
|
341
|
+
return rowsToMap(rows)
|
|
342
|
+
} catch (e) {
|
|
343
|
+
this.log.error(`kb acl query failed: ${String(e)}`)
|
|
344
|
+
return {}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
async getRecordsByRecordIds(recordIds: string[], orgId: string): Promise<RecordDoc[]> {
|
|
349
|
+
if (recordIds.length === 0) return []
|
|
350
|
+
const rows = await this.client.executeAql(
|
|
351
|
+
`FOR record IN @@records FILTER record._key IN @recordIds AND record.orgId == @orgId RETURN record`,
|
|
352
|
+
{ "@records": C.RECORDS, recordIds, orgId },
|
|
353
|
+
)
|
|
354
|
+
return rows ?? []
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async getDocument(recordId: string, collection: string): Promise<RecordDoc | null> {
|
|
358
|
+
const rows = await this.client.executeAql(
|
|
359
|
+
`FOR d IN @@col FILTER d._key == @recordId LIMIT 1 RETURN d`,
|
|
360
|
+
{ "@col": collection, recordId },
|
|
361
|
+
)
|
|
362
|
+
return rows?.[0] ?? null
|
|
363
|
+
}
|
|
364
|
+
}
|
package/src/bin.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
// Chitta entrypoint dispatcher. One binary, three modes:
|
|
3
|
+
// chitta → MCP stdio server (what MCP clients launch; the default)
|
|
4
|
+
// chitta install [...] → wire Chitta into AI tools (MCP config + Skill)
|
|
5
|
+
// chitta ingest|query|... → the embedded CLI
|
|
6
|
+
// Sub-entrypoints self-run on import, so we route by dynamic import (no eager start).
|
|
7
|
+
export {} // module marker (enables top-level await)
|
|
8
|
+
|
|
9
|
+
// Arg layout differs by how we're launched:
|
|
10
|
+
// `bun run src/bin.ts <args>` → argv = [bun, /path/bin.ts, ...args] (args at index 2)
|
|
11
|
+
// compiled `./chitta <args>` → argv = [/path/chitta, ...args] (args at index 1)
|
|
12
|
+
// (the Node shim execs the compiled binary, so it hits the compiled layout too)
|
|
13
|
+
// Detect via whether argv[1] is a script file, then NORMALIZE process.argv to the canonical
|
|
14
|
+
// [exec, "chitta", ...args] so every downstream module's process.argv.slice(2) is correct.
|
|
15
|
+
const launchedFromScript = !!process.argv[1] && /\.(ts|js|mjs|cjs)$/.test(process.argv[1])
|
|
16
|
+
const userArgs = launchedFromScript ? process.argv.slice(2) : process.argv.slice(1)
|
|
17
|
+
process.argv = [process.argv[0], "chitta", ...userArgs]
|
|
18
|
+
|
|
19
|
+
const cmd = userArgs[0]
|
|
20
|
+
|
|
21
|
+
if (!cmd || cmd.startsWith("-")) {
|
|
22
|
+
await import("./mcp/server") // bare invocation (or flags) = MCP server
|
|
23
|
+
} else if (cmd === "install" || cmd === "uninstall") {
|
|
24
|
+
await import("./install/index")
|
|
25
|
+
} else {
|
|
26
|
+
await import("./embedded/cli") // ingest | query | user-add | ...
|
|
27
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// Env-based config loader for the context layer. Dep-free so it stays testable.
|
|
2
|
+
// v0 source of truth; later this can be superseded by the CLI config + per-session
|
|
3
|
+
// identity (packages/identity) without touching the moat.
|
|
4
|
+
|
|
5
|
+
import type { ContextConfig } from "./service"
|
|
6
|
+
|
|
7
|
+
export interface ContextIdentity {
|
|
8
|
+
userId: string
|
|
9
|
+
orgId: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
type Env = Record<string, string | undefined>
|
|
13
|
+
|
|
14
|
+
/** Returns null when the required backend vars aren't set (feature off). */
|
|
15
|
+
export function loadContextConfigFromEnv(env: Env): ContextConfig | null {
|
|
16
|
+
const arangoUrl = env.CONTEXT_ARANGO_URL
|
|
17
|
+
const qdrantUrl = env.CONTEXT_QDRANT_URL
|
|
18
|
+
const denseEndpoint = env.CONTEXT_EMBED_URL
|
|
19
|
+
const collectionName = env.CONTEXT_COLLECTION
|
|
20
|
+
if (!arangoUrl || !qdrantUrl || !denseEndpoint || !collectionName) return null
|
|
21
|
+
|
|
22
|
+
return {
|
|
23
|
+
arango: {
|
|
24
|
+
url: arangoUrl,
|
|
25
|
+
database: env.CONTEXT_ARANGO_DB ?? "_system",
|
|
26
|
+
username: env.CONTEXT_ARANGO_USER,
|
|
27
|
+
password: env.CONTEXT_ARANGO_PASSWORD,
|
|
28
|
+
},
|
|
29
|
+
qdrant: { url: qdrantUrl, apiKey: env.CONTEXT_QDRANT_API_KEY },
|
|
30
|
+
embeddings: {
|
|
31
|
+
denseEndpoint,
|
|
32
|
+
denseModel: env.CONTEXT_EMBED_MODEL ?? "BAAI/bge-small-en-v1.5",
|
|
33
|
+
sparseEndpoint: env.CONTEXT_SPARSE_URL,
|
|
34
|
+
},
|
|
35
|
+
collectionName,
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** The asking user. v0 reads env; production wires this to packages/identity. */
|
|
40
|
+
export function loadContextIdentityFromEnv(env: Env): ContextIdentity | null {
|
|
41
|
+
const userId = env.CONTEXT_USER_ID
|
|
42
|
+
const orgId = env.CONTEXT_ORG_ID
|
|
43
|
+
if (!userId || !orgId) return null
|
|
44
|
+
return { userId, orgId }
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Names of the env vars, for help text / diagnostics. */
|
|
48
|
+
export const REQUIRED_CONTEXT_ENV = [
|
|
49
|
+
"CONTEXT_ARANGO_URL",
|
|
50
|
+
"CONTEXT_QDRANT_URL",
|
|
51
|
+
"CONTEXT_EMBED_URL",
|
|
52
|
+
"CONTEXT_COLLECTION",
|
|
53
|
+
] as const
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// Write-side access control - the mutation counterpart to the read ACL. Answers
|
|
2
|
+
// "who can CREATE / MODIFY / DELETE / SHARE what", which read-filtering doesn't.
|
|
3
|
+
//
|
|
4
|
+
// Roles (per user): admin (full), editor (create + manage own), viewer (read only).
|
|
5
|
+
// Ownership: the creator owns a record; only owner or admin may modify/delete it.
|
|
6
|
+
// Grant validation: a non-admin can only share within their own org/groups - they
|
|
7
|
+
// cannot grant access to principals or orgs outside their scope (no over-sharing).
|
|
8
|
+
|
|
9
|
+
import type { SqliteStore } from "./sqlite-store"
|
|
10
|
+
|
|
11
|
+
export type Role = "admin" | "editor" | "viewer"
|
|
12
|
+
|
|
13
|
+
export class AuthorizationError extends Error {
|
|
14
|
+
constructor(message: string) {
|
|
15
|
+
super(message)
|
|
16
|
+
this.name = "AuthorizationError"
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class Authorizer {
|
|
21
|
+
constructor(private readonly store: SqliteStore) {}
|
|
22
|
+
|
|
23
|
+
roleOf(userId: string): Role {
|
|
24
|
+
const r = this.store.db.query("SELECT data FROM nodes WHERE id = ? AND coll = 'users' LIMIT 1").get(userId) as
|
|
25
|
+
| { data: string }
|
|
26
|
+
| undefined
|
|
27
|
+
if (!r) return "viewer"
|
|
28
|
+
return ((JSON.parse(r.data) as { role?: Role }).role ?? "viewer") as Role
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
ownerOf(recordId: string): string | null {
|
|
32
|
+
const r = this.store.db.query("SELECT data FROM nodes WHERE id = ? AND coll = 'records' LIMIT 1").get(recordId) as
|
|
33
|
+
| { data: string }
|
|
34
|
+
| undefined
|
|
35
|
+
return r ? ((JSON.parse(r.data) as { ownerId?: string }).ownerId ?? null) : null
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** May this user create new records? (editor or admin) */
|
|
39
|
+
canCreate(userId: string): boolean {
|
|
40
|
+
return this.roleOf(userId) !== "viewer"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** May this user modify/delete this record? (admin, or the record's owner) */
|
|
44
|
+
canModify(userId: string, recordId: string): boolean {
|
|
45
|
+
return this.roleOf(userId) === "admin" || this.ownerOf(recordId) === userId
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
private memberships(userId: string): Set<string> {
|
|
49
|
+
const rows = this.store.db.query("SELECT dst FROM edges WHERE src = ? AND label = 'belongsTo'").all(userId) as Array<{
|
|
50
|
+
dst: string
|
|
51
|
+
}>
|
|
52
|
+
return new Set(rows.map((r) => r.dst))
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
private belongsToOrg(principal: string, orgId: string): boolean {
|
|
56
|
+
return !!this.store.db
|
|
57
|
+
.query("SELECT 1 FROM edges WHERE src = ? AND dst = ? AND label = 'belongsTo' LIMIT 1")
|
|
58
|
+
.get(principal, orgId)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** A non-admin may only grant to themselves, their own groups/teams, principals
|
|
62
|
+
* in the same org, or share-with-own-org. Throws on any out-of-scope grant. */
|
|
63
|
+
assertCanGrant(userId: string, orgId: string, principals: string[], shareWithOrg?: string): void {
|
|
64
|
+
if (this.roleOf(userId) === "admin") return
|
|
65
|
+
if (shareWithOrg && shareWithOrg !== orgId) {
|
|
66
|
+
throw new AuthorizationError(`cannot share to org '${shareWithOrg}' - outside your org`)
|
|
67
|
+
}
|
|
68
|
+
const mine = this.memberships(userId)
|
|
69
|
+
for (const p of principals) {
|
|
70
|
+
const ok = p === userId || p === orgId || mine.has(p) || this.belongsToOrg(p, orgId)
|
|
71
|
+
if (!ok) throw new AuthorizationError(`cannot grant access to '${p}' - outside your scope`)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Throws unless the user may create with the requested sharing. */
|
|
76
|
+
assertCanCreate(userId: string, orgId: string, principals: string[], shareWithOrg?: string): void {
|
|
77
|
+
if (!this.canCreate(userId)) {
|
|
78
|
+
throw new AuthorizationError(`user '${userId}' (role: ${this.roleOf(userId)}) is not permitted to create records`)
|
|
79
|
+
}
|
|
80
|
+
this.assertCanGrant(userId, orgId, principals, shareWithOrg)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
assertCanModify(userId: string, recordId: string): void {
|
|
84
|
+
if (!this.canModify(userId, recordId)) {
|
|
85
|
+
const owner = this.ownerOf(recordId) ?? "none"
|
|
86
|
+
throw new AuthorizationError(`user '${userId}' may not modify/delete '${recordId}' (owner: ${owner})`)
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// Standalone context CLI - the complete system in one runnable program.
|
|
2
|
+
// Persists to a real .db file, so ingest and query work across invocations.
|
|
3
|
+
//
|
|
4
|
+
// bun run cli.ts user-add alice --org org1
|
|
5
|
+
// bun run cli.ts ingest --id doc1 --org org1 --name "Notes" --share-user alice --text "hello world"
|
|
6
|
+
// bun run cli.ts query "hello" --user alice --org org1
|
|
7
|
+
//
|
|
8
|
+
// Compile to a single binary:
|
|
9
|
+
// bun build cli.ts --compile --outfile ctx
|
|
10
|
+
|
|
11
|
+
import { buildEmbeddedContext } from "./index"
|
|
12
|
+
|
|
13
|
+
function arg(flag: string, fallback?: string): string | undefined {
|
|
14
|
+
const i = process.argv.indexOf(flag)
|
|
15
|
+
return i >= 0 && i + 1 < process.argv.length ? process.argv[i + 1] : fallback
|
|
16
|
+
}
|
|
17
|
+
function has(flag: string): boolean {
|
|
18
|
+
return process.argv.includes(flag)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function main() {
|
|
22
|
+
const cmd = process.argv[2]
|
|
23
|
+
const dbPath = arg("--db", process.env.CONTEXT_DB ?? "context.db")!
|
|
24
|
+
const ctx = buildEmbeddedContext({ path: dbPath })
|
|
25
|
+
|
|
26
|
+
switch (cmd) {
|
|
27
|
+
case "user-add": {
|
|
28
|
+
const userId = process.argv[3]
|
|
29
|
+
const org = arg("--org", "org1")!
|
|
30
|
+
ctx.ingestor.registerUser(userId, org, arg("--email"))
|
|
31
|
+
console.log(`user '${userId}' added to org '${org}'`)
|
|
32
|
+
break
|
|
33
|
+
}
|
|
34
|
+
case "group-add": {
|
|
35
|
+
ctx.ingestor.registerGroup(process.argv[3])
|
|
36
|
+
console.log(`group '${process.argv[3]}' added`)
|
|
37
|
+
break
|
|
38
|
+
}
|
|
39
|
+
case "member-add": {
|
|
40
|
+
ctx.ingestor.addMembership(process.argv[3], arg("--group")!)
|
|
41
|
+
console.log(`'${process.argv[3]}' added to group '${arg("--group")}'`)
|
|
42
|
+
break
|
|
43
|
+
}
|
|
44
|
+
case "ingest": {
|
|
45
|
+
const out = await ctx.ingestor.ingest({
|
|
46
|
+
recordId: arg("--id") ?? `rec-${Date.now().toString(36)}`,
|
|
47
|
+
orgId: arg("--org", "org1")!,
|
|
48
|
+
recordName: arg("--name", "Untitled")!,
|
|
49
|
+
text: arg("--text") ?? (arg("--file") ? await Bun.file(arg("--file")!).text() : ""),
|
|
50
|
+
permittedPrincipals: [arg("--share-user"), arg("--share-group")].filter(Boolean) as string[],
|
|
51
|
+
shareWithOrg: has("--share-org") ? arg("--org", "org1") : undefined,
|
|
52
|
+
})
|
|
53
|
+
console.log(`ingested '${out.recordId}' (${out.chunks} chunks)`)
|
|
54
|
+
break
|
|
55
|
+
}
|
|
56
|
+
case "query": {
|
|
57
|
+
const res = await ctx.retrieval.searchWithFilters({
|
|
58
|
+
queries: [process.argv[3]],
|
|
59
|
+
userId: arg("--user")!,
|
|
60
|
+
orgId: arg("--org", "org1")!,
|
|
61
|
+
limit: Number(arg("--limit", "5")),
|
|
62
|
+
})
|
|
63
|
+
console.log(`status: ${res.status}`)
|
|
64
|
+
for (const r of res.searchResults) {
|
|
65
|
+
console.log(` • [${r.metadata.recordName}] ${r.content.slice(0, 80)}`)
|
|
66
|
+
}
|
|
67
|
+
if (res.searchResults.length === 0) console.log(" (no accessible context)")
|
|
68
|
+
break
|
|
69
|
+
}
|
|
70
|
+
case "rebuild-graph": {
|
|
71
|
+
const res = await ctx.rebuildGraph()
|
|
72
|
+
console.log(`rebuilt knowledge graph: ${res.records} records → ${res.entities} concept-mentions`)
|
|
73
|
+
break
|
|
74
|
+
}
|
|
75
|
+
case "reindex-vectors": {
|
|
76
|
+
const n = await ctx.reindex()
|
|
77
|
+
console.log(`re-embedded ${n} chunks and rebuilt the vector index`)
|
|
78
|
+
break
|
|
79
|
+
}
|
|
80
|
+
default:
|
|
81
|
+
console.log("commands: user-add | group-add | member-add | ingest | query | rebuild-graph | reindex-vectors")
|
|
82
|
+
}
|
|
83
|
+
ctx.store.close()
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
main()
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// Code → graph extractor (the Graphify capability, ported TS-native). Parses source
|
|
2
|
+
// with tree-sitter (WASM grammars) into the SAME entity/edge shape every other
|
|
3
|
+
// extractor produces - so code nodes get ACL, vectors, temporal edges, and graph
|
|
4
|
+
// algorithms for free. STRICT SUPERSET of Graphify in one embedded store.
|
|
5
|
+
//
|
|
6
|
+
// Thin facade: the implementation lives in ./extractors/code and is re-exported here
|
|
7
|
+
// so existing imports keep resolving unchanged. Public API is preserved exactly.
|
|
8
|
+
|
|
9
|
+
export { CodeExtractor } from "./extractors/code"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Single-binary demo. Compile with:
|
|
2
|
+
// bun build src/context/embedded/demo.ts --compile --outfile ctxdemo
|
|
3
|
+
// Then run ./ctxdemo - one self-contained executable, no servers, no Python.
|
|
4
|
+
|
|
5
|
+
import { buildEmbeddedContext, LocalHashEmbeddings } from "./index"
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const ctx = buildEmbeddedContext({ path: ":memory:" })
|
|
9
|
+
const emb = new LocalHashEmbeddings()
|
|
10
|
+
const ORG = "org1"
|
|
11
|
+
const rec = (id: string, vid: string, name: string) => ({
|
|
12
|
+
_key: id, virtualRecordId: vid, orgId: ORG, indexingStatus: "COMPLETED",
|
|
13
|
+
origin: "CONNECTOR", recordName: name, mimeType: "text/plain", connectorId: "slack", connectorName: "slack",
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
ctx.store.addNode("org1", "organizations", { name: "Acme" })
|
|
17
|
+
ctx.store.addNode("alice", "users", { userId: "alice", email: "alice@acme.co" })
|
|
18
|
+
ctx.store.addNode("bob", "users", { userId: "bob", email: "bob@acme.co" })
|
|
19
|
+
ctx.store.addEdge("alice", "org1", "belongsTo")
|
|
20
|
+
ctx.store.addEdge("bob", "org1", "belongsTo")
|
|
21
|
+
ctx.store.addNode("recPub", "records", rec("recPub", "vPub", "Q3 Revenue Report"))
|
|
22
|
+
ctx.store.addNode("anyPub", "anyone", { organization: ORG, file_key: "recPub" })
|
|
23
|
+
ctx.store.addNode("recSec", "records", rec("recSec", "vSec", "Secret Merger Plans"))
|
|
24
|
+
ctx.store.addEdge("alice", "recSec", "permissions")
|
|
25
|
+
ctx.store.addChunk("p1", "vPub", ORG, "quarterly revenue report Q3", await emb.embedDense("quarterly revenue report"))
|
|
26
|
+
ctx.store.addChunk("p2", "vSec", ORG, "secret merger plans globex", await emb.embedDense("secret merger plans globex"))
|
|
27
|
+
|
|
28
|
+
for (const user of ["alice", "bob"]) {
|
|
29
|
+
const res = await ctx.retrieval.searchWithFilters({ queries: ["secret merger plans"], userId: user, orgId: ORG })
|
|
30
|
+
const names = res.searchResults.map((r) => r.metadata.recordName)
|
|
31
|
+
console.log(`${user} → [${names.join(", ") || "nothing"}]`)
|
|
32
|
+
}
|
|
33
|
+
console.log("(expected: alice sees the Secret doc; bob does NOT - ACL enforced in one binary)")
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
main()
|