@100xprompt/chitta 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +203 -0
  3. package/assets/rules/claude-md.md +9 -0
  4. package/assets/skill/SKILL.md +47 -0
  5. package/package.json +48 -0
  6. package/src/README.md +124 -0
  7. package/src/arango-client.ts +67 -0
  8. package/src/arango-graph-provider.ts +364 -0
  9. package/src/bin.ts +27 -0
  10. package/src/config-env.ts +53 -0
  11. package/src/embedded/authorizer.ts +89 -0
  12. package/src/embedded/cli.ts +86 -0
  13. package/src/embedded/code-extractor.ts +9 -0
  14. package/src/embedded/demo.ts +36 -0
  15. package/src/embedded/extract.ts +12 -0
  16. package/src/embedded/extractors/code.ts +308 -0
  17. package/src/embedded/extractors/deterministic.ts +63 -0
  18. package/src/embedded/extractors/llm.ts +151 -0
  19. package/src/embedded/extractors/text-hygiene.ts +54 -0
  20. package/src/embedded/extractors/types.ts +34 -0
  21. package/src/embedded/graph/acl-paths.ts +96 -0
  22. package/src/embedded/graph/adjacency.ts +61 -0
  23. package/src/embedded/graph/centrality.ts +23 -0
  24. package/src/embedded/graph/communities.ts +46 -0
  25. package/src/embedded/graph/cypher.ts +17 -0
  26. package/src/embedded/graph/impact.ts +24 -0
  27. package/src/embedded/graph/knowledge-graph.ts +108 -0
  28. package/src/embedded/graph/pagerank.ts +57 -0
  29. package/src/embedded/graph/sql-access.ts +13 -0
  30. package/src/embedded/graph/traversal.ts +73 -0
  31. package/src/embedded/graph/types.ts +35 -0
  32. package/src/embedded/graph-query.ts +126 -0
  33. package/src/embedded/index.ts +171 -0
  34. package/src/embedded/ingest.ts +262 -0
  35. package/src/embedded/kgqa/answer-paths.ts +197 -0
  36. package/src/embedded/kgqa/entity-link.ts +13 -0
  37. package/src/embedded/kgqa/intent.ts +14 -0
  38. package/src/embedded/kgqa/predicates.ts +9 -0
  39. package/src/embedded/kgqa/preference.ts +20 -0
  40. package/src/embedded/kgqa/select.ts +99 -0
  41. package/src/embedded/kgqa/text.ts +16 -0
  42. package/src/embedded/kgqa/types.ts +6 -0
  43. package/src/embedded/kgqa-service.ts +122 -0
  44. package/src/embedded/llm-extractor.ts +10 -0
  45. package/src/embedded/local-embeddings.ts +36 -0
  46. package/src/embedded/personal.ts +100 -0
  47. package/src/embedded/reranker.ts +62 -0
  48. package/src/embedded/retrieval/decay-stage.ts +59 -0
  49. package/src/embedded/retrieval/diversity.ts +37 -0
  50. package/src/embedded/retrieval/fuse.ts +52 -0
  51. package/src/embedded/retrieval/graph-stage.ts +45 -0
  52. package/src/embedded/retrieval/hybrid-retriever.ts +80 -0
  53. package/src/embedded/retrieval/keyword-stage.ts +27 -0
  54. package/src/embedded/retrieval/passage.ts +44 -0
  55. package/src/embedded/retrieval/rerank-stage.ts +31 -0
  56. package/src/embedded/retrieval/trace.ts +31 -0
  57. package/src/embedded/retrieval/vector-stage.ts +15 -0
  58. package/src/embedded/sqlite-graph-provider.ts +119 -0
  59. package/src/embedded/sqlite-store.ts +95 -0
  60. package/src/embedded/sqlite-vec-service.ts +122 -0
  61. package/src/embedded/store/chunks.ts +61 -0
  62. package/src/embedded/store/fts.ts +50 -0
  63. package/src/embedded/store/nodes-edges.ts +112 -0
  64. package/src/embedded/store/salience.ts +37 -0
  65. package/src/embedded/store/schema.ts +109 -0
  66. package/src/embedded/transformers-embeddings.ts +100 -0
  67. package/src/embeddings.ts +51 -0
  68. package/src/eval/goldset.ts +46 -0
  69. package/src/eval/harness.ts +65 -0
  70. package/src/eval/metrics.ts +38 -0
  71. package/src/http/server.ts +93 -0
  72. package/src/index.ts +44 -0
  73. package/src/install/index.ts +139 -0
  74. package/src/install/platforms.ts +126 -0
  75. package/src/install/skill.ts +46 -0
  76. package/src/install/writers.ts +82 -0
  77. package/src/mcp/backend.ts +129 -0
  78. package/src/mcp/server.ts +83 -0
  79. package/src/mcp/tools/context-about.ts +69 -0
  80. package/src/mcp/tools/context-graph.ts +23 -0
  81. package/src/mcp/tools/context-ingest.ts +88 -0
  82. package/src/mcp/tools/context-rebuild.ts +22 -0
  83. package/src/mcp/tools/context-relate.ts +88 -0
  84. package/src/mcp/tools/get-context.ts +52 -0
  85. package/src/mcp/tools/index.ts +40 -0
  86. package/src/mcp/tools/types.ts +33 -0
  87. package/src/permission.ts +72 -0
  88. package/src/provider.ts +65 -0
  89. package/src/qdrant-vector.ts +76 -0
  90. package/src/retrieval.ts +218 -0
  91. package/src/service.ts +40 -0
  92. package/src/types.ts +91 -0
@@ -0,0 +1,96 @@
1
+ // ACL permission paths - the Arango-AQL traversal ported to SQL over the generic
2
+ // node/edge tables. These eight-path-collapsed-to-five helpers are the access
3
+ // moat; the provider unions their results and dedupes first-writer-wins.
4
+ //
5
+ // • principals = the user + every group/role/org/team they belong to or are
6
+ // permissioned to (one hop).
7
+ // • directRecords = records permissioned to any principal.
8
+ // • recordGroups = record-groups permissioned to any principal, then both:
9
+ // - inheritedRecords (recursive descent over inheritPermissions), and
10
+ // - kbRecords (records that belong to those groups, origin=UPLOAD).
11
+ // • anyoneRecords = org-wide shared records.
12
+ // Same invariant as the Arango port; only the query language differs.
13
+
14
+ import type { UserDoc } from "../../types"
15
+ import { COMPLETED, type Pair, type SqlAccess } from "./sql-access"
16
+
17
+ export function userRow(sql: SqlAccess, userId: string): (UserDoc & { id: string }) | null {
18
+ const r = sql.rows<{ id: string; data: string }>(
19
+ "SELECT id, data FROM nodes WHERE coll = 'users' AND json_extract(data, '$.userId') = ? LIMIT 1",
20
+ [userId],
21
+ )[0]
22
+ if (!r) return null
23
+ return { ...(JSON.parse(r.data) as UserDoc), id: r.id, _key: r.id }
24
+ }
25
+
26
+ export function principalIds(sql: SqlAccess, userId: string): string[] {
27
+ const belongs = sql.rows<{ dst: string }>("SELECT dst FROM edges WHERE src = ? AND label = 'belongsTo'", [userId])
28
+ const permPrincipals = sql.rows<{ dst: string }>(
29
+ `SELECT e.dst AS dst FROM edges e JOIN nodes n ON n.id = e.dst
30
+ WHERE e.src = ? AND e.label = 'permissions'
31
+ AND n.coll IN ('groups','roles','organizations','teams')`,
32
+ [userId],
33
+ )
34
+ return [...new Set([userId, ...belongs.map((r) => r.dst), ...permPrincipals.map((r) => r.dst)])]
35
+ }
36
+
37
+ export function recordsPermissionedTo(sql: SqlAccess, principals: string[], apps?: string[]): Pair[] {
38
+ if (principals.length === 0) return []
39
+ const appClause = apps?.length ? ` AND json_extract(r.data,'$.connectorId') IN (${sql.ph(apps.length)})` : ""
40
+ return sql.rows<Pair>(
41
+ `SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
42
+ FROM edges e JOIN nodes r ON r.id = e.dst AND r.coll = 'records'
43
+ WHERE e.label = 'permissions' AND e.src IN (${sql.ph(principals.length)})
44
+ AND json_extract(r.data,'$.indexingStatus') = ?${appClause}`,
45
+ [...principals, COMPLETED, ...(apps ?? [])],
46
+ )
47
+ }
48
+
49
+ export function recordGroupsPermissionedTo(sql: SqlAccess, principals: string[], kb?: string[]): string[] {
50
+ if (principals.length === 0) return []
51
+ const kbClause = kb?.length ? ` AND n.id IN (${sql.ph(kb.length)})` : ""
52
+ return sql
53
+ .rows<{ id: string }>(
54
+ `SELECT DISTINCT n.id AS id FROM edges e JOIN nodes n ON n.id = e.dst AND n.coll = 'recordGroups'
55
+ WHERE e.label = 'permissions' AND e.src IN (${sql.ph(principals.length)})${kbClause}`,
56
+ [...principals, ...(kb ?? [])],
57
+ )
58
+ .map((r) => r.id)
59
+ }
60
+
61
+ export function recordsInheritingFrom(sql: SqlAccess, recordGroups: string[]): Pair[] {
62
+ if (recordGroups.length === 0) return []
63
+ return sql.rows<Pair>(
64
+ `WITH RECURSIVE descend(id) AS (
65
+ SELECT src FROM edges WHERE label = 'inheritPermissions' AND dst IN (${sql.ph(recordGroups.length)})
66
+ UNION
67
+ SELECT e.src FROM edges e JOIN descend d ON e.dst = d.id WHERE e.label = 'inheritPermissions'
68
+ )
69
+ SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
70
+ FROM nodes r JOIN descend ON r.id = descend.id
71
+ WHERE r.coll = 'records' AND json_extract(r.data,'$.indexingStatus') = ?`,
72
+ [...recordGroups, COMPLETED],
73
+ )
74
+ }
75
+
76
+ export function kbRecords(sql: SqlAccess, recordGroups: string[]): Pair[] {
77
+ if (recordGroups.length === 0) return []
78
+ return sql.rows<Pair>(
79
+ `SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
80
+ FROM edges e JOIN nodes r ON r.id = e.src AND r.coll = 'records'
81
+ WHERE e.label = 'belongsTo' AND e.dst IN (${sql.ph(recordGroups.length)})
82
+ AND json_extract(r.data,'$.origin') = 'UPLOAD'
83
+ AND json_extract(r.data,'$.indexingStatus') = ?`,
84
+ [...recordGroups, COMPLETED],
85
+ )
86
+ }
87
+
88
+ export function anyoneRecords(sql: SqlAccess, orgId: string): Pair[] {
89
+ return sql.rows<Pair>(
90
+ `SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
91
+ FROM nodes a JOIN nodes r ON r.id = json_extract(a.data,'$.file_key') AND r.coll = 'records'
92
+ WHERE a.coll = 'anyone' AND json_extract(a.data,'$.organization') = ?
93
+ AND json_extract(r.data,'$.indexingStatus') = ?`,
94
+ [orgId, COMPLETED],
95
+ )
96
+ }
@@ -0,0 +1,61 @@
1
+ // Adjacency / scope helpers - build the ACL-scoped subgraph and the small
2
+ // resolution utilities (free-text → entity ids, label lookup, hub threshold)
3
+ // that every traversal shares. Pure over the provider-returned {entities,
4
+ // relations}; the ACL filtering itself happens upstream in the provider.
5
+
6
+ import { slugify, entityId } from "../extract"
7
+ import type { Adj, Entity, Relation } from "./types"
8
+
9
+ /** Build id→entity map and the typed-first adjacency list from live relations.
10
+ * Edges whose endpoints aren't both present are dropped. Each adjacency list is
11
+ * ordered TYPED-first (generic "relates_to" last), then by descending weight, so
12
+ * neighbors lead with real relationships and BFS reconstructs precise predicates. */
13
+ export function buildAdjacency(
14
+ entities: Entity[],
15
+ relations: Relation[],
16
+ ): { byId: Map<string, Entity>; adj: Map<string, Adj[]> } {
17
+ const byId = new Map(entities.map((e) => [e.id, e]))
18
+ const adj = new Map<string, Adj[]>()
19
+ const push = (a: string, edge: Adj) => {
20
+ const list = adj.get(a) ?? []
21
+ list.push(edge)
22
+ adj.set(a, list)
23
+ }
24
+ for (const r of relations) {
25
+ if (!byId.has(r.from) || !byId.has(r.to)) continue
26
+ push(r.from, { to: r.to, type: r.type, weight: r.weight, dir: "out" })
27
+ push(r.to, { to: r.from, type: r.type, weight: r.weight, dir: "in" })
28
+ }
29
+ // Prefer TYPED edges over generic co-occurrence ("relates_to"): order each
30
+ // adjacency list typed-first so neighbors lead with real relationships and BFS
31
+ // paths are reconstructed through the precise predicate, not "relates_to".
32
+ const generic = (t: string) => (t === "relates_to" ? 1 : 0)
33
+ for (const list of adj.values()) list.sort((a, b) => generic(a.type) - generic(b.type) || b.weight - a.weight)
34
+ return { byId, adj }
35
+ }
36
+
37
+ /** Resolve a free-text name to entity id(s) within the accessible set: exact id /
38
+ * exact label first, then substring/slug containment. Returns [] if unknown. */
39
+ export function resolveIds(name: string, entities: Entity[]): string[] {
40
+ const q = name.trim().toLowerCase()
41
+ if (!q) return []
42
+ const slug = slugify(name)
43
+ const id = entityId(slug)
44
+ const exact = entities.filter((e) => e.id === id || e.label.toLowerCase() === q)
45
+ if (exact.length) return [...new Set(exact.map((e) => e.id))]
46
+ const partial = entities.filter((e) => e.label.toLowerCase().includes(q) || (slug.length >= 3 && e.id.includes(slug)))
47
+ return [...new Set(partial.map((e) => e.id))]
48
+ }
49
+
50
+ export function labelOf(id: string, byId: Map<string, Entity>): string {
51
+ return byId.get(id)?.label ?? id
52
+ }
53
+
54
+ /** hub threshold (Graphify _bfs): refuse to EXPAND through a super-connected node so
55
+ * one mega-entity can't blow up traversal / context. max(50, p99 degree). */
56
+ export function hubThreshold(adj: Map<string, Adj[]>): number {
57
+ const degrees = [...adj.values()].map((l) => l.length).sort((a, b) => a - b)
58
+ if (degrees.length === 0) return 50
59
+ const p99 = degrees[Math.min(degrees.length - 1, Math.floor(degrees.length * 0.99))]
60
+ return Math.max(50, p99)
61
+ }
@@ -0,0 +1,23 @@
1
+ // Centrality - hub entities ranked by total edge weight (then degree). Pure over
2
+ // the scoped adjacency. "What are the central things I know about."
3
+
4
+ import { labelOf } from "./adjacency"
5
+ import type { Adj, Entity } from "./types"
6
+
7
+ /** Most-connected concepts in the accessible graph, heaviest total weight first. */
8
+ export function centralEntities(
9
+ byId: Map<string, Entity>,
10
+ adj: Map<string, Adj[]>,
11
+ limit = 10,
12
+ ): Array<{ label: string; degree: number; strength: number }> {
13
+ const out: Array<{ label: string; degree: number; strength: number }> = []
14
+ for (const [id, edges] of adj) {
15
+ out.push({
16
+ label: labelOf(id, byId),
17
+ degree: edges.length,
18
+ strength: edges.reduce((s, e) => s + e.weight, 0),
19
+ })
20
+ }
21
+ out.sort((a, b) => b.strength - a.strength || b.degree - a.degree)
22
+ return out.slice(0, limit)
23
+ }
@@ -0,0 +1,46 @@
1
+ // Communities - connected clusters of related entities (Graphify's god-node /
2
+ // community view), via union-find over live edges. Each cluster's `hub` is its
3
+ // most-connected member. Pure over the scoped subgraph (ACL-scoped upstream).
4
+
5
+ import { labelOf } from "./adjacency"
6
+ import type { Adj, Entity, Relation } from "./types"
7
+
8
+ export function detectCommunities(
9
+ entities: Entity[],
10
+ relations: Relation[],
11
+ byId: Map<string, Entity>,
12
+ adj: Map<string, Adj[]>,
13
+ minSize = 2,
14
+ ): Array<{ size: number; hub: string; members: string[] }> {
15
+ const parent = new Map<string, string>()
16
+ const find = (x: string): string => {
17
+ let r = x
18
+ while (parent.get(r) !== r) r = parent.get(r) as string
19
+ while (parent.get(x) !== r) {
20
+ const n = parent.get(x) as string
21
+ parent.set(x, r)
22
+ x = n
23
+ }
24
+ return r
25
+ }
26
+ for (const e of entities) parent.set(e.id, e.id)
27
+ for (const r of relations) {
28
+ if (!parent.has(r.from) || !parent.has(r.to)) continue
29
+ parent.set(find(r.from), find(r.to))
30
+ }
31
+ const groups = new Map<string, string[]>()
32
+ for (const e of entities) {
33
+ const root = find(e.id)
34
+ const g = groups.get(root) ?? []
35
+ g.push(e.id)
36
+ groups.set(root, g)
37
+ }
38
+ const out: Array<{ size: number; hub: string; members: string[] }> = []
39
+ for (const ids of groups.values()) {
40
+ if (ids.length < minSize) continue
41
+ const hub = ids.reduce((best, id) => ((adj.get(id)?.length ?? 0) > (adj.get(best)?.length ?? 0) ? id : best), ids[0])
42
+ out.push({ size: ids.length, hub: labelOf(hub, byId), members: ids.map((id) => labelOf(id, byId)) })
43
+ }
44
+ out.sort((a, b) => b.size - a.size)
45
+ return out
46
+ }
@@ -0,0 +1,17 @@
1
+ // Cypher export (Graphify's to_cypher) - render the already ACL-filtered subgraph
2
+ // as idempotent MERGE statements for Neo4j interop. Pure; ACL filtering happens
3
+ // upstream so this only ever sees what the user may access.
4
+
5
+ import type { Entity, Relation } from "./types"
6
+
7
+ export function toCypher(entities: Entity[], relations: Relation[], byId: Map<string, Entity>): string {
8
+ const esc = (s: string) => s.replace(/\\/g, "\\\\").replace(/'/g, "\\'")
9
+ const lines: string[] = []
10
+ for (const e of entities) lines.push(`MERGE (n:${e.type.replace(/[^A-Za-z0-9_]/g, "_") || "Entity"} {id:'${esc(e.id)}', label:'${esc(e.label)}'});`)
11
+ for (const r of relations) {
12
+ if (!byId.has(r.from) || !byId.has(r.to)) continue
13
+ const rel = (r.type || "RELATES_TO").toUpperCase().replace(/[^A-Z0-9_]/g, "_")
14
+ lines.push(`MATCH (a {id:'${esc(r.from)}'}),(b {id:'${esc(r.to)}'}) MERGE (a)-[:${rel} {weight:${r.weight}}]->(b);`)
15
+ }
16
+ return lines.join("\n")
17
+ }
@@ -0,0 +1,24 @@
1
+ // Impact / reverse-reference - given resolved entity ids, the directly connected
2
+ // entities. The "which records mention it" half lives in the provider
3
+ // (recordsMentioning); the orchestrator joins the two. Pure over the scoped adj.
4
+
5
+ import type { ImpactResult } from "../graph-query"
6
+ import { labelOf } from "./adjacency"
7
+ import type { Adj, Entity } from "./types"
8
+
9
+ /** The distinct entities the given ids connect to (first edge wins per neighbor). */
10
+ export function connectedEntities(
11
+ ids: string[],
12
+ byId: Map<string, Entity>,
13
+ adj: Map<string, Adj[]>,
14
+ ): ImpactResult["connectedEntities"] {
15
+ const connected: ImpactResult["connectedEntities"] = []
16
+ const seen = new Set<string>()
17
+ for (const id of ids)
18
+ for (const e of adj.get(id) ?? []) {
19
+ if (seen.has(e.to)) continue
20
+ seen.add(e.to)
21
+ connected.push({ label: labelOf(e.to, byId), relation: e.type })
22
+ }
23
+ return connected
24
+ }
@@ -0,0 +1,108 @@
1
+ // Knowledge-graph assembly + GraphRAG expansion over the ACL-filtered record set.
2
+ //
3
+ // SECURITY INVARIANT (provenance leak-guard): in `getKnowledgeGraph`, a relation
4
+ // surfaces ONLY if a record the user may access ASSERTED it (provenance ∩
5
+ // accessible ≠ ∅). Endpoint visibility is NOT enough - two visible entities can
6
+ // still have a relationship stated only in a record the user can't see. Fail
7
+ // closed: no provenance match ⇒ hidden. Preserve this exactly.
8
+
9
+ import type { SqlAccess } from "./sql-access"
10
+
11
+ /** Record names (within the accessible set) that mention any of the given entities. */
12
+ export function recordsMentioning(sql: SqlAccess, entityIds: string[], accessibleRecordIds: string[]): string[] {
13
+ if (entityIds.length === 0 || accessibleRecordIds.length === 0) return []
14
+ const rows = sql.rows<{ data: string }>(
15
+ `SELECT DISTINCT n.data AS data
16
+ FROM edges m JOIN nodes n ON n.id = m.src AND n.coll = 'records'
17
+ WHERE m.label = 'mentions' AND m.dst IN (${sql.ph(entityIds.length)}) AND m.src IN (${sql.ph(accessibleRecordIds.length)})`,
18
+ [...entityIds, ...accessibleRecordIds],
19
+ )
20
+ return rows.map((r) => (JSON.parse(r.data) as { recordName?: string }).recordName ?? "").filter(Boolean)
21
+ }
22
+
23
+ /** GraphRAG hop: records connected to the seeds through shared/related concepts.
24
+ * seed records → their entities → relates_to neighbors → other records that
25
+ * mention those neighbors. Constrained to `accessibleRecordIds` (ACL-safe) and
26
+ * excluding the seeds themselves. */
27
+ export function getRelatedRecordIds(
28
+ sql: SqlAccess,
29
+ seedRecordIds: string[],
30
+ accessibleRecordIds: string[],
31
+ limit = 5,
32
+ ): string[] {
33
+ if (seedRecordIds.length === 0 || accessibleRecordIds.length === 0) return []
34
+ const seedEnts = sql
35
+ .rows<{ dst: string }>(
36
+ `SELECT DISTINCT dst FROM edges WHERE label = 'mentions' AND src IN (${sql.ph(seedRecordIds.length)})`,
37
+ seedRecordIds,
38
+ )
39
+ .map((r) => r.dst)
40
+ if (seedEnts.length === 0) return []
41
+ const ep = sql.ph(seedEnts.length)
42
+ const neighbors = new Set<string>(seedEnts)
43
+ // Follow entity→entity relation edges of ANY predicate (exclude structural
44
+ // labels), LIVE edges only - superseded facts don't drive current expansion.
45
+ for (const r of sql.rows<{ e: string }>(
46
+ `SELECT dst AS e FROM edges WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL AND src IN (${ep})
47
+ UNION SELECT src AS e FROM edges WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL AND dst IN (${ep})`,
48
+ [...seedEnts, ...seedEnts],
49
+ ))
50
+ neighbors.add(r.e)
51
+
52
+ const nb = [...neighbors]
53
+ const seeds = new Set(seedRecordIds)
54
+ const acc = new Set(accessibleRecordIds)
55
+ const related = sql
56
+ .rows<{ src: string }>(
57
+ `SELECT src, COUNT(*) c FROM edges
58
+ WHERE label = 'mentions' AND dst IN (${sql.ph(nb.length)})
59
+ GROUP BY src ORDER BY c DESC`,
60
+ nb,
61
+ )
62
+ .map((r) => r.src)
63
+ .filter((id) => acc.has(id) && !seeds.has(id))
64
+ return related.slice(0, limit)
65
+ }
66
+
67
+ /** The knowledge graph the given (already ACL-filtered) records expose:
68
+ * entities those records mention + relationships among them. ACL-safe because
69
+ * the caller passes only recordIds the user may access. */
70
+ export function getKnowledgeGraph(
71
+ sql: SqlAccess,
72
+ recordIds: string[],
73
+ ): {
74
+ entities: Array<{ id: string; label: string; type: string }>
75
+ relations: Array<{ from: string; to: string; type: string; weight: number }>
76
+ } {
77
+ if (recordIds.length === 0) return { entities: [], relations: [] }
78
+ const ph = sql.ph(recordIds.length)
79
+ const ents = sql
80
+ .rows<{ id: string; data: string }>(
81
+ `SELECT DISTINCT e.id AS id, e.data AS data
82
+ FROM edges m JOIN nodes e ON e.id = m.dst AND e.coll = 'entities'
83
+ WHERE m.label = 'mentions' AND m.src IN (${ph})`,
84
+ recordIds,
85
+ )
86
+ .map((r) => {
87
+ const d = JSON.parse(r.data) as { label?: string; type?: string }
88
+ return { id: r.id, label: d.label ?? r.id, type: d.type ?? "CONCEPT" }
89
+ })
90
+ const ids = new Set(ents.map((e) => e.id))
91
+ const accessible = new Set(recordIds)
92
+ // Typed relations: entity→entity edges of any predicate (exclude structural labels).
93
+ // Only LIVE edges (expired_at IS NULL) - superseded facts stay in history but never
94
+ // surface as current. PERMISSION-FILTERED PER EDGE: an edge surfaces only if a record
95
+ // the user may access ASSERTED it (provenance ∩ accessible ≠ ∅). Endpoint visibility
96
+ // is NOT enough - two visible entities can still have a relationship stated only in a
97
+ // record the user can't see (fail-closed: no provenance match ⇒ hidden).
98
+ const relations = sql
99
+ .rows<{ src: string; dst: string; label: string; weight: number; provenance: string }>(
100
+ `SELECT DISTINCT src, dst, label, weight, provenance FROM edges
101
+ WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL
102
+ ORDER BY weight DESC`,
103
+ [],
104
+ )
105
+ .filter((r) => ids.has(r.src) && ids.has(r.dst) && (JSON.parse(r.provenance || "[]") as string[]).some((p) => accessible.has(p)))
106
+ .map((r) => ({ from: r.src, to: r.dst, type: r.label, weight: r.weight }))
107
+ return { entities: ents, relations }
108
+ }
@@ -0,0 +1,57 @@
1
+ // Personalized PageRank multi-hop walk (HippoRAG-style). Seeds activation mass on
2
+ // the query's entities and spreads it over the ACL-scoped, weighted typed graph;
3
+ // a node reachable via MANY paths scores higher than a near dead-end. Pure TS
4
+ // power-iteration (sub-ms at our scale). Returns ranked related entities (seeds
5
+ // excluded). Edge weight (frequency≈confidence) steers the flow.
6
+
7
+ import type { Adj, Entity } from "./types"
8
+
9
+ export function personalizedPageRank(
10
+ entities: Entity[],
11
+ byId: Map<string, Entity>,
12
+ adj: Map<string, Adj[]>,
13
+ seedIds: Set<string>,
14
+ opts: { alpha: number; iters: number; limit: number },
15
+ ): Array<{ label: string; score: number; type: string }> {
16
+ const { alpha, iters, limit } = opts
17
+ if (entities.length === 0) return []
18
+ if (seedIds.size === 0) return []
19
+
20
+ const ids = entities.map((e) => e.id)
21
+ const n = ids.length
22
+ const idx = new Map(ids.map((id, i) => [id, i]))
23
+ // weighted out-degree (undirected: adj holds both directions)
24
+ const deg = new Float64Array(n)
25
+ for (const id of ids) {
26
+ let d = 0
27
+ for (const e of adj.get(id) ?? []) d += e.weight
28
+ deg[idx.get(id) as number] = d || 1
29
+ }
30
+ // personalization vector: mass on the seeds
31
+ const teleport = new Float64Array(n)
32
+ for (const s of seedIds) {
33
+ const i = idx.get(s)
34
+ if (i !== undefined) teleport[i] = 1 / seedIds.size
35
+ }
36
+ let r = Float64Array.from(teleport)
37
+ for (let it = 0; it < iters; it++) {
38
+ const next = new Float64Array(n)
39
+ for (let i = 0; i < n; i++) next[i] = (1 - alpha) * teleport[i] // restart to seeds
40
+ for (const id of ids) {
41
+ const i = idx.get(id) as number
42
+ if (r[i] === 0) continue
43
+ const share = (alpha * r[i]) / deg[i]
44
+ for (const e of adj.get(id) ?? []) {
45
+ const j = idx.get(e.to)
46
+ if (j !== undefined) next[j] += share * e.weight
47
+ }
48
+ }
49
+ r = next
50
+ }
51
+ return ids
52
+ .map((id, i) => ({ id, label: byId.get(id)?.label ?? id, type: byId.get(id)?.type ?? "", score: r[i] }))
53
+ .filter((x) => !seedIds.has(x.id) && x.score > 0)
54
+ .sort((a, b) => b.score - a.score)
55
+ .slice(0, limit)
56
+ .map(({ label, score, type }) => ({ label, score, type }))
57
+ }
@@ -0,0 +1,13 @@
1
+ // Tiny SQL-access seam shared by the provider's decomposed query modules. The
2
+ // provider passes itself (its `rows` + `ph` helpers over bun:sqlite) so the
3
+ // permission-path and knowledge-graph functions stay pure of any class state.
4
+
5
+ export interface SqlAccess {
6
+ /** Run `sql` with positional params, return all rows typed as T[]. */
7
+ rows<T = any>(sql: string, params: unknown[]): T[]
8
+ /** Build a comma-joined run of `n` positional placeholders ("?,?,?"). */
9
+ ph(n: number): string
10
+ }
11
+
12
+ export const COMPLETED = "COMPLETED"
13
+ export type Pair = { rid: string; vid: string | null }
@@ -0,0 +1,73 @@
1
+ // Traversal - direct neighbors and shortest relation chains (undirected,
2
+ // hub-avoiding BFS). Pure over the scoped subgraph; results are labeled by the
3
+ // caller-provided byId map. ACL-safe by construction (the subgraph already
4
+ // excludes inaccessible entities/edges).
5
+
6
+ import type { NeighborResult, PathResult } from "../graph-query"
7
+ import { hubThreshold, labelOf } from "./adjacency"
8
+ import type { Adj, Entity } from "./types"
9
+
10
+ /** Direct neighbors of the resolved entity ids, optionally filtered by relation,
11
+ * typed-relationships-first then heaviest first. */
12
+ export function neighborsOf(
13
+ ids: string[],
14
+ byId: Map<string, Entity>,
15
+ adj: Map<string, Adj[]>,
16
+ relation?: string,
17
+ ): NeighborResult {
18
+ const seen = new Set<string>()
19
+ const neighbors: NeighborResult["neighbors"] = []
20
+ for (const id of ids) {
21
+ for (const e of adj.get(id) ?? []) {
22
+ if (relation && e.type !== relation) continue
23
+ const key = `${e.to}|${e.type}|${e.dir}`
24
+ if (seen.has(key)) continue
25
+ seen.add(key)
26
+ neighbors.push({ label: labelOf(e.to, byId), relation: e.type, direction: e.dir, weight: e.weight })
27
+ }
28
+ }
29
+ // Typed relationships first, then by weight - so real predicates lead over "relates_to".
30
+ neighbors.sort((a, b) => (a.relation === "relates_to" ? 1 : 0) - (b.relation === "relates_to" ? 1 : 0) || b.weight - a.weight)
31
+ return { entity: labelOf(ids[0], byId), neighbors }
32
+ }
33
+
34
+ /** Shortest relation chain between two id sets (undirected BFS, hub-avoiding).
35
+ * Answers "how are X and Y related?" - the single most useful graph query. */
36
+ export function shortestPath(
37
+ startIds: string[],
38
+ goalIds: Set<string>,
39
+ byId: Map<string, Entity>,
40
+ adj: Map<string, Adj[]>,
41
+ ): PathResult {
42
+ if (startIds.length === 0 || goalIds.size === 0) return { found: false, hops: 0, steps: [] }
43
+ const hub = hubThreshold(adj)
44
+ const prev = new Map<string, { from: string; type: string }>()
45
+ const queue: string[] = [...startIds]
46
+ const visited = new Set<string>(startIds)
47
+ let hitGoal: string | null = null
48
+ while (queue.length) {
49
+ const cur = queue.shift() as string
50
+ if (goalIds.has(cur)) {
51
+ hitGoal = cur
52
+ break
53
+ }
54
+ // don't EXPAND through a hub (but it can still be a goal, handled above)
55
+ if ((adj.get(cur)?.length ?? 0) > hub && !startIds.includes(cur)) continue
56
+ for (const e of adj.get(cur) ?? []) {
57
+ if (visited.has(e.to)) continue
58
+ visited.add(e.to)
59
+ prev.set(e.to, { from: cur, type: e.type })
60
+ queue.push(e.to)
61
+ }
62
+ }
63
+ if (!hitGoal) return { found: false, hops: 0, steps: [] }
64
+ // reconstruct
65
+ const chain: Array<{ from: string; relation: string; to: string }> = []
66
+ let node = hitGoal
67
+ while (prev.has(node)) {
68
+ const p = prev.get(node) as { from: string; type: string }
69
+ chain.unshift({ from: labelOf(p.from, byId), relation: p.type, to: labelOf(node, byId) })
70
+ node = p.from
71
+ }
72
+ return { found: true, hops: chain.length, steps: chain }
73
+ }
@@ -0,0 +1,35 @@
1
+ // Shared in-memory graph shapes used across the decomposed graph-query modules.
2
+ // These mirror the structures GraphQueryService builds from the provider's
3
+ // ACL-scoped {entities, relations}; kept here so the pure-function modules can
4
+ // be typed without depending on the orchestrating class.
5
+
6
+ export interface Entity {
7
+ id: string
8
+ label: string
9
+ type: string
10
+ }
11
+
12
+ export interface Relation {
13
+ from: string
14
+ to: string
15
+ type: string
16
+ weight: number
17
+ }
18
+
19
+ export interface Adj {
20
+ to: string
21
+ type: string
22
+ weight: number
23
+ dir: "out" | "in"
24
+ }
25
+
26
+ /** The ACL-scoped subgraph: entities + LIVE relations the user may see, plus the
27
+ * derived id→entity map, adjacency, and the accessible record id set. Every
28
+ * traversal works ONLY over this, so no query crosses a permission boundary. */
29
+ export interface ScopedGraph {
30
+ entities: Entity[]
31
+ relations: Relation[]
32
+ byId: Map<string, Entity>
33
+ adj: Map<string, Adj[]>
34
+ recordIds: string[]
35
+ }