@100xprompt/chitta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +203 -0
- package/assets/rules/claude-md.md +9 -0
- package/assets/skill/SKILL.md +47 -0
- package/package.json +48 -0
- package/src/README.md +124 -0
- package/src/arango-client.ts +67 -0
- package/src/arango-graph-provider.ts +364 -0
- package/src/bin.ts +27 -0
- package/src/config-env.ts +53 -0
- package/src/embedded/authorizer.ts +89 -0
- package/src/embedded/cli.ts +86 -0
- package/src/embedded/code-extractor.ts +9 -0
- package/src/embedded/demo.ts +36 -0
- package/src/embedded/extract.ts +12 -0
- package/src/embedded/extractors/code.ts +308 -0
- package/src/embedded/extractors/deterministic.ts +63 -0
- package/src/embedded/extractors/llm.ts +151 -0
- package/src/embedded/extractors/text-hygiene.ts +54 -0
- package/src/embedded/extractors/types.ts +34 -0
- package/src/embedded/graph/acl-paths.ts +96 -0
- package/src/embedded/graph/adjacency.ts +61 -0
- package/src/embedded/graph/centrality.ts +23 -0
- package/src/embedded/graph/communities.ts +46 -0
- package/src/embedded/graph/cypher.ts +17 -0
- package/src/embedded/graph/impact.ts +24 -0
- package/src/embedded/graph/knowledge-graph.ts +108 -0
- package/src/embedded/graph/pagerank.ts +57 -0
- package/src/embedded/graph/sql-access.ts +13 -0
- package/src/embedded/graph/traversal.ts +73 -0
- package/src/embedded/graph/types.ts +35 -0
- package/src/embedded/graph-query.ts +126 -0
- package/src/embedded/index.ts +171 -0
- package/src/embedded/ingest.ts +262 -0
- package/src/embedded/kgqa/answer-paths.ts +197 -0
- package/src/embedded/kgqa/entity-link.ts +13 -0
- package/src/embedded/kgqa/intent.ts +14 -0
- package/src/embedded/kgqa/predicates.ts +9 -0
- package/src/embedded/kgqa/preference.ts +20 -0
- package/src/embedded/kgqa/select.ts +99 -0
- package/src/embedded/kgqa/text.ts +16 -0
- package/src/embedded/kgqa/types.ts +6 -0
- package/src/embedded/kgqa-service.ts +122 -0
- package/src/embedded/llm-extractor.ts +10 -0
- package/src/embedded/local-embeddings.ts +36 -0
- package/src/embedded/personal.ts +100 -0
- package/src/embedded/reranker.ts +62 -0
- package/src/embedded/retrieval/decay-stage.ts +59 -0
- package/src/embedded/retrieval/diversity.ts +37 -0
- package/src/embedded/retrieval/fuse.ts +52 -0
- package/src/embedded/retrieval/graph-stage.ts +45 -0
- package/src/embedded/retrieval/hybrid-retriever.ts +80 -0
- package/src/embedded/retrieval/keyword-stage.ts +27 -0
- package/src/embedded/retrieval/passage.ts +44 -0
- package/src/embedded/retrieval/rerank-stage.ts +31 -0
- package/src/embedded/retrieval/trace.ts +31 -0
- package/src/embedded/retrieval/vector-stage.ts +15 -0
- package/src/embedded/sqlite-graph-provider.ts +119 -0
- package/src/embedded/sqlite-store.ts +95 -0
- package/src/embedded/sqlite-vec-service.ts +122 -0
- package/src/embedded/store/chunks.ts +61 -0
- package/src/embedded/store/fts.ts +50 -0
- package/src/embedded/store/nodes-edges.ts +112 -0
- package/src/embedded/store/salience.ts +37 -0
- package/src/embedded/store/schema.ts +109 -0
- package/src/embedded/transformers-embeddings.ts +100 -0
- package/src/embeddings.ts +51 -0
- package/src/eval/goldset.ts +46 -0
- package/src/eval/harness.ts +65 -0
- package/src/eval/metrics.ts +38 -0
- package/src/http/server.ts +93 -0
- package/src/index.ts +44 -0
- package/src/install/index.ts +139 -0
- package/src/install/platforms.ts +126 -0
- package/src/install/skill.ts +46 -0
- package/src/install/writers.ts +82 -0
- package/src/mcp/backend.ts +129 -0
- package/src/mcp/server.ts +83 -0
- package/src/mcp/tools/context-about.ts +69 -0
- package/src/mcp/tools/context-graph.ts +23 -0
- package/src/mcp/tools/context-ingest.ts +88 -0
- package/src/mcp/tools/context-rebuild.ts +22 -0
- package/src/mcp/tools/context-relate.ts +88 -0
- package/src/mcp/tools/get-context.ts +52 -0
- package/src/mcp/tools/index.ts +40 -0
- package/src/mcp/tools/types.ts +33 -0
- package/src/permission.ts +72 -0
- package/src/provider.ts +65 -0
- package/src/qdrant-vector.ts +76 -0
- package/src/retrieval.ts +218 -0
- package/src/service.ts +40 -0
- package/src/types.ts +91 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
// ACL permission paths - the Arango-AQL traversal ported to SQL over the generic
|
|
2
|
+
// node/edge tables. These eight-path-collapsed-to-five helpers are the access
|
|
3
|
+
// moat; the provider unions their results and dedupes first-writer-wins.
|
|
4
|
+
//
|
|
5
|
+
// • principals = the user + every group/role/org/team they belong to or are
|
|
6
|
+
// permissioned to (one hop).
|
|
7
|
+
// • directRecords = records permissioned to any principal.
|
|
8
|
+
// • recordGroups = record-groups permissioned to any principal, then both:
|
|
9
|
+
// - inheritedRecords (recursive descent over inheritPermissions), and
|
|
10
|
+
// - kbRecords (records that belong to those groups, origin=UPLOAD).
|
|
11
|
+
// • anyoneRecords = org-wide shared records.
|
|
12
|
+
// Same invariant as the Arango port; only the query language differs.
|
|
13
|
+
|
|
14
|
+
import type { UserDoc } from "../../types"
|
|
15
|
+
import { COMPLETED, type Pair, type SqlAccess } from "./sql-access"
|
|
16
|
+
|
|
17
|
+
export function userRow(sql: SqlAccess, userId: string): (UserDoc & { id: string }) | null {
|
|
18
|
+
const r = sql.rows<{ id: string; data: string }>(
|
|
19
|
+
"SELECT id, data FROM nodes WHERE coll = 'users' AND json_extract(data, '$.userId') = ? LIMIT 1",
|
|
20
|
+
[userId],
|
|
21
|
+
)[0]
|
|
22
|
+
if (!r) return null
|
|
23
|
+
return { ...(JSON.parse(r.data) as UserDoc), id: r.id, _key: r.id }
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function principalIds(sql: SqlAccess, userId: string): string[] {
|
|
27
|
+
const belongs = sql.rows<{ dst: string }>("SELECT dst FROM edges WHERE src = ? AND label = 'belongsTo'", [userId])
|
|
28
|
+
const permPrincipals = sql.rows<{ dst: string }>(
|
|
29
|
+
`SELECT e.dst AS dst FROM edges e JOIN nodes n ON n.id = e.dst
|
|
30
|
+
WHERE e.src = ? AND e.label = 'permissions'
|
|
31
|
+
AND n.coll IN ('groups','roles','organizations','teams')`,
|
|
32
|
+
[userId],
|
|
33
|
+
)
|
|
34
|
+
return [...new Set([userId, ...belongs.map((r) => r.dst), ...permPrincipals.map((r) => r.dst)])]
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function recordsPermissionedTo(sql: SqlAccess, principals: string[], apps?: string[]): Pair[] {
|
|
38
|
+
if (principals.length === 0) return []
|
|
39
|
+
const appClause = apps?.length ? ` AND json_extract(r.data,'$.connectorId') IN (${sql.ph(apps.length)})` : ""
|
|
40
|
+
return sql.rows<Pair>(
|
|
41
|
+
`SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
|
|
42
|
+
FROM edges e JOIN nodes r ON r.id = e.dst AND r.coll = 'records'
|
|
43
|
+
WHERE e.label = 'permissions' AND e.src IN (${sql.ph(principals.length)})
|
|
44
|
+
AND json_extract(r.data,'$.indexingStatus') = ?${appClause}`,
|
|
45
|
+
[...principals, COMPLETED, ...(apps ?? [])],
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function recordGroupsPermissionedTo(sql: SqlAccess, principals: string[], kb?: string[]): string[] {
|
|
50
|
+
if (principals.length === 0) return []
|
|
51
|
+
const kbClause = kb?.length ? ` AND n.id IN (${sql.ph(kb.length)})` : ""
|
|
52
|
+
return sql
|
|
53
|
+
.rows<{ id: string }>(
|
|
54
|
+
`SELECT DISTINCT n.id AS id FROM edges e JOIN nodes n ON n.id = e.dst AND n.coll = 'recordGroups'
|
|
55
|
+
WHERE e.label = 'permissions' AND e.src IN (${sql.ph(principals.length)})${kbClause}`,
|
|
56
|
+
[...principals, ...(kb ?? [])],
|
|
57
|
+
)
|
|
58
|
+
.map((r) => r.id)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function recordsInheritingFrom(sql: SqlAccess, recordGroups: string[]): Pair[] {
|
|
62
|
+
if (recordGroups.length === 0) return []
|
|
63
|
+
return sql.rows<Pair>(
|
|
64
|
+
`WITH RECURSIVE descend(id) AS (
|
|
65
|
+
SELECT src FROM edges WHERE label = 'inheritPermissions' AND dst IN (${sql.ph(recordGroups.length)})
|
|
66
|
+
UNION
|
|
67
|
+
SELECT e.src FROM edges e JOIN descend d ON e.dst = d.id WHERE e.label = 'inheritPermissions'
|
|
68
|
+
)
|
|
69
|
+
SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
|
|
70
|
+
FROM nodes r JOIN descend ON r.id = descend.id
|
|
71
|
+
WHERE r.coll = 'records' AND json_extract(r.data,'$.indexingStatus') = ?`,
|
|
72
|
+
[...recordGroups, COMPLETED],
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function kbRecords(sql: SqlAccess, recordGroups: string[]): Pair[] {
|
|
77
|
+
if (recordGroups.length === 0) return []
|
|
78
|
+
return sql.rows<Pair>(
|
|
79
|
+
`SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
|
|
80
|
+
FROM edges e JOIN nodes r ON r.id = e.src AND r.coll = 'records'
|
|
81
|
+
WHERE e.label = 'belongsTo' AND e.dst IN (${sql.ph(recordGroups.length)})
|
|
82
|
+
AND json_extract(r.data,'$.origin') = 'UPLOAD'
|
|
83
|
+
AND json_extract(r.data,'$.indexingStatus') = ?`,
|
|
84
|
+
[...recordGroups, COMPLETED],
|
|
85
|
+
)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function anyoneRecords(sql: SqlAccess, orgId: string): Pair[] {
|
|
89
|
+
return sql.rows<Pair>(
|
|
90
|
+
`SELECT r.id AS rid, json_extract(r.data,'$.virtualRecordId') AS vid
|
|
91
|
+
FROM nodes a JOIN nodes r ON r.id = json_extract(a.data,'$.file_key') AND r.coll = 'records'
|
|
92
|
+
WHERE a.coll = 'anyone' AND json_extract(a.data,'$.organization') = ?
|
|
93
|
+
AND json_extract(r.data,'$.indexingStatus') = ?`,
|
|
94
|
+
[orgId, COMPLETED],
|
|
95
|
+
)
|
|
96
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Adjacency / scope helpers - build the ACL-scoped subgraph and the small
|
|
2
|
+
// resolution utilities (free-text → entity ids, label lookup, hub threshold)
|
|
3
|
+
// that every traversal shares. Pure over the provider-returned {entities,
|
|
4
|
+
// relations}; the ACL filtering itself happens upstream in the provider.
|
|
5
|
+
|
|
6
|
+
import { slugify, entityId } from "../extract"
|
|
7
|
+
import type { Adj, Entity, Relation } from "./types"
|
|
8
|
+
|
|
9
|
+
/** Build id→entity map and the typed-first adjacency list from live relations.
|
|
10
|
+
* Edges whose endpoints aren't both present are dropped. Each adjacency list is
|
|
11
|
+
* ordered TYPED-first (generic "relates_to" last), then by descending weight, so
|
|
12
|
+
* neighbors lead with real relationships and BFS reconstructs precise predicates. */
|
|
13
|
+
export function buildAdjacency(
|
|
14
|
+
entities: Entity[],
|
|
15
|
+
relations: Relation[],
|
|
16
|
+
): { byId: Map<string, Entity>; adj: Map<string, Adj[]> } {
|
|
17
|
+
const byId = new Map(entities.map((e) => [e.id, e]))
|
|
18
|
+
const adj = new Map<string, Adj[]>()
|
|
19
|
+
const push = (a: string, edge: Adj) => {
|
|
20
|
+
const list = adj.get(a) ?? []
|
|
21
|
+
list.push(edge)
|
|
22
|
+
adj.set(a, list)
|
|
23
|
+
}
|
|
24
|
+
for (const r of relations) {
|
|
25
|
+
if (!byId.has(r.from) || !byId.has(r.to)) continue
|
|
26
|
+
push(r.from, { to: r.to, type: r.type, weight: r.weight, dir: "out" })
|
|
27
|
+
push(r.to, { to: r.from, type: r.type, weight: r.weight, dir: "in" })
|
|
28
|
+
}
|
|
29
|
+
// Prefer TYPED edges over generic co-occurrence ("relates_to"): order each
|
|
30
|
+
// adjacency list typed-first so neighbors lead with real relationships and BFS
|
|
31
|
+
// paths are reconstructed through the precise predicate, not "relates_to".
|
|
32
|
+
const generic = (t: string) => (t === "relates_to" ? 1 : 0)
|
|
33
|
+
for (const list of adj.values()) list.sort((a, b) => generic(a.type) - generic(b.type) || b.weight - a.weight)
|
|
34
|
+
return { byId, adj }
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** Resolve a free-text name to entity id(s) within the accessible set: exact id /
|
|
38
|
+
* exact label first, then substring/slug containment. Returns [] if unknown. */
|
|
39
|
+
export function resolveIds(name: string, entities: Entity[]): string[] {
|
|
40
|
+
const q = name.trim().toLowerCase()
|
|
41
|
+
if (!q) return []
|
|
42
|
+
const slug = slugify(name)
|
|
43
|
+
const id = entityId(slug)
|
|
44
|
+
const exact = entities.filter((e) => e.id === id || e.label.toLowerCase() === q)
|
|
45
|
+
if (exact.length) return [...new Set(exact.map((e) => e.id))]
|
|
46
|
+
const partial = entities.filter((e) => e.label.toLowerCase().includes(q) || (slug.length >= 3 && e.id.includes(slug)))
|
|
47
|
+
return [...new Set(partial.map((e) => e.id))]
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function labelOf(id: string, byId: Map<string, Entity>): string {
|
|
51
|
+
return byId.get(id)?.label ?? id
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** hub threshold (Graphify _bfs): refuse to EXPAND through a super-connected node so
|
|
55
|
+
* one mega-entity can't blow up traversal / context. max(50, p99 degree). */
|
|
56
|
+
export function hubThreshold(adj: Map<string, Adj[]>): number {
|
|
57
|
+
const degrees = [...adj.values()].map((l) => l.length).sort((a, b) => a - b)
|
|
58
|
+
if (degrees.length === 0) return 50
|
|
59
|
+
const p99 = degrees[Math.min(degrees.length - 1, Math.floor(degrees.length * 0.99))]
|
|
60
|
+
return Math.max(50, p99)
|
|
61
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
// Centrality - hub entities ranked by total edge weight (then degree). Pure over
|
|
2
|
+
// the scoped adjacency. "What are the central things I know about."
|
|
3
|
+
|
|
4
|
+
import { labelOf } from "./adjacency"
|
|
5
|
+
import type { Adj, Entity } from "./types"
|
|
6
|
+
|
|
7
|
+
/** Most-connected concepts in the accessible graph, heaviest total weight first. */
|
|
8
|
+
export function centralEntities(
|
|
9
|
+
byId: Map<string, Entity>,
|
|
10
|
+
adj: Map<string, Adj[]>,
|
|
11
|
+
limit = 10,
|
|
12
|
+
): Array<{ label: string; degree: number; strength: number }> {
|
|
13
|
+
const out: Array<{ label: string; degree: number; strength: number }> = []
|
|
14
|
+
for (const [id, edges] of adj) {
|
|
15
|
+
out.push({
|
|
16
|
+
label: labelOf(id, byId),
|
|
17
|
+
degree: edges.length,
|
|
18
|
+
strength: edges.reduce((s, e) => s + e.weight, 0),
|
|
19
|
+
})
|
|
20
|
+
}
|
|
21
|
+
out.sort((a, b) => b.strength - a.strength || b.degree - a.degree)
|
|
22
|
+
return out.slice(0, limit)
|
|
23
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Communities - connected clusters of related entities (Graphify's god-node /
|
|
2
|
+
// community view), via union-find over live edges. Each cluster's `hub` is its
|
|
3
|
+
// most-connected member. Pure over the scoped subgraph (ACL-scoped upstream).
|
|
4
|
+
|
|
5
|
+
import { labelOf } from "./adjacency"
|
|
6
|
+
import type { Adj, Entity, Relation } from "./types"
|
|
7
|
+
|
|
8
|
+
export function detectCommunities(
|
|
9
|
+
entities: Entity[],
|
|
10
|
+
relations: Relation[],
|
|
11
|
+
byId: Map<string, Entity>,
|
|
12
|
+
adj: Map<string, Adj[]>,
|
|
13
|
+
minSize = 2,
|
|
14
|
+
): Array<{ size: number; hub: string; members: string[] }> {
|
|
15
|
+
const parent = new Map<string, string>()
|
|
16
|
+
const find = (x: string): string => {
|
|
17
|
+
let r = x
|
|
18
|
+
while (parent.get(r) !== r) r = parent.get(r) as string
|
|
19
|
+
while (parent.get(x) !== r) {
|
|
20
|
+
const n = parent.get(x) as string
|
|
21
|
+
parent.set(x, r)
|
|
22
|
+
x = n
|
|
23
|
+
}
|
|
24
|
+
return r
|
|
25
|
+
}
|
|
26
|
+
for (const e of entities) parent.set(e.id, e.id)
|
|
27
|
+
for (const r of relations) {
|
|
28
|
+
if (!parent.has(r.from) || !parent.has(r.to)) continue
|
|
29
|
+
parent.set(find(r.from), find(r.to))
|
|
30
|
+
}
|
|
31
|
+
const groups = new Map<string, string[]>()
|
|
32
|
+
for (const e of entities) {
|
|
33
|
+
const root = find(e.id)
|
|
34
|
+
const g = groups.get(root) ?? []
|
|
35
|
+
g.push(e.id)
|
|
36
|
+
groups.set(root, g)
|
|
37
|
+
}
|
|
38
|
+
const out: Array<{ size: number; hub: string; members: string[] }> = []
|
|
39
|
+
for (const ids of groups.values()) {
|
|
40
|
+
if (ids.length < minSize) continue
|
|
41
|
+
const hub = ids.reduce((best, id) => ((adj.get(id)?.length ?? 0) > (adj.get(best)?.length ?? 0) ? id : best), ids[0])
|
|
42
|
+
out.push({ size: ids.length, hub: labelOf(hub, byId), members: ids.map((id) => labelOf(id, byId)) })
|
|
43
|
+
}
|
|
44
|
+
out.sort((a, b) => b.size - a.size)
|
|
45
|
+
return out
|
|
46
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// Cypher export (Graphify's to_cypher) - render the already ACL-filtered subgraph
|
|
2
|
+
// as idempotent MERGE statements for Neo4j interop. Pure; ACL filtering happens
|
|
3
|
+
// upstream so this only ever sees what the user may access.
|
|
4
|
+
|
|
5
|
+
import type { Entity, Relation } from "./types"
|
|
6
|
+
|
|
7
|
+
export function toCypher(entities: Entity[], relations: Relation[], byId: Map<string, Entity>): string {
|
|
8
|
+
const esc = (s: string) => s.replace(/\\/g, "\\\\").replace(/'/g, "\\'")
|
|
9
|
+
const lines: string[] = []
|
|
10
|
+
for (const e of entities) lines.push(`MERGE (n:${e.type.replace(/[^A-Za-z0-9_]/g, "_") || "Entity"} {id:'${esc(e.id)}', label:'${esc(e.label)}'});`)
|
|
11
|
+
for (const r of relations) {
|
|
12
|
+
if (!byId.has(r.from) || !byId.has(r.to)) continue
|
|
13
|
+
const rel = (r.type || "RELATES_TO").toUpperCase().replace(/[^A-Z0-9_]/g, "_")
|
|
14
|
+
lines.push(`MATCH (a {id:'${esc(r.from)}'}),(b {id:'${esc(r.to)}'}) MERGE (a)-[:${rel} {weight:${r.weight}}]->(b);`)
|
|
15
|
+
}
|
|
16
|
+
return lines.join("\n")
|
|
17
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// Impact / reverse-reference - given resolved entity ids, the directly connected
|
|
2
|
+
// entities. The "which records mention it" half lives in the provider
|
|
3
|
+
// (recordsMentioning); the orchestrator joins the two. Pure over the scoped adj.
|
|
4
|
+
|
|
5
|
+
import type { ImpactResult } from "../graph-query"
|
|
6
|
+
import { labelOf } from "./adjacency"
|
|
7
|
+
import type { Adj, Entity } from "./types"
|
|
8
|
+
|
|
9
|
+
/** The distinct entities the given ids connect to (first edge wins per neighbor). */
|
|
10
|
+
export function connectedEntities(
|
|
11
|
+
ids: string[],
|
|
12
|
+
byId: Map<string, Entity>,
|
|
13
|
+
adj: Map<string, Adj[]>,
|
|
14
|
+
): ImpactResult["connectedEntities"] {
|
|
15
|
+
const connected: ImpactResult["connectedEntities"] = []
|
|
16
|
+
const seen = new Set<string>()
|
|
17
|
+
for (const id of ids)
|
|
18
|
+
for (const e of adj.get(id) ?? []) {
|
|
19
|
+
if (seen.has(e.to)) continue
|
|
20
|
+
seen.add(e.to)
|
|
21
|
+
connected.push({ label: labelOf(e.to, byId), relation: e.type })
|
|
22
|
+
}
|
|
23
|
+
return connected
|
|
24
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// Knowledge-graph assembly + GraphRAG expansion over the ACL-filtered record set.
|
|
2
|
+
//
|
|
3
|
+
// SECURITY INVARIANT (provenance leak-guard): in `getKnowledgeGraph`, a relation
|
|
4
|
+
// surfaces ONLY if a record the user may access ASSERTED it (provenance ∩
|
|
5
|
+
// accessible ≠ ∅). Endpoint visibility is NOT enough - two visible entities can
|
|
6
|
+
// still have a relationship stated only in a record the user can't see. Fail
|
|
7
|
+
// closed: no provenance match ⇒ hidden. Preserve this exactly.
|
|
8
|
+
|
|
9
|
+
import type { SqlAccess } from "./sql-access"
|
|
10
|
+
|
|
11
|
+
/** Record names (within the accessible set) that mention any of the given entities. */
|
|
12
|
+
export function recordsMentioning(sql: SqlAccess, entityIds: string[], accessibleRecordIds: string[]): string[] {
|
|
13
|
+
if (entityIds.length === 0 || accessibleRecordIds.length === 0) return []
|
|
14
|
+
const rows = sql.rows<{ data: string }>(
|
|
15
|
+
`SELECT DISTINCT n.data AS data
|
|
16
|
+
FROM edges m JOIN nodes n ON n.id = m.src AND n.coll = 'records'
|
|
17
|
+
WHERE m.label = 'mentions' AND m.dst IN (${sql.ph(entityIds.length)}) AND m.src IN (${sql.ph(accessibleRecordIds.length)})`,
|
|
18
|
+
[...entityIds, ...accessibleRecordIds],
|
|
19
|
+
)
|
|
20
|
+
return rows.map((r) => (JSON.parse(r.data) as { recordName?: string }).recordName ?? "").filter(Boolean)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** GraphRAG hop: records connected to the seeds through shared/related concepts.
|
|
24
|
+
* seed records → their entities → relates_to neighbors → other records that
|
|
25
|
+
* mention those neighbors. Constrained to `accessibleRecordIds` (ACL-safe) and
|
|
26
|
+
* excluding the seeds themselves. */
|
|
27
|
+
export function getRelatedRecordIds(
|
|
28
|
+
sql: SqlAccess,
|
|
29
|
+
seedRecordIds: string[],
|
|
30
|
+
accessibleRecordIds: string[],
|
|
31
|
+
limit = 5,
|
|
32
|
+
): string[] {
|
|
33
|
+
if (seedRecordIds.length === 0 || accessibleRecordIds.length === 0) return []
|
|
34
|
+
const seedEnts = sql
|
|
35
|
+
.rows<{ dst: string }>(
|
|
36
|
+
`SELECT DISTINCT dst FROM edges WHERE label = 'mentions' AND src IN (${sql.ph(seedRecordIds.length)})`,
|
|
37
|
+
seedRecordIds,
|
|
38
|
+
)
|
|
39
|
+
.map((r) => r.dst)
|
|
40
|
+
if (seedEnts.length === 0) return []
|
|
41
|
+
const ep = sql.ph(seedEnts.length)
|
|
42
|
+
const neighbors = new Set<string>(seedEnts)
|
|
43
|
+
// Follow entity→entity relation edges of ANY predicate (exclude structural
|
|
44
|
+
// labels), LIVE edges only - superseded facts don't drive current expansion.
|
|
45
|
+
for (const r of sql.rows<{ e: string }>(
|
|
46
|
+
`SELECT dst AS e FROM edges WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL AND src IN (${ep})
|
|
47
|
+
UNION SELECT src AS e FROM edges WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL AND dst IN (${ep})`,
|
|
48
|
+
[...seedEnts, ...seedEnts],
|
|
49
|
+
))
|
|
50
|
+
neighbors.add(r.e)
|
|
51
|
+
|
|
52
|
+
const nb = [...neighbors]
|
|
53
|
+
const seeds = new Set(seedRecordIds)
|
|
54
|
+
const acc = new Set(accessibleRecordIds)
|
|
55
|
+
const related = sql
|
|
56
|
+
.rows<{ src: string }>(
|
|
57
|
+
`SELECT src, COUNT(*) c FROM edges
|
|
58
|
+
WHERE label = 'mentions' AND dst IN (${sql.ph(nb.length)})
|
|
59
|
+
GROUP BY src ORDER BY c DESC`,
|
|
60
|
+
nb,
|
|
61
|
+
)
|
|
62
|
+
.map((r) => r.src)
|
|
63
|
+
.filter((id) => acc.has(id) && !seeds.has(id))
|
|
64
|
+
return related.slice(0, limit)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** The knowledge graph the given (already ACL-filtered) records expose:
|
|
68
|
+
* entities those records mention + relationships among them. ACL-safe because
|
|
69
|
+
* the caller passes only recordIds the user may access. */
|
|
70
|
+
export function getKnowledgeGraph(
|
|
71
|
+
sql: SqlAccess,
|
|
72
|
+
recordIds: string[],
|
|
73
|
+
): {
|
|
74
|
+
entities: Array<{ id: string; label: string; type: string }>
|
|
75
|
+
relations: Array<{ from: string; to: string; type: string; weight: number }>
|
|
76
|
+
} {
|
|
77
|
+
if (recordIds.length === 0) return { entities: [], relations: [] }
|
|
78
|
+
const ph = sql.ph(recordIds.length)
|
|
79
|
+
const ents = sql
|
|
80
|
+
.rows<{ id: string; data: string }>(
|
|
81
|
+
`SELECT DISTINCT e.id AS id, e.data AS data
|
|
82
|
+
FROM edges m JOIN nodes e ON e.id = m.dst AND e.coll = 'entities'
|
|
83
|
+
WHERE m.label = 'mentions' AND m.src IN (${ph})`,
|
|
84
|
+
recordIds,
|
|
85
|
+
)
|
|
86
|
+
.map((r) => {
|
|
87
|
+
const d = JSON.parse(r.data) as { label?: string; type?: string }
|
|
88
|
+
return { id: r.id, label: d.label ?? r.id, type: d.type ?? "CONCEPT" }
|
|
89
|
+
})
|
|
90
|
+
const ids = new Set(ents.map((e) => e.id))
|
|
91
|
+
const accessible = new Set(recordIds)
|
|
92
|
+
// Typed relations: entity→entity edges of any predicate (exclude structural labels).
|
|
93
|
+
// Only LIVE edges (expired_at IS NULL) - superseded facts stay in history but never
|
|
94
|
+
// surface as current. PERMISSION-FILTERED PER EDGE: an edge surfaces only if a record
|
|
95
|
+
// the user may access ASSERTED it (provenance ∩ accessible ≠ ∅). Endpoint visibility
|
|
96
|
+
// is NOT enough - two visible entities can still have a relationship stated only in a
|
|
97
|
+
// record the user can't see (fail-closed: no provenance match ⇒ hidden).
|
|
98
|
+
const relations = sql
|
|
99
|
+
.rows<{ src: string; dst: string; label: string; weight: number; provenance: string }>(
|
|
100
|
+
`SELECT DISTINCT src, dst, label, weight, provenance FROM edges
|
|
101
|
+
WHERE label NOT IN ('mentions','permissions','belongsTo','inheritPermissions') AND expired_at IS NULL
|
|
102
|
+
ORDER BY weight DESC`,
|
|
103
|
+
[],
|
|
104
|
+
)
|
|
105
|
+
.filter((r) => ids.has(r.src) && ids.has(r.dst) && (JSON.parse(r.provenance || "[]") as string[]).some((p) => accessible.has(p)))
|
|
106
|
+
.map((r) => ({ from: r.src, to: r.dst, type: r.label, weight: r.weight }))
|
|
107
|
+
return { entities: ents, relations }
|
|
108
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// Personalized PageRank multi-hop walk (HippoRAG-style). Seeds activation mass on
|
|
2
|
+
// the query's entities and spreads it over the ACL-scoped, weighted typed graph;
|
|
3
|
+
// a node reachable via MANY paths scores higher than a near dead-end. Pure TS
|
|
4
|
+
// power-iteration (sub-ms at our scale). Returns ranked related entities (seeds
|
|
5
|
+
// excluded). Edge weight (frequency≈confidence) steers the flow.
|
|
6
|
+
|
|
7
|
+
import type { Adj, Entity } from "./types"
|
|
8
|
+
|
|
9
|
+
export function personalizedPageRank(
|
|
10
|
+
entities: Entity[],
|
|
11
|
+
byId: Map<string, Entity>,
|
|
12
|
+
adj: Map<string, Adj[]>,
|
|
13
|
+
seedIds: Set<string>,
|
|
14
|
+
opts: { alpha: number; iters: number; limit: number },
|
|
15
|
+
): Array<{ label: string; score: number; type: string }> {
|
|
16
|
+
const { alpha, iters, limit } = opts
|
|
17
|
+
if (entities.length === 0) return []
|
|
18
|
+
if (seedIds.size === 0) return []
|
|
19
|
+
|
|
20
|
+
const ids = entities.map((e) => e.id)
|
|
21
|
+
const n = ids.length
|
|
22
|
+
const idx = new Map(ids.map((id, i) => [id, i]))
|
|
23
|
+
// weighted out-degree (undirected: adj holds both directions)
|
|
24
|
+
const deg = new Float64Array(n)
|
|
25
|
+
for (const id of ids) {
|
|
26
|
+
let d = 0
|
|
27
|
+
for (const e of adj.get(id) ?? []) d += e.weight
|
|
28
|
+
deg[idx.get(id) as number] = d || 1
|
|
29
|
+
}
|
|
30
|
+
// personalization vector: mass on the seeds
|
|
31
|
+
const teleport = new Float64Array(n)
|
|
32
|
+
for (const s of seedIds) {
|
|
33
|
+
const i = idx.get(s)
|
|
34
|
+
if (i !== undefined) teleport[i] = 1 / seedIds.size
|
|
35
|
+
}
|
|
36
|
+
let r = Float64Array.from(teleport)
|
|
37
|
+
for (let it = 0; it < iters; it++) {
|
|
38
|
+
const next = new Float64Array(n)
|
|
39
|
+
for (let i = 0; i < n; i++) next[i] = (1 - alpha) * teleport[i] // restart to seeds
|
|
40
|
+
for (const id of ids) {
|
|
41
|
+
const i = idx.get(id) as number
|
|
42
|
+
if (r[i] === 0) continue
|
|
43
|
+
const share = (alpha * r[i]) / deg[i]
|
|
44
|
+
for (const e of adj.get(id) ?? []) {
|
|
45
|
+
const j = idx.get(e.to)
|
|
46
|
+
if (j !== undefined) next[j] += share * e.weight
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
r = next
|
|
50
|
+
}
|
|
51
|
+
return ids
|
|
52
|
+
.map((id, i) => ({ id, label: byId.get(id)?.label ?? id, type: byId.get(id)?.type ?? "", score: r[i] }))
|
|
53
|
+
.filter((x) => !seedIds.has(x.id) && x.score > 0)
|
|
54
|
+
.sort((a, b) => b.score - a.score)
|
|
55
|
+
.slice(0, limit)
|
|
56
|
+
.map(({ label, score, type }) => ({ label, score, type }))
|
|
57
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// Tiny SQL-access seam shared by the provider's decomposed query modules. The
|
|
2
|
+
// provider passes itself (its `rows` + `ph` helpers over bun:sqlite) so the
|
|
3
|
+
// permission-path and knowledge-graph functions stay pure of any class state.
|
|
4
|
+
|
|
5
|
+
export interface SqlAccess {
|
|
6
|
+
/** Run `sql` with positional params, return all rows typed as T[]. */
|
|
7
|
+
rows<T = any>(sql: string, params: unknown[]): T[]
|
|
8
|
+
/** Build a comma-joined run of `n` positional placeholders ("?,?,?"). */
|
|
9
|
+
ph(n: number): string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const COMPLETED = "COMPLETED"
|
|
13
|
+
export type Pair = { rid: string; vid: string | null }
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// Traversal - direct neighbors and shortest relation chains (undirected,
|
|
2
|
+
// hub-avoiding BFS). Pure over the scoped subgraph; results are labeled by the
|
|
3
|
+
// caller-provided byId map. ACL-safe by construction (the subgraph already
|
|
4
|
+
// excludes inaccessible entities/edges).
|
|
5
|
+
|
|
6
|
+
import type { NeighborResult, PathResult } from "../graph-query"
|
|
7
|
+
import { hubThreshold, labelOf } from "./adjacency"
|
|
8
|
+
import type { Adj, Entity } from "./types"
|
|
9
|
+
|
|
10
|
+
/** Direct neighbors of the resolved entity ids, optionally filtered by relation,
|
|
11
|
+
* typed-relationships-first then heaviest first. */
|
|
12
|
+
export function neighborsOf(
|
|
13
|
+
ids: string[],
|
|
14
|
+
byId: Map<string, Entity>,
|
|
15
|
+
adj: Map<string, Adj[]>,
|
|
16
|
+
relation?: string,
|
|
17
|
+
): NeighborResult {
|
|
18
|
+
const seen = new Set<string>()
|
|
19
|
+
const neighbors: NeighborResult["neighbors"] = []
|
|
20
|
+
for (const id of ids) {
|
|
21
|
+
for (const e of adj.get(id) ?? []) {
|
|
22
|
+
if (relation && e.type !== relation) continue
|
|
23
|
+
const key = `${e.to}|${e.type}|${e.dir}`
|
|
24
|
+
if (seen.has(key)) continue
|
|
25
|
+
seen.add(key)
|
|
26
|
+
neighbors.push({ label: labelOf(e.to, byId), relation: e.type, direction: e.dir, weight: e.weight })
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
// Typed relationships first, then by weight - so real predicates lead over "relates_to".
|
|
30
|
+
neighbors.sort((a, b) => (a.relation === "relates_to" ? 1 : 0) - (b.relation === "relates_to" ? 1 : 0) || b.weight - a.weight)
|
|
31
|
+
return { entity: labelOf(ids[0], byId), neighbors }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Shortest relation chain between two id sets (undirected BFS, hub-avoiding).
|
|
35
|
+
* Answers "how are X and Y related?" - the single most useful graph query. */
|
|
36
|
+
export function shortestPath(
|
|
37
|
+
startIds: string[],
|
|
38
|
+
goalIds: Set<string>,
|
|
39
|
+
byId: Map<string, Entity>,
|
|
40
|
+
adj: Map<string, Adj[]>,
|
|
41
|
+
): PathResult {
|
|
42
|
+
if (startIds.length === 0 || goalIds.size === 0) return { found: false, hops: 0, steps: [] }
|
|
43
|
+
const hub = hubThreshold(adj)
|
|
44
|
+
const prev = new Map<string, { from: string; type: string }>()
|
|
45
|
+
const queue: string[] = [...startIds]
|
|
46
|
+
const visited = new Set<string>(startIds)
|
|
47
|
+
let hitGoal: string | null = null
|
|
48
|
+
while (queue.length) {
|
|
49
|
+
const cur = queue.shift() as string
|
|
50
|
+
if (goalIds.has(cur)) {
|
|
51
|
+
hitGoal = cur
|
|
52
|
+
break
|
|
53
|
+
}
|
|
54
|
+
// don't EXPAND through a hub (but it can still be a goal, handled above)
|
|
55
|
+
if ((adj.get(cur)?.length ?? 0) > hub && !startIds.includes(cur)) continue
|
|
56
|
+
for (const e of adj.get(cur) ?? []) {
|
|
57
|
+
if (visited.has(e.to)) continue
|
|
58
|
+
visited.add(e.to)
|
|
59
|
+
prev.set(e.to, { from: cur, type: e.type })
|
|
60
|
+
queue.push(e.to)
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
if (!hitGoal) return { found: false, hops: 0, steps: [] }
|
|
64
|
+
// reconstruct
|
|
65
|
+
const chain: Array<{ from: string; relation: string; to: string }> = []
|
|
66
|
+
let node = hitGoal
|
|
67
|
+
while (prev.has(node)) {
|
|
68
|
+
const p = prev.get(node) as { from: string; type: string }
|
|
69
|
+
chain.unshift({ from: labelOf(p.from, byId), relation: p.type, to: labelOf(node, byId) })
|
|
70
|
+
node = p.from
|
|
71
|
+
}
|
|
72
|
+
return { found: true, hops: chain.length, steps: chain }
|
|
73
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Shared in-memory graph shapes used across the decomposed graph-query modules.
|
|
2
|
+
// These mirror the structures GraphQueryService builds from the provider's
|
|
3
|
+
// ACL-scoped {entities, relations}; kept here so the pure-function modules can
|
|
4
|
+
// be typed without depending on the orchestrating class.
|
|
5
|
+
|
|
6
|
+
export interface Entity {
|
|
7
|
+
id: string
|
|
8
|
+
label: string
|
|
9
|
+
type: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface Relation {
|
|
13
|
+
from: string
|
|
14
|
+
to: string
|
|
15
|
+
type: string
|
|
16
|
+
weight: number
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface Adj {
|
|
20
|
+
to: string
|
|
21
|
+
type: string
|
|
22
|
+
weight: number
|
|
23
|
+
dir: "out" | "in"
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** The ACL-scoped subgraph: entities + LIVE relations the user may see, plus the
|
|
27
|
+
* derived id→entity map, adjacency, and the accessible record id set. Every
|
|
28
|
+
* traversal works ONLY over this, so no query crosses a permission boundary. */
|
|
29
|
+
export interface ScopedGraph {
|
|
30
|
+
entities: Entity[]
|
|
31
|
+
relations: Relation[]
|
|
32
|
+
byId: Map<string, Entity>
|
|
33
|
+
adj: Map<string, Adj[]>
|
|
34
|
+
recordIds: string[]
|
|
35
|
+
}
|