@restormel/graphrag-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ import type { HybridCandidate } from "./hybrid-candidate-generation.js";
2
+ import { type RetrievalOriginBalanceKey } from "./kg-balance.js";
3
+ export type SeedRole = 'support' | 'objection' | 'reply' | 'definition_distinction';
4
+ export interface SeedCandidate extends HybridCandidate {
5
+ claim_type: string;
6
+ embedding?: number[] | null;
7
+ /** Optional: used by inquiry-time KG balance (retrieval). */
8
+ domain?: string;
9
+ source_url?: string | null;
10
+ source_source_type?: string | null;
11
+ }
12
+ export interface SeedBalanceStats {
13
+ selection_strategy: 'mmr_quota_v1' | 'mmr_quota_kg_balance_v1';
14
+ mmr_lambda: number;
15
+ role_counts_pool: Record<SeedRole, number>;
16
+ role_counts_selected: Record<SeedRole, number>;
17
+ role_quotas: Record<SeedRole, number>;
18
+ quota_satisfied_roles: SeedRole[];
19
+ avg_pairwise_similarity_before: number;
20
+ avg_pairwise_similarity_after: number;
21
+ objection_reply_presence_before: boolean;
22
+ objection_reply_presence_after: boolean;
23
+ mono_perspective_before: boolean;
24
+ mono_perspective_after: boolean;
25
+ /** Present when retrieval applies ideal SEP/Gutenberg/domain balance (not DB snapshot metrics). */
26
+ kg_balance?: {
27
+ ideal_origin: Record<RetrievalOriginBalanceKey, number>;
28
+ selected_origin_counts: Record<RetrievalOriginBalanceKey, number>;
29
+ domains_in_pool: string[];
30
+ selected_domain_counts: Record<string, number>;
31
+ };
32
+ }
33
+ export interface SeedSetConstructionResult<T extends SeedCandidate> {
34
+ seeds: T[];
35
+ stats: SeedBalanceStats;
36
+ }
37
+ export declare function constructSeedSet<T extends SeedCandidate>(params: {
38
+ candidates: T[];
39
+ topK: number;
40
+ queryEmbedding?: number[];
41
+ mmrLambda?: number;
42
+ /**
43
+ * Optional inquiry-time balance: ideal origin mix + uniform domain targets among domains
44
+ * present in the candidate pool (see `knowledgeGraphRetrievalBalance.ts`).
45
+ */
46
+ kgBalance?: {
47
+ idealOrigin: Record<RetrievalOriginBalanceKey, number>;
48
+ domainsInPool: Set<string>;
49
+ getOrigin: (c: T) => RetrievalOriginBalanceKey;
50
+ getDomainKey: (c: T) => string;
51
+ };
52
+ }): SeedSetConstructionResult<T>;
53
+ //# sourceMappingURL=seed-set-constructor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"seed-set-constructor.d.ts","sourceRoot":"","sources":["../src/seed-set-constructor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACxE,OAAO,EAEL,KAAK,yBAAyB,EAC/B,MAAM,iBAAiB,CAAC;AAEzB,MAAM,MAAM,QAAQ,GAAG,SAAS,GAAG,WAAW,GAAG,OAAO,GAAG,wBAAwB,CAAC;AAEpF,MAAM,WAAW,aAAc,SAAQ,eAAe;IACpD,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IAC5B,6DAA6D;IAC7D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,kBAAkB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC;AAED,MAAM,WAAW,gBAAgB;IAC/B,kBAAkB,EAAE,cAAc,GAAG,yBAAyB,CAAC;IAC/D,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC3C,oBAAoB,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/C,WAAW,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACtC,qBAAqB,EAAE,QAAQ,EAAE,CAAC;IAClC,8BAA8B,EAAE,MAAM,CAAC;IACvC,6BAA6B,EAAE,MAAM,CAAC;IACtC,+BAA+B,EAAE,OAAO,CAAC;IACzC,8BAA8B,EAAE,OAAO,CAAC;IACxC,uBAAuB,EAAE,OAAO,CAAC;IACjC,sBAAsB,EAAE,OAAO,CAAC;IAChC,mGAAmG;IACnG,UAAU,CAAC,EAAE;QACX,YAAY,EAAE,MAAM,CAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;QACxD,sBAAsB,EAAE,MAAM,CAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;QAClE,eAAe,EAAE,MAAM,EAAE,CAAC;QAC1B,sBAAsB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAChD,CAAC;CACH;AAED,MAAM,WAAW,yBAAyB,CAAC,CAAC,SAAS,aAAa;IAChE,KAAK,EAAE,CAAC,EAAE,CAAC;IACX,KAAK,EAAE,gBAAgB,CAAC;CACzB;AAgJD,wBAAgB,gBAAgB,CAAC,CAAC,SAAS,aAAa,EAAE,MAAM,EAAE;IAChE,UAAU,EAAE,CAAC,EAAE,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,SAAS,CAAC,EAAE;QACV,WAAW,EAAE,MAAM,CAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;QACvD,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,SAAS,EAAE,CAAC,CAAC,EAAE,CAAC,KAAK,yBAAyB,CAAC;QAC/C,YAAY,EAAE,CAAC,CAAC,EAAE,CAAC,KAAK,MAAM,CAAC;KAChC,CAAC;CACH,GAAG,yBAAyB,CAAC,CAAC,CAAC,CAwH/B"}
@@ -0,0 +1,247 @@
1
+ import { computeKgBalanceMultiplier, } from "./kg-balance.js";
2
+ const ROLE_ORDER = ['support', 'objection', 'reply', 'definition_distinction'];
3
+ function normalizeText(text) {
4
+ return text
5
+ .toLowerCase()
6
+ .replace(/[^\w\s-]/g, ' ')
7
+ .split(/\s+/)
8
+ .filter(Boolean);
9
+ }
10
+ function cosineSimilarity(a, b) {
11
+ if (!a || !b || a.length === 0 || b.length === 0 || a.length !== b.length)
12
+ return 0;
13
+ let dot = 0;
14
+ let aNorm = 0;
15
+ let bNorm = 0;
16
+ for (let i = 0; i < a.length; i++) {
17
+ dot += a[i] * b[i];
18
+ aNorm += a[i] * a[i];
19
+ bNorm += b[i] * b[i];
20
+ }
21
+ if (aNorm === 0 || bNorm === 0)
22
+ return 0;
23
+ return dot / (Math.sqrt(aNorm) * Math.sqrt(bNorm));
24
+ }
25
+ function tokenOverlapSimilarity(aText, bText) {
26
+ const aSet = new Set(normalizeText(aText));
27
+ const bSet = new Set(normalizeText(bText));
28
+ if (aSet.size === 0 || bSet.size === 0)
29
+ return 0;
30
+ let overlap = 0;
31
+ for (const token of aSet) {
32
+ if (bSet.has(token))
33
+ overlap += 1;
34
+ }
35
+ return overlap / Math.max(aSet.size, bSet.size);
36
+ }
37
+ function pairwiseSimilarity(a, b) {
38
+ const vectorSim = cosineSimilarity(a.embedding, b.embedding);
39
+ if (vectorSim > 0)
40
+ return vectorSim;
41
+ return tokenOverlapSimilarity(a.text, b.text);
42
+ }
43
+ function roleForCandidate(candidate) {
44
+ const claimType = candidate.claim_type.toLowerCase();
45
+ if (claimType === 'objection')
46
+ return 'objection';
47
+ if (claimType === 'response' || claimType === 'reply')
48
+ return 'reply';
49
+ if (claimType === 'definition' || normalizeText(candidate.text).includes('distinction')) {
50
+ return 'definition_distinction';
51
+ }
52
+ return 'support';
53
+ }
54
+ function makeEmptyRoleCounts() {
55
+ return {
56
+ support: 0,
57
+ objection: 0,
58
+ reply: 0,
59
+ definition_distinction: 0
60
+ };
61
+ }
62
+ function makeEmptyOriginCounts() {
63
+ return { sep: 0, gutenberg: 0, other: 0 };
64
+ }
65
+ function averagePairwiseSimilarity(candidates) {
66
+ if (candidates.length < 2)
67
+ return 0;
68
+ let sum = 0;
69
+ let pairs = 0;
70
+ for (let i = 0; i < candidates.length; i++) {
71
+ for (let j = i + 1; j < candidates.length; j++) {
72
+ sum += pairwiseSimilarity(candidates[i], candidates[j]);
73
+ pairs += 1;
74
+ }
75
+ }
76
+ return pairs === 0 ? 0 : sum / pairs;
77
+ }
78
+ function hasObjectionReplyPresence(roleCounts) {
79
+ return roleCounts.objection > 0 && roleCounts.reply > 0;
80
+ }
81
+ function isMonoPerspective(roleCounts, total) {
82
+ if (total === 0)
83
+ return true;
84
+ return roleCounts.support >= total || Math.max(...ROLE_ORDER.map((role) => roleCounts[role])) >= total * 0.85;
85
+ }
86
+ function computeDefaultQuotas(topK) {
87
+ const quotas = makeEmptyRoleCounts();
88
+ if (topK >= 4) {
89
+ quotas.support = 1;
90
+ quotas.objection = 1;
91
+ quotas.reply = 1;
92
+ quotas.definition_distinction = 1;
93
+ quotas.support += topK - 4;
94
+ }
95
+ else if (topK === 3) {
96
+ quotas.support = 1;
97
+ quotas.objection = 1;
98
+ quotas.reply = 1;
99
+ }
100
+ else if (topK === 2) {
101
+ quotas.support = 1;
102
+ quotas.objection = 1;
103
+ }
104
+ else {
105
+ quotas.support = 1;
106
+ }
107
+ return quotas;
108
+ }
109
+ function adaptQuotasToPool(quotas, poolCounts, topK) {
110
+ const adapted = { ...quotas };
111
+ let used = 0;
112
+ for (const role of ROLE_ORDER) {
113
+ adapted[role] = Math.min(adapted[role], poolCounts[role]);
114
+ used += adapted[role];
115
+ }
116
+ if (used >= topK)
117
+ return adapted;
118
+ const topUpOrder = ['support', 'objection', 'reply', 'definition_distinction'];
119
+ while (used < topK) {
120
+ let topped = false;
121
+ for (const role of topUpOrder) {
122
+ if (adapted[role] < poolCounts[role]) {
123
+ adapted[role] += 1;
124
+ used += 1;
125
+ topped = true;
126
+ if (used >= topK)
127
+ break;
128
+ }
129
+ }
130
+ if (!topped)
131
+ break;
132
+ }
133
+ return adapted;
134
+ }
135
+ function relevanceToQuery(candidate, queryEmbedding) {
136
+ const vectorSim = cosineSimilarity(candidate.embedding, queryEmbedding ?? null);
137
+ if (vectorSim > 0)
138
+ return vectorSim;
139
+ return Math.max(0, candidate.confidence);
140
+ }
141
+ export function constructSeedSet(params) {
142
+ const { candidates, topK, queryEmbedding, mmrLambda = 0.72, kgBalance } = params;
143
+ const cappedCandidates = candidates.slice(0, Math.max(topK * 4, topK));
144
+ const targetSize = Math.min(topK, cappedCandidates.length);
145
+ const roleCountsPool = makeEmptyRoleCounts();
146
+ const candidateRole = new Map();
147
+ for (const candidate of cappedCandidates) {
148
+ const role = roleForCandidate(candidate);
149
+ candidateRole.set(candidate.id, role);
150
+ roleCountsPool[role] += 1;
151
+ }
152
+ const defaultQuotas = computeDefaultQuotas(targetSize);
153
+ const adaptedQuotas = adaptQuotasToPool(defaultQuotas, roleCountsPool, targetSize);
154
+ const selected = [];
155
+ const selectedIds = new Set();
156
+ const roleCountsSelected = makeEmptyRoleCounts();
157
+ const selectedOriginCounts = makeEmptyOriginCounts();
158
+ const selectedDomainCounts = new Map();
159
+ const relevanceById = new Map();
160
+ for (const candidate of cappedCandidates) {
161
+ relevanceById.set(candidate.id, relevanceToQuery(candidate, queryEmbedding));
162
+ }
163
+ while (selected.length < targetSize) {
164
+ const remaining = cappedCandidates.filter((candidate) => !selectedIds.has(candidate.id));
165
+ if (remaining.length === 0)
166
+ break;
167
+ const unmetRoles = ROLE_ORDER.filter((role) => roleCountsSelected[role] < adaptedQuotas[role]);
168
+ const roleRestricted = unmetRoles.length > 0
169
+ ? remaining.filter((candidate) => unmetRoles.includes(candidateRole.get(candidate.id) ?? 'support'))
170
+ : remaining;
171
+ const pool = roleRestricted.length > 0 ? roleRestricted : remaining;
172
+ let best = null;
173
+ let bestScore = Number.NEGATIVE_INFINITY;
174
+ const totalSel = selected.length;
175
+ for (const candidate of pool) {
176
+ const relevance = relevanceById.get(candidate.id) ?? 0;
177
+ let maxSimilarity = 0;
178
+ for (const chosen of selected) {
179
+ maxSimilarity = Math.max(maxSimilarity, pairwiseSimilarity(candidate, chosen));
180
+ }
181
+ let balanceMult = 1;
182
+ if (kgBalance && totalSel > 0) {
183
+ balanceMult = computeKgBalanceMultiplier({
184
+ origin: kgBalance.getOrigin(candidate),
185
+ domain: kgBalance.getDomainKey(candidate),
186
+ selectedOriginCounts,
187
+ selectedDomainCounts,
188
+ totalSelected: totalSel,
189
+ idealOrigin: kgBalance.idealOrigin,
190
+ domainsInPool: kgBalance.domainsInPool
191
+ });
192
+ }
193
+ else if (kgBalance && totalSel === 0) {
194
+ balanceMult = 1;
195
+ }
196
+ const score = mmrLambda * relevance * balanceMult - (1 - mmrLambda) * maxSimilarity;
197
+ if (score > bestScore) {
198
+ best = candidate;
199
+ bestScore = score;
200
+ }
201
+ }
202
+ if (!best)
203
+ break;
204
+ selected.push(best);
205
+ selectedIds.add(best.id);
206
+ const role = candidateRole.get(best.id) ?? 'support';
207
+ roleCountsSelected[role] += 1;
208
+ if (kgBalance) {
209
+ const o = kgBalance.getOrigin(best);
210
+ selectedOriginCounts[o] += 1;
211
+ const dk = kgBalance.getDomainKey(best);
212
+ selectedDomainCounts.set(dk, (selectedDomainCounts.get(dk) ?? 0) + 1);
213
+ }
214
+ }
215
+ const baseline = cappedCandidates.slice(0, targetSize);
216
+ const baselineRoleCounts = makeEmptyRoleCounts();
217
+ for (const candidate of baseline) {
218
+ baselineRoleCounts[candidateRole.get(candidate.id) ?? 'support'] += 1;
219
+ }
220
+ const quotaSatisfiedRoles = ROLE_ORDER.filter((role) => roleCountsSelected[role] >= adaptedQuotas[role]);
221
+ const kgStats = kgBalance && selected.length > 0
222
+ ? {
223
+ ideal_origin: { ...kgBalance.idealOrigin },
224
+ selected_origin_counts: { ...selectedOriginCounts },
225
+ domains_in_pool: [...kgBalance.domainsInPool].sort(),
226
+ selected_domain_counts: Object.fromEntries([...selectedDomainCounts.entries()].sort((a, b) => a[0].localeCompare(b[0])))
227
+ }
228
+ : undefined;
229
+ return {
230
+ seeds: selected,
231
+ stats: {
232
+ selection_strategy: kgBalance ? 'mmr_quota_kg_balance_v1' : 'mmr_quota_v1',
233
+ mmr_lambda: mmrLambda,
234
+ role_counts_pool: roleCountsPool,
235
+ role_counts_selected: roleCountsSelected,
236
+ role_quotas: adaptedQuotas,
237
+ quota_satisfied_roles: quotaSatisfiedRoles,
238
+ avg_pairwise_similarity_before: averagePairwiseSimilarity(baseline),
239
+ avg_pairwise_similarity_after: averagePairwiseSimilarity(selected),
240
+ objection_reply_presence_before: hasObjectionReplyPresence(baselineRoleCounts),
241
+ objection_reply_presence_after: hasObjectionReplyPresence(roleCountsSelected),
242
+ mono_perspective_before: isMonoPerspective(baselineRoleCounts, baseline.length),
243
+ mono_perspective_after: isMonoPerspective(roleCountsSelected, selected.length),
244
+ ...(kgStats ? { kg_balance: kgStats } : {})
245
+ }
246
+ };
247
+ }
@@ -0,0 +1,39 @@
1
+ import type { GraphStore } from "./ports.js";
2
+ export declare function isRetrievalBm25Enabled(): boolean;
3
+ export declare function isRetrievalNativeGraphEnabled(): boolean;
4
+ export declare function fetchBm25ClaimCandidates(store: GraphStore, params: {
5
+ terms: string[];
6
+ limit: number;
7
+ reviewFilter: string;
8
+ }): Promise<Array<{
9
+ id: string;
10
+ text: string;
11
+ confidence: number;
12
+ }>>;
13
+ export declare function fetchNativeGraphNeighbors(store: GraphStore, params: {
14
+ seedIds: string[];
15
+ limit: number;
16
+ }): Promise<string[]>;
17
+ export declare function ensureClaimSearchIndex(db: {
18
+ query: (sql: string) => Promise<unknown>;
19
+ }): Promise<void>;
20
+ export declare function isRetrievalPassageGroundedEnabled(): boolean;
21
+ export declare function isRetrievalTaxonomyRoutingEnabled(): boolean;
22
+ export declare function isKgEnforcePassageOnAcceptEnabled(): boolean;
23
+ export declare function fetchPassageGroundedClaimIds(store: GraphStore, params: {
24
+ queryEmbedding: number[];
25
+ limit: number;
26
+ reviewFilter: string;
27
+ }): Promise<string[]>;
28
+ export declare function fetchTaxonomySeedClaimIds(store: GraphStore, params: {
29
+ terms: string[];
30
+ limit: number;
31
+ reviewFilter: string;
32
+ }): Promise<string[]>;
33
+ export declare function ensurePassageEmbeddingIndex(db: {
34
+ query: (sql: string) => Promise<unknown>;
35
+ }): Promise<void>;
36
+ export declare function ensureClaimAcceptPassageEvent(db: {
37
+ query: (sql: string) => Promise<unknown>;
38
+ }): Promise<void>;
39
+ //# sourceMappingURL=surreal-retrieval-enhancements.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"surreal-retrieval-enhancements.d.ts","sourceRoot":"","sources":["../src/surreal-retrieval-enhancements.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C,wBAAgB,sBAAsB,IAAI,OAAO,CAGhD;AAED,wBAAgB,6BAA6B,IAAI,OAAO,CAGvD;AAED,wBAAsB,wBAAwB,CAC5C,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE;IACN,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;CACtB,GACA,OAAO,CAAC,KAAK,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAelE;AAED,wBAAsB,yBAAyB,CAC7C,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE;IACN,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;CACf,GACA,OAAO,CAAC,MAAM,EAAE,CAAC,CA2BnB;AAED,wBAAsB,sBAAsB,CAAC,EAAE,EAAE;IAAE,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAM5G;AAED,wBAAgB,iCAAiC,IAAI,OAAO,CAG3D;AAED,wBAAgB,iCAAiC,IAAI,OAAO,CAG3D;AAED,wBAAgB,iCAAiC,IAAI,OAAO,CAG3D;AAED,wBAAsB,4BAA4B,CAChD,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE;IACN,cAAc,EAAE,MAAM,EAAE,CAAC;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;CACtB,GACA,OAAO,CAAC,MAAM,EAAE,CAAC,CAuBnB;AAED,wBAAsB,yBAAyB,CAC7C,KAAK,EAAE,UAAU,EACjB,MAAM,EAAE;IACN,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;CACtB,GACA,OAAO,CAAC,MAAM,EAAE,CAAC,CAwBnB;AAED,wBAAsB,2BAA2B,CAAC,EAAE,EAAE;IAAE,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAKjH;AAED,wBAAsB,6BAA6B,CAAC,EAAE,EAAE;IAAE,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;CAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAWnH"}
@@ -0,0 +1,139 @@
1
+ export function isRetrievalBm25Enabled() {
2
+ const v = (process.env.RETRIEVAL_USE_BM25 ?? "").trim().toLowerCase();
3
+ return v === "1" || v === "true" || v === "yes";
4
+ }
5
+ export function isRetrievalNativeGraphEnabled() {
6
+ const v = (process.env.RETRIEVAL_NATIVE_GRAPH ?? "").trim().toLowerCase();
7
+ return v === "1" || v === "true" || v === "yes";
8
+ }
9
+ export async function fetchBm25ClaimCandidates(store, params) {
10
+ if (params.terms.length === 0)
11
+ return [];
12
+ const searchQuery = params.terms.slice(0, 8).join(" ");
13
+ try {
14
+ const rows = await store.query(`SELECT id, text, confidence FROM claim
15
+ WHERE text @@ $q AND ${params.reviewFilter}
16
+ ORDER BY confidence DESC
17
+ LIMIT $limit`, { q: searchQuery, limit: params.limit });
18
+ return rows ?? [];
19
+ }
20
+ catch {
21
+ return [];
22
+ }
23
+ }
24
+ export async function fetchNativeGraphNeighbors(store, params) {
25
+ if (!isRetrievalNativeGraphEnabled() || params.seedIds.length === 0)
26
+ return [];
27
+ try {
28
+ const rows = await store.query(`SELECT array::distinct(
29
+ array::flatten([
30
+ ->depends_on->claim,
31
+ ->supports->claim,
32
+ <-contradicts<-claim,
33
+ ->responds_to->claim
34
+ ])
35
+ ) AS neighbors
36
+ FROM claim
37
+ WHERE id INSIDE $seed_ids
38
+ LIMIT $limit`, { seed_ids: params.seedIds, limit: params.limit });
39
+ const out = new Set();
40
+ for (const row of rows ?? []) {
41
+ for (const n of row.neighbors ?? []) {
42
+ if (typeof n?.id === "string")
43
+ out.add(n.id);
44
+ }
45
+ }
46
+ return [...out];
47
+ }
48
+ catch {
49
+ return [];
50
+ }
51
+ }
52
+ export async function ensureClaimSearchIndex(db) {
53
+ if (!isRetrievalBm25Enabled())
54
+ return;
55
+ await db.query(`
56
+ DEFINE ANALYZER IF NOT EXISTS claim_english TOKENIZERS blank,class FILTERS lowercase,ascii;
57
+ DEFINE INDEX IF NOT EXISTS claim_search ON claim FIELDS text SEARCH ANALYZER claim_english BM25;
58
+ `);
59
+ }
60
+ export function isRetrievalPassageGroundedEnabled() {
61
+ const v = (process.env.RETRIEVAL_PASSAGE_GROUNDED ?? "").trim().toLowerCase();
62
+ return v === "1" || v === "true" || v === "yes";
63
+ }
64
+ export function isRetrievalTaxonomyRoutingEnabled() {
65
+ const v = (process.env.RETRIEVAL_TAXONOMY_ROUTING ?? "").trim().toLowerCase();
66
+ return v === "1" || v === "true" || v === "yes";
67
+ }
68
+ export function isKgEnforcePassageOnAcceptEnabled() {
69
+ const v = (process.env.KG_ENFORCE_PASSAGE_ON_ACCEPT ?? "").trim().toLowerCase();
70
+ return v === "1" || v === "true" || v === "yes";
71
+ }
72
+ export async function fetchPassageGroundedClaimIds(store, params) {
73
+ if (!isRetrievalPassageGroundedEnabled())
74
+ return [];
75
+ try {
76
+ const rows = await store.query(`SELECT array::distinct(array::flatten(<-grounded_in<-claim.id)) AS claim_ids
77
+ FROM (
78
+ SELECT id FROM passage
79
+ WHERE embedding <|$limit,64|> $query_embedding
80
+ LIMIT $limit
81
+ )
82
+ WHERE claim_ids != NONE`, { query_embedding: params.queryEmbedding, limit: params.limit });
83
+ const out = new Set();
84
+ for (const row of rows ?? []) {
85
+ for (const id of row.claim_ids ?? []) {
86
+ if (typeof id === "string")
87
+ out.add(id);
88
+ }
89
+ }
90
+ return [...out].slice(0, params.limit);
91
+ }
92
+ catch {
93
+ return [];
94
+ }
95
+ }
96
+ export async function fetchTaxonomySeedClaimIds(store, params) {
97
+ if (!isRetrievalTaxonomyRoutingEnabled() || params.terms.length === 0)
98
+ return [];
99
+ const needles = params.terms.slice(0, 6).map((t) => t.toLowerCase());
100
+ try {
101
+ const rows = await store.query(`SELECT id FROM claim
102
+ WHERE ${params.reviewFilter}
103
+ AND (
104
+ id IN (
105
+ SELECT VALUE in FROM about_subject
106
+ WHERE out.name ~ $needle OR out.slug ~ $needle
107
+ )
108
+ OR id IN (
109
+ SELECT VALUE in FROM authored
110
+ WHERE out.name ~ $needle OR out.canonical_name ~ $needle
111
+ )
112
+ )
113
+ LIMIT $limit`, { needle: needles[0], limit: params.limit });
114
+ return (rows ?? []).map((r) => String(r.id)).filter(Boolean);
115
+ }
116
+ catch {
117
+ return [];
118
+ }
119
+ }
120
+ export async function ensurePassageEmbeddingIndex(db) {
121
+ if (!isRetrievalPassageGroundedEnabled())
122
+ return;
123
+ await db.query(`
124
+ DEFINE INDEX IF NOT EXISTS passage_embedding ON passage FIELDS embedding HNSW DIMENSION 768 DIST COSINE;
125
+ `);
126
+ }
127
+ export async function ensureClaimAcceptPassageEvent(db) {
128
+ if (!isKgEnforcePassageOnAcceptEnabled())
129
+ return;
130
+ await db.query(`
131
+ DEFINE EVENT IF NOT EXISTS claim_accept_requires_passage ON claim
132
+ WHEN $event = 'CREATE' OR $event = 'UPDATE'
133
+ THEN {
134
+ IF $after.review_state = 'accepted' AND count(<-grounded_in<-passage WHERE in = $after.id) = 0 {
135
+ THROW 'accepted claims must link to at least one passage';
136
+ };
137
+ };
138
+ `);
139
+ }
package/package.json ADDED
@@ -0,0 +1,44 @@
1
+ {
2
+ "name": "@restormel/graphrag-core",
3
+ "version": "0.1.0",
4
+ "description": "Knowledge Retrieve pipeline — graph-aware RAG with injected GraphStore and embedding ports (no Surreal driver in package).",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/Allotment-Technology-Ltd/restormel-keys.git",
9
+ "directory": "packages/graphrag-core"
10
+ },
11
+ "type": "module",
12
+ "main": "./dist/index.js",
13
+ "types": "./dist/index.d.ts",
14
+ "files": [
15
+ "dist",
16
+ "README.md",
17
+ "CHANGELOG.md"
18
+ ],
19
+ "exports": {
20
+ ".": {
21
+ "types": "./dist/index.d.ts",
22
+ "import": "./dist/index.js"
23
+ }
24
+ },
25
+ "engines": {
26
+ "node": ">=20"
27
+ },
28
+ "dependencies": {
29
+ "@restormel/contracts": "0.1.1",
30
+ "@restormel/graph-core": "0.1.2"
31
+ },
32
+ "devDependencies": {
33
+ "typescript": "^5.7.0",
34
+ "vitest": "^4.1.0"
35
+ },
36
+ "publishConfig": {
37
+ "access": "public"
38
+ },
39
+ "scripts": {
40
+ "build": "tsc -b tsconfig.json",
41
+ "typecheck": "tsc -b tsconfig.json",
42
+ "test": "vitest run"
43
+ }
44
+ }