@restormel/graphrag-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1401 @@
1
+ /**
2
+ * SOPHIA — Argument-Aware Retrieval
3
+ *
4
+ * Core differentiator: not just semantic similarity, but graph traversal
5
+ * that assembles complete argumentative chains from the knowledge graph.
6
+ *
7
+ * Retrieval pipeline:
8
+ * 1. Embed query via Voyage AI
9
+ * 2. Vector search for top-K semantically similar claims
10
+ * 3. Graph traversal for each seed claim (depends_on, supports, contradicts, responds_to, defines, qualifies, part_of)
11
+ * 4. Deduplicate claims
12
+ * 5. Resolve inter-claim relations
13
+ * 6. Fetch argument structure (conclusion + key premises)
14
+ * 7. Return assembled RetrievalResult
15
+ *
16
+ * Graceful degradation: never throws — returns empty result on any failure
17
+ * so the three-pass engine can still work without the graph.
18
+ */
19
+ import { detectCorpusLevelQuery, extractLexicalTerms, fuseHybridCandidates } from './hybrid-candidate-generation.js';
20
+ import { IDEAL_RETRIEVAL_ORIGIN_FRACTIONS, isRetrievalKgBalanceEnabled, } from './kg-balance.js';
21
+ import { constructSeedSet } from './seed-set-constructor.js';
22
+ import { fetchBm25ClaimCandidates, fetchNativeGraphNeighbors, fetchPassageGroundedClaimIds, fetchTaxonomySeedClaimIds, isRetrievalPassageGroundedEnabled, isRetrievalTaxonomyRoutingEnabled, isRetrievalBm25Enabled } from './surreal-retrieval-enhancements.js';
23
+ /** SurrealDB KNN: `<|k,ef|>` — ef tunes HNSW/ANN search breadth (see Surreal vector docs). */
24
+ function retrievalDenseKnnEf() {
25
+ const raw = (process.env.RETRIEVAL_KNN_EF ?? '64').trim();
26
+ const n = parseInt(raw, 10);
27
+ if (!Number.isFinite(n))
28
+ return 64;
29
+ return Math.max(16, Math.min(512, n));
30
+ }
31
+ function surrealKnnOperator(k) {
32
+ const kk = Math.max(1, Math.trunc(k));
33
+ const ef = retrievalDenseKnnEf();
34
+ return `<|${kk},${ef}|>`;
35
+ }
36
+ function claimVerificationSqlFilter(trustedGraphActive) {
37
+ const flagged = `(verification_state = NONE OR verification_state != 'flagged')`;
38
+ const raw = (process.env.RETRIEVAL_REQUIRE_VERIFIED ?? '').trim().toLowerCase();
39
+ const requireValidated = trustedGraphActive && (raw === '1' || raw === 'true' || raw === 'yes');
40
+ if (requireValidated) {
41
+ return `${flagged} AND verification_state = 'validated'`;
42
+ }
43
+ return flagged;
44
+ }
45
+ const EMPTY_RESULT = {
46
+ claims: [],
47
+ relations: [],
48
+ arguments: [],
49
+ seed_claim_ids: [],
50
+ thinker_context: null,
51
+ degraded: false
52
+ };
53
+ const RELATION_TRAVERSAL_BEAM_SPECS = [
54
+ { table: 'supports', edgePrior: 1.04 },
55
+ { table: 'contradicts', edgePrior: 1.16 },
56
+ { table: 'depends_on', edgePrior: 0.92 },
57
+ { table: 'responds_to', edgePrior: 1.2 },
58
+ { table: 'defines', edgePrior: 0.9 },
59
+ { table: 'qualifies', edgePrior: 0.88 },
60
+ { table: 'refines', edgePrior: 0.86 },
61
+ { table: 'exemplifies', edgePrior: 0.82 }
62
+ ];
63
+ const RELATION_FETCH_SPECS = [
64
+ { table: 'supports', relationType: 'supports' },
65
+ { table: 'contradicts', relationType: 'contradicts' },
66
+ { table: 'depends_on', relationType: 'depends_on' },
67
+ { table: 'responds_to', relationType: 'responds_to' },
68
+ { table: 'defines', relationType: 'defines' },
69
+ { table: 'qualifies', relationType: 'qualifies' },
70
+ { table: 'refines', relationType: 'qualifies' },
71
+ { table: 'exemplifies', relationType: 'supports' }
72
+ ];
73
+ const THESIS_CLAIM_TYPES = new Set(['thesis', 'conclusion']);
74
+ const OBJECTION_CLAIM_TYPES = new Set(['objection', 'counterargument', 'counter_argument']);
75
+ const REPLY_CLAIM_TYPES = new Set(['response', 'reply', 'rebuttal']);
76
+ function normalizeClaimType(claimType) {
77
+ return claimType.trim().toLowerCase();
78
+ }
79
+ function isThesisClaimType(claimType) {
80
+ return THESIS_CLAIM_TYPES.has(normalizeClaimType(claimType));
81
+ }
82
+ function isObjectionClaimType(claimType) {
83
+ return OBJECTION_CLAIM_TYPES.has(normalizeClaimType(claimType));
84
+ }
85
+ function isReplyClaimType(claimType) {
86
+ return REPLY_CLAIM_TYPES.has(normalizeClaimType(claimType));
87
+ }
88
+ function selectMajorThesisIds(params) {
89
+ const { claims, seedClaimIds, limit } = params;
90
+ if (claims.length === 0 || limit <= 0)
91
+ return [];
92
+ const seedSet = new Set(seedClaimIds);
93
+ const thesisClaims = claims
94
+ .filter((claim) => isThesisClaimType(claim.claim_type))
95
+ .sort((a, b) => {
96
+ const aSeed = seedSet.has(a.id) ? 1 : 0;
97
+ const bSeed = seedSet.has(b.id) ? 1 : 0;
98
+ if (aSeed !== bSeed)
99
+ return bSeed - aSeed;
100
+ return (b.confidence ?? 0) - (a.confidence ?? 0);
101
+ });
102
+ if (thesisClaims.length > 0) {
103
+ return thesisClaims.slice(0, limit).map((claim) => claim.id);
104
+ }
105
+ // Fallback when claim typing is sparse: treat top seed/supportive claims as thesis anchors.
106
+ const fallbackClaims = claims
107
+ .filter((claim) => {
108
+ const type = normalizeClaimType(claim.claim_type);
109
+ return type === 'premise' || type === 'support' || type === 'methodological';
110
+ })
111
+ .sort((a, b) => {
112
+ const aSeed = seedSet.has(a.id) ? 1 : 0;
113
+ const bSeed = seedSet.has(b.id) ? 1 : 0;
114
+ if (aSeed !== bSeed)
115
+ return bSeed - aSeed;
116
+ return (b.confidence ?? 0) - (a.confidence ?? 0);
117
+ });
118
+ return fallbackClaims.slice(0, limit).map((claim) => claim.id);
119
+ }
120
+ function computeHopConfidenceThreshold(baseThreshold, hop) {
121
+ const clampedBase = Math.max(0.2, Math.min(0.85, baseThreshold));
122
+ return Math.max(0.2, Math.min(0.9, clampedBase + (hop - 1) * 0.08));
123
+ }
124
+ function computeDomainExpansionWeight(params) {
125
+ const { targetDomain, anchorDomain, neighborDomain } = params;
126
+ if (targetDomain && neighborDomain === targetDomain)
127
+ return 1.05;
128
+ if (targetDomain && neighborDomain && neighborDomain !== targetDomain)
129
+ return 0.72;
130
+ if (anchorDomain && neighborDomain && neighborDomain === anchorDomain)
131
+ return 1.0;
132
+ if (anchorDomain && neighborDomain && neighborDomain !== anchorDomain)
133
+ return 0.84;
134
+ return 0.92;
135
+ }
136
+ function parseRelationStrengthWeight(strength) {
137
+ if (!strength)
138
+ return 1;
139
+ const normalized = strength.toLowerCase();
140
+ if (normalized === 'strong')
141
+ return 1.08;
142
+ if (normalized === 'weak')
143
+ return 0.86;
144
+ return 1;
145
+ }
146
+ function toThinkerSummary(node) {
147
+ if (!node || typeof node !== 'object')
148
+ return null;
149
+ const row = node;
150
+ const wikidata_id = typeof row.wikidata_id === 'string' ? row.wikidata_id : '';
151
+ const name = typeof row.name === 'string' ? row.name.trim() : '';
152
+ if (!name)
153
+ return null;
154
+ return {
155
+ wikidata_id,
156
+ name,
157
+ birth_year: typeof row.birth_year === 'number' ? row.birth_year : null,
158
+ death_year: typeof row.death_year === 'number' ? row.death_year : null,
159
+ traditions: Array.isArray(row.traditions)
160
+ ? row.traditions.filter((value) => typeof value === 'string' && value.trim().length > 0)
161
+ : []
162
+ };
163
+ }
164
+ function capThinkerContext(context, maxNodes = 10) {
165
+ const seen = new Set();
166
+ const take = (items) => {
167
+ const result = [];
168
+ for (const item of items) {
169
+ const key = item.wikidata_id || item.name.toLowerCase();
170
+ if (seen.has(key))
171
+ continue;
172
+ if (seen.size >= maxNodes)
173
+ break;
174
+ seen.add(key);
175
+ result.push(item);
176
+ }
177
+ return result;
178
+ };
179
+ return {
180
+ direct_authors: take(context.direct_authors),
181
+ influences: take(context.influences),
182
+ teachers: take(context.teachers)
183
+ };
184
+ }
185
+ async function fetchThinkerContext(store, claimIds) {
186
+ if (!Array.isArray(claimIds) || claimIds.length === 0)
187
+ return null;
188
+ try {
189
+ const result = await store.query(`LET $source_ids = array::distinct((SELECT VALUE source FROM claim WHERE id INSIDE $claim_ids));
190
+ LET $author_rows = (SELECT <-authored<-thinker AS thinkers FROM $source_ids FETCH thinkers);
191
+ LET $direct_authors = array::flatten($author_rows.thinkers);
192
+ LET $influence_rows = (SELECT ->influenced_by->thinker AS thinkers FROM $direct_authors.id FETCH thinkers);
193
+ LET $teacher_rows = (SELECT ->student_of->thinker AS thinkers FROM $direct_authors.id FETCH thinkers);
194
+ RETURN {
195
+ direct_authors: $direct_authors,
196
+ influences: array::flatten($influence_rows.thinkers),
197
+ teachers: array::flatten($teacher_rows.thinkers)
198
+ };`, { claim_ids: claimIds });
199
+ const row = Array.isArray(result) ? result[0] : null;
200
+ if (!row)
201
+ return null;
202
+ const directAuthors = (row.direct_authors ?? [])
203
+ .map((entry) => toThinkerSummary(entry))
204
+ .filter((entry) => entry !== null);
205
+ const influences = (row.influences ?? [])
206
+ .map((entry) => toThinkerSummary(entry))
207
+ .filter((entry) => entry !== null);
208
+ const teachers = (row.teachers ?? [])
209
+ .map((entry) => toThinkerSummary(entry))
210
+ .filter((entry) => entry !== null);
211
+ if (directAuthors.length === 0 && influences.length === 0 && teachers.length === 0) {
212
+ return null;
213
+ }
214
+ return capThinkerContext({
215
+ direct_authors: directAuthors,
216
+ influences,
217
+ teachers
218
+ }, 10);
219
+ }
220
+ catch (error) {
221
+ const message = error instanceof Error ? error.message : String(error);
222
+ const lower = message.toLowerCase();
223
+ if ((lower.includes('authored') ||
224
+ lower.includes('thinker') ||
225
+ lower.includes('influenced_by') ||
226
+ lower.includes('student_of')) &&
227
+ (lower.includes('table') ||
228
+ lower.includes('record') ||
229
+ lower.includes('not found') ||
230
+ lower.includes('does not exist') ||
231
+ lower.includes('invalid'))) {
232
+ console.debug('[RETRIEVAL] Thinker enrichment unavailable (missing thinker graph tables); returning null');
233
+ return null;
234
+ }
235
+ console.debug('[RETRIEVAL] Thinker enrichment failed; returning null:', message);
236
+ return null;
237
+ }
238
+ }
239
+ function formatThinkerDisplayName(thinker) {
240
+ const years = thinker.birth_year === null && thinker.death_year === null
241
+ ? ''
242
+ : ` (${thinker.birth_year ?? '?'}-${thinker.death_year ?? '?'})`;
243
+ const tradition = thinker.traditions.length > 0 ? `, ${thinker.traditions[0]}` : '';
244
+ return `${thinker.name}${years}${tradition}`;
245
+ }
246
+ export function formatThinkerContextBlock(context) {
247
+ if (!context)
248
+ return '';
249
+ const directAuthors = context.direct_authors.filter((thinker) => thinker.name.trim().length > 0);
250
+ const influences = context.influences.filter((thinker) => thinker.name.trim().length > 0).slice(0, 5);
251
+ const teachers = context.teachers.filter((thinker) => thinker.name.trim().length > 0);
252
+ if (directAuthors.length === 0 && influences.length === 0 && teachers.length === 0) {
253
+ return '';
254
+ }
255
+ const lines = [];
256
+ lines.push('PHILOSOPHICAL LINEAGE CONTEXT (advisory — heuristic data from Wikidata)');
257
+ lines.push('(sourced from Wikidata thinker graph — advisory context only)');
258
+ lines.push('');
259
+ if (directAuthors.length > 0) {
260
+ lines.push(`Authors of retrieved sources: ${directAuthors.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
261
+ }
262
+ if (influences.length > 0) {
263
+ lines.push(`Influences in this lineage: ${influences.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
264
+ }
265
+ if (teachers.length > 0) {
266
+ lines.push(`Teachers in this lineage: ${teachers.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
267
+ }
268
+ return lines.join('\n');
269
+ }
270
+ // ─── Main retrieval function ───────────────────────────────────────────────
271
+ /**
272
+ * Retrieve structured philosophical context from the argument graph.
273
+ *
274
+ * Assembles complete argumentative chains by:
275
+ * 1. Finding semantically similar claims via vector search
276
+ * 2. Traversing the graph for supporting/contradicting/dependent claims
277
+ * 3. Resolving arguments those claims participate in
278
+ *
279
+ * Never throws — returns empty result on any failure.
280
+ */
281
+ export async function retrieveContext(userQuery, deps, options = {}) {
282
+ const { store, embedder, resolveOriginBucket } = deps;
283
+ const { topK = 5, domain, minConfidence = 0, maxHops, maxClaims, hybridMode = 'auto', enrichWithThinkerContext = false } = options;
284
+ const traversalMaxHops = Math.max(1, maxHops ?? (topK >= 10 ? 3 : topK <= 3 ? 1 : 2));
285
+ const traversalClaimCap = Math.max(topK, maxClaims ?? (topK >= 10 ? 120 : topK <= 3 ? 32 : 72));
286
+ try {
287
+ // ── Step 1: Embed the query ──────────────────────────────────
288
+ let queryEmbedding;
289
+ try {
290
+ console.log('[RETRIEVAL] Embedding query:', userQuery.substring(0, 50) + '...');
291
+ queryEmbedding = await embedder.embedQuery(userQuery);
292
+ console.log('[RETRIEVAL] ✓ Query embedding received:', queryEmbedding.length, 'dimensions');
293
+ }
294
+ catch (err) {
295
+ console.error('[RETRIEVAL] Embedding API failed:', err instanceof Error ? err.message : err);
296
+ return {
297
+ ...EMPTY_RESULT,
298
+ degraded: true,
299
+ degraded_reason: 'embedding_unavailable'
300
+ };
301
+ }
302
+ // ── Step 2: Hybrid candidate generation (dense + lexical) ───
303
+ // Dense path: vector index (HNSW or MTREE) + KNN `<|k,ef|>`.
304
+ // Lexical path: exact-term matching for philosophy-specific phrases.
305
+ // Fusion: reciprocal-rank fusion + lightweight rerank.
306
+ const densePool = domain || minConfidence > 0 ? topK * 4 : topK * 3;
307
+ const lexicalTerms = hybridMode === 'dense_only' ? [] : extractLexicalTerms(userQuery);
308
+ const corpusLevelQuery = hybridMode === 'dense_only' ? false : detectCorpusLevelQuery(userQuery);
309
+ const queryDecomposition = {
310
+ focus_mode: corpusLevelQuery ? 'corpus_overview' : 'focused',
311
+ domain_filter: domain,
312
+ hybrid_mode: hybridMode,
313
+ corpus_level_query: corpusLevelQuery,
314
+ lexical_terms: lexicalTerms.slice(0, 16),
315
+ lexical_term_count: lexicalTerms.length
316
+ };
317
+ const lexicalPool = lexicalTerms.length === 0 ? 0 : corpusLevelQuery ? topK * 8 : topK * 4;
318
+ const acceptedClaimRows = await store.query(`SELECT count() AS count FROM claim WHERE review_state = 'accepted' GROUP ALL`).catch(() => []);
319
+ const trustedGraphActive = (acceptedClaimRows[0]?.count ?? 0) > 0;
320
+ const claimReviewFilter = trustedGraphActive
321
+ ? `review_state = 'accepted'`
322
+ : `(review_state = NONE OR review_state IN ['candidate', 'needs_review', 'accepted'])`;
323
+ const relationReviewFilter = trustedGraphActive
324
+ ? `review_state = 'accepted'`
325
+ : `(review_state = NONE OR review_state IN ['candidate', 'needs_review', 'accepted'])`;
326
+ // Stage 3.2: traversal beam only follows trusted edges.
327
+ const traversalRelationReviewFilter = `review_state = 'accepted'`;
328
+ const argumentClaimReviewFilter = trustedGraphActive
329
+ ? `in.review_state = 'accepted'`
330
+ : `(in.review_state = NONE OR in.review_state IN ['candidate', 'needs_review', 'accepted'])`;
331
+ const postFilters = [];
332
+ if (domain)
333
+ postFilters.push('domain = $domain');
334
+ if (minConfidence > 0)
335
+ postFilters.push('confidence >= $minConfidence');
336
+ postFilters.push(claimReviewFilter);
337
+ postFilters.push(claimVerificationSqlFilter(trustedGraphActive));
338
+ const postWhere = postFilters.length > 0 ? `WHERE ${postFilters.join(' AND ')}` : '';
339
+ const sourcePassageIntegrityCache = new Map();
340
+ const sourceIdPart = (sourceId) => sourceId.includes(':') ? sourceId.split(':').slice(1).join(':') : sourceId;
341
+ const sourceHasPassageCoverage = (sourceId) => {
342
+ if (!sourceId)
343
+ return Promise.resolve(false);
344
+ const existing = sourcePassageIntegrityCache.get(sourceId);
345
+ if (existing)
346
+ return existing;
347
+ const pending = (async () => {
348
+ const sid = sourceIdPart(sourceId);
349
+ const passageRows = await store.query(`SELECT id FROM passage WHERE source = type::record('source', $sid) LIMIT 1`, { sid }).catch(() => []);
350
+ return passageRows.length > 0;
351
+ })();
352
+ sourcePassageIntegrityCache.set(sourceId, pending);
353
+ return pending;
354
+ };
355
+ let seedClaims;
356
+ let seedPoolCount = 0;
357
+ const rejectedClaimsByKey = new Map();
358
+ const rejectedRelations = [];
359
+ const addRejectedClaim = (candidate) => {
360
+ const key = `${candidate.id}|${candidate.reason_code}`;
361
+ if (rejectedClaimsByKey.has(key))
362
+ return;
363
+ rejectedClaimsByKey.set(key, candidate);
364
+ };
365
+ const rowProjection = `SELECT
366
+ id,
367
+ text,
368
+ claim_type,
369
+ domain,
370
+ confidence,
371
+ embedding,
372
+ position_in_source,
373
+ review_state,
374
+ section_context,
375
+ source.id AS source_id,
376
+ source.url AS source_url,
377
+ source.source_type AS source_source_type,
378
+ source.title AS source_title,
379
+ source.author AS source_author`;
380
+ const sharedParams = {
381
+ ...(domain ? { domain } : {}),
382
+ ...(minConfidence > 0 ? { minConfidence } : {})
383
+ };
384
+ let denseSeedClaims = [];
385
+ let lexicalSeedClaims = [];
386
+ try {
387
+ const knnOp = surrealKnnOperator(densePool);
388
+ console.log('[RETRIEVAL] Dense candidate generation topK=', topK, 'knn=', knnOp);
389
+ const denseSurql = (op) => `${rowProjection}
390
+ FROM (
391
+ SELECT *
392
+ FROM claim
393
+ WHERE embedding ${op} $query_embedding
394
+ )
395
+ ${postWhere}
396
+ LIMIT ${densePool}`;
397
+ const denseParams = {
398
+ query_embedding: queryEmbedding,
399
+ ...sharedParams
400
+ };
401
+ try {
402
+ denseSeedClaims = await store.query(denseSurql(knnOp), denseParams);
403
+ }
404
+ catch (knnErr) {
405
+ const legacyOp = `<|${Math.max(1, Math.trunc(densePool))}|>`;
406
+ console.warn('[RETRIEVAL] KNN two-arg operator failed; retrying legacy', legacyOp, knnErr instanceof Error ? knnErr.message : knnErr);
407
+ denseSeedClaims = await store.query(denseSurql(legacyOp), denseParams);
408
+ }
409
+ console.log('[RETRIEVAL] ✓ Dense candidates:', denseSeedClaims?.length || 0);
410
+ }
411
+ catch (dbErr) {
412
+ if (store.isDatabaseUnavailable(dbErr)) {
413
+ console.warn('[RETRIEVAL] Database unavailable during dense candidate retrieval');
414
+ return {
415
+ ...EMPTY_RESULT,
416
+ degraded: true,
417
+ degraded_reason: 'database_unavailable'
418
+ };
419
+ }
420
+ throw dbErr;
421
+ }
422
+ if (hybridMode !== 'dense_only' && lexicalPool > 0) {
423
+ try {
424
+ const lexicalTermClauses = lexicalTerms.map((_term, idx) => `(text ~ $term_${idx} OR section_context ~ $term_${idx})`);
425
+ const lexicalWhere = `WHERE (${lexicalTermClauses.join(' OR ')}) AND ${postFilters.join(' AND ')}`;
426
+ const lexicalParams = { ...sharedParams };
427
+ for (const [idx, term] of lexicalTerms.entries()) {
428
+ lexicalParams[`term_${idx}`] = term;
429
+ }
430
+ lexicalSeedClaims = await store.query(`${rowProjection}
431
+ FROM claim
432
+ ${lexicalWhere}
433
+ ORDER BY confidence DESC
434
+ LIMIT ${lexicalPool}`, lexicalParams);
435
+ if (isRetrievalBm25Enabled() && lexicalTerms.length > 0) {
436
+ const bm25Rows = await fetchBm25ClaimCandidates(store, {
437
+ terms: lexicalTerms,
438
+ limit: lexicalPool,
439
+ reviewFilter: postFilters.join(' AND ')
440
+ });
441
+ if (bm25Rows.length > 0) {
442
+ const existing = new Set(lexicalSeedClaims.map((r) => String(r.id)));
443
+ for (const row of bm25Rows) {
444
+ if (existing.has(String(row.id)))
445
+ continue;
446
+ lexicalSeedClaims.push({
447
+ id: row.id,
448
+ text: row.text,
449
+ claim_type: 'premise',
450
+ domain: 'ethics',
451
+ confidence: row.confidence ?? 0.5,
452
+ position_in_source: 0,
453
+ section_context: null,
454
+ source_id: '',
455
+ source_title: 'Unknown',
456
+ source_author: []
457
+ });
458
+ }
459
+ }
460
+ }
461
+ console.log('[RETRIEVAL] ✓ Lexical candidates:', lexicalSeedClaims?.length || 0, 'terms=', lexicalTerms.length);
462
+ }
463
+ catch (lexicalErr) {
464
+ console.warn('[RETRIEVAL] Lexical candidate generation failed (continuing with dense only):', lexicalErr instanceof Error ? lexicalErr.message : lexicalErr);
465
+ }
466
+ }
467
+ if (isRetrievalTaxonomyRoutingEnabled() && lexicalTerms.length > 0) {
468
+ try {
469
+ const taxonomyIds = await fetchTaxonomySeedClaimIds(store, {
470
+ terms: lexicalTerms,
471
+ limit: Math.max(4, topK),
472
+ reviewFilter: postFilters.join(' AND ')
473
+ });
474
+ if (taxonomyIds.length > 0) {
475
+ const taxonomyRows = await store.query(`${rowProjection}
476
+ FROM claim
477
+ WHERE id INSIDE $ids AND ${postFilters.join(' AND ')}
478
+ LIMIT $limit`, { ids: taxonomyIds, limit: taxonomyIds.length });
479
+ const existing = new Set([
480
+ ...denseSeedClaims.map((r) => String(r.id)),
481
+ ...lexicalSeedClaims.map((r) => String(r.id))
482
+ ]);
483
+ for (const row of taxonomyRows ?? []) {
484
+ if (existing.has(String(row.id)))
485
+ continue;
486
+ denseSeedClaims.push(row);
487
+ existing.add(String(row.id));
488
+ }
489
+ console.log('[RETRIEVAL] ✓ Taxonomy-routed seeds:', taxonomyRows?.length ?? 0);
490
+ }
491
+ }
492
+ catch (taxonomyErr) {
493
+ console.warn('[RETRIEVAL] Taxonomy routing failed (continuing):', taxonomyErr instanceof Error ? taxonomyErr.message : taxonomyErr);
494
+ }
495
+ }
496
+ let passageGroundedClaimIds = [];
497
+ if (isRetrievalPassageGroundedEnabled()) {
498
+ try {
499
+ passageGroundedClaimIds = await fetchPassageGroundedClaimIds(store, {
500
+ queryEmbedding,
501
+ limit: Math.max(4, topK),
502
+ reviewFilter: postFilters.join(' AND ')
503
+ });
504
+ if (passageGroundedClaimIds.length > 0) {
505
+ const passageRows = await store.query(`${rowProjection}
506
+ FROM claim
507
+ WHERE id INSIDE $ids AND ${postFilters.join(' AND ')}
508
+ LIMIT $limit`, { ids: passageGroundedClaimIds, limit: passageGroundedClaimIds.length });
509
+ const existing = new Set([
510
+ ...denseSeedClaims.map((r) => String(r.id)),
511
+ ...lexicalSeedClaims.map((r) => String(r.id))
512
+ ]);
513
+ for (const row of passageRows ?? []) {
514
+ if (existing.has(String(row.id)))
515
+ continue;
516
+ denseSeedClaims.push(row);
517
+ existing.add(String(row.id));
518
+ }
519
+ console.log('[RETRIEVAL] ✓ Passage-grounded seeds:', passageRows?.length ?? 0);
520
+ }
521
+ }
522
+ catch (passageErr) {
523
+ console.warn('[RETRIEVAL] Passage-grounded retrieval failed (continuing):', passageErr instanceof Error ? passageErr.message : passageErr);
524
+ }
525
+ }
526
+ if ((!denseSeedClaims || denseSeedClaims.length === 0) && lexicalSeedClaims.length === 0) {
527
+ console.log('[RETRIEVAL] No candidates found in dense or lexical retrieval');
528
+ return EMPTY_RESULT;
529
+ }
530
+ if (hybridMode === 'dense_only') {
531
+ seedClaims = denseSeedClaims;
532
+ seedPoolCount = denseSeedClaims.length;
533
+ }
534
+ else {
535
+ const fusion = fuseHybridCandidates({
536
+ dense: denseSeedClaims,
537
+ lexical: lexicalSeedClaims,
538
+ lexicalTerms,
539
+ poolSize: Math.max(topK * 4, topK),
540
+ corpusLevelQuery
541
+ });
542
+ seedClaims = fusion.ranked;
543
+ seedPoolCount = fusion.fusedCount;
544
+ }
545
+ if (!seedClaims || seedClaims.length === 0) {
546
+ console.log('[RETRIEVAL] Hybrid fusion returned no candidates');
547
+ return EMPTY_RESULT;
548
+ }
549
+ console.log(`[RETRIEVAL] Candidate generation mode=${hybridMode} dense=${denseSeedClaims.length} lexical=${lexicalSeedClaims.length} fused=${seedPoolCount} corpusLevel=${corpusLevelQuery}`);
550
+ const seedPool = [...seedClaims];
551
+ const vettedSeedPool = [];
552
+ for (const seed of seedPool) {
553
+ const sourceOk = await sourceHasPassageCoverage(seed.source_id);
554
+ if (!sourceOk) {
555
+ addRejectedClaim({
556
+ id: typeof seed.id === 'object' ? String(seed.id) : seed.id,
557
+ text: seed.text,
558
+ source_title: seed.source_title ?? 'Unknown',
559
+ confidence: seed.confidence,
560
+ reason_code: 'source_integrity_gate',
561
+ considered_in: 'seed_pool'
562
+ });
563
+ continue;
564
+ }
565
+ vettedSeedPool.push(seed);
566
+ }
567
+ if (vettedSeedPool.length === 0) {
568
+ return {
569
+ ...EMPTY_RESULT,
570
+ degraded: true,
571
+ degraded_reason: 'source_integrity_gate'
572
+ };
573
+ }
574
+ const domainsInPool = new Set();
575
+ for (const s of vettedSeedPool) {
576
+ domainsInPool.add(String(s.domain ?? 'unknown'));
577
+ }
578
+ const seedSet = constructSeedSet({
579
+ candidates: vettedSeedPool,
580
+ topK,
581
+ queryEmbedding,
582
+ ...(isRetrievalKgBalanceEnabled()
583
+ ? {
584
+ kgBalance: {
585
+ idealOrigin: IDEAL_RETRIEVAL_ORIGIN_FRACTIONS,
586
+ domainsInPool,
587
+ getOrigin: (c) => resolveOriginBucket(c.source_url ?? null, c.source_source_type ?? null),
588
+ getDomainKey: (c) => String(c.domain ?? 'unknown')
589
+ }
590
+ }
591
+ : {})
592
+ });
593
+ seedClaims = seedSet.seeds;
594
+ console.log(`[RETRIEVAL] Found ${seedClaims.length} seed claims`);
595
+ const seedClaimIds = seedClaims.map((seed) => typeof seed.id === 'object' ? String(seed.id) : seed.id);
596
+ const seedTrace = seedClaims.map((seed) => ({
597
+ id: typeof seed.id === 'object' ? String(seed.id) : seed.id,
598
+ claim_type: seed.claim_type,
599
+ domain: seed.domain,
600
+ source_title: seed.source_title ?? 'Unknown',
601
+ confidence: seed.confidence ?? 0
602
+ }));
603
+ const selectedSeedIds = new Set(seedClaimIds);
604
+ for (const candidate of vettedSeedPool) {
605
+ const candidateId = typeof candidate.id === 'object' ? String(candidate.id) : candidate.id;
606
+ if (selectedSeedIds.has(candidateId))
607
+ continue;
608
+ addRejectedClaim({
609
+ id: candidateId,
610
+ text: candidate.text,
611
+ source_title: candidate.source_title ?? 'Unknown',
612
+ confidence: candidate.confidence,
613
+ reason_code: 'seed_pool_pruned',
614
+ considered_in: 'seed_pool'
615
+ });
616
+ }
617
+ const allGraphClaims = new Map();
618
+ const argumentIds = new Set();
619
+ // Add seed claims to the map first
620
+ for (const seed of seedClaims) {
621
+ const id = typeof seed.id === 'object' ? String(seed.id) : seed.id;
622
+ allGraphClaims.set(id, {
623
+ id,
624
+ text: seed.text,
625
+ claim_type: seed.claim_type,
626
+ domain: seed.domain,
627
+ source_title: seed.source_title ?? 'Unknown',
628
+ source_author: seed.source_author ?? [],
629
+ confidence: seed.confidence,
630
+ position_in_source: seed.position_in_source ?? 0
631
+ });
632
+ }
633
+ const resolveSource = (claim) => {
634
+ if (claim.source && typeof claim.source === 'object' && 'title' in claim.source) {
635
+ return {
636
+ id: claim.source.id,
637
+ title: claim.source.title,
638
+ author: claim.source.author ?? []
639
+ };
640
+ }
641
+ return { title: 'Unknown', author: [] };
642
+ };
643
+ const toClaimId = (idValue) => {
644
+ if (!idValue)
645
+ return null;
646
+ if (typeof idValue === 'string')
647
+ return idValue;
648
+ return String(idValue);
649
+ };
650
+ const claimProjection = `{id, text, claim_type, domain, confidence, position_in_source, review_state, verification_state, source.{id, title, author}}`;
651
+ const passesTraversalClaimGate = (claim) => {
652
+ if (claim.review_state === 'rejected' || claim.review_state === 'merged')
653
+ return false;
654
+ if (claim.verification_state === 'flagged')
655
+ return false;
656
+ if (trustedGraphActive && claim.review_state !== 'accepted')
657
+ return false;
658
+ const raw = (process.env.RETRIEVAL_REQUIRE_VERIFIED ?? '').trim().toLowerCase();
659
+ if (trustedGraphActive &&
660
+ (raw === '1' || raw === 'true' || raw === 'yes') &&
661
+ claim.verification_state !== 'validated') {
662
+ return false;
663
+ }
664
+ return true;
665
+ };
666
+ const maxNewClaimsPerHop = topK >= 10 ? 48 : topK <= 3 ? 12 : 28;
667
+ const beamWidthPerHop = topK >= 10 ? 44 : topK <= 3 ? 10 : 24;
668
+ const beamQueryLimitPerTable = topK >= 10 ? 260 : topK <= 3 ? 64 : 140;
669
+ const hopDecayFactor = traversalMaxHops <= 1 ? 1 : 0.78;
670
+ const traversalBaseConfidence = minConfidence > 0 ? Math.max(0.3, Math.min(0.8, minConfidence)) : 0.38;
671
+ const traversalConfidenceThresholds = Array.from({ length: traversalMaxHops }, (_, idx) => computeHopConfidenceThreshold(traversalBaseConfidence, idx + 1));
672
+ let frontier = new Set(seedClaimIds);
673
+ const nativeNeighborIds = await fetchNativeGraphNeighbors(store, {
674
+ seedIds: seedClaimIds,
675
+ limit: Math.max(16, topK * 4)
676
+ });
677
+ for (const neighborId of nativeNeighborIds) {
678
+ frontier.add(neighborId);
679
+ }
680
+ for (let hop = 1; hop <= traversalMaxHops; hop++) {
681
+ if (frontier.size === 0 || allGraphClaims.size >= traversalClaimCap)
682
+ break;
683
+ const frontierIds = Array.from(frontier);
684
+ const frontierSet = new Set(frontierIds);
685
+ const hopConfidenceThreshold = traversalConfidenceThresholds[hop - 1] ?? traversalBaseConfidence;
686
+ const hopDecay = Math.pow(hopDecayFactor, hop - 1);
687
+ const hopCandidates = new Map();
688
+ for (const spec of RELATION_TRAVERSAL_BEAM_SPECS) {
689
+ try {
690
+ const rows = await store.query(`SELECT
691
+ in,
692
+ out,
693
+ in.${claimProjection} AS in_claim,
694
+ out.${claimProjection} AS out_claim,
695
+ strength,
696
+ note
697
+ FROM ${spec.table}
698
+ WHERE (in INSIDE $frontier_ids OR out INSIDE $frontier_ids) AND ${traversalRelationReviewFilter}
699
+ LIMIT ${beamQueryLimitPerTable}`, { frontier_ids: frontierIds });
700
+ if (!rows || !Array.isArray(rows))
701
+ continue;
702
+ const registerBeamCandidate = (params) => {
703
+ const { anchorId, neighbor, strength, edgePrior } = params;
704
+ if (!neighbor)
705
+ return;
706
+ const neighborId = toClaimId(neighbor.id);
707
+ if (!neighborId)
708
+ return;
709
+ const source = resolveSource(neighbor);
710
+ if (!passesTraversalClaimGate(neighbor))
711
+ return;
712
+ if ((neighbor.confidence ?? 0) < hopConfidenceThreshold) {
713
+ addRejectedClaim({
714
+ id: neighborId,
715
+ text: neighbor.text,
716
+ source_title: source.title,
717
+ confidence: neighbor.confidence,
718
+ reason_code: 'confidence_gate',
719
+ considered_in: 'traversal',
720
+ anchor_claim_id: anchorId
721
+ });
722
+ return;
723
+ }
724
+ if (allGraphClaims.has(neighborId)) {
725
+ addRejectedClaim({
726
+ id: neighborId,
727
+ text: neighbor.text,
728
+ source_title: source.title,
729
+ confidence: neighbor.confidence,
730
+ reason_code: 'duplicate_traversal',
731
+ considered_in: 'traversal',
732
+ anchor_claim_id: anchorId
733
+ });
734
+ return;
735
+ }
736
+ const anchor = allGraphClaims.get(anchorId);
737
+ const domainWeight = computeDomainExpansionWeight({
738
+ targetDomain: domain,
739
+ anchorDomain: anchor?.domain,
740
+ neighborDomain: neighbor.domain
741
+ });
742
+ const strengthWeight = parseRelationStrengthWeight(strength);
743
+ const anchorWeight = 0.7 + 0.3 * (anchor?.confidence ?? 0.6);
744
+ const score = Math.max(0.01, neighbor.confidence ?? 0.5) *
745
+ edgePrior *
746
+ hopDecay *
747
+ domainWeight *
748
+ strengthWeight *
749
+ anchorWeight;
750
+ const existing = hopCandidates.get(neighborId);
751
+ if (!existing || score > existing.score) {
752
+ hopCandidates.set(neighborId, {
753
+ claim: neighbor,
754
+ anchorId,
755
+ score
756
+ });
757
+ }
758
+ };
759
+ for (const row of rows) {
760
+ const inId = toClaimId(row.in);
761
+ const outId = toClaimId(row.out);
762
+ if (!inId || !outId)
763
+ continue;
764
+ if (frontierSet.has(inId)) {
765
+ registerBeamCandidate({
766
+ anchorId: inId,
767
+ neighbor: row.out_claim,
768
+ strength: row.strength,
769
+ edgePrior: spec.edgePrior
770
+ });
771
+ }
772
+ if (frontierSet.has(outId)) {
773
+ registerBeamCandidate({
774
+ anchorId: outId,
775
+ neighbor: row.in_claim,
776
+ strength: row.strength,
777
+ edgePrior: spec.edgePrior
778
+ });
779
+ }
780
+ }
781
+ }
782
+ catch (traversalErr) {
783
+ console.warn(`[RETRIEVAL] Beam traversal failed for ${spec.table}:`, traversalErr instanceof Error ? traversalErr.message : traversalErr);
784
+ }
785
+ }
786
+ const candidates = Array.from(hopCandidates.values())
787
+ .sort((a, b) => b.score - a.score)
788
+ .slice(0, beamWidthPerHop);
789
+ const selected = [];
790
+ const seenSources = new Set();
791
+ const hopBudget = Math.min(maxNewClaimsPerHop, Math.max(traversalClaimCap - allGraphClaims.size, 0));
792
+ for (const candidate of candidates) {
793
+ if (selected.length >= hopBudget)
794
+ break;
795
+ const source = resolveSource(candidate.claim);
796
+ if (!(await sourceHasPassageCoverage(source.id))) {
797
+ addRejectedClaim({
798
+ id: typeof candidate.claim.id === 'object'
799
+ ? String(candidate.claim.id)
800
+ : candidate.claim.id,
801
+ text: candidate.claim.text,
802
+ source_title: source.title,
803
+ confidence: candidate.claim.confidence,
804
+ reason_code: 'source_integrity_gate',
805
+ considered_in: 'traversal',
806
+ anchor_claim_id: candidate.anchorId
807
+ });
808
+ continue;
809
+ }
810
+ const sourceTitle = source.title;
811
+ if (seenSources.has(sourceTitle))
812
+ continue;
813
+ seenSources.add(sourceTitle);
814
+ selected.push(candidate);
815
+ }
816
+ for (const candidate of candidates) {
817
+ if (selected.length >= hopBudget)
818
+ break;
819
+ if (selected.includes(candidate))
820
+ continue;
821
+ const source = resolveSource(candidate.claim);
822
+ if (!(await sourceHasPassageCoverage(source.id))) {
823
+ addRejectedClaim({
824
+ id: typeof candidate.claim.id === 'object'
825
+ ? String(candidate.claim.id)
826
+ : candidate.claim.id,
827
+ text: candidate.claim.text,
828
+ source_title: source.title,
829
+ confidence: candidate.claim.confidence,
830
+ reason_code: 'source_integrity_gate',
831
+ considered_in: 'traversal',
832
+ anchor_claim_id: candidate.anchorId
833
+ });
834
+ continue;
835
+ }
836
+ selected.push(candidate);
837
+ }
838
+ const nextFrontier = new Set();
839
+ for (const { claim } of selected) {
840
+ const cId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
841
+ const source = resolveSource(claim);
842
+ allGraphClaims.set(cId, {
843
+ id: cId,
844
+ text: claim.text,
845
+ claim_type: claim.claim_type,
846
+ domain: claim.domain,
847
+ source_title: source.title,
848
+ source_author: source.author,
849
+ confidence: claim.confidence ?? 0.5,
850
+ position_in_source: claim.position_in_source ?? 0
851
+ });
852
+ nextFrontier.add(cId);
853
+ }
854
+ const selectedClaimIds = Array.from(nextFrontier);
855
+ if (selectedClaimIds.length > 0) {
856
+ try {
857
+ const argRefs = await store.query(`SELECT out.id AS arg_id FROM part_of WHERE in INSIDE $claim_ids LIMIT 200`, { claim_ids: selectedClaimIds });
858
+ if (argRefs && Array.isArray(argRefs)) {
859
+ for (const row of argRefs) {
860
+ const aId = toClaimId(row.arg_id);
861
+ if (aId)
862
+ argumentIds.add(aId);
863
+ }
864
+ }
865
+ }
866
+ catch (argRefErr) {
867
+ console.warn('[RETRIEVAL] Beam traversal argument lookup failed:', argRefErr instanceof Error ? argRefErr.message : argRefErr);
868
+ }
869
+ }
870
+ console.log(`[RETRIEVAL] hop ${hop}/${traversalMaxHops}: candidates=${candidates.length} threshold=${hopConfidenceThreshold.toFixed(2)} added=${selected.length} frontier=${nextFrontier.size}`);
871
+ frontier = nextFrontier;
872
+ }
873
+ // Add argument-neighborhood claims so traversal can surface complete
874
+ // argument structures (conclusions + key premises), not only local edges.
875
+ if (argumentIds.size > 0 && allGraphClaims.size < traversalClaimCap) {
876
+ try {
877
+ const memberRows = await store.query(`SELECT
878
+ in.{id, text, claim_type, domain, confidence, position_in_source, review_state, source.{id, title, author}} AS in,
879
+ role
880
+ FROM part_of
881
+ WHERE out INSIDE $arg_ids AND ${argumentClaimReviewFilter}`, { arg_ids: Array.from(argumentIds) });
882
+ if (memberRows && Array.isArray(memberRows)) {
883
+ const roleRank = (role) => {
884
+ if (role === 'conclusion')
885
+ return 0;
886
+ if (role === 'key_premise')
887
+ return 1;
888
+ if (role === 'supporting_premise')
889
+ return 2;
890
+ return 3;
891
+ };
892
+ const sorted = [...memberRows].sort((a, b) => {
893
+ const rankDelta = roleRank(a.role) - roleRank(b.role);
894
+ if (rankDelta !== 0)
895
+ return rankDelta;
896
+ return (b.in?.confidence ?? 0) - (a.in?.confidence ?? 0);
897
+ });
898
+ for (const row of sorted) {
899
+ if (allGraphClaims.size >= traversalClaimCap)
900
+ break;
901
+ if (!row.in)
902
+ continue;
903
+ const claim = row.in;
904
+ if (claim.review_state === 'rejected' || claim.review_state === 'merged')
905
+ continue;
906
+ if (trustedGraphActive && claim.review_state !== 'accepted')
907
+ continue;
908
+ const cId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
909
+ if (allGraphClaims.has(cId))
910
+ continue;
911
+ const source = claim.source && typeof claim.source === 'object' && 'title' in claim.source
912
+ ? {
913
+ id: claim.source.id,
914
+ title: claim.source.title,
915
+ author: claim.source.author ?? []
916
+ }
917
+ : { title: 'Unknown', author: [] };
918
+ if (!(await sourceHasPassageCoverage(source.id))) {
919
+ addRejectedClaim({
920
+ id: cId,
921
+ text: claim.text,
922
+ source_title: source.title,
923
+ confidence: claim.confidence,
924
+ reason_code: 'source_integrity_gate',
925
+ considered_in: 'traversal'
926
+ });
927
+ continue;
928
+ }
929
+ allGraphClaims.set(cId, {
930
+ id: cId,
931
+ text: claim.text,
932
+ claim_type: claim.claim_type,
933
+ domain: claim.domain,
934
+ source_title: source.title,
935
+ source_author: source.author,
936
+ confidence: claim.confidence ?? 0.5,
937
+ position_in_source: claim.position_in_source ?? 0
938
+ });
939
+ }
940
+ }
941
+ }
942
+ catch (argNeighborhoodErr) {
943
+ console.warn('[RETRIEVAL] Failed to expand argument-neighborhood claims:', argNeighborhoodErr instanceof Error ? argNeighborhoodErr.message : argNeighborhoodErr);
944
+ }
945
+ }
946
+ const contradictionNeighborCache = new Map();
947
+ const replyNeighborCache = new Map();
948
+ const majorThesisLimit = Math.max(1, Math.min(3, Math.ceil(topK / 4)));
949
+ const majorThesisIds = selectMajorThesisIds({
950
+ claims: Array.from(allGraphClaims.values()),
951
+ seedClaimIds,
952
+ limit: majorThesisLimit
953
+ });
954
+ let closureClaimsAdded = 0;
955
+ let closureObjectionsAdded = 0;
956
+ let closureRepliesAdded = 0;
957
+ let closureCapLimitedUnits = 0;
958
+ const closureUnits = [];
959
+ const passesClosureReviewGate = (claim) => {
960
+ if (claim.review_state === 'rejected' || claim.review_state === 'merged')
961
+ return false;
962
+ if (trustedGraphActive && claim.review_state !== 'accepted')
963
+ return false;
964
+ return true;
965
+ };
966
+ const hasClosurePassageCoverage = async (claim, anchorClaimId) => {
967
+ const source = resolveSource(claim);
968
+ const covered = await sourceHasPassageCoverage(source.id);
969
+ if (covered)
970
+ return true;
971
+ addRejectedClaim({
972
+ id: typeof claim.id === 'object' ? String(claim.id) : claim.id,
973
+ text: claim.text,
974
+ source_title: source.title,
975
+ confidence: claim.confidence,
976
+ reason_code: 'source_integrity_gate',
977
+ considered_in: 'traversal',
978
+ anchor_claim_id: anchorClaimId
979
+ });
980
+ return false;
981
+ };
982
+ const attachClaimForClosure = async (claim, anchorClaimId) => {
983
+ const claimId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
984
+ if (allGraphClaims.has(claimId))
985
+ return 'present';
986
+ if (allGraphClaims.size >= traversalClaimCap)
987
+ return 'blocked_cap';
988
+ if (!(await hasClosurePassageCoverage(claim, anchorClaimId)))
989
+ return 'blocked_source';
990
+ const source = resolveSource(claim);
991
+ allGraphClaims.set(claimId, {
992
+ id: claimId,
993
+ text: claim.text,
994
+ claim_type: claim.claim_type,
995
+ domain: claim.domain,
996
+ source_title: source.title,
997
+ source_author: source.author,
998
+ confidence: claim.confidence ?? 0.5,
999
+ position_in_source: claim.position_in_source ?? 0
1000
+ });
1001
+ return 'added';
1002
+ };
1003
+ const fetchRelationNeighbors = async (table, claimId, cache) => {
1004
+ const cached = cache.get(claimId);
1005
+ if (cached)
1006
+ return cached;
1007
+ const pending = (async () => {
1008
+ try {
1009
+ const rows = await store.query(`SELECT
1010
+ in.${claimProjection} AS in_claim,
1011
+ out.${claimProjection} AS out_claim
1012
+ FROM ${table}
1013
+ WHERE (in = $claim_id OR out = $claim_id) AND ${relationReviewFilter}
1014
+ LIMIT 24`, { claim_id: claimId });
1015
+ if (!rows || !Array.isArray(rows))
1016
+ return [];
1017
+ const byId = new Map();
1018
+ for (const row of rows) {
1019
+ const inClaim = row.in_claim;
1020
+ const outClaim = row.out_claim;
1021
+ if (!inClaim || !outClaim)
1022
+ continue;
1023
+ const inId = toClaimId(inClaim.id);
1024
+ const outId = toClaimId(outClaim.id);
1025
+ const neighbor = inId === claimId ? outClaim : outId === claimId ? inClaim : undefined;
1026
+ if (!neighbor)
1027
+ continue;
1028
+ const neighborId = toClaimId(neighbor.id);
1029
+ if (!neighborId)
1030
+ continue;
1031
+ const existing = byId.get(neighborId);
1032
+ if (!existing || (neighbor.confidence ?? 0) > (existing.confidence ?? 0)) {
1033
+ byId.set(neighborId, neighbor);
1034
+ }
1035
+ }
1036
+ return Array.from(byId.values()).sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0));
1037
+ }
1038
+ catch (err) {
1039
+ console.warn(`[RETRIEVAL] Closure lookup failed for ${table} on ${claimId}:`, err instanceof Error ? err.message : err);
1040
+ return [];
1041
+ }
1042
+ })();
1043
+ cache.set(claimId, pending);
1044
+ return pending;
1045
+ };
1046
+ const pickClosureCandidate = async (candidates, anchorClaimId, matcher) => {
1047
+ if (candidates.length === 0)
1048
+ return null;
1049
+ for (const requireTypedMatch of [true, false]) {
1050
+ for (const candidate of candidates) {
1051
+ if (!passesClosureReviewGate(candidate))
1052
+ continue;
1053
+ if (requireTypedMatch && !matcher(candidate.claim_type))
1054
+ continue;
1055
+ const candidateId = typeof candidate.id === 'object' ? String(candidate.id) : candidate.id;
1056
+ if (allGraphClaims.has(candidateId))
1057
+ return candidate;
1058
+ if (await hasClosurePassageCoverage(candidate, anchorClaimId)) {
1059
+ return candidate;
1060
+ }
1061
+ }
1062
+ }
1063
+ return null;
1064
+ };
1065
+ for (const thesisId of majorThesisIds) {
1066
+ const unit = {
1067
+ thesis_claim_id: thesisId,
1068
+ objection_found: false,
1069
+ reply_found: false,
1070
+ unit_complete: false
1071
+ };
1072
+ let capLimitedInUnit = false;
1073
+ const thesisExists = allGraphClaims.has(thesisId);
1074
+ if (!thesisExists) {
1075
+ closureUnits.push(unit);
1076
+ continue;
1077
+ }
1078
+ const contradictionNeighbors = await fetchRelationNeighbors('contradicts', thesisId, contradictionNeighborCache);
1079
+ const objectionCandidate = await pickClosureCandidate(contradictionNeighbors, thesisId, isObjectionClaimType);
1080
+ if (objectionCandidate) {
1081
+ const objectionId = typeof objectionCandidate.id === 'object'
1082
+ ? String(objectionCandidate.id)
1083
+ : objectionCandidate.id;
1084
+ const objectionAttach = await attachClaimForClosure(objectionCandidate, thesisId);
1085
+ if (objectionAttach === 'added') {
1086
+ closureClaimsAdded += 1;
1087
+ closureObjectionsAdded += 1;
1088
+ }
1089
+ if (objectionAttach === 'blocked_cap') {
1090
+ capLimitedInUnit = true;
1091
+ }
1092
+ if (objectionAttach === 'added' || objectionAttach === 'present') {
1093
+ unit.objection_found = true;
1094
+ unit.objection_claim_id = objectionId;
1095
+ const replyNeighbors = await fetchRelationNeighbors('responds_to', objectionId, replyNeighborCache);
1096
+ const replyCandidate = await pickClosureCandidate(replyNeighbors, objectionId, isReplyClaimType);
1097
+ if (replyCandidate) {
1098
+ const replyId = typeof replyCandidate.id === 'object'
1099
+ ? String(replyCandidate.id)
1100
+ : replyCandidate.id;
1101
+ const replyAttach = await attachClaimForClosure(replyCandidate, objectionId);
1102
+ if (replyAttach === 'added') {
1103
+ closureClaimsAdded += 1;
1104
+ closureRepliesAdded += 1;
1105
+ }
1106
+ if (replyAttach === 'blocked_cap') {
1107
+ capLimitedInUnit = true;
1108
+ }
1109
+ if (replyAttach === 'added' || replyAttach === 'present') {
1110
+ unit.reply_found = true;
1111
+ unit.reply_claim_id = replyId;
1112
+ }
1113
+ }
1114
+ }
1115
+ }
1116
+ unit.unit_complete = unit.objection_found && unit.reply_found;
1117
+ if (capLimitedInUnit)
1118
+ closureCapLimitedUnits += 1;
1119
+ closureUnits.push(unit);
1120
+ }
1121
+ const closureStats = {
1122
+ major_thesis_count: majorThesisIds.length,
1123
+ units_attempted: majorThesisIds.length,
1124
+ units_completed: closureUnits.filter((unit) => unit.unit_complete).length,
1125
+ claims_added_for_closure: closureClaimsAdded,
1126
+ objections_added: closureObjectionsAdded,
1127
+ replies_added: closureRepliesAdded,
1128
+ cap_limited_units: closureCapLimitedUnits,
1129
+ units: closureUnits
1130
+ };
1131
+ console.log('[RETRIEVAL] Closure enforcement', {
1132
+ major_theses: closureStats.major_thesis_count,
1133
+ units_completed: closureStats.units_completed,
1134
+ claims_added: closureStats.claims_added_for_closure
1135
+ });
1136
+ // ── Step 4: Build deduplicated claims array ──────────────────
1137
+ const claims = Array.from(allGraphClaims.values());
1138
+ const claimIdToIndex = new Map();
1139
+ claims.forEach((c, i) => claimIdToIndex.set(c.id, i));
1140
+ console.log(`[RETRIEVAL] ${claims.length} unique claims after graph traversal`);
1141
+ // ── Step 5: Resolve relations between claims in result set ───
1142
+ const relations = [];
1143
+ const claimIds = claims.map((c) => c.id);
1144
+ let relationCandidateCount = 0;
1145
+ const keptRelationKeys = new Set();
1146
+ if (claimIds.length >= 2) {
1147
+ for (const { table, relationType } of RELATION_FETCH_SPECS) {
1148
+ try {
1149
+ const rels = await store.query(`SELECT in, out, $table AS relation_type, strength, note
1150
+ FROM ${table}
1151
+ WHERE in INSIDE $ids AND out INSIDE $ids AND ${relationReviewFilter}`, { ids: claimIds, table });
1152
+ if (rels && Array.isArray(rels)) {
1153
+ relationCandidateCount += rels.length;
1154
+ for (const rel of rels) {
1155
+ const fromId = typeof rel.in === 'object' ? String(rel.in) : rel.in;
1156
+ const toId = typeof rel.out === 'object' ? String(rel.out) : rel.out;
1157
+ const fromIdx = claimIdToIndex.get(fromId);
1158
+ const toIdx = claimIdToIndex.get(toId);
1159
+ if (fromIdx === undefined || toIdx === undefined) {
1160
+ rejectedRelations.push({
1161
+ from_claim_id: fromId,
1162
+ to_claim_id: toId,
1163
+ relation_type: relationType,
1164
+ reason_code: 'missing_endpoint',
1165
+ strength: rel.strength,
1166
+ note: rel.note
1167
+ });
1168
+ continue;
1169
+ }
1170
+ const relationKey = `${fromIdx}|${toIdx}|${relationType}`;
1171
+ if (keptRelationKeys.has(relationKey)) {
1172
+ rejectedRelations.push({
1173
+ from_claim_id: fromId,
1174
+ to_claim_id: toId,
1175
+ relation_type: relationType,
1176
+ reason_code: 'duplicate_relation',
1177
+ strength: rel.strength,
1178
+ note: rel.note
1179
+ });
1180
+ continue;
1181
+ }
1182
+ keptRelationKeys.add(relationKey);
1183
+ relations.push({
1184
+ from_index: fromIdx,
1185
+ to_index: toIdx,
1186
+ relation_type: relationType,
1187
+ strength: rel.strength,
1188
+ note: rel.note
1189
+ });
1190
+ }
1191
+ }
1192
+ }
1193
+ catch (relErr) {
1194
+ console.warn(`[RETRIEVAL] Failed to query ${table} relations:`, relErr instanceof Error ? relErr.message : relErr);
1195
+ }
1196
+ }
1197
+ }
1198
+ console.log(`[RETRIEVAL] ${relations.length} relations among retrieved claims`);
1199
+ // ── Step 6: Fetch argument structures ────────────────────────
1200
+ const arguments_ = [];
1201
+ for (const argId of argumentIds) {
1202
+ try {
1203
+ const argRows = await store.query(`SELECT
1204
+ *,
1205
+ <-part_of<-claim.{text, role: <-part_of[WHERE out = $arg_id].role} AS member_claims
1206
+ FROM $arg_id`, { arg_id: argId });
1207
+ if (!argRows || argRows.length === 0)
1208
+ continue;
1209
+ const arg = Array.isArray(argRows) ? argRows[0] : argRows;
1210
+ // Try a simpler approach to get member claims with roles
1211
+ let conclusionText = null;
1212
+ const keyPremises = [];
1213
+ const partOfRels = await store.query(`SELECT in, role, in.text AS claim_text
1214
+ FROM part_of
1215
+ WHERE out = $arg_id`, { arg_id: argId });
1216
+ if (partOfRels && Array.isArray(partOfRels)) {
1217
+ for (const po of partOfRels) {
1218
+ if (po.role === 'conclusion' && po.claim_text) {
1219
+ conclusionText = po.claim_text;
1220
+ }
1221
+ else if (po.role === 'key_premise' && po.claim_text) {
1222
+ keyPremises.push(po.claim_text);
1223
+ }
1224
+ }
1225
+ }
1226
+ arguments_.push({
1227
+ id: typeof arg.id === 'object' ? String(arg.id) : arg.id,
1228
+ name: arg.name,
1229
+ tradition: arg.tradition,
1230
+ domain: arg.domain,
1231
+ summary: arg.summary,
1232
+ conclusion_text: conclusionText,
1233
+ key_premises: keyPremises
1234
+ });
1235
+ }
1236
+ catch (argErr) {
1237
+ console.warn(`[RETRIEVAL] Failed to fetch argument ${argId}:`, argErr instanceof Error ? argErr.message : argErr);
1238
+ }
1239
+ }
1240
+ console.log(`[RETRIEVAL] ${arguments_.length} arguments assembled`);
1241
+ let thinkerContext = null;
1242
+ if (enrichWithThinkerContext) {
1243
+ const claimIdsForThinkerContext = claims.map((claim) => claim.id).filter(Boolean);
1244
+ thinkerContext = await fetchThinkerContext(store, claimIdsForThinkerContext);
1245
+ }
1246
+ const traversalEdgePriors = Object.fromEntries(RELATION_TRAVERSAL_BEAM_SPECS.map((spec) => [spec.table, spec.edgePrior]));
1247
+ const pruningSummary = {
1248
+ claims_by_reason: {
1249
+ seed_pool_pruned: 0,
1250
+ duplicate_traversal: 0,
1251
+ confidence_gate: 0,
1252
+ source_integrity_gate: 0
1253
+ },
1254
+ relations_by_reason: {
1255
+ duplicate_relation: 0,
1256
+ missing_endpoint: 0
1257
+ }
1258
+ };
1259
+ for (const rejected of rejectedClaimsByKey.values()) {
1260
+ pruningSummary.claims_by_reason[rejected.reason_code] += 1;
1261
+ }
1262
+ for (const rejected of rejectedRelations) {
1263
+ pruningSummary.relations_by_reason[rejected.reason_code] += 1;
1264
+ }
1265
+ let evidencePassages;
1266
+ if (isRetrievalPassageGroundedEnabled() && passageGroundedClaimIds.length > 0) {
1267
+ try {
1268
+ const passageRows = await store.query(`SELECT passage.id AS id, passage.text AS text, in AS claim_id
1269
+ FROM grounded_in
1270
+ WHERE in INSIDE $claim_ids
1271
+ FETCH passage
1272
+ LIMIT 8`, { claim_ids: claims.map((c) => c.id).slice(0, 24) });
1273
+ evidencePassages = (passageRows ?? []).map((row) => ({
1274
+ passage_id: String(row.id),
1275
+ excerpt: (row.text ?? '').slice(0, 480),
1276
+ claim_ids: row.claim_id ? [String(row.claim_id)] : []
1277
+ }));
1278
+ }
1279
+ catch {
1280
+ evidencePassages = undefined;
1281
+ }
1282
+ }
1283
+ return {
1284
+ claims,
1285
+ relations,
1286
+ arguments: arguments_,
1287
+ seed_claim_ids: seedClaimIds,
1288
+ evidence_passages: evidencePassages,
1289
+ thinker_context: thinkerContext,
1290
+ trace: {
1291
+ seed_pool_count: seedPoolCount,
1292
+ selected_seed_count: seedClaimIds.length,
1293
+ hybrid_mode: hybridMode,
1294
+ dense_seed_count: denseSeedClaims.length,
1295
+ lexical_seed_count: lexicalSeedClaims.length,
1296
+ lexical_terms: lexicalTerms.slice(0, 8),
1297
+ corpus_level_query: corpusLevelQuery,
1298
+ seed_balance_stats: seedSet.stats,
1299
+ traversal_mode: 'beam_trusted_v1',
1300
+ traversal_max_hops: traversalMaxHops,
1301
+ traversal_hop_decay: hopDecayFactor,
1302
+ traversal_base_confidence_threshold: traversalBaseConfidence,
1303
+ traversal_confidence_thresholds: traversalConfidenceThresholds,
1304
+ traversal_domain_aware: true,
1305
+ traversal_trusted_edges_only: true,
1306
+ traversal_edge_priors: traversalEdgePriors,
1307
+ query_decomposition: queryDecomposition,
1308
+ seed_claims: seedTrace,
1309
+ pruning_summary: pruningSummary,
1310
+ traversed_claim_count: Math.max(claims.length - seedClaimIds.length, 0),
1311
+ relation_candidate_count: relationCandidateCount,
1312
+ relation_kept_count: relations.length,
1313
+ argument_candidate_count: argumentIds.size,
1314
+ argument_kept_count: arguments_.length,
1315
+ closure_stats: closureStats,
1316
+ rejected_claims: Array.from(rejectedClaimsByKey.values()).slice(0, 60),
1317
+ rejected_relations: rejectedRelations.slice(0, 80)
1318
+ },
1319
+ degraded: false
1320
+ };
1321
+ }
1322
+ catch (err) {
1323
+ // Top-level catch: SurrealDB unreachable, unexpected errors, etc.
1324
+ console.error('[RETRIEVAL] Fatal retrieval error (returning empty result):', err instanceof Error ? err.message : err);
1325
+ return {
1326
+ ...EMPTY_RESULT,
1327
+ degraded: true,
1328
+ degraded_reason: store.isDatabaseUnavailable(err) ? 'database_unavailable' : 'retrieval_error'
1329
+ };
1330
+ }
1331
+ }
1332
+ // ─── Context block formatter ───────────────────────────────────────────────
1333
+ /**
1334
+ * Format a RetrievalResult into a structured text block for the LLM prompt.
1335
+ *
1336
+ * Returns a human-readable representation of the retrieved argument graph
1337
+ * that the model can use as grounding context for its three-pass analysis.
1338
+ */
1339
+ export function buildContextBlock(result) {
1340
+ if (!result.claims || result.claims.length === 0) {
1341
+ return 'No knowledge base context available for this query.';
1342
+ }
1343
+ const lines = [];
1344
+ lines.push('=== PHILOSOPHICAL KNOWLEDGE GRAPH CONTEXT ===');
1345
+ lines.push('');
1346
+ lines.push('The following are structured claims from SOPHIA\'s curated philosophical knowledge graph. ' +
1347
+ 'Use these as your philosophical foundation, noting their typed logical relations and source attributions.');
1348
+ lines.push('');
1349
+ // ── Claims with IDs and Relations ──
1350
+ for (let i = 0; i < result.claims.length; i++) {
1351
+ const c = result.claims[i];
1352
+ const claimId = `c:${String(i + 1).padStart(3, '0')}`;
1353
+ const authorStr = c.source_author?.length
1354
+ ? c.source_author.join(', ')
1355
+ : 'Unknown';
1356
+ lines.push(`CLAIM [${claimId}] (${c.claim_type}, source: "${c.source_title}")`);
1357
+ lines.push(`"${c.text}"`);
1358
+ // Show relations from this claim
1359
+ const outgoingRelations = (result.relations ?? []).filter(r => r.from_index === i);
1360
+ if (outgoingRelations.length > 0) {
1361
+ for (const r of outgoingRelations) {
1362
+ const targetId = `c:${String(r.to_index + 1).padStart(3, '0')}`;
1363
+ const relType = r.relation_type.toUpperCase().replace(/_/g, ' ');
1364
+ const strengthStr = r.strength ? ` (${r.strength})` : '';
1365
+ lines.push(` ├─ ${relType} [${targetId}]${strengthStr}`);
1366
+ }
1367
+ }
1368
+ lines.push('');
1369
+ }
1370
+ // ── Arguments ──
1371
+ if ((result.arguments ?? []).length > 0) {
1372
+ lines.push('NAMED ARGUMENTS:');
1373
+ for (const arg of result.arguments ?? []) {
1374
+ const traditionStr = arg.tradition ? ` (${arg.tradition})` : '';
1375
+ lines.push(`▸ ${arg.name}${traditionStr}`);
1376
+ lines.push(` ${arg.summary}`);
1377
+ if (arg.conclusion_text) {
1378
+ lines.push(` Conclusion: "${arg.conclusion_text}"`);
1379
+ }
1380
+ if (arg.key_premises.length > 0) {
1381
+ lines.push(` Key premises: ${arg.key_premises.map((p) => `"${p}"`).join('; ')}`);
1382
+ }
1383
+ lines.push('');
1384
+ }
1385
+ }
1386
+ if (result.evidence_passages && result.evidence_passages.length > 0) {
1387
+ lines.push('EVIDENCE PASSAGES (source spans linked to retrieved claims):');
1388
+ for (const passage of result.evidence_passages) {
1389
+ lines.push(`▸ ${passage.passage_id}`);
1390
+ lines.push(` "${passage.excerpt}"`);
1391
+ if (passage.claim_ids.length > 0) {
1392
+ lines.push(` Linked claims: ${passage.claim_ids.join(', ')}`);
1393
+ }
1394
+ lines.push('');
1395
+ }
1396
+ }
1397
+ lines.push('=== END KNOWLEDGE GRAPH CONTEXT ===');
1398
+ lines.push('');
1399
+ lines.push('Use Google Search to verify, challenge, or extend these claims with current sources.');
1400
+ return lines.join('\n');
1401
+ }