@restormel/graphrag-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/LICENSE +21 -0
- package/README.md +38 -0
- package/dist/empty-graph.d.ts +4 -0
- package/dist/empty-graph.d.ts.map +1 -0
- package/dist/empty-graph.js +3 -0
- package/dist/hybrid-candidate-generation.d.ts +23 -0
- package/dist/hybrid-candidate-generation.d.ts.map +1 -0
- package/dist/hybrid-candidate-generation.js +147 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/kg-balance.d.ts +21 -0
- package/dist/kg-balance.d.ts.map +1 -0
- package/dist/kg-balance.js +43 -0
- package/dist/ports.d.ts +19 -0
- package/dist/ports.d.ts.map +1 -0
- package/dist/ports.js +1 -0
- package/dist/retrieve-context.d.ts +196 -0
- package/dist/retrieve-context.d.ts.map +1 -0
- package/dist/retrieve-context.js +1401 -0
- package/dist/seed-set-constructor.d.ts +53 -0
- package/dist/seed-set-constructor.d.ts.map +1 -0
- package/dist/seed-set-constructor.js +247 -0
- package/dist/surreal-retrieval-enhancements.d.ts +39 -0
- package/dist/surreal-retrieval-enhancements.d.ts.map +1 -0
- package/dist/surreal-retrieval-enhancements.js +139 -0
- package/package.json +44 -0
|
@@ -0,0 +1,1401 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SOPHIA — Argument-Aware Retrieval
|
|
3
|
+
*
|
|
4
|
+
* Core differentiator: not just semantic similarity, but graph traversal
|
|
5
|
+
* that assembles complete argumentative chains from the knowledge graph.
|
|
6
|
+
*
|
|
7
|
+
* Retrieval pipeline:
|
|
8
|
+
* 1. Embed query via Voyage AI
|
|
9
|
+
* 2. Vector search for top-K semantically similar claims
|
|
10
|
+
* 3. Graph traversal for each seed claim (depends_on, supports, contradicts, responds_to, defines, qualifies, part_of)
|
|
11
|
+
* 4. Deduplicate claims
|
|
12
|
+
* 5. Resolve inter-claim relations
|
|
13
|
+
* 6. Fetch argument structure (conclusion + key premises)
|
|
14
|
+
* 7. Return assembled RetrievalResult
|
|
15
|
+
*
|
|
16
|
+
* Graceful degradation: never throws — returns empty result on any failure
|
|
17
|
+
* so the three-pass engine can still work without the graph.
|
|
18
|
+
*/
|
|
19
|
+
import { detectCorpusLevelQuery, extractLexicalTerms, fuseHybridCandidates } from './hybrid-candidate-generation.js';
|
|
20
|
+
import { IDEAL_RETRIEVAL_ORIGIN_FRACTIONS, isRetrievalKgBalanceEnabled, } from './kg-balance.js';
|
|
21
|
+
import { constructSeedSet } from './seed-set-constructor.js';
|
|
22
|
+
import { fetchBm25ClaimCandidates, fetchNativeGraphNeighbors, fetchPassageGroundedClaimIds, fetchTaxonomySeedClaimIds, isRetrievalPassageGroundedEnabled, isRetrievalTaxonomyRoutingEnabled, isRetrievalBm25Enabled } from './surreal-retrieval-enhancements.js';
|
|
23
|
+
/** SurrealDB KNN: `<|k,ef|>` — ef tunes HNSW/ANN search breadth (see Surreal vector docs). */
|
|
24
|
+
function retrievalDenseKnnEf() {
|
|
25
|
+
const raw = (process.env.RETRIEVAL_KNN_EF ?? '64').trim();
|
|
26
|
+
const n = parseInt(raw, 10);
|
|
27
|
+
if (!Number.isFinite(n))
|
|
28
|
+
return 64;
|
|
29
|
+
return Math.max(16, Math.min(512, n));
|
|
30
|
+
}
|
|
31
|
+
function surrealKnnOperator(k) {
|
|
32
|
+
const kk = Math.max(1, Math.trunc(k));
|
|
33
|
+
const ef = retrievalDenseKnnEf();
|
|
34
|
+
return `<|${kk},${ef}|>`;
|
|
35
|
+
}
|
|
36
|
+
function claimVerificationSqlFilter(trustedGraphActive) {
|
|
37
|
+
const flagged = `(verification_state = NONE OR verification_state != 'flagged')`;
|
|
38
|
+
const raw = (process.env.RETRIEVAL_REQUIRE_VERIFIED ?? '').trim().toLowerCase();
|
|
39
|
+
const requireValidated = trustedGraphActive && (raw === '1' || raw === 'true' || raw === 'yes');
|
|
40
|
+
if (requireValidated) {
|
|
41
|
+
return `${flagged} AND verification_state = 'validated'`;
|
|
42
|
+
}
|
|
43
|
+
return flagged;
|
|
44
|
+
}
|
|
45
|
+
const EMPTY_RESULT = {
|
|
46
|
+
claims: [],
|
|
47
|
+
relations: [],
|
|
48
|
+
arguments: [],
|
|
49
|
+
seed_claim_ids: [],
|
|
50
|
+
thinker_context: null,
|
|
51
|
+
degraded: false
|
|
52
|
+
};
|
|
53
|
+
const RELATION_TRAVERSAL_BEAM_SPECS = [
|
|
54
|
+
{ table: 'supports', edgePrior: 1.04 },
|
|
55
|
+
{ table: 'contradicts', edgePrior: 1.16 },
|
|
56
|
+
{ table: 'depends_on', edgePrior: 0.92 },
|
|
57
|
+
{ table: 'responds_to', edgePrior: 1.2 },
|
|
58
|
+
{ table: 'defines', edgePrior: 0.9 },
|
|
59
|
+
{ table: 'qualifies', edgePrior: 0.88 },
|
|
60
|
+
{ table: 'refines', edgePrior: 0.86 },
|
|
61
|
+
{ table: 'exemplifies', edgePrior: 0.82 }
|
|
62
|
+
];
|
|
63
|
+
const RELATION_FETCH_SPECS = [
|
|
64
|
+
{ table: 'supports', relationType: 'supports' },
|
|
65
|
+
{ table: 'contradicts', relationType: 'contradicts' },
|
|
66
|
+
{ table: 'depends_on', relationType: 'depends_on' },
|
|
67
|
+
{ table: 'responds_to', relationType: 'responds_to' },
|
|
68
|
+
{ table: 'defines', relationType: 'defines' },
|
|
69
|
+
{ table: 'qualifies', relationType: 'qualifies' },
|
|
70
|
+
{ table: 'refines', relationType: 'qualifies' },
|
|
71
|
+
{ table: 'exemplifies', relationType: 'supports' }
|
|
72
|
+
];
|
|
73
|
+
const THESIS_CLAIM_TYPES = new Set(['thesis', 'conclusion']);
|
|
74
|
+
const OBJECTION_CLAIM_TYPES = new Set(['objection', 'counterargument', 'counter_argument']);
|
|
75
|
+
const REPLY_CLAIM_TYPES = new Set(['response', 'reply', 'rebuttal']);
|
|
76
|
+
function normalizeClaimType(claimType) {
|
|
77
|
+
return claimType.trim().toLowerCase();
|
|
78
|
+
}
|
|
79
|
+
function isThesisClaimType(claimType) {
|
|
80
|
+
return THESIS_CLAIM_TYPES.has(normalizeClaimType(claimType));
|
|
81
|
+
}
|
|
82
|
+
function isObjectionClaimType(claimType) {
|
|
83
|
+
return OBJECTION_CLAIM_TYPES.has(normalizeClaimType(claimType));
|
|
84
|
+
}
|
|
85
|
+
function isReplyClaimType(claimType) {
|
|
86
|
+
return REPLY_CLAIM_TYPES.has(normalizeClaimType(claimType));
|
|
87
|
+
}
|
|
88
|
+
function selectMajorThesisIds(params) {
|
|
89
|
+
const { claims, seedClaimIds, limit } = params;
|
|
90
|
+
if (claims.length === 0 || limit <= 0)
|
|
91
|
+
return [];
|
|
92
|
+
const seedSet = new Set(seedClaimIds);
|
|
93
|
+
const thesisClaims = claims
|
|
94
|
+
.filter((claim) => isThesisClaimType(claim.claim_type))
|
|
95
|
+
.sort((a, b) => {
|
|
96
|
+
const aSeed = seedSet.has(a.id) ? 1 : 0;
|
|
97
|
+
const bSeed = seedSet.has(b.id) ? 1 : 0;
|
|
98
|
+
if (aSeed !== bSeed)
|
|
99
|
+
return bSeed - aSeed;
|
|
100
|
+
return (b.confidence ?? 0) - (a.confidence ?? 0);
|
|
101
|
+
});
|
|
102
|
+
if (thesisClaims.length > 0) {
|
|
103
|
+
return thesisClaims.slice(0, limit).map((claim) => claim.id);
|
|
104
|
+
}
|
|
105
|
+
// Fallback when claim typing is sparse: treat top seed/supportive claims as thesis anchors.
|
|
106
|
+
const fallbackClaims = claims
|
|
107
|
+
.filter((claim) => {
|
|
108
|
+
const type = normalizeClaimType(claim.claim_type);
|
|
109
|
+
return type === 'premise' || type === 'support' || type === 'methodological';
|
|
110
|
+
})
|
|
111
|
+
.sort((a, b) => {
|
|
112
|
+
const aSeed = seedSet.has(a.id) ? 1 : 0;
|
|
113
|
+
const bSeed = seedSet.has(b.id) ? 1 : 0;
|
|
114
|
+
if (aSeed !== bSeed)
|
|
115
|
+
return bSeed - aSeed;
|
|
116
|
+
return (b.confidence ?? 0) - (a.confidence ?? 0);
|
|
117
|
+
});
|
|
118
|
+
return fallbackClaims.slice(0, limit).map((claim) => claim.id);
|
|
119
|
+
}
|
|
120
|
+
function computeHopConfidenceThreshold(baseThreshold, hop) {
|
|
121
|
+
const clampedBase = Math.max(0.2, Math.min(0.85, baseThreshold));
|
|
122
|
+
return Math.max(0.2, Math.min(0.9, clampedBase + (hop - 1) * 0.08));
|
|
123
|
+
}
|
|
124
|
+
function computeDomainExpansionWeight(params) {
|
|
125
|
+
const { targetDomain, anchorDomain, neighborDomain } = params;
|
|
126
|
+
if (targetDomain && neighborDomain === targetDomain)
|
|
127
|
+
return 1.05;
|
|
128
|
+
if (targetDomain && neighborDomain && neighborDomain !== targetDomain)
|
|
129
|
+
return 0.72;
|
|
130
|
+
if (anchorDomain && neighborDomain && neighborDomain === anchorDomain)
|
|
131
|
+
return 1.0;
|
|
132
|
+
if (anchorDomain && neighborDomain && neighborDomain !== anchorDomain)
|
|
133
|
+
return 0.84;
|
|
134
|
+
return 0.92;
|
|
135
|
+
}
|
|
136
|
+
function parseRelationStrengthWeight(strength) {
|
|
137
|
+
if (!strength)
|
|
138
|
+
return 1;
|
|
139
|
+
const normalized = strength.toLowerCase();
|
|
140
|
+
if (normalized === 'strong')
|
|
141
|
+
return 1.08;
|
|
142
|
+
if (normalized === 'weak')
|
|
143
|
+
return 0.86;
|
|
144
|
+
return 1;
|
|
145
|
+
}
|
|
146
|
+
function toThinkerSummary(node) {
|
|
147
|
+
if (!node || typeof node !== 'object')
|
|
148
|
+
return null;
|
|
149
|
+
const row = node;
|
|
150
|
+
const wikidata_id = typeof row.wikidata_id === 'string' ? row.wikidata_id : '';
|
|
151
|
+
const name = typeof row.name === 'string' ? row.name.trim() : '';
|
|
152
|
+
if (!name)
|
|
153
|
+
return null;
|
|
154
|
+
return {
|
|
155
|
+
wikidata_id,
|
|
156
|
+
name,
|
|
157
|
+
birth_year: typeof row.birth_year === 'number' ? row.birth_year : null,
|
|
158
|
+
death_year: typeof row.death_year === 'number' ? row.death_year : null,
|
|
159
|
+
traditions: Array.isArray(row.traditions)
|
|
160
|
+
? row.traditions.filter((value) => typeof value === 'string' && value.trim().length > 0)
|
|
161
|
+
: []
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
function capThinkerContext(context, maxNodes = 10) {
|
|
165
|
+
const seen = new Set();
|
|
166
|
+
const take = (items) => {
|
|
167
|
+
const result = [];
|
|
168
|
+
for (const item of items) {
|
|
169
|
+
const key = item.wikidata_id || item.name.toLowerCase();
|
|
170
|
+
if (seen.has(key))
|
|
171
|
+
continue;
|
|
172
|
+
if (seen.size >= maxNodes)
|
|
173
|
+
break;
|
|
174
|
+
seen.add(key);
|
|
175
|
+
result.push(item);
|
|
176
|
+
}
|
|
177
|
+
return result;
|
|
178
|
+
};
|
|
179
|
+
return {
|
|
180
|
+
direct_authors: take(context.direct_authors),
|
|
181
|
+
influences: take(context.influences),
|
|
182
|
+
teachers: take(context.teachers)
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
async function fetchThinkerContext(store, claimIds) {
|
|
186
|
+
if (!Array.isArray(claimIds) || claimIds.length === 0)
|
|
187
|
+
return null;
|
|
188
|
+
try {
|
|
189
|
+
const result = await store.query(`LET $source_ids = array::distinct((SELECT VALUE source FROM claim WHERE id INSIDE $claim_ids));
|
|
190
|
+
LET $author_rows = (SELECT <-authored<-thinker AS thinkers FROM $source_ids FETCH thinkers);
|
|
191
|
+
LET $direct_authors = array::flatten($author_rows.thinkers);
|
|
192
|
+
LET $influence_rows = (SELECT ->influenced_by->thinker AS thinkers FROM $direct_authors.id FETCH thinkers);
|
|
193
|
+
LET $teacher_rows = (SELECT ->student_of->thinker AS thinkers FROM $direct_authors.id FETCH thinkers);
|
|
194
|
+
RETURN {
|
|
195
|
+
direct_authors: $direct_authors,
|
|
196
|
+
influences: array::flatten($influence_rows.thinkers),
|
|
197
|
+
teachers: array::flatten($teacher_rows.thinkers)
|
|
198
|
+
};`, { claim_ids: claimIds });
|
|
199
|
+
const row = Array.isArray(result) ? result[0] : null;
|
|
200
|
+
if (!row)
|
|
201
|
+
return null;
|
|
202
|
+
const directAuthors = (row.direct_authors ?? [])
|
|
203
|
+
.map((entry) => toThinkerSummary(entry))
|
|
204
|
+
.filter((entry) => entry !== null);
|
|
205
|
+
const influences = (row.influences ?? [])
|
|
206
|
+
.map((entry) => toThinkerSummary(entry))
|
|
207
|
+
.filter((entry) => entry !== null);
|
|
208
|
+
const teachers = (row.teachers ?? [])
|
|
209
|
+
.map((entry) => toThinkerSummary(entry))
|
|
210
|
+
.filter((entry) => entry !== null);
|
|
211
|
+
if (directAuthors.length === 0 && influences.length === 0 && teachers.length === 0) {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
return capThinkerContext({
|
|
215
|
+
direct_authors: directAuthors,
|
|
216
|
+
influences,
|
|
217
|
+
teachers
|
|
218
|
+
}, 10);
|
|
219
|
+
}
|
|
220
|
+
catch (error) {
|
|
221
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
222
|
+
const lower = message.toLowerCase();
|
|
223
|
+
if ((lower.includes('authored') ||
|
|
224
|
+
lower.includes('thinker') ||
|
|
225
|
+
lower.includes('influenced_by') ||
|
|
226
|
+
lower.includes('student_of')) &&
|
|
227
|
+
(lower.includes('table') ||
|
|
228
|
+
lower.includes('record') ||
|
|
229
|
+
lower.includes('not found') ||
|
|
230
|
+
lower.includes('does not exist') ||
|
|
231
|
+
lower.includes('invalid'))) {
|
|
232
|
+
console.debug('[RETRIEVAL] Thinker enrichment unavailable (missing thinker graph tables); returning null');
|
|
233
|
+
return null;
|
|
234
|
+
}
|
|
235
|
+
console.debug('[RETRIEVAL] Thinker enrichment failed; returning null:', message);
|
|
236
|
+
return null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
function formatThinkerDisplayName(thinker) {
|
|
240
|
+
const years = thinker.birth_year === null && thinker.death_year === null
|
|
241
|
+
? ''
|
|
242
|
+
: ` (${thinker.birth_year ?? '?'}-${thinker.death_year ?? '?'})`;
|
|
243
|
+
const tradition = thinker.traditions.length > 0 ? `, ${thinker.traditions[0]}` : '';
|
|
244
|
+
return `${thinker.name}${years}${tradition}`;
|
|
245
|
+
}
|
|
246
|
+
export function formatThinkerContextBlock(context) {
|
|
247
|
+
if (!context)
|
|
248
|
+
return '';
|
|
249
|
+
const directAuthors = context.direct_authors.filter((thinker) => thinker.name.trim().length > 0);
|
|
250
|
+
const influences = context.influences.filter((thinker) => thinker.name.trim().length > 0).slice(0, 5);
|
|
251
|
+
const teachers = context.teachers.filter((thinker) => thinker.name.trim().length > 0);
|
|
252
|
+
if (directAuthors.length === 0 && influences.length === 0 && teachers.length === 0) {
|
|
253
|
+
return '';
|
|
254
|
+
}
|
|
255
|
+
const lines = [];
|
|
256
|
+
lines.push('PHILOSOPHICAL LINEAGE CONTEXT (advisory — heuristic data from Wikidata)');
|
|
257
|
+
lines.push('(sourced from Wikidata thinker graph — advisory context only)');
|
|
258
|
+
lines.push('');
|
|
259
|
+
if (directAuthors.length > 0) {
|
|
260
|
+
lines.push(`Authors of retrieved sources: ${directAuthors.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
|
|
261
|
+
}
|
|
262
|
+
if (influences.length > 0) {
|
|
263
|
+
lines.push(`Influences in this lineage: ${influences.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
|
|
264
|
+
}
|
|
265
|
+
if (teachers.length > 0) {
|
|
266
|
+
lines.push(`Teachers in this lineage: ${teachers.map((thinker) => formatThinkerDisplayName(thinker)).join(', ')}`);
|
|
267
|
+
}
|
|
268
|
+
return lines.join('\n');
|
|
269
|
+
}
|
|
270
|
+
// ─── Main retrieval function ───────────────────────────────────────────────
|
|
271
|
+
/**
|
|
272
|
+
* Retrieve structured philosophical context from the argument graph.
|
|
273
|
+
*
|
|
274
|
+
* Assembles complete argumentative chains by:
|
|
275
|
+
* 1. Finding semantically similar claims via vector search
|
|
276
|
+
* 2. Traversing the graph for supporting/contradicting/dependent claims
|
|
277
|
+
* 3. Resolving arguments those claims participate in
|
|
278
|
+
*
|
|
279
|
+
* Never throws — returns empty result on any failure.
|
|
280
|
+
*/
|
|
281
|
+
export async function retrieveContext(userQuery, deps, options = {}) {
|
|
282
|
+
const { store, embedder, resolveOriginBucket } = deps;
|
|
283
|
+
const { topK = 5, domain, minConfidence = 0, maxHops, maxClaims, hybridMode = 'auto', enrichWithThinkerContext = false } = options;
|
|
284
|
+
const traversalMaxHops = Math.max(1, maxHops ?? (topK >= 10 ? 3 : topK <= 3 ? 1 : 2));
|
|
285
|
+
const traversalClaimCap = Math.max(topK, maxClaims ?? (topK >= 10 ? 120 : topK <= 3 ? 32 : 72));
|
|
286
|
+
try {
|
|
287
|
+
// ── Step 1: Embed the query ──────────────────────────────────
|
|
288
|
+
let queryEmbedding;
|
|
289
|
+
try {
|
|
290
|
+
console.log('[RETRIEVAL] Embedding query:', userQuery.substring(0, 50) + '...');
|
|
291
|
+
queryEmbedding = await embedder.embedQuery(userQuery);
|
|
292
|
+
console.log('[RETRIEVAL] ✓ Query embedding received:', queryEmbedding.length, 'dimensions');
|
|
293
|
+
}
|
|
294
|
+
catch (err) {
|
|
295
|
+
console.error('[RETRIEVAL] Embedding API failed:', err instanceof Error ? err.message : err);
|
|
296
|
+
return {
|
|
297
|
+
...EMPTY_RESULT,
|
|
298
|
+
degraded: true,
|
|
299
|
+
degraded_reason: 'embedding_unavailable'
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
// ── Step 2: Hybrid candidate generation (dense + lexical) ───
|
|
303
|
+
// Dense path: vector index (HNSW or MTREE) + KNN `<|k,ef|>`.
|
|
304
|
+
// Lexical path: exact-term matching for philosophy-specific phrases.
|
|
305
|
+
// Fusion: reciprocal-rank fusion + lightweight rerank.
|
|
306
|
+
const densePool = domain || minConfidence > 0 ? topK * 4 : topK * 3;
|
|
307
|
+
const lexicalTerms = hybridMode === 'dense_only' ? [] : extractLexicalTerms(userQuery);
|
|
308
|
+
const corpusLevelQuery = hybridMode === 'dense_only' ? false : detectCorpusLevelQuery(userQuery);
|
|
309
|
+
const queryDecomposition = {
|
|
310
|
+
focus_mode: corpusLevelQuery ? 'corpus_overview' : 'focused',
|
|
311
|
+
domain_filter: domain,
|
|
312
|
+
hybrid_mode: hybridMode,
|
|
313
|
+
corpus_level_query: corpusLevelQuery,
|
|
314
|
+
lexical_terms: lexicalTerms.slice(0, 16),
|
|
315
|
+
lexical_term_count: lexicalTerms.length
|
|
316
|
+
};
|
|
317
|
+
const lexicalPool = lexicalTerms.length === 0 ? 0 : corpusLevelQuery ? topK * 8 : topK * 4;
|
|
318
|
+
const acceptedClaimRows = await store.query(`SELECT count() AS count FROM claim WHERE review_state = 'accepted' GROUP ALL`).catch(() => []);
|
|
319
|
+
const trustedGraphActive = (acceptedClaimRows[0]?.count ?? 0) > 0;
|
|
320
|
+
const claimReviewFilter = trustedGraphActive
|
|
321
|
+
? `review_state = 'accepted'`
|
|
322
|
+
: `(review_state = NONE OR review_state IN ['candidate', 'needs_review', 'accepted'])`;
|
|
323
|
+
const relationReviewFilter = trustedGraphActive
|
|
324
|
+
? `review_state = 'accepted'`
|
|
325
|
+
: `(review_state = NONE OR review_state IN ['candidate', 'needs_review', 'accepted'])`;
|
|
326
|
+
// Stage 3.2: traversal beam only follows trusted edges.
|
|
327
|
+
const traversalRelationReviewFilter = `review_state = 'accepted'`;
|
|
328
|
+
const argumentClaimReviewFilter = trustedGraphActive
|
|
329
|
+
? `in.review_state = 'accepted'`
|
|
330
|
+
: `(in.review_state = NONE OR in.review_state IN ['candidate', 'needs_review', 'accepted'])`;
|
|
331
|
+
const postFilters = [];
|
|
332
|
+
if (domain)
|
|
333
|
+
postFilters.push('domain = $domain');
|
|
334
|
+
if (minConfidence > 0)
|
|
335
|
+
postFilters.push('confidence >= $minConfidence');
|
|
336
|
+
postFilters.push(claimReviewFilter);
|
|
337
|
+
postFilters.push(claimVerificationSqlFilter(trustedGraphActive));
|
|
338
|
+
const postWhere = postFilters.length > 0 ? `WHERE ${postFilters.join(' AND ')}` : '';
|
|
339
|
+
const sourcePassageIntegrityCache = new Map();
|
|
340
|
+
const sourceIdPart = (sourceId) => sourceId.includes(':') ? sourceId.split(':').slice(1).join(':') : sourceId;
|
|
341
|
+
const sourceHasPassageCoverage = (sourceId) => {
|
|
342
|
+
if (!sourceId)
|
|
343
|
+
return Promise.resolve(false);
|
|
344
|
+
const existing = sourcePassageIntegrityCache.get(sourceId);
|
|
345
|
+
if (existing)
|
|
346
|
+
return existing;
|
|
347
|
+
const pending = (async () => {
|
|
348
|
+
const sid = sourceIdPart(sourceId);
|
|
349
|
+
const passageRows = await store.query(`SELECT id FROM passage WHERE source = type::record('source', $sid) LIMIT 1`, { sid }).catch(() => []);
|
|
350
|
+
return passageRows.length > 0;
|
|
351
|
+
})();
|
|
352
|
+
sourcePassageIntegrityCache.set(sourceId, pending);
|
|
353
|
+
return pending;
|
|
354
|
+
};
|
|
355
|
+
let seedClaims;
|
|
356
|
+
let seedPoolCount = 0;
|
|
357
|
+
const rejectedClaimsByKey = new Map();
|
|
358
|
+
const rejectedRelations = [];
|
|
359
|
+
const addRejectedClaim = (candidate) => {
|
|
360
|
+
const key = `${candidate.id}|${candidate.reason_code}`;
|
|
361
|
+
if (rejectedClaimsByKey.has(key))
|
|
362
|
+
return;
|
|
363
|
+
rejectedClaimsByKey.set(key, candidate);
|
|
364
|
+
};
|
|
365
|
+
const rowProjection = `SELECT
|
|
366
|
+
id,
|
|
367
|
+
text,
|
|
368
|
+
claim_type,
|
|
369
|
+
domain,
|
|
370
|
+
confidence,
|
|
371
|
+
embedding,
|
|
372
|
+
position_in_source,
|
|
373
|
+
review_state,
|
|
374
|
+
section_context,
|
|
375
|
+
source.id AS source_id,
|
|
376
|
+
source.url AS source_url,
|
|
377
|
+
source.source_type AS source_source_type,
|
|
378
|
+
source.title AS source_title,
|
|
379
|
+
source.author AS source_author`;
|
|
380
|
+
const sharedParams = {
|
|
381
|
+
...(domain ? { domain } : {}),
|
|
382
|
+
...(minConfidence > 0 ? { minConfidence } : {})
|
|
383
|
+
};
|
|
384
|
+
let denseSeedClaims = [];
|
|
385
|
+
let lexicalSeedClaims = [];
|
|
386
|
+
try {
|
|
387
|
+
const knnOp = surrealKnnOperator(densePool);
|
|
388
|
+
console.log('[RETRIEVAL] Dense candidate generation topK=', topK, 'knn=', knnOp);
|
|
389
|
+
const denseSurql = (op) => `${rowProjection}
|
|
390
|
+
FROM (
|
|
391
|
+
SELECT *
|
|
392
|
+
FROM claim
|
|
393
|
+
WHERE embedding ${op} $query_embedding
|
|
394
|
+
)
|
|
395
|
+
${postWhere}
|
|
396
|
+
LIMIT ${densePool}`;
|
|
397
|
+
const denseParams = {
|
|
398
|
+
query_embedding: queryEmbedding,
|
|
399
|
+
...sharedParams
|
|
400
|
+
};
|
|
401
|
+
try {
|
|
402
|
+
denseSeedClaims = await store.query(denseSurql(knnOp), denseParams);
|
|
403
|
+
}
|
|
404
|
+
catch (knnErr) {
|
|
405
|
+
const legacyOp = `<|${Math.max(1, Math.trunc(densePool))}|>`;
|
|
406
|
+
console.warn('[RETRIEVAL] KNN two-arg operator failed; retrying legacy', legacyOp, knnErr instanceof Error ? knnErr.message : knnErr);
|
|
407
|
+
denseSeedClaims = await store.query(denseSurql(legacyOp), denseParams);
|
|
408
|
+
}
|
|
409
|
+
console.log('[RETRIEVAL] ✓ Dense candidates:', denseSeedClaims?.length || 0);
|
|
410
|
+
}
|
|
411
|
+
catch (dbErr) {
|
|
412
|
+
if (store.isDatabaseUnavailable(dbErr)) {
|
|
413
|
+
console.warn('[RETRIEVAL] Database unavailable during dense candidate retrieval');
|
|
414
|
+
return {
|
|
415
|
+
...EMPTY_RESULT,
|
|
416
|
+
degraded: true,
|
|
417
|
+
degraded_reason: 'database_unavailable'
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
throw dbErr;
|
|
421
|
+
}
|
|
422
|
+
if (hybridMode !== 'dense_only' && lexicalPool > 0) {
|
|
423
|
+
try {
|
|
424
|
+
const lexicalTermClauses = lexicalTerms.map((_term, idx) => `(text ~ $term_${idx} OR section_context ~ $term_${idx})`);
|
|
425
|
+
const lexicalWhere = `WHERE (${lexicalTermClauses.join(' OR ')}) AND ${postFilters.join(' AND ')}`;
|
|
426
|
+
const lexicalParams = { ...sharedParams };
|
|
427
|
+
for (const [idx, term] of lexicalTerms.entries()) {
|
|
428
|
+
lexicalParams[`term_${idx}`] = term;
|
|
429
|
+
}
|
|
430
|
+
lexicalSeedClaims = await store.query(`${rowProjection}
|
|
431
|
+
FROM claim
|
|
432
|
+
${lexicalWhere}
|
|
433
|
+
ORDER BY confidence DESC
|
|
434
|
+
LIMIT ${lexicalPool}`, lexicalParams);
|
|
435
|
+
if (isRetrievalBm25Enabled() && lexicalTerms.length > 0) {
|
|
436
|
+
const bm25Rows = await fetchBm25ClaimCandidates(store, {
|
|
437
|
+
terms: lexicalTerms,
|
|
438
|
+
limit: lexicalPool,
|
|
439
|
+
reviewFilter: postFilters.join(' AND ')
|
|
440
|
+
});
|
|
441
|
+
if (bm25Rows.length > 0) {
|
|
442
|
+
const existing = new Set(lexicalSeedClaims.map((r) => String(r.id)));
|
|
443
|
+
for (const row of bm25Rows) {
|
|
444
|
+
if (existing.has(String(row.id)))
|
|
445
|
+
continue;
|
|
446
|
+
lexicalSeedClaims.push({
|
|
447
|
+
id: row.id,
|
|
448
|
+
text: row.text,
|
|
449
|
+
claim_type: 'premise',
|
|
450
|
+
domain: 'ethics',
|
|
451
|
+
confidence: row.confidence ?? 0.5,
|
|
452
|
+
position_in_source: 0,
|
|
453
|
+
section_context: null,
|
|
454
|
+
source_id: '',
|
|
455
|
+
source_title: 'Unknown',
|
|
456
|
+
source_author: []
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
console.log('[RETRIEVAL] ✓ Lexical candidates:', lexicalSeedClaims?.length || 0, 'terms=', lexicalTerms.length);
|
|
462
|
+
}
|
|
463
|
+
catch (lexicalErr) {
|
|
464
|
+
console.warn('[RETRIEVAL] Lexical candidate generation failed (continuing with dense only):', lexicalErr instanceof Error ? lexicalErr.message : lexicalErr);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
if (isRetrievalTaxonomyRoutingEnabled() && lexicalTerms.length > 0) {
|
|
468
|
+
try {
|
|
469
|
+
const taxonomyIds = await fetchTaxonomySeedClaimIds(store, {
|
|
470
|
+
terms: lexicalTerms,
|
|
471
|
+
limit: Math.max(4, topK),
|
|
472
|
+
reviewFilter: postFilters.join(' AND ')
|
|
473
|
+
});
|
|
474
|
+
if (taxonomyIds.length > 0) {
|
|
475
|
+
const taxonomyRows = await store.query(`${rowProjection}
|
|
476
|
+
FROM claim
|
|
477
|
+
WHERE id INSIDE $ids AND ${postFilters.join(' AND ')}
|
|
478
|
+
LIMIT $limit`, { ids: taxonomyIds, limit: taxonomyIds.length });
|
|
479
|
+
const existing = new Set([
|
|
480
|
+
...denseSeedClaims.map((r) => String(r.id)),
|
|
481
|
+
...lexicalSeedClaims.map((r) => String(r.id))
|
|
482
|
+
]);
|
|
483
|
+
for (const row of taxonomyRows ?? []) {
|
|
484
|
+
if (existing.has(String(row.id)))
|
|
485
|
+
continue;
|
|
486
|
+
denseSeedClaims.push(row);
|
|
487
|
+
existing.add(String(row.id));
|
|
488
|
+
}
|
|
489
|
+
console.log('[RETRIEVAL] ✓ Taxonomy-routed seeds:', taxonomyRows?.length ?? 0);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
catch (taxonomyErr) {
|
|
493
|
+
console.warn('[RETRIEVAL] Taxonomy routing failed (continuing):', taxonomyErr instanceof Error ? taxonomyErr.message : taxonomyErr);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
let passageGroundedClaimIds = [];
|
|
497
|
+
if (isRetrievalPassageGroundedEnabled()) {
|
|
498
|
+
try {
|
|
499
|
+
passageGroundedClaimIds = await fetchPassageGroundedClaimIds(store, {
|
|
500
|
+
queryEmbedding,
|
|
501
|
+
limit: Math.max(4, topK),
|
|
502
|
+
reviewFilter: postFilters.join(' AND ')
|
|
503
|
+
});
|
|
504
|
+
if (passageGroundedClaimIds.length > 0) {
|
|
505
|
+
const passageRows = await store.query(`${rowProjection}
|
|
506
|
+
FROM claim
|
|
507
|
+
WHERE id INSIDE $ids AND ${postFilters.join(' AND ')}
|
|
508
|
+
LIMIT $limit`, { ids: passageGroundedClaimIds, limit: passageGroundedClaimIds.length });
|
|
509
|
+
const existing = new Set([
|
|
510
|
+
...denseSeedClaims.map((r) => String(r.id)),
|
|
511
|
+
...lexicalSeedClaims.map((r) => String(r.id))
|
|
512
|
+
]);
|
|
513
|
+
for (const row of passageRows ?? []) {
|
|
514
|
+
if (existing.has(String(row.id)))
|
|
515
|
+
continue;
|
|
516
|
+
denseSeedClaims.push(row);
|
|
517
|
+
existing.add(String(row.id));
|
|
518
|
+
}
|
|
519
|
+
console.log('[RETRIEVAL] ✓ Passage-grounded seeds:', passageRows?.length ?? 0);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
catch (passageErr) {
|
|
523
|
+
console.warn('[RETRIEVAL] Passage-grounded retrieval failed (continuing):', passageErr instanceof Error ? passageErr.message : passageErr);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
if ((!denseSeedClaims || denseSeedClaims.length === 0) && lexicalSeedClaims.length === 0) {
|
|
527
|
+
console.log('[RETRIEVAL] No candidates found in dense or lexical retrieval');
|
|
528
|
+
return EMPTY_RESULT;
|
|
529
|
+
}
|
|
530
|
+
if (hybridMode === 'dense_only') {
|
|
531
|
+
seedClaims = denseSeedClaims;
|
|
532
|
+
seedPoolCount = denseSeedClaims.length;
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
const fusion = fuseHybridCandidates({
|
|
536
|
+
dense: denseSeedClaims,
|
|
537
|
+
lexical: lexicalSeedClaims,
|
|
538
|
+
lexicalTerms,
|
|
539
|
+
poolSize: Math.max(topK * 4, topK),
|
|
540
|
+
corpusLevelQuery
|
|
541
|
+
});
|
|
542
|
+
seedClaims = fusion.ranked;
|
|
543
|
+
seedPoolCount = fusion.fusedCount;
|
|
544
|
+
}
|
|
545
|
+
if (!seedClaims || seedClaims.length === 0) {
|
|
546
|
+
console.log('[RETRIEVAL] Hybrid fusion returned no candidates');
|
|
547
|
+
return EMPTY_RESULT;
|
|
548
|
+
}
|
|
549
|
+
console.log(`[RETRIEVAL] Candidate generation mode=${hybridMode} dense=${denseSeedClaims.length} lexical=${lexicalSeedClaims.length} fused=${seedPoolCount} corpusLevel=${corpusLevelQuery}`);
|
|
550
|
+
const seedPool = [...seedClaims];
|
|
551
|
+
const vettedSeedPool = [];
|
|
552
|
+
for (const seed of seedPool) {
|
|
553
|
+
const sourceOk = await sourceHasPassageCoverage(seed.source_id);
|
|
554
|
+
if (!sourceOk) {
|
|
555
|
+
addRejectedClaim({
|
|
556
|
+
id: typeof seed.id === 'object' ? String(seed.id) : seed.id,
|
|
557
|
+
text: seed.text,
|
|
558
|
+
source_title: seed.source_title ?? 'Unknown',
|
|
559
|
+
confidence: seed.confidence,
|
|
560
|
+
reason_code: 'source_integrity_gate',
|
|
561
|
+
considered_in: 'seed_pool'
|
|
562
|
+
});
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
565
|
+
vettedSeedPool.push(seed);
|
|
566
|
+
}
|
|
567
|
+
if (vettedSeedPool.length === 0) {
|
|
568
|
+
return {
|
|
569
|
+
...EMPTY_RESULT,
|
|
570
|
+
degraded: true,
|
|
571
|
+
degraded_reason: 'source_integrity_gate'
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
const domainsInPool = new Set();
|
|
575
|
+
for (const s of vettedSeedPool) {
|
|
576
|
+
domainsInPool.add(String(s.domain ?? 'unknown'));
|
|
577
|
+
}
|
|
578
|
+
const seedSet = constructSeedSet({
|
|
579
|
+
candidates: vettedSeedPool,
|
|
580
|
+
topK,
|
|
581
|
+
queryEmbedding,
|
|
582
|
+
...(isRetrievalKgBalanceEnabled()
|
|
583
|
+
? {
|
|
584
|
+
kgBalance: {
|
|
585
|
+
idealOrigin: IDEAL_RETRIEVAL_ORIGIN_FRACTIONS,
|
|
586
|
+
domainsInPool,
|
|
587
|
+
getOrigin: (c) => resolveOriginBucket(c.source_url ?? null, c.source_source_type ?? null),
|
|
588
|
+
getDomainKey: (c) => String(c.domain ?? 'unknown')
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
: {})
|
|
592
|
+
});
|
|
593
|
+
seedClaims = seedSet.seeds;
|
|
594
|
+
console.log(`[RETRIEVAL] Found ${seedClaims.length} seed claims`);
|
|
595
|
+
const seedClaimIds = seedClaims.map((seed) => typeof seed.id === 'object' ? String(seed.id) : seed.id);
|
|
596
|
+
const seedTrace = seedClaims.map((seed) => ({
|
|
597
|
+
id: typeof seed.id === 'object' ? String(seed.id) : seed.id,
|
|
598
|
+
claim_type: seed.claim_type,
|
|
599
|
+
domain: seed.domain,
|
|
600
|
+
source_title: seed.source_title ?? 'Unknown',
|
|
601
|
+
confidence: seed.confidence ?? 0
|
|
602
|
+
}));
|
|
603
|
+
const selectedSeedIds = new Set(seedClaimIds);
|
|
604
|
+
for (const candidate of vettedSeedPool) {
|
|
605
|
+
const candidateId = typeof candidate.id === 'object' ? String(candidate.id) : candidate.id;
|
|
606
|
+
if (selectedSeedIds.has(candidateId))
|
|
607
|
+
continue;
|
|
608
|
+
addRejectedClaim({
|
|
609
|
+
id: candidateId,
|
|
610
|
+
text: candidate.text,
|
|
611
|
+
source_title: candidate.source_title ?? 'Unknown',
|
|
612
|
+
confidence: candidate.confidence,
|
|
613
|
+
reason_code: 'seed_pool_pruned',
|
|
614
|
+
considered_in: 'seed_pool'
|
|
615
|
+
});
|
|
616
|
+
}
|
|
617
|
+
const allGraphClaims = new Map();
|
|
618
|
+
const argumentIds = new Set();
|
|
619
|
+
// Add seed claims to the map first
|
|
620
|
+
for (const seed of seedClaims) {
|
|
621
|
+
const id = typeof seed.id === 'object' ? String(seed.id) : seed.id;
|
|
622
|
+
allGraphClaims.set(id, {
|
|
623
|
+
id,
|
|
624
|
+
text: seed.text,
|
|
625
|
+
claim_type: seed.claim_type,
|
|
626
|
+
domain: seed.domain,
|
|
627
|
+
source_title: seed.source_title ?? 'Unknown',
|
|
628
|
+
source_author: seed.source_author ?? [],
|
|
629
|
+
confidence: seed.confidence,
|
|
630
|
+
position_in_source: seed.position_in_source ?? 0
|
|
631
|
+
});
|
|
632
|
+
}
|
|
633
|
+
const resolveSource = (claim) => {
|
|
634
|
+
if (claim.source && typeof claim.source === 'object' && 'title' in claim.source) {
|
|
635
|
+
return {
|
|
636
|
+
id: claim.source.id,
|
|
637
|
+
title: claim.source.title,
|
|
638
|
+
author: claim.source.author ?? []
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
return { title: 'Unknown', author: [] };
|
|
642
|
+
};
|
|
643
|
+
const toClaimId = (idValue) => {
|
|
644
|
+
if (!idValue)
|
|
645
|
+
return null;
|
|
646
|
+
if (typeof idValue === 'string')
|
|
647
|
+
return idValue;
|
|
648
|
+
return String(idValue);
|
|
649
|
+
};
|
|
650
|
+
const claimProjection = `{id, text, claim_type, domain, confidence, position_in_source, review_state, verification_state, source.{id, title, author}}`;
|
|
651
|
+
const passesTraversalClaimGate = (claim) => {
|
|
652
|
+
if (claim.review_state === 'rejected' || claim.review_state === 'merged')
|
|
653
|
+
return false;
|
|
654
|
+
if (claim.verification_state === 'flagged')
|
|
655
|
+
return false;
|
|
656
|
+
if (trustedGraphActive && claim.review_state !== 'accepted')
|
|
657
|
+
return false;
|
|
658
|
+
const raw = (process.env.RETRIEVAL_REQUIRE_VERIFIED ?? '').trim().toLowerCase();
|
|
659
|
+
if (trustedGraphActive &&
|
|
660
|
+
(raw === '1' || raw === 'true' || raw === 'yes') &&
|
|
661
|
+
claim.verification_state !== 'validated') {
|
|
662
|
+
return false;
|
|
663
|
+
}
|
|
664
|
+
return true;
|
|
665
|
+
};
|
|
666
|
+
const maxNewClaimsPerHop = topK >= 10 ? 48 : topK <= 3 ? 12 : 28;
|
|
667
|
+
const beamWidthPerHop = topK >= 10 ? 44 : topK <= 3 ? 10 : 24;
|
|
668
|
+
const beamQueryLimitPerTable = topK >= 10 ? 260 : topK <= 3 ? 64 : 140;
|
|
669
|
+
const hopDecayFactor = traversalMaxHops <= 1 ? 1 : 0.78;
|
|
670
|
+
const traversalBaseConfidence = minConfidence > 0 ? Math.max(0.3, Math.min(0.8, minConfidence)) : 0.38;
|
|
671
|
+
const traversalConfidenceThresholds = Array.from({ length: traversalMaxHops }, (_, idx) => computeHopConfidenceThreshold(traversalBaseConfidence, idx + 1));
|
|
672
|
+
let frontier = new Set(seedClaimIds);
|
|
673
|
+
const nativeNeighborIds = await fetchNativeGraphNeighbors(store, {
|
|
674
|
+
seedIds: seedClaimIds,
|
|
675
|
+
limit: Math.max(16, topK * 4)
|
|
676
|
+
});
|
|
677
|
+
for (const neighborId of nativeNeighborIds) {
|
|
678
|
+
frontier.add(neighborId);
|
|
679
|
+
}
|
|
680
|
+
for (let hop = 1; hop <= traversalMaxHops; hop++) {
|
|
681
|
+
if (frontier.size === 0 || allGraphClaims.size >= traversalClaimCap)
|
|
682
|
+
break;
|
|
683
|
+
const frontierIds = Array.from(frontier);
|
|
684
|
+
const frontierSet = new Set(frontierIds);
|
|
685
|
+
const hopConfidenceThreshold = traversalConfidenceThresholds[hop - 1] ?? traversalBaseConfidence;
|
|
686
|
+
const hopDecay = Math.pow(hopDecayFactor, hop - 1);
|
|
687
|
+
const hopCandidates = new Map();
|
|
688
|
+
for (const spec of RELATION_TRAVERSAL_BEAM_SPECS) {
|
|
689
|
+
try {
|
|
690
|
+
const rows = await store.query(`SELECT
|
|
691
|
+
in,
|
|
692
|
+
out,
|
|
693
|
+
in.${claimProjection} AS in_claim,
|
|
694
|
+
out.${claimProjection} AS out_claim,
|
|
695
|
+
strength,
|
|
696
|
+
note
|
|
697
|
+
FROM ${spec.table}
|
|
698
|
+
WHERE (in INSIDE $frontier_ids OR out INSIDE $frontier_ids) AND ${traversalRelationReviewFilter}
|
|
699
|
+
LIMIT ${beamQueryLimitPerTable}`, { frontier_ids: frontierIds });
|
|
700
|
+
if (!rows || !Array.isArray(rows))
|
|
701
|
+
continue;
|
|
702
|
+
const registerBeamCandidate = (params) => {
|
|
703
|
+
const { anchorId, neighbor, strength, edgePrior } = params;
|
|
704
|
+
if (!neighbor)
|
|
705
|
+
return;
|
|
706
|
+
const neighborId = toClaimId(neighbor.id);
|
|
707
|
+
if (!neighborId)
|
|
708
|
+
return;
|
|
709
|
+
const source = resolveSource(neighbor);
|
|
710
|
+
if (!passesTraversalClaimGate(neighbor))
|
|
711
|
+
return;
|
|
712
|
+
if ((neighbor.confidence ?? 0) < hopConfidenceThreshold) {
|
|
713
|
+
addRejectedClaim({
|
|
714
|
+
id: neighborId,
|
|
715
|
+
text: neighbor.text,
|
|
716
|
+
source_title: source.title,
|
|
717
|
+
confidence: neighbor.confidence,
|
|
718
|
+
reason_code: 'confidence_gate',
|
|
719
|
+
considered_in: 'traversal',
|
|
720
|
+
anchor_claim_id: anchorId
|
|
721
|
+
});
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
if (allGraphClaims.has(neighborId)) {
|
|
725
|
+
addRejectedClaim({
|
|
726
|
+
id: neighborId,
|
|
727
|
+
text: neighbor.text,
|
|
728
|
+
source_title: source.title,
|
|
729
|
+
confidence: neighbor.confidence,
|
|
730
|
+
reason_code: 'duplicate_traversal',
|
|
731
|
+
considered_in: 'traversal',
|
|
732
|
+
anchor_claim_id: anchorId
|
|
733
|
+
});
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
736
|
+
const anchor = allGraphClaims.get(anchorId);
|
|
737
|
+
const domainWeight = computeDomainExpansionWeight({
|
|
738
|
+
targetDomain: domain,
|
|
739
|
+
anchorDomain: anchor?.domain,
|
|
740
|
+
neighborDomain: neighbor.domain
|
|
741
|
+
});
|
|
742
|
+
const strengthWeight = parseRelationStrengthWeight(strength);
|
|
743
|
+
const anchorWeight = 0.7 + 0.3 * (anchor?.confidence ?? 0.6);
|
|
744
|
+
const score = Math.max(0.01, neighbor.confidence ?? 0.5) *
|
|
745
|
+
edgePrior *
|
|
746
|
+
hopDecay *
|
|
747
|
+
domainWeight *
|
|
748
|
+
strengthWeight *
|
|
749
|
+
anchorWeight;
|
|
750
|
+
const existing = hopCandidates.get(neighborId);
|
|
751
|
+
if (!existing || score > existing.score) {
|
|
752
|
+
hopCandidates.set(neighborId, {
|
|
753
|
+
claim: neighbor,
|
|
754
|
+
anchorId,
|
|
755
|
+
score
|
|
756
|
+
});
|
|
757
|
+
}
|
|
758
|
+
};
|
|
759
|
+
for (const row of rows) {
|
|
760
|
+
const inId = toClaimId(row.in);
|
|
761
|
+
const outId = toClaimId(row.out);
|
|
762
|
+
if (!inId || !outId)
|
|
763
|
+
continue;
|
|
764
|
+
if (frontierSet.has(inId)) {
|
|
765
|
+
registerBeamCandidate({
|
|
766
|
+
anchorId: inId,
|
|
767
|
+
neighbor: row.out_claim,
|
|
768
|
+
strength: row.strength,
|
|
769
|
+
edgePrior: spec.edgePrior
|
|
770
|
+
});
|
|
771
|
+
}
|
|
772
|
+
if (frontierSet.has(outId)) {
|
|
773
|
+
registerBeamCandidate({
|
|
774
|
+
anchorId: outId,
|
|
775
|
+
neighbor: row.in_claim,
|
|
776
|
+
strength: row.strength,
|
|
777
|
+
edgePrior: spec.edgePrior
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
catch (traversalErr) {
|
|
783
|
+
console.warn(`[RETRIEVAL] Beam traversal failed for ${spec.table}:`, traversalErr instanceof Error ? traversalErr.message : traversalErr);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
const candidates = Array.from(hopCandidates.values())
|
|
787
|
+
.sort((a, b) => b.score - a.score)
|
|
788
|
+
.slice(0, beamWidthPerHop);
|
|
789
|
+
const selected = [];
|
|
790
|
+
const seenSources = new Set();
|
|
791
|
+
const hopBudget = Math.min(maxNewClaimsPerHop, Math.max(traversalClaimCap - allGraphClaims.size, 0));
|
|
792
|
+
for (const candidate of candidates) {
|
|
793
|
+
if (selected.length >= hopBudget)
|
|
794
|
+
break;
|
|
795
|
+
const source = resolveSource(candidate.claim);
|
|
796
|
+
if (!(await sourceHasPassageCoverage(source.id))) {
|
|
797
|
+
addRejectedClaim({
|
|
798
|
+
id: typeof candidate.claim.id === 'object'
|
|
799
|
+
? String(candidate.claim.id)
|
|
800
|
+
: candidate.claim.id,
|
|
801
|
+
text: candidate.claim.text,
|
|
802
|
+
source_title: source.title,
|
|
803
|
+
confidence: candidate.claim.confidence,
|
|
804
|
+
reason_code: 'source_integrity_gate',
|
|
805
|
+
considered_in: 'traversal',
|
|
806
|
+
anchor_claim_id: candidate.anchorId
|
|
807
|
+
});
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
const sourceTitle = source.title;
|
|
811
|
+
if (seenSources.has(sourceTitle))
|
|
812
|
+
continue;
|
|
813
|
+
seenSources.add(sourceTitle);
|
|
814
|
+
selected.push(candidate);
|
|
815
|
+
}
|
|
816
|
+
for (const candidate of candidates) {
|
|
817
|
+
if (selected.length >= hopBudget)
|
|
818
|
+
break;
|
|
819
|
+
if (selected.includes(candidate))
|
|
820
|
+
continue;
|
|
821
|
+
const source = resolveSource(candidate.claim);
|
|
822
|
+
if (!(await sourceHasPassageCoverage(source.id))) {
|
|
823
|
+
addRejectedClaim({
|
|
824
|
+
id: typeof candidate.claim.id === 'object'
|
|
825
|
+
? String(candidate.claim.id)
|
|
826
|
+
: candidate.claim.id,
|
|
827
|
+
text: candidate.claim.text,
|
|
828
|
+
source_title: source.title,
|
|
829
|
+
confidence: candidate.claim.confidence,
|
|
830
|
+
reason_code: 'source_integrity_gate',
|
|
831
|
+
considered_in: 'traversal',
|
|
832
|
+
anchor_claim_id: candidate.anchorId
|
|
833
|
+
});
|
|
834
|
+
continue;
|
|
835
|
+
}
|
|
836
|
+
selected.push(candidate);
|
|
837
|
+
}
|
|
838
|
+
const nextFrontier = new Set();
|
|
839
|
+
for (const { claim } of selected) {
|
|
840
|
+
const cId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
|
|
841
|
+
const source = resolveSource(claim);
|
|
842
|
+
allGraphClaims.set(cId, {
|
|
843
|
+
id: cId,
|
|
844
|
+
text: claim.text,
|
|
845
|
+
claim_type: claim.claim_type,
|
|
846
|
+
domain: claim.domain,
|
|
847
|
+
source_title: source.title,
|
|
848
|
+
source_author: source.author,
|
|
849
|
+
confidence: claim.confidence ?? 0.5,
|
|
850
|
+
position_in_source: claim.position_in_source ?? 0
|
|
851
|
+
});
|
|
852
|
+
nextFrontier.add(cId);
|
|
853
|
+
}
|
|
854
|
+
const selectedClaimIds = Array.from(nextFrontier);
|
|
855
|
+
if (selectedClaimIds.length > 0) {
|
|
856
|
+
try {
|
|
857
|
+
const argRefs = await store.query(`SELECT out.id AS arg_id FROM part_of WHERE in INSIDE $claim_ids LIMIT 200`, { claim_ids: selectedClaimIds });
|
|
858
|
+
if (argRefs && Array.isArray(argRefs)) {
|
|
859
|
+
for (const row of argRefs) {
|
|
860
|
+
const aId = toClaimId(row.arg_id);
|
|
861
|
+
if (aId)
|
|
862
|
+
argumentIds.add(aId);
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
catch (argRefErr) {
|
|
867
|
+
console.warn('[RETRIEVAL] Beam traversal argument lookup failed:', argRefErr instanceof Error ? argRefErr.message : argRefErr);
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
console.log(`[RETRIEVAL] hop ${hop}/${traversalMaxHops}: candidates=${candidates.length} threshold=${hopConfidenceThreshold.toFixed(2)} added=${selected.length} frontier=${nextFrontier.size}`);
|
|
871
|
+
frontier = nextFrontier;
|
|
872
|
+
}
|
|
873
|
+
// Add argument-neighborhood claims so traversal can surface complete
|
|
874
|
+
// argument structures (conclusions + key premises), not only local edges.
|
|
875
|
+
if (argumentIds.size > 0 && allGraphClaims.size < traversalClaimCap) {
|
|
876
|
+
try {
|
|
877
|
+
const memberRows = await store.query(`SELECT
|
|
878
|
+
in.{id, text, claim_type, domain, confidence, position_in_source, review_state, source.{id, title, author}} AS in,
|
|
879
|
+
role
|
|
880
|
+
FROM part_of
|
|
881
|
+
WHERE out INSIDE $arg_ids AND ${argumentClaimReviewFilter}`, { arg_ids: Array.from(argumentIds) });
|
|
882
|
+
if (memberRows && Array.isArray(memberRows)) {
|
|
883
|
+
const roleRank = (role) => {
|
|
884
|
+
if (role === 'conclusion')
|
|
885
|
+
return 0;
|
|
886
|
+
if (role === 'key_premise')
|
|
887
|
+
return 1;
|
|
888
|
+
if (role === 'supporting_premise')
|
|
889
|
+
return 2;
|
|
890
|
+
return 3;
|
|
891
|
+
};
|
|
892
|
+
const sorted = [...memberRows].sort((a, b) => {
|
|
893
|
+
const rankDelta = roleRank(a.role) - roleRank(b.role);
|
|
894
|
+
if (rankDelta !== 0)
|
|
895
|
+
return rankDelta;
|
|
896
|
+
return (b.in?.confidence ?? 0) - (a.in?.confidence ?? 0);
|
|
897
|
+
});
|
|
898
|
+
for (const row of sorted) {
|
|
899
|
+
if (allGraphClaims.size >= traversalClaimCap)
|
|
900
|
+
break;
|
|
901
|
+
if (!row.in)
|
|
902
|
+
continue;
|
|
903
|
+
const claim = row.in;
|
|
904
|
+
if (claim.review_state === 'rejected' || claim.review_state === 'merged')
|
|
905
|
+
continue;
|
|
906
|
+
if (trustedGraphActive && claim.review_state !== 'accepted')
|
|
907
|
+
continue;
|
|
908
|
+
const cId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
|
|
909
|
+
if (allGraphClaims.has(cId))
|
|
910
|
+
continue;
|
|
911
|
+
const source = claim.source && typeof claim.source === 'object' && 'title' in claim.source
|
|
912
|
+
? {
|
|
913
|
+
id: claim.source.id,
|
|
914
|
+
title: claim.source.title,
|
|
915
|
+
author: claim.source.author ?? []
|
|
916
|
+
}
|
|
917
|
+
: { title: 'Unknown', author: [] };
|
|
918
|
+
if (!(await sourceHasPassageCoverage(source.id))) {
|
|
919
|
+
addRejectedClaim({
|
|
920
|
+
id: cId,
|
|
921
|
+
text: claim.text,
|
|
922
|
+
source_title: source.title,
|
|
923
|
+
confidence: claim.confidence,
|
|
924
|
+
reason_code: 'source_integrity_gate',
|
|
925
|
+
considered_in: 'traversal'
|
|
926
|
+
});
|
|
927
|
+
continue;
|
|
928
|
+
}
|
|
929
|
+
allGraphClaims.set(cId, {
|
|
930
|
+
id: cId,
|
|
931
|
+
text: claim.text,
|
|
932
|
+
claim_type: claim.claim_type,
|
|
933
|
+
domain: claim.domain,
|
|
934
|
+
source_title: source.title,
|
|
935
|
+
source_author: source.author,
|
|
936
|
+
confidence: claim.confidence ?? 0.5,
|
|
937
|
+
position_in_source: claim.position_in_source ?? 0
|
|
938
|
+
});
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
catch (argNeighborhoodErr) {
|
|
943
|
+
console.warn('[RETRIEVAL] Failed to expand argument-neighborhood claims:', argNeighborhoodErr instanceof Error ? argNeighborhoodErr.message : argNeighborhoodErr);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
const contradictionNeighborCache = new Map();
|
|
947
|
+
const replyNeighborCache = new Map();
|
|
948
|
+
const majorThesisLimit = Math.max(1, Math.min(3, Math.ceil(topK / 4)));
|
|
949
|
+
const majorThesisIds = selectMajorThesisIds({
|
|
950
|
+
claims: Array.from(allGraphClaims.values()),
|
|
951
|
+
seedClaimIds,
|
|
952
|
+
limit: majorThesisLimit
|
|
953
|
+
});
|
|
954
|
+
let closureClaimsAdded = 0;
|
|
955
|
+
let closureObjectionsAdded = 0;
|
|
956
|
+
let closureRepliesAdded = 0;
|
|
957
|
+
let closureCapLimitedUnits = 0;
|
|
958
|
+
const closureUnits = [];
|
|
959
|
+
const passesClosureReviewGate = (claim) => {
|
|
960
|
+
if (claim.review_state === 'rejected' || claim.review_state === 'merged')
|
|
961
|
+
return false;
|
|
962
|
+
if (trustedGraphActive && claim.review_state !== 'accepted')
|
|
963
|
+
return false;
|
|
964
|
+
return true;
|
|
965
|
+
};
|
|
966
|
+
const hasClosurePassageCoverage = async (claim, anchorClaimId) => {
|
|
967
|
+
const source = resolveSource(claim);
|
|
968
|
+
const covered = await sourceHasPassageCoverage(source.id);
|
|
969
|
+
if (covered)
|
|
970
|
+
return true;
|
|
971
|
+
addRejectedClaim({
|
|
972
|
+
id: typeof claim.id === 'object' ? String(claim.id) : claim.id,
|
|
973
|
+
text: claim.text,
|
|
974
|
+
source_title: source.title,
|
|
975
|
+
confidence: claim.confidence,
|
|
976
|
+
reason_code: 'source_integrity_gate',
|
|
977
|
+
considered_in: 'traversal',
|
|
978
|
+
anchor_claim_id: anchorClaimId
|
|
979
|
+
});
|
|
980
|
+
return false;
|
|
981
|
+
};
|
|
982
|
+
const attachClaimForClosure = async (claim, anchorClaimId) => {
|
|
983
|
+
const claimId = typeof claim.id === 'object' ? String(claim.id) : claim.id;
|
|
984
|
+
if (allGraphClaims.has(claimId))
|
|
985
|
+
return 'present';
|
|
986
|
+
if (allGraphClaims.size >= traversalClaimCap)
|
|
987
|
+
return 'blocked_cap';
|
|
988
|
+
if (!(await hasClosurePassageCoverage(claim, anchorClaimId)))
|
|
989
|
+
return 'blocked_source';
|
|
990
|
+
const source = resolveSource(claim);
|
|
991
|
+
allGraphClaims.set(claimId, {
|
|
992
|
+
id: claimId,
|
|
993
|
+
text: claim.text,
|
|
994
|
+
claim_type: claim.claim_type,
|
|
995
|
+
domain: claim.domain,
|
|
996
|
+
source_title: source.title,
|
|
997
|
+
source_author: source.author,
|
|
998
|
+
confidence: claim.confidence ?? 0.5,
|
|
999
|
+
position_in_source: claim.position_in_source ?? 0
|
|
1000
|
+
});
|
|
1001
|
+
return 'added';
|
|
1002
|
+
};
|
|
1003
|
+
const fetchRelationNeighbors = async (table, claimId, cache) => {
|
|
1004
|
+
const cached = cache.get(claimId);
|
|
1005
|
+
if (cached)
|
|
1006
|
+
return cached;
|
|
1007
|
+
const pending = (async () => {
|
|
1008
|
+
try {
|
|
1009
|
+
const rows = await store.query(`SELECT
|
|
1010
|
+
in.${claimProjection} AS in_claim,
|
|
1011
|
+
out.${claimProjection} AS out_claim
|
|
1012
|
+
FROM ${table}
|
|
1013
|
+
WHERE (in = $claim_id OR out = $claim_id) AND ${relationReviewFilter}
|
|
1014
|
+
LIMIT 24`, { claim_id: claimId });
|
|
1015
|
+
if (!rows || !Array.isArray(rows))
|
|
1016
|
+
return [];
|
|
1017
|
+
const byId = new Map();
|
|
1018
|
+
for (const row of rows) {
|
|
1019
|
+
const inClaim = row.in_claim;
|
|
1020
|
+
const outClaim = row.out_claim;
|
|
1021
|
+
if (!inClaim || !outClaim)
|
|
1022
|
+
continue;
|
|
1023
|
+
const inId = toClaimId(inClaim.id);
|
|
1024
|
+
const outId = toClaimId(outClaim.id);
|
|
1025
|
+
const neighbor = inId === claimId ? outClaim : outId === claimId ? inClaim : undefined;
|
|
1026
|
+
if (!neighbor)
|
|
1027
|
+
continue;
|
|
1028
|
+
const neighborId = toClaimId(neighbor.id);
|
|
1029
|
+
if (!neighborId)
|
|
1030
|
+
continue;
|
|
1031
|
+
const existing = byId.get(neighborId);
|
|
1032
|
+
if (!existing || (neighbor.confidence ?? 0) > (existing.confidence ?? 0)) {
|
|
1033
|
+
byId.set(neighborId, neighbor);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
return Array.from(byId.values()).sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0));
|
|
1037
|
+
}
|
|
1038
|
+
catch (err) {
|
|
1039
|
+
console.warn(`[RETRIEVAL] Closure lookup failed for ${table} on ${claimId}:`, err instanceof Error ? err.message : err);
|
|
1040
|
+
return [];
|
|
1041
|
+
}
|
|
1042
|
+
})();
|
|
1043
|
+
cache.set(claimId, pending);
|
|
1044
|
+
return pending;
|
|
1045
|
+
};
|
|
1046
|
+
const pickClosureCandidate = async (candidates, anchorClaimId, matcher) => {
|
|
1047
|
+
if (candidates.length === 0)
|
|
1048
|
+
return null;
|
|
1049
|
+
for (const requireTypedMatch of [true, false]) {
|
|
1050
|
+
for (const candidate of candidates) {
|
|
1051
|
+
if (!passesClosureReviewGate(candidate))
|
|
1052
|
+
continue;
|
|
1053
|
+
if (requireTypedMatch && !matcher(candidate.claim_type))
|
|
1054
|
+
continue;
|
|
1055
|
+
const candidateId = typeof candidate.id === 'object' ? String(candidate.id) : candidate.id;
|
|
1056
|
+
if (allGraphClaims.has(candidateId))
|
|
1057
|
+
return candidate;
|
|
1058
|
+
if (await hasClosurePassageCoverage(candidate, anchorClaimId)) {
|
|
1059
|
+
return candidate;
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
return null;
|
|
1064
|
+
};
|
|
1065
|
+
for (const thesisId of majorThesisIds) {
|
|
1066
|
+
const unit = {
|
|
1067
|
+
thesis_claim_id: thesisId,
|
|
1068
|
+
objection_found: false,
|
|
1069
|
+
reply_found: false,
|
|
1070
|
+
unit_complete: false
|
|
1071
|
+
};
|
|
1072
|
+
let capLimitedInUnit = false;
|
|
1073
|
+
const thesisExists = allGraphClaims.has(thesisId);
|
|
1074
|
+
if (!thesisExists) {
|
|
1075
|
+
closureUnits.push(unit);
|
|
1076
|
+
continue;
|
|
1077
|
+
}
|
|
1078
|
+
const contradictionNeighbors = await fetchRelationNeighbors('contradicts', thesisId, contradictionNeighborCache);
|
|
1079
|
+
const objectionCandidate = await pickClosureCandidate(contradictionNeighbors, thesisId, isObjectionClaimType);
|
|
1080
|
+
if (objectionCandidate) {
|
|
1081
|
+
const objectionId = typeof objectionCandidate.id === 'object'
|
|
1082
|
+
? String(objectionCandidate.id)
|
|
1083
|
+
: objectionCandidate.id;
|
|
1084
|
+
const objectionAttach = await attachClaimForClosure(objectionCandidate, thesisId);
|
|
1085
|
+
if (objectionAttach === 'added') {
|
|
1086
|
+
closureClaimsAdded += 1;
|
|
1087
|
+
closureObjectionsAdded += 1;
|
|
1088
|
+
}
|
|
1089
|
+
if (objectionAttach === 'blocked_cap') {
|
|
1090
|
+
capLimitedInUnit = true;
|
|
1091
|
+
}
|
|
1092
|
+
if (objectionAttach === 'added' || objectionAttach === 'present') {
|
|
1093
|
+
unit.objection_found = true;
|
|
1094
|
+
unit.objection_claim_id = objectionId;
|
|
1095
|
+
const replyNeighbors = await fetchRelationNeighbors('responds_to', objectionId, replyNeighborCache);
|
|
1096
|
+
const replyCandidate = await pickClosureCandidate(replyNeighbors, objectionId, isReplyClaimType);
|
|
1097
|
+
if (replyCandidate) {
|
|
1098
|
+
const replyId = typeof replyCandidate.id === 'object'
|
|
1099
|
+
? String(replyCandidate.id)
|
|
1100
|
+
: replyCandidate.id;
|
|
1101
|
+
const replyAttach = await attachClaimForClosure(replyCandidate, objectionId);
|
|
1102
|
+
if (replyAttach === 'added') {
|
|
1103
|
+
closureClaimsAdded += 1;
|
|
1104
|
+
closureRepliesAdded += 1;
|
|
1105
|
+
}
|
|
1106
|
+
if (replyAttach === 'blocked_cap') {
|
|
1107
|
+
capLimitedInUnit = true;
|
|
1108
|
+
}
|
|
1109
|
+
if (replyAttach === 'added' || replyAttach === 'present') {
|
|
1110
|
+
unit.reply_found = true;
|
|
1111
|
+
unit.reply_claim_id = replyId;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
unit.unit_complete = unit.objection_found && unit.reply_found;
|
|
1117
|
+
if (capLimitedInUnit)
|
|
1118
|
+
closureCapLimitedUnits += 1;
|
|
1119
|
+
closureUnits.push(unit);
|
|
1120
|
+
}
|
|
1121
|
+
const closureStats = {
|
|
1122
|
+
major_thesis_count: majorThesisIds.length,
|
|
1123
|
+
units_attempted: majorThesisIds.length,
|
|
1124
|
+
units_completed: closureUnits.filter((unit) => unit.unit_complete).length,
|
|
1125
|
+
claims_added_for_closure: closureClaimsAdded,
|
|
1126
|
+
objections_added: closureObjectionsAdded,
|
|
1127
|
+
replies_added: closureRepliesAdded,
|
|
1128
|
+
cap_limited_units: closureCapLimitedUnits,
|
|
1129
|
+
units: closureUnits
|
|
1130
|
+
};
|
|
1131
|
+
console.log('[RETRIEVAL] Closure enforcement', {
|
|
1132
|
+
major_theses: closureStats.major_thesis_count,
|
|
1133
|
+
units_completed: closureStats.units_completed,
|
|
1134
|
+
claims_added: closureStats.claims_added_for_closure
|
|
1135
|
+
});
|
|
1136
|
+
// ── Step 4: Build deduplicated claims array ──────────────────
|
|
1137
|
+
const claims = Array.from(allGraphClaims.values());
|
|
1138
|
+
const claimIdToIndex = new Map();
|
|
1139
|
+
claims.forEach((c, i) => claimIdToIndex.set(c.id, i));
|
|
1140
|
+
console.log(`[RETRIEVAL] ${claims.length} unique claims after graph traversal`);
|
|
1141
|
+
// ── Step 5: Resolve relations between claims in result set ───
|
|
1142
|
+
const relations = [];
|
|
1143
|
+
const claimIds = claims.map((c) => c.id);
|
|
1144
|
+
let relationCandidateCount = 0;
|
|
1145
|
+
const keptRelationKeys = new Set();
|
|
1146
|
+
if (claimIds.length >= 2) {
|
|
1147
|
+
for (const { table, relationType } of RELATION_FETCH_SPECS) {
|
|
1148
|
+
try {
|
|
1149
|
+
const rels = await store.query(`SELECT in, out, $table AS relation_type, strength, note
|
|
1150
|
+
FROM ${table}
|
|
1151
|
+
WHERE in INSIDE $ids AND out INSIDE $ids AND ${relationReviewFilter}`, { ids: claimIds, table });
|
|
1152
|
+
if (rels && Array.isArray(rels)) {
|
|
1153
|
+
relationCandidateCount += rels.length;
|
|
1154
|
+
for (const rel of rels) {
|
|
1155
|
+
const fromId = typeof rel.in === 'object' ? String(rel.in) : rel.in;
|
|
1156
|
+
const toId = typeof rel.out === 'object' ? String(rel.out) : rel.out;
|
|
1157
|
+
const fromIdx = claimIdToIndex.get(fromId);
|
|
1158
|
+
const toIdx = claimIdToIndex.get(toId);
|
|
1159
|
+
if (fromIdx === undefined || toIdx === undefined) {
|
|
1160
|
+
rejectedRelations.push({
|
|
1161
|
+
from_claim_id: fromId,
|
|
1162
|
+
to_claim_id: toId,
|
|
1163
|
+
relation_type: relationType,
|
|
1164
|
+
reason_code: 'missing_endpoint',
|
|
1165
|
+
strength: rel.strength,
|
|
1166
|
+
note: rel.note
|
|
1167
|
+
});
|
|
1168
|
+
continue;
|
|
1169
|
+
}
|
|
1170
|
+
const relationKey = `${fromIdx}|${toIdx}|${relationType}`;
|
|
1171
|
+
if (keptRelationKeys.has(relationKey)) {
|
|
1172
|
+
rejectedRelations.push({
|
|
1173
|
+
from_claim_id: fromId,
|
|
1174
|
+
to_claim_id: toId,
|
|
1175
|
+
relation_type: relationType,
|
|
1176
|
+
reason_code: 'duplicate_relation',
|
|
1177
|
+
strength: rel.strength,
|
|
1178
|
+
note: rel.note
|
|
1179
|
+
});
|
|
1180
|
+
continue;
|
|
1181
|
+
}
|
|
1182
|
+
keptRelationKeys.add(relationKey);
|
|
1183
|
+
relations.push({
|
|
1184
|
+
from_index: fromIdx,
|
|
1185
|
+
to_index: toIdx,
|
|
1186
|
+
relation_type: relationType,
|
|
1187
|
+
strength: rel.strength,
|
|
1188
|
+
note: rel.note
|
|
1189
|
+
});
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
catch (relErr) {
|
|
1194
|
+
console.warn(`[RETRIEVAL] Failed to query ${table} relations:`, relErr instanceof Error ? relErr.message : relErr);
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
console.log(`[RETRIEVAL] ${relations.length} relations among retrieved claims`);
|
|
1199
|
+
// ── Step 6: Fetch argument structures ────────────────────────
|
|
1200
|
+
const arguments_ = [];
|
|
1201
|
+
for (const argId of argumentIds) {
|
|
1202
|
+
try {
|
|
1203
|
+
const argRows = await store.query(`SELECT
|
|
1204
|
+
*,
|
|
1205
|
+
<-part_of<-claim.{text, role: <-part_of[WHERE out = $arg_id].role} AS member_claims
|
|
1206
|
+
FROM $arg_id`, { arg_id: argId });
|
|
1207
|
+
if (!argRows || argRows.length === 0)
|
|
1208
|
+
continue;
|
|
1209
|
+
const arg = Array.isArray(argRows) ? argRows[0] : argRows;
|
|
1210
|
+
// Try a simpler approach to get member claims with roles
|
|
1211
|
+
let conclusionText = null;
|
|
1212
|
+
const keyPremises = [];
|
|
1213
|
+
const partOfRels = await store.query(`SELECT in, role, in.text AS claim_text
|
|
1214
|
+
FROM part_of
|
|
1215
|
+
WHERE out = $arg_id`, { arg_id: argId });
|
|
1216
|
+
if (partOfRels && Array.isArray(partOfRels)) {
|
|
1217
|
+
for (const po of partOfRels) {
|
|
1218
|
+
if (po.role === 'conclusion' && po.claim_text) {
|
|
1219
|
+
conclusionText = po.claim_text;
|
|
1220
|
+
}
|
|
1221
|
+
else if (po.role === 'key_premise' && po.claim_text) {
|
|
1222
|
+
keyPremises.push(po.claim_text);
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
arguments_.push({
|
|
1227
|
+
id: typeof arg.id === 'object' ? String(arg.id) : arg.id,
|
|
1228
|
+
name: arg.name,
|
|
1229
|
+
tradition: arg.tradition,
|
|
1230
|
+
domain: arg.domain,
|
|
1231
|
+
summary: arg.summary,
|
|
1232
|
+
conclusion_text: conclusionText,
|
|
1233
|
+
key_premises: keyPremises
|
|
1234
|
+
});
|
|
1235
|
+
}
|
|
1236
|
+
catch (argErr) {
|
|
1237
|
+
console.warn(`[RETRIEVAL] Failed to fetch argument ${argId}:`, argErr instanceof Error ? argErr.message : argErr);
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
console.log(`[RETRIEVAL] ${arguments_.length} arguments assembled`);
|
|
1241
|
+
let thinkerContext = null;
|
|
1242
|
+
if (enrichWithThinkerContext) {
|
|
1243
|
+
const claimIdsForThinkerContext = claims.map((claim) => claim.id).filter(Boolean);
|
|
1244
|
+
thinkerContext = await fetchThinkerContext(store, claimIdsForThinkerContext);
|
|
1245
|
+
}
|
|
1246
|
+
const traversalEdgePriors = Object.fromEntries(RELATION_TRAVERSAL_BEAM_SPECS.map((spec) => [spec.table, spec.edgePrior]));
|
|
1247
|
+
const pruningSummary = {
|
|
1248
|
+
claims_by_reason: {
|
|
1249
|
+
seed_pool_pruned: 0,
|
|
1250
|
+
duplicate_traversal: 0,
|
|
1251
|
+
confidence_gate: 0,
|
|
1252
|
+
source_integrity_gate: 0
|
|
1253
|
+
},
|
|
1254
|
+
relations_by_reason: {
|
|
1255
|
+
duplicate_relation: 0,
|
|
1256
|
+
missing_endpoint: 0
|
|
1257
|
+
}
|
|
1258
|
+
};
|
|
1259
|
+
for (const rejected of rejectedClaimsByKey.values()) {
|
|
1260
|
+
pruningSummary.claims_by_reason[rejected.reason_code] += 1;
|
|
1261
|
+
}
|
|
1262
|
+
for (const rejected of rejectedRelations) {
|
|
1263
|
+
pruningSummary.relations_by_reason[rejected.reason_code] += 1;
|
|
1264
|
+
}
|
|
1265
|
+
let evidencePassages;
|
|
1266
|
+
if (isRetrievalPassageGroundedEnabled() && passageGroundedClaimIds.length > 0) {
|
|
1267
|
+
try {
|
|
1268
|
+
const passageRows = await store.query(`SELECT passage.id AS id, passage.text AS text, in AS claim_id
|
|
1269
|
+
FROM grounded_in
|
|
1270
|
+
WHERE in INSIDE $claim_ids
|
|
1271
|
+
FETCH passage
|
|
1272
|
+
LIMIT 8`, { claim_ids: claims.map((c) => c.id).slice(0, 24) });
|
|
1273
|
+
evidencePassages = (passageRows ?? []).map((row) => ({
|
|
1274
|
+
passage_id: String(row.id),
|
|
1275
|
+
excerpt: (row.text ?? '').slice(0, 480),
|
|
1276
|
+
claim_ids: row.claim_id ? [String(row.claim_id)] : []
|
|
1277
|
+
}));
|
|
1278
|
+
}
|
|
1279
|
+
catch {
|
|
1280
|
+
evidencePassages = undefined;
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
return {
|
|
1284
|
+
claims,
|
|
1285
|
+
relations,
|
|
1286
|
+
arguments: arguments_,
|
|
1287
|
+
seed_claim_ids: seedClaimIds,
|
|
1288
|
+
evidence_passages: evidencePassages,
|
|
1289
|
+
thinker_context: thinkerContext,
|
|
1290
|
+
trace: {
|
|
1291
|
+
seed_pool_count: seedPoolCount,
|
|
1292
|
+
selected_seed_count: seedClaimIds.length,
|
|
1293
|
+
hybrid_mode: hybridMode,
|
|
1294
|
+
dense_seed_count: denseSeedClaims.length,
|
|
1295
|
+
lexical_seed_count: lexicalSeedClaims.length,
|
|
1296
|
+
lexical_terms: lexicalTerms.slice(0, 8),
|
|
1297
|
+
corpus_level_query: corpusLevelQuery,
|
|
1298
|
+
seed_balance_stats: seedSet.stats,
|
|
1299
|
+
traversal_mode: 'beam_trusted_v1',
|
|
1300
|
+
traversal_max_hops: traversalMaxHops,
|
|
1301
|
+
traversal_hop_decay: hopDecayFactor,
|
|
1302
|
+
traversal_base_confidence_threshold: traversalBaseConfidence,
|
|
1303
|
+
traversal_confidence_thresholds: traversalConfidenceThresholds,
|
|
1304
|
+
traversal_domain_aware: true,
|
|
1305
|
+
traversal_trusted_edges_only: true,
|
|
1306
|
+
traversal_edge_priors: traversalEdgePriors,
|
|
1307
|
+
query_decomposition: queryDecomposition,
|
|
1308
|
+
seed_claims: seedTrace,
|
|
1309
|
+
pruning_summary: pruningSummary,
|
|
1310
|
+
traversed_claim_count: Math.max(claims.length - seedClaimIds.length, 0),
|
|
1311
|
+
relation_candidate_count: relationCandidateCount,
|
|
1312
|
+
relation_kept_count: relations.length,
|
|
1313
|
+
argument_candidate_count: argumentIds.size,
|
|
1314
|
+
argument_kept_count: arguments_.length,
|
|
1315
|
+
closure_stats: closureStats,
|
|
1316
|
+
rejected_claims: Array.from(rejectedClaimsByKey.values()).slice(0, 60),
|
|
1317
|
+
rejected_relations: rejectedRelations.slice(0, 80)
|
|
1318
|
+
},
|
|
1319
|
+
degraded: false
|
|
1320
|
+
};
|
|
1321
|
+
}
|
|
1322
|
+
catch (err) {
|
|
1323
|
+
// Top-level catch: SurrealDB unreachable, unexpected errors, etc.
|
|
1324
|
+
console.error('[RETRIEVAL] Fatal retrieval error (returning empty result):', err instanceof Error ? err.message : err);
|
|
1325
|
+
return {
|
|
1326
|
+
...EMPTY_RESULT,
|
|
1327
|
+
degraded: true,
|
|
1328
|
+
degraded_reason: store.isDatabaseUnavailable(err) ? 'database_unavailable' : 'retrieval_error'
|
|
1329
|
+
};
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
// ─── Context block formatter ───────────────────────────────────────────────
|
|
1333
|
+
/**
|
|
1334
|
+
* Format a RetrievalResult into a structured text block for the LLM prompt.
|
|
1335
|
+
*
|
|
1336
|
+
* Returns a human-readable representation of the retrieved argument graph
|
|
1337
|
+
* that the model can use as grounding context for its three-pass analysis.
|
|
1338
|
+
*/
|
|
1339
|
+
export function buildContextBlock(result) {
|
|
1340
|
+
if (!result.claims || result.claims.length === 0) {
|
|
1341
|
+
return 'No knowledge base context available for this query.';
|
|
1342
|
+
}
|
|
1343
|
+
const lines = [];
|
|
1344
|
+
lines.push('=== PHILOSOPHICAL KNOWLEDGE GRAPH CONTEXT ===');
|
|
1345
|
+
lines.push('');
|
|
1346
|
+
lines.push('The following are structured claims from SOPHIA\'s curated philosophical knowledge graph. ' +
|
|
1347
|
+
'Use these as your philosophical foundation, noting their typed logical relations and source attributions.');
|
|
1348
|
+
lines.push('');
|
|
1349
|
+
// ── Claims with IDs and Relations ──
|
|
1350
|
+
for (let i = 0; i < result.claims.length; i++) {
|
|
1351
|
+
const c = result.claims[i];
|
|
1352
|
+
const claimId = `c:${String(i + 1).padStart(3, '0')}`;
|
|
1353
|
+
const authorStr = c.source_author?.length
|
|
1354
|
+
? c.source_author.join(', ')
|
|
1355
|
+
: 'Unknown';
|
|
1356
|
+
lines.push(`CLAIM [${claimId}] (${c.claim_type}, source: "${c.source_title}")`);
|
|
1357
|
+
lines.push(`"${c.text}"`);
|
|
1358
|
+
// Show relations from this claim
|
|
1359
|
+
const outgoingRelations = (result.relations ?? []).filter(r => r.from_index === i);
|
|
1360
|
+
if (outgoingRelations.length > 0) {
|
|
1361
|
+
for (const r of outgoingRelations) {
|
|
1362
|
+
const targetId = `c:${String(r.to_index + 1).padStart(3, '0')}`;
|
|
1363
|
+
const relType = r.relation_type.toUpperCase().replace(/_/g, ' ');
|
|
1364
|
+
const strengthStr = r.strength ? ` (${r.strength})` : '';
|
|
1365
|
+
lines.push(` ├─ ${relType} [${targetId}]${strengthStr}`);
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
lines.push('');
|
|
1369
|
+
}
|
|
1370
|
+
// ── Arguments ──
|
|
1371
|
+
if ((result.arguments ?? []).length > 0) {
|
|
1372
|
+
lines.push('NAMED ARGUMENTS:');
|
|
1373
|
+
for (const arg of result.arguments ?? []) {
|
|
1374
|
+
const traditionStr = arg.tradition ? ` (${arg.tradition})` : '';
|
|
1375
|
+
lines.push(`▸ ${arg.name}${traditionStr}`);
|
|
1376
|
+
lines.push(` ${arg.summary}`);
|
|
1377
|
+
if (arg.conclusion_text) {
|
|
1378
|
+
lines.push(` Conclusion: "${arg.conclusion_text}"`);
|
|
1379
|
+
}
|
|
1380
|
+
if (arg.key_premises.length > 0) {
|
|
1381
|
+
lines.push(` Key premises: ${arg.key_premises.map((p) => `"${p}"`).join('; ')}`);
|
|
1382
|
+
}
|
|
1383
|
+
lines.push('');
|
|
1384
|
+
}
|
|
1385
|
+
}
|
|
1386
|
+
if (result.evidence_passages && result.evidence_passages.length > 0) {
|
|
1387
|
+
lines.push('EVIDENCE PASSAGES (source spans linked to retrieved claims):');
|
|
1388
|
+
for (const passage of result.evidence_passages) {
|
|
1389
|
+
lines.push(`▸ ${passage.passage_id}`);
|
|
1390
|
+
lines.push(` "${passage.excerpt}"`);
|
|
1391
|
+
if (passage.claim_ids.length > 0) {
|
|
1392
|
+
lines.push(` Linked claims: ${passage.claim_ids.join(', ')}`);
|
|
1393
|
+
}
|
|
1394
|
+
lines.push('');
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
lines.push('=== END KNOWLEDGE GRAPH CONTEXT ===');
|
|
1398
|
+
lines.push('');
|
|
1399
|
+
lines.push('Use Google Search to verify, challenge, or extend these claims with current sources.');
|
|
1400
|
+
return lines.join('\n');
|
|
1401
|
+
}
|