@johndimm/constellations 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/App.tsx +480 -0
- package/FullPageConstellations.tsx +74 -0
- package/FullPageConstellationsHostShell.tsx +27 -0
- package/README.md +116 -0
- package/components/AppConfirmDialog.tsx +46 -0
- package/components/AppHeader.tsx +73 -0
- package/components/AppNotifications.tsx +21 -0
- package/components/BrowsePeople.tsx +832 -0
- package/components/ControlPanel.tsx +1023 -0
- package/components/Graph.tsx +1525 -0
- package/components/HelpOverlay.tsx +168 -0
- package/components/NodeContextMenu.tsx +160 -0
- package/components/PeopleBrowserSidebar.tsx +690 -0
- package/components/Sidebar.tsx +271 -0
- package/components/TimelineView.tsx +4 -0
- package/hooks/useExpansion.ts +889 -0
- package/hooks/useGraphActions.ts +325 -0
- package/hooks/useGraphState.ts +414 -0
- package/hooks/useKioskMode.ts +47 -0
- package/hooks/useNodeClickHandler.ts +172 -0
- package/hooks/useSearchHandlers.ts +369 -0
- package/host.ts +16 -0
- package/index.css +101 -0
- package/index.tsx +16 -0
- package/kioskDomains.ts +307 -0
- package/package.json +78 -0
- package/services/aiUtils.ts +364 -0
- package/services/cacheService.ts +76 -0
- package/services/crossrefService.ts +107 -0
- package/services/geminiService.ts +1359 -0
- package/services/get-local-graphs.js +5 -0
- package/services/graphUtils.ts +347 -0
- package/services/imageService.ts +39 -0
- package/services/llmClient.ts +194 -0
- package/services/openAlexService.ts +173 -0
- package/services/wikipediaImage.ts +40 -0
- package/services/wikipediaService.ts +1175 -0
- package/sessionHandoff.ts +132 -0
- package/types.ts +99 -0
- package/useFullPageConstellationsHost.ts +116 -0
- package/utils/evidenceUtils.ts +107 -0
- package/utils/graphLogicUtils.ts +32 -0
- package/utils/graphNodeToChannelNotes.ts +71 -0
- package/utils/wikiUtils.ts +34 -0
|
@@ -0,0 +1,889 @@
|
|
|
1
|
+
import React, { useCallback, useRef } from 'react';
|
|
2
|
+
import { GraphNode, GraphLink } from '../types';
|
|
3
|
+
import { fetchConnections, fetchPersonWorks, classifyEntity, fetchOrgKeyPeopleBlockViaSearch, LockedPair } from '../services/geminiService';
|
|
4
|
+
import { fetchWikipediaSummary, fetchWikipediaExtract, fetchWikidataKeyPeopleForTitle, fetchWikidataCastForTitle } from '../services/wikipediaService';
|
|
5
|
+
import {
|
|
6
|
+
searchOpenAlexAuthor,
|
|
7
|
+
getTopWorksForAuthor,
|
|
8
|
+
openAlexWorkToPaperNode,
|
|
9
|
+
makeOpenAlexAuthorshipEvidence,
|
|
10
|
+
getOpenAlexWork,
|
|
11
|
+
openAlexAuthorToAuthorNode,
|
|
12
|
+
searchOpenAlexWork
|
|
13
|
+
} from '../services/openAlexService';
|
|
14
|
+
import { fetchCrossrefWorkByDoi, crossrefAuthors, makeCrossrefAuthorshipEvidence, crossrefWorkToPaperNode } from '../services/crossrefService';
|
|
15
|
+
import { dedupeGraph, mergeExpansionGraph, baseDedupeKey, normalizeForDedup } from '../services/graphUtils';
|
|
16
|
+
import { buildWikiUrl, looksLikeWikipediaTitle } from '../utils/wikiUtils';
|
|
17
|
+
import {
|
|
18
|
+
normalizeForEvidence,
|
|
19
|
+
splitIntoSentences,
|
|
20
|
+
looksLikeSpecificPersonName,
|
|
21
|
+
sanitizeEvidenceAndRole,
|
|
22
|
+
isParenJobTitle,
|
|
23
|
+
sanitizeTitleParen,
|
|
24
|
+
roleLooksLikeJobTitle
|
|
25
|
+
} from '../utils/evidenceUtils';
|
|
26
|
+
import { getLinkKey, looksLikeScreenWork, isBadListPage } from '../utils/graphLogicUtils';
|
|
27
|
+
import { fetchWithTimeout, withTimeout } from '../services/aiUtils';
|
|
28
|
+
import { clipForLlmLog } from '../services/aiUtils';
|
|
29
|
+
|
|
30
|
+
const WIKI_SUMMARY_TIMEOUT_MS = 15000;
|
|
31
|
+
/** Hung cache PostgreSQL / slow disk should not strand node spinners indefinitely. */
|
|
32
|
+
const CACHE_GET_TIMEOUT_MS = 25_000;
|
|
33
|
+
const CACHE_POST_TIMEOUT_MS = 90_000;
|
|
34
|
+
|
|
35
|
+
interface UseExpansionOptions {
|
|
36
|
+
graphDataRef: React.MutableRefObject<{ nodes: GraphNode[], links: GraphLink[] }>;
|
|
37
|
+
setGraphData: React.Dispatch<React.SetStateAction<{ nodes: GraphNode[], links: GraphLink[] }>>;
|
|
38
|
+
setIsProcessing: (val: boolean) => void;
|
|
39
|
+
setError: (val: string | null) => void;
|
|
40
|
+
searchIdRef: React.MutableRefObject<number>;
|
|
41
|
+
lockedPairRef: React.MutableRefObject<LockedPair>;
|
|
42
|
+
nodesRef: React.MutableRefObject<GraphNode[]>;
|
|
43
|
+
selectedNodeRef: React.MutableRefObject<GraphNode | null>;
|
|
44
|
+
autoExpandMoreDoneRef: React.MutableRefObject<Set<number>>;
|
|
45
|
+
cacheEnabled: boolean;
|
|
46
|
+
cacheBaseUrl: string;
|
|
47
|
+
ENABLE_ACADEMIC_CORPORA: boolean;
|
|
48
|
+
ENABLE_WEB_SEARCH: boolean;
|
|
49
|
+
loadNodeImage: (nodeId: number, title: string, context?: string, fallbackNode?: any, opts?: any) => Promise<void>;
|
|
50
|
+
saveCacheNodeMeta: (nodeId: number | string, meta: any, fallbackNode?: any) => Promise<void>;
|
|
51
|
+
setNewlyExpandedNodeIds: (ids: (number | string)[]) => void;
|
|
52
|
+
setExpandingNodeId: (id: number | string | null) => void;
|
|
53
|
+
setNewChildNodeIds: (ids: Set<string | number> | ((prev: Set<string | number>) => Set<string | number>)) => void;
|
|
54
|
+
setSelectedNode: (node: GraphNode | null) => void;
|
|
55
|
+
setSelectedLink: (link: GraphLink | null) => void;
|
|
56
|
+
exploreTerm: string;
|
|
57
|
+
isTextOnly: boolean;
|
|
58
|
+
graphRef: React.RefObject<any>;
|
|
59
|
+
setNotification?: (n: { message: string; type: 'success' | 'error' }) => void;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function useExpansion(options: UseExpansionOptions) {
|
|
63
|
+
const expansionInflightRef = useRef(0);
|
|
64
|
+
const {
|
|
65
|
+
graphDataRef, setGraphData, setIsProcessing, setError,
|
|
66
|
+
searchIdRef, lockedPairRef, nodesRef, selectedNodeRef,
|
|
67
|
+
autoExpandMoreDoneRef, cacheEnabled, cacheBaseUrl,
|
|
68
|
+
ENABLE_ACADEMIC_CORPORA, ENABLE_WEB_SEARCH, loadNodeImage, saveCacheNodeMeta,
|
|
69
|
+
setNewlyExpandedNodeIds, setExpandingNodeId, setNewChildNodeIds,
|
|
70
|
+
setSelectedNode, setSelectedLink, exploreTerm, isTextOnly, graphRef,
|
|
71
|
+
setNotification,
|
|
72
|
+
} = options;
|
|
73
|
+
|
|
74
|
+
const fetchCacheExpansion = useCallback(async (sourceId: number) => {
|
|
75
|
+
if (!cacheEnabled) return null;
|
|
76
|
+
const url = new URL("/expansion", cacheBaseUrl);
|
|
77
|
+
url.searchParams.set("sourceId", sourceId.toString());
|
|
78
|
+
try {
|
|
79
|
+
const res = await fetchWithTimeout(url.toString(), {}, CACHE_GET_TIMEOUT_MS);
|
|
80
|
+
if (!res.ok) return null;
|
|
81
|
+
return res.json();
|
|
82
|
+
} catch (e) {
|
|
83
|
+
// console.warn("Cache fetch failed", e);
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
}, [cacheEnabled, cacheBaseUrl]);
|
|
87
|
+
|
|
88
|
+
const saveCacheExpansion = useCallback(async (sourceId: number | string, nodes: any[]) => {
|
|
89
|
+
if (!cacheEnabled) return null;
|
|
90
|
+
try {
|
|
91
|
+
const res = await fetchWithTimeout(
|
|
92
|
+
new URL("/expansion", cacheBaseUrl).toString(),
|
|
93
|
+
{
|
|
94
|
+
method: "POST",
|
|
95
|
+
headers: { "Content-Type": "application/json" },
|
|
96
|
+
body: JSON.stringify({ sourceId, nodes }),
|
|
97
|
+
},
|
|
98
|
+
CACHE_POST_TIMEOUT_MS,
|
|
99
|
+
);
|
|
100
|
+
if (res.ok) {
|
|
101
|
+
const data = await res.json();
|
|
102
|
+
return data.idMap as Record<string, number> | undefined;
|
|
103
|
+
}
|
|
104
|
+
} catch (e) {
|
|
105
|
+
// console.warn("Cache save failed", e);
|
|
106
|
+
}
|
|
107
|
+
return null;
|
|
108
|
+
}, [cacheEnabled, cacheBaseUrl]);
|
|
109
|
+
|
|
110
|
+
const fetchAndExpandNode = useCallback(async (
|
|
111
|
+
node: GraphNode,
|
|
112
|
+
isInitial = false,
|
|
113
|
+
forceMore = false,
|
|
114
|
+
nodesOverride?: GraphNode[],
|
|
115
|
+
linksOverride?: GraphLink[],
|
|
116
|
+
skipSelection = false,
|
|
117
|
+
skipExpandingHighlight = false
|
|
118
|
+
) => {
|
|
119
|
+
const currentNodes = nodesOverride || graphDataRef.current.nodes;
|
|
120
|
+
const currentLinks = linksOverride || graphDataRef.current.links;
|
|
121
|
+
const guardId = searchIdRef.current;
|
|
122
|
+
const isStale = () => searchIdRef.current !== guardId;
|
|
123
|
+
|
|
124
|
+
/** Avoid infinite spinner when isStale() returns mid-flight or setTimeout never completes. */
|
|
125
|
+
const clearThisNodeLoading = () => {
|
|
126
|
+
setGraphData((prev) => ({
|
|
127
|
+
...prev,
|
|
128
|
+
nodes: prev.nodes.map((n) =>
|
|
129
|
+
String(n.id) === String(node.id) ? { ...n, isLoading: false } : n,
|
|
130
|
+
),
|
|
131
|
+
}));
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
// Do not block on isLoading: a crashed/stale expansion would strand the node forever.
|
|
135
|
+
if (!forceMore && node.expanded) {
|
|
136
|
+
console.info("[Expansion] skip (already expanded)", { title: node.title, id: node.id });
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
if (isStale()) return;
|
|
144
|
+
setGraphData(prev => ({
|
|
145
|
+
...prev,
|
|
146
|
+
nodes: prev.nodes.map(n => (String(n.id) === String(node.id) ? { ...n, isLoading: true } : n))
|
|
147
|
+
}));
|
|
148
|
+
|
|
149
|
+
const loadingGuard = setTimeout(() => {
|
|
150
|
+
if (isStale()) return;
|
|
151
|
+
setGraphData(prev => ({
|
|
152
|
+
...prev,
|
|
153
|
+
nodes: prev.nodes.map(n => (String(n.id) === String(node.id) ? { ...n, isLoading: true } : n))
|
|
154
|
+
}));
|
|
155
|
+
}, 0);
|
|
156
|
+
|
|
157
|
+
expansionInflightRef.current += 1;
|
|
158
|
+
setIsProcessing(true);
|
|
159
|
+
setError(null);
|
|
160
|
+
|
|
161
|
+
try {
|
|
162
|
+
console.info("[Expansion] start", {
|
|
163
|
+
title: node.title,
|
|
164
|
+
id: node.id,
|
|
165
|
+
expanded: !!node.expanded,
|
|
166
|
+
isLoading: !!node.isLoading,
|
|
167
|
+
cacheEnabled,
|
|
168
|
+
forceMore,
|
|
169
|
+
isInitial,
|
|
170
|
+
});
|
|
171
|
+
const nodeKey = String(node.id);
|
|
172
|
+
const nodeUpdates = new Map<string, Partial<GraphNode>>();
|
|
173
|
+
const maybeAutoExpandMore = (neighborCount: number) => {
|
|
174
|
+
if (forceMore) return;
|
|
175
|
+
if (neighborCount > 3) return;
|
|
176
|
+
if (autoExpandMoreDoneRef.current.has(nodeKey)) return;
|
|
177
|
+
autoExpandMoreDoneRef.current.add(nodeKey);
|
|
178
|
+
setTimeout(() => {
|
|
179
|
+
if (String(selectedNodeRef.current?.id) !== nodeKey) return;
|
|
180
|
+
|
|
181
|
+
fetchAndExpandNode(node, false, true);
|
|
182
|
+
}, 900);
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
const getLinkIdEarly = (thing: any) => {
|
|
186
|
+
if (typeof thing === 'object' && thing !== null) return String((thing as any).id);
|
|
187
|
+
return String(thing);
|
|
188
|
+
};
|
|
189
|
+
/** Must read links after awaits (cache fetch); stale `currentLinks` from expand start misses edges and falsely skips duplicate detection. */
|
|
190
|
+
const edgeExistsBetweenFresh = (a: string, b: string) => {
|
|
191
|
+
const links = graphDataRef.current.links;
|
|
192
|
+
return links.some((l) => {
|
|
193
|
+
const s = getLinkIdEarly(l.source);
|
|
194
|
+
const t = getLinkIdEarly(l.target);
|
|
195
|
+
return (s === a && t === b) || (s === b && t === a);
|
|
196
|
+
});
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
if (cacheEnabled && !forceMore) {
|
|
200
|
+
const cacheHit = await fetchCacheExpansion(node.id);
|
|
201
|
+
const cacheCount = cacheHit?.nodes?.length ?? 0;
|
|
202
|
+
console.info("[Expansion] cache GET", {
|
|
203
|
+
title: node.title,
|
|
204
|
+
hit: cacheHit?.hit,
|
|
205
|
+
neighbors: cacheCount,
|
|
206
|
+
});
|
|
207
|
+
if (cacheHit && cacheHit.hit === "exact" && cacheHit.nodes) {
|
|
208
|
+
let validCached: any[] = cacheHit.nodes.filter((cn: any) => String(cn.id) !== String(node.id));
|
|
209
|
+
// Concurrent upgrade of Wikipedia summaries if needed
|
|
210
|
+
const upgraded = await Promise.all(validCached.map(async (cn: any) => {
|
|
211
|
+
const meta = cn.meta || {};
|
|
212
|
+
if (!cn.wikiChecked && (String(meta.wikiSummary || cn.description || '').toLowerCase().includes(' is a song') || String(meta.wikiSummary || cn.description || '').toLowerCase().includes(' song written') || String(meta.wikiSummary || cn.description || '').toLowerCase().includes(' song by'))) {
|
|
213
|
+
setGraphData(prev => ({
|
|
214
|
+
...prev,
|
|
215
|
+
nodes: prev.nodes.map(n => String(n.id) === String(cn.id) ? { ...n, wikiChecked: true } : n)
|
|
216
|
+
}));
|
|
217
|
+
const wiki = await withTimeout(
|
|
218
|
+
fetchWikipediaSummary(cn.title),
|
|
219
|
+
WIKI_SUMMARY_TIMEOUT_MS,
|
|
220
|
+
'Wikipedia summary timeout',
|
|
221
|
+
).catch(() => ({ extract: null, pageid: null, title: null } as const));
|
|
222
|
+
if (!wiki.extract && !wiki.pageid) return cn; // Return original if no new wiki data
|
|
223
|
+
setGraphData(prev => ({
|
|
224
|
+
...prev,
|
|
225
|
+
nodes: prev.nodes.map(n => String(n.id) === String(cn.id) ? {
|
|
226
|
+
...n,
|
|
227
|
+
wikiSummary: wiki.extract || n.wikiSummary,
|
|
228
|
+
wikipedia_id: wiki.pageid ? wiki.pageid.toString() : n.wikipedia_id
|
|
229
|
+
} : n)
|
|
230
|
+
}));
|
|
231
|
+
return {
|
|
232
|
+
...cn,
|
|
233
|
+
wikipedia_id: wiki.pageid ? String(wiki.pageid) : cn.wikipedia_id,
|
|
234
|
+
description: wiki.extract,
|
|
235
|
+
meta: { ...meta, wikiSummary: wiki.extract },
|
|
236
|
+
wikiChecked: true
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
return cn;
|
|
240
|
+
}));
|
|
241
|
+
// The edge position (atomic_id vs composite_id) is the ground truth for
|
|
242
|
+
// bipartite membership. The is_atomic column in the DB can be stale/wrong.
|
|
243
|
+
// Infer the correct value from the parent: neighbors of a composite are atomic
|
|
244
|
+
// and vice versa.
|
|
245
|
+
const parentIsAtomic = !!(node.is_atomic ?? (node as any).is_person ?? (node.type || '').toLowerCase() === 'person');
|
|
246
|
+
const expectedChildIsAtomic = !parentIsAtomic;
|
|
247
|
+
validCached = upgraded.map((cn: any) => ({ ...cn, is_atomic: expectedChildIsAtomic }));
|
|
248
|
+
|
|
249
|
+
// Any cached neighbors are usable (old threshold >=5 skipped most DB rows and forced LLM every time).
|
|
250
|
+
if (validCached.length >= 1) {
|
|
251
|
+
const existingNodeIdsBefore = new Set(graphDataRef.current.nodes.map(n => String(n.id)));
|
|
252
|
+
const parentIdStr = nodeKey;
|
|
253
|
+
|
|
254
|
+
/** If DB only echoes edges already drawn (typical leaf actor → one film that's on screen), a cache-only merge adds no new bubbles — skip to AI so expansion actually shows filmography nodes. */
|
|
255
|
+
const cacheDuplicatesVisibleGraph =
|
|
256
|
+
validCached.every(
|
|
257
|
+
(cn: any) =>
|
|
258
|
+
existingNodeIdsBefore.has(String(cn.id)) &&
|
|
259
|
+
edgeExistsBetweenFresh(parentIdStr, String(cn.id)),
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
if (cacheDuplicatesVisibleGraph) {
|
|
263
|
+
console.info("[Expansion] exact cache overlaps graph — skipping shortcut, fetching AI expansion", {
|
|
264
|
+
title: node.title,
|
|
265
|
+
neighborsInCache: validCached.length,
|
|
266
|
+
});
|
|
267
|
+
} else {
|
|
268
|
+
const newChildIds: (string | number)[] = validCached.filter(cn => !existingNodeIdsBefore.has(String(cn.id))).map(cn => cn.id);
|
|
269
|
+
// Include ALL connected nodes for highlighting, not just new ones
|
|
270
|
+
const allConnectedNodeIds = validCached.map(cn => cn.id);
|
|
271
|
+
|
|
272
|
+
if (isStale()) {
|
|
273
|
+
clearThisNodeLoading();
|
|
274
|
+
return;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
setGraphData(prev => mergeExpansionGraph({
|
|
278
|
+
nodes: prev.nodes,
|
|
279
|
+
links: prev.links,
|
|
280
|
+
parent: node,
|
|
281
|
+
targets: validCached,
|
|
282
|
+
seedFromParent: true
|
|
283
|
+
}));
|
|
284
|
+
|
|
285
|
+
maybeAutoExpandMore(validCached.length);
|
|
286
|
+
if (!skipSelection) setSelectedNode(node);
|
|
287
|
+
if (!skipExpandingHighlight) {
|
|
288
|
+
|
|
289
|
+
setExpandingNodeId(node.id);
|
|
290
|
+
// Highlight ALL connected nodes, not just new ones
|
|
291
|
+
setNewChildNodeIds(new Set(allConnectedNodeIds.map(id => String(id))));
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
validCached.forEach((cn, idx) => {
|
|
295
|
+
if (!cn.imageUrl && !cn.imageChecked && !isTextOnly) {
|
|
296
|
+
setTimeout(() => loadNodeImage(cn.id, cn.title), 50 * idx);
|
|
297
|
+
}
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
setGraphData(prev => ({
|
|
301
|
+
...prev,
|
|
302
|
+
nodes: prev.nodes.map(n => (String(n.id) === nodeKey ? { ...n, expanded: true, isLoading: false } : n))
|
|
303
|
+
}));
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
const getLinkId = (thing: any) => {
|
|
312
|
+
if (typeof thing === 'object' && thing !== null) return String(thing.id);
|
|
313
|
+
return String(thing);
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
const neighborLinks = currentLinks.filter(l =>
|
|
317
|
+
getLinkId(l.source) === String(node.id) ||
|
|
318
|
+
getLinkId(l.target) === String(node.id)
|
|
319
|
+
);
|
|
320
|
+
|
|
321
|
+
const neighborNodes = neighborLinks.map(l => {
|
|
322
|
+
const sid = getLinkId(l.source);
|
|
323
|
+
const tid = getLinkId(l.target);
|
|
324
|
+
const neighborId = sid === String(node.id) ? tid : sid;
|
|
325
|
+
return currentNodes.find(n => String(n.id) === String(neighborId));
|
|
326
|
+
}).filter((n): n is GraphNode => !!n);
|
|
327
|
+
|
|
328
|
+
const neighborNames = neighborNodes.map(n => n.title || '').filter(Boolean);
|
|
329
|
+
|
|
330
|
+
/** Only composite-side titles (films, orgs, works). Do not pass fellow cast members — the works prompt asks for NEW films, and listing other actors as "excludes" often yields an empty model response. */
|
|
331
|
+
const worksExcludeTitles = neighborNodes
|
|
332
|
+
.filter((n) => {
|
|
333
|
+
if (n.is_atomic === true || (n as any).is_person === true) return false;
|
|
334
|
+
if (n.is_atomic === false) return true;
|
|
335
|
+
const t = (n.type || '').toLowerCase();
|
|
336
|
+
if (/\b(actor|person|author|character|composer|scientist|philosopher)\b/.test(t)) return false;
|
|
337
|
+
if (/\b(movie|film|novel|book|album|series|event|organization|museum|institution|battle|war|movement)\b/.test(t)) return true;
|
|
338
|
+
return false;
|
|
339
|
+
})
|
|
340
|
+
.map(n => n.title || '')
|
|
341
|
+
.filter(Boolean);
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
let wiki: any = {
|
|
345
|
+
extract: node.wikiSummary || null,
|
|
346
|
+
pageid: node.wikipedia_id ? Number(node.wikipedia_id) : null,
|
|
347
|
+
mentioningPageTitles: node.mentioningPageTitles || null
|
|
348
|
+
};
|
|
349
|
+
if ((!wiki.extract && !wiki.pageid) || (wiki.extract && !wiki.pageid && !wiki.mentioningPageTitles)) {
|
|
350
|
+
wiki = await withTimeout(
|
|
351
|
+
fetchWikipediaSummary(node.title, neighborNames.join(' ')),
|
|
352
|
+
WIKI_SUMMARY_TIMEOUT_MS,
|
|
353
|
+
'Wikipedia summary timeout',
|
|
354
|
+
).catch(() => ({ extract: null, pageid: null, title: null, mentioningPageTitles: null }));
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (wiki.extract) {
|
|
358
|
+
const isPerson = node.is_atomic === true || node.is_person === true || node.type?.toLowerCase() === 'person';
|
|
359
|
+
nodeUpdates.set(nodeKey, {
|
|
360
|
+
wikiSummary: wiki.extract,
|
|
361
|
+
wikipedia_id: wiki.pageid?.toString(),
|
|
362
|
+
mentioningPageTitles: wiki.mentioningPageTitles || undefined,
|
|
363
|
+
// Only use extracted year if node is an event (not a person) and currently missing a year
|
|
364
|
+
...(!isPerson && !node.year && wiki.year ? { year: wiki.year } : {})
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
let currentIsAtomic = node.is_atomic ?? (node as any).is_person;
|
|
370
|
+
let currentType = node.type;
|
|
371
|
+
const pair = lockedPairRef.current || { atomicType: "Person", compositeType: "Event" };
|
|
372
|
+
const currentAtomicType = pair.atomicType;
|
|
373
|
+
const currentCompositeType = pair.compositeType;
|
|
374
|
+
const isAcademicPair = ENABLE_ACADEMIC_CORPORA && (pair.atomicType.toLowerCase() === 'author' || pair.compositeType.toLowerCase() === 'paper');
|
|
375
|
+
|
|
376
|
+
if (!node.classification_reasoning) {
|
|
377
|
+
nodeUpdates.set(nodeKey, {
|
|
378
|
+
classification_reasoning: `Locked pair: ${pair.atomicType} ↔ ${pair.compositeType}.`,
|
|
379
|
+
atomic_type: pair.atomicType,
|
|
380
|
+
composite_type: pair.compositeType
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
if (currentIsAtomic === undefined) {
|
|
386
|
+
|
|
387
|
+
const inferred = (node.type || '').toLowerCase() === pair.atomicType.toLowerCase() ? true
|
|
388
|
+
: (node.type || '').toLowerCase() === pair.compositeType.toLowerCase() ? false
|
|
389
|
+
: undefined;
|
|
390
|
+
|
|
391
|
+
if (typeof inferred === 'boolean') {
|
|
392
|
+
currentIsAtomic = inferred;
|
|
393
|
+
nodeUpdates.set(nodeKey, { is_atomic: inferred });
|
|
394
|
+
} else {
|
|
395
|
+
const classification = await classifyEntity(node.title);
|
|
396
|
+
currentIsAtomic = classification.isAtomic;
|
|
397
|
+
nodeUpdates.set(nodeKey, {
|
|
398
|
+
...(typeof (node.is_atomic ?? (node as any).is_person) === 'boolean' ? {} : { is_atomic: classification.isAtomic }),
|
|
399
|
+
type: classification.type
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
const extractResult = await fetchWikipediaExtract(node.title, 12000);
|
|
406
|
+
const sourceLong = extractResult.extract || wiki.extract || '';
|
|
407
|
+
|
|
408
|
+
const hasReliableWikipediaForThisTitle = !!(sourceLong && String(sourceLong).trim().length > 0);
|
|
409
|
+
|
|
410
|
+
let verifiedContext = sourceLong;
|
|
411
|
+
try {
|
|
412
|
+
const expandingComposite = !(currentIsAtomic ?? currentType.toLowerCase() === 'person');
|
|
413
|
+
|
|
414
|
+
if (!isAcademicPair && pair.atomicType.toLowerCase() === 'person' && expandingComposite) {
|
|
415
|
+
|
|
416
|
+
const wd = await fetchWikidataKeyPeopleForTitle(node.title);
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
if (wd) {
|
|
420
|
+
const lines: string[] = [];
|
|
421
|
+
if (wd.founders.length) lines.push(`Founders: ${wd.founders.join(', ')}`);
|
|
422
|
+
if (wd.directors.length) lines.push(`Directors/Managers: ${wd.directors.join(', ')}`);
|
|
423
|
+
if (wd.ceos.length) lines.push(`Chief Executive Officers: ${wd.ceos.join(', ')}`);
|
|
424
|
+
if (wd.keyPeople.length) lines.push(`Key People: ${wd.keyPeople.join(', ')}`);
|
|
425
|
+
if (lines.length) verifiedContext = `${verifiedContext}\n\nWIKIDATA (structured properties for "${node.title}", ${wd.wikidataId}):\n${lines.map(l => `- ${l}`).join('\n')}\n`;
|
|
426
|
+
} else if (ENABLE_WEB_SEARCH && (verifiedContext || '').trim().length < 400) {
|
|
427
|
+
const grounded = await fetchOrgKeyPeopleBlockViaSearch(node.title);
|
|
428
|
+
if (grounded) verifiedContext = `${verifiedContext}\n\n${grounded}\n`;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
} catch (e) { }
|
|
432
|
+
|
|
433
|
+
let results: any[] = [];
|
|
434
|
+
const isPerson = currentIsAtomic ?? currentType.toLowerCase() === 'person';
|
|
435
|
+
|
|
436
|
+
if (isAcademicPair) {
|
|
437
|
+
const meta = (node as any).meta || {};
|
|
438
|
+
const parentAuthorId = String(meta.openAlexAuthorId || '').trim();
|
|
439
|
+
const parentWorkId = String(meta.openAlexWorkId || '').trim();
|
|
440
|
+
if (isPerson) {
|
|
441
|
+
const author = parentAuthorId ? { id: parentAuthorId, display_name: node.title } : await searchOpenAlexAuthor(node.title);
|
|
442
|
+
if (author?.id) {
|
|
443
|
+
const works = await getTopWorksForAuthor(author.id, 10);
|
|
444
|
+
results = works.map(w => ({
|
|
445
|
+
...openAlexWorkToPaperNode(w),
|
|
446
|
+
edge_label: 'Authored',
|
|
447
|
+
edge_meta: { evidence: makeOpenAlexAuthorshipEvidence(w, node.title) }
|
|
448
|
+
}));
|
|
449
|
+
if (!meta.openAlexAuthorId && author.id) nodeUpdates.set(nodeKey, { meta: { ...meta, openAlexAuthorId: author.id, openAlexUrl: author.id, source: 'openalex' } });
|
|
450
|
+
}
|
|
451
|
+
} else {
|
|
452
|
+
// Check if this is "Work (Author)" pattern - if so, skip OpenAlex (it returns modern editions/translators)
|
|
453
|
+
// E.g., "Republic (Plato)" should use LLM, not OpenAlex database
|
|
454
|
+
const hasAuthorInParens = /^[^(]+\([A-Z][a-z]+(\s+[A-Z][a-z]+)*\)$/.test(node.title.trim());
|
|
455
|
+
|
|
456
|
+
const work = (!hasAuthorInParens && parentWorkId)
|
|
457
|
+
? await getOpenAlexWork(parentWorkId)
|
|
458
|
+
: (!hasAuthorInParens ? await searchOpenAlexWork(node.title) : null);
|
|
459
|
+
|
|
460
|
+
if (work?.id) {
|
|
461
|
+
const authors = (work.authorships || []).map(a => a.author).filter(Boolean).map(a => ({ id: String(a!.id), display_name: String(a!.display_name) })).filter(a => a.id && a.display_name);
|
|
462
|
+
results = authors.slice(0, 12).map(a => ({
|
|
463
|
+
...openAlexAuthorToAuthorNode({ id: a.id, display_name: a.display_name }),
|
|
464
|
+
edge_label: 'Author',
|
|
465
|
+
edge_meta: { evidence: makeOpenAlexAuthorshipEvidence(work, a.display_name) }
|
|
466
|
+
}));
|
|
467
|
+
if (!meta.openAlexWorkId && work.id) {
|
|
468
|
+
const paperNode = openAlexWorkToPaperNode(work);
|
|
469
|
+
nodeUpdates.set(nodeKey, {
|
|
470
|
+
meta: { ...meta, openAlexWorkId: work.id, doi: work.doi || undefined, openAlexUrl: work.id, source: 'openalex' },
|
|
471
|
+
...((node.description || '').trim() ? {} : { description: paperNode.description, year: paperNode.year })
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
} else {
|
|
475
|
+
const doiMatch = (String(meta.doi || '') || String(node.title || '')).match(/\b10\.\d{4,9}\/\S+\b/i);
|
|
476
|
+
const doi = doiMatch ? doiMatch[0] : "";
|
|
477
|
+
if (doi) {
|
|
478
|
+
const cw = await fetchCrossrefWorkByDoi(doi);
|
|
479
|
+
if (cw) {
|
|
480
|
+
const authors = crossrefAuthors(cw);
|
|
481
|
+
results = authors.slice(0, 12).map(name => ({
|
|
482
|
+
title: name, type: "Author", description: "", is_atomic: true, edge_label: "Author",
|
|
483
|
+
edge_meta: { evidence: makeCrossrefAuthorshipEvidence(cw, name) }
|
|
484
|
+
}));
|
|
485
|
+
const paperNode = crossrefWorkToPaperNode(cw);
|
|
486
|
+
nodeUpdates.set(nodeKey, {
|
|
487
|
+
meta: { ...meta, doi: cw.DOI || doi, crossrefUrl: paperNode.meta?.crossrefUrl, source: 'crossref' },
|
|
488
|
+
...((node.description || '').trim() ? {} : { description: paperNode.description, year: paperNode.year })
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Fallback: If academic results were empty, proceed to standard expansion
|
|
497
|
+
if (results.length === 0) {
|
|
498
|
+
if (isPerson) {
|
|
499
|
+
let data = await fetchPersonWorks(node.title, worksExcludeTitles, verifiedContext || undefined, node.wikipedia_id, currentAtomicType, currentCompositeType, wiki.mentioningPageTitles || undefined);
|
|
500
|
+
if ((!data.works || data.works.length === 0) && worksExcludeTitles.length > 0) {
|
|
501
|
+
data = await fetchPersonWorks(node.title, [], verifiedContext || undefined, node.wikipedia_id, currentAtomicType, currentCompositeType, wiki.mentioningPageTitles || undefined);
|
|
502
|
+
}
|
|
503
|
+
console.info("[Expansion] works raw", {
|
|
504
|
+
title: node.title,
|
|
505
|
+
worksExcludeTitles: worksExcludeTitles.length,
|
|
506
|
+
works: (data as any)?.works?.length ?? 0,
|
|
507
|
+
sample: clipForLlmLog(
|
|
508
|
+
JSON.stringify(((data as any)?.works || []).slice(0, 4).map((w: any) => ({
|
|
509
|
+
entity: w.entity,
|
|
510
|
+
wikipediaTitle: w.wikipediaTitle,
|
|
511
|
+
type: w.type,
|
|
512
|
+
year: w.year,
|
|
513
|
+
})))
|
|
514
|
+
),
|
|
515
|
+
});
|
|
516
|
+
results = (data.works || []).filter(w => typeof (w as any).entity === 'string' && (w as any).entity.trim().length > 0).map(w => ({
|
|
517
|
+
title: (w as any).wikipediaTitle || w.entity,
|
|
518
|
+
type: (w as any).type || currentCompositeType,
|
|
519
|
+
description: w.description,
|
|
520
|
+
year: w.year ?? undefined,
|
|
521
|
+
role: w.role ?? undefined,
|
|
522
|
+
is_atomic: (w as any).isAtomic !== undefined ? (w as any).isAtomic : false,
|
|
523
|
+
edge_meta: {
|
|
524
|
+
evidence: {
|
|
525
|
+
kind: 'ai', pageTitle: (w as any).evidencePageTitle || node.title, snippet: (w as any).evidenceSnippet || '',
|
|
526
|
+
url: looksLikeWikipediaTitle((w as any).evidencePageTitle || node.title) ? (
|
|
527
|
+
((String((w as any).evidencePageTitle || node.title) === node.title) && !hasReliableWikipediaForThisTitle)
|
|
528
|
+
? undefined
|
|
529
|
+
: buildWikiUrl((w as any).evidencePageTitle || node.title, (String((w as any).evidencePageTitle || node.title) === node.title) ? node.wikipedia_id : undefined)
|
|
530
|
+
) : undefined
|
|
531
|
+
}
|
|
532
|
+
},
|
|
533
|
+
edge_label: w.role || null
|
|
534
|
+
}));
|
|
535
|
+
} else {
|
|
536
|
+
let data = await fetchConnections(node.title, undefined, neighborNames, verifiedContext || undefined, node.wikipedia_id, currentAtomicType, currentCompositeType, wiki.mentioningPageTitles || undefined);
|
|
537
|
+
if ((!data.people || data.people.length === 0) && neighborNames.length > 0) {
|
|
538
|
+
data = await fetchConnections(node.title, undefined, [], verifiedContext || undefined, node.wikipedia_id, currentAtomicType, currentCompositeType, wiki.mentioningPageTitles || undefined);
|
|
539
|
+
}
|
|
540
|
+
if (data.sourceYear) nodeUpdates.set(nodeKey, { year: data.sourceYear });
|
|
541
|
+
const atomicTypeToUse = currentAtomicType || 'Person';
|
|
542
|
+
results = (data.people || []).map(p => ({
|
|
543
|
+
title: (p as any).wikipediaTitle || p.name,
|
|
544
|
+
type: atomicTypeToUse,
|
|
545
|
+
description: p.description,
|
|
546
|
+
role: p.role,
|
|
547
|
+
is_atomic: (p as any).isAtomic !== undefined ? (p as any).isAtomic : true,
|
|
548
|
+
edge_meta: {
|
|
549
|
+
evidence: {
|
|
550
|
+
kind: 'ai', pageTitle: (p as any).evidencePageTitle || node.title, snippet: (p as any).evidenceSnippet || '',
|
|
551
|
+
url: looksLikeWikipediaTitle((p as any).evidencePageTitle || node.title) ? (
|
|
552
|
+
((String((p as any).evidencePageTitle || node.title) === node.title) && !hasReliableWikipediaForThisTitle)
|
|
553
|
+
? undefined
|
|
554
|
+
: buildWikiUrl((p as any).evidencePageTitle || node.title, (String((p as any).evidencePageTitle || node.title) === node.title) ? node.wikipedia_id : undefined)
|
|
555
|
+
) : undefined
|
|
556
|
+
}
|
|
557
|
+
},
|
|
558
|
+
edge_label: p.role || null
|
|
559
|
+
}));
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
if (results.length === 0 && sourceLong) {
|
|
565
|
+
const sentences = splitIntoSentences(sourceLong);
|
|
566
|
+
const patterns = [
|
|
567
|
+
{ role: 'Author', re: /\bis (?:an?|the)\s+(?:nonfiction\s+)?(?:book|novel|memoir|biography|essay)\s+by\s+([^.;]+)/i },
|
|
568
|
+
{ role: 'Author', re: /\bwritten by\s+([^.;]+)/i },
|
|
569
|
+
{ role: 'Director', re: /\b(?:film|movie)\s+directed by\s+([^.;]+)/i },
|
|
570
|
+
{ role: 'Creator', re: /\bcreated by\s+([^.;]+)/i },
|
|
571
|
+
];
|
|
572
|
+
for (const sent of sentences.slice(0, 4)) {
|
|
573
|
+
for (const ptn of patterns) {
|
|
574
|
+
const m = sent.match(ptn.re);
|
|
575
|
+
if (m) {
|
|
576
|
+
const name = String(m[1] || '').split(/,| and | who | which /i)[0].trim();
|
|
577
|
+
if (name && name.split(/\s+/).length >= 2) {
|
|
578
|
+
const atomicTypeToUse = currentAtomicType || 'Person';
|
|
579
|
+
results = [{ title: name, type: atomicTypeToUse, description: `${ptn.role} associated with ${node.title}.`, role: ptn.role, is_atomic: true, edge_meta: { evidence: { kind: 'wikipedia', pageTitle: node.title, snippet: sent, url: looksLikeWikipediaTitle(node.title) ? buildWikiUrl(node.title) : undefined } }, edge_label: ptn.role }];
|
|
580
|
+
break;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
if (results.length) break;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (looksLikeScreenWork(node.title, node.description || sourceLong)) {
|
|
589
|
+
try {
|
|
590
|
+
const castLabels = await fetchWikidataCastForTitle(node.title);
|
|
591
|
+
if (castLabels.length) {
|
|
592
|
+
const existingNames = new Set(results.map(r => normalizeForDedup(r.title)));
|
|
593
|
+
const atomicTypeToUse = currentAtomicType || 'Person';
|
|
594
|
+
castLabels.forEach(name => {
|
|
595
|
+
const key = normalizeForDedup(name);
|
|
596
|
+
if (!key || existingNames.has(key)) return;
|
|
597
|
+
existingNames.add(key);
|
|
598
|
+
results.push({ title: name, type: atomicTypeToUse, description: `Cast member in ${node.title}.`, role: 'Cast', is_atomic: true, edge_meta: { evidence: { kind: 'wikipedia', pageTitle: node.title, snippet: `${name} is a cast member in ${node.title}.`, url: looksLikeWikipediaTitle(node.title) ? buildWikiUrl(node.title) : undefined } }, edge_label: 'Cast' });
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
} catch (e) { }
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (!skipSelection) setSelectedNode(node);
|
|
606
|
+
if (!skipExpandingHighlight) setExpandingNodeId(node.id);
|
|
607
|
+
|
|
608
|
+
if (results.length === 0) {
|
|
609
|
+
if (isInitial) {
|
|
610
|
+
setError(`No connections found for "${node.title}".`);
|
|
611
|
+
setGraphData({ nodes: [], links: [] });
|
|
612
|
+
setSelectedNode(null);
|
|
613
|
+
setSelectedLink(null);
|
|
614
|
+
setExpandingNodeId(null);
|
|
615
|
+
setNewChildNodeIds(new Set());
|
|
616
|
+
} else {
|
|
617
|
+
setNotification?.({
|
|
618
|
+
message:
|
|
619
|
+
cacheEnabled
|
|
620
|
+
? `No new connections for "${node.title}". Often the AI returned none or only links you already had. If failures repeat, check the cache terminal for quota/API errors.`
|
|
621
|
+
: `No new connections for "${node.title}". The model returned nothing usable—check API keys in .env.local.`,
|
|
622
|
+
type: 'error',
|
|
623
|
+
});
|
|
624
|
+
setGraphData(prev => ({ ...prev, nodes: prev.nodes.map(n => String(n.id) === String(node.id) ? { ...n, expanded: false, isLoading: false } : n) }));
|
|
625
|
+
setExpandingNodeId(null);
|
|
626
|
+
setNewChildNodeIds(new Set());
|
|
627
|
+
}
|
|
628
|
+
} else {
|
|
629
|
+
const resultsWithWiki = await Promise.all(results.map(async r => {
|
|
630
|
+
const contextHint = [node.title, r.type, r.edge_label || r.role, r.description, r.edge_meta?.evidence?.snippet].filter(Boolean).join(' · ').slice(0, 280);
|
|
631
|
+
const skipWiki = isAcademicPair || String(r.edge_meta?.evidence?.kind || '') === 'openalex';
|
|
632
|
+
const rWiki = skipWiki
|
|
633
|
+
? ({ title: r.title, extract: '', pageid: undefined } as any)
|
|
634
|
+
: await withTimeout(
|
|
635
|
+
fetchWikipediaSummary(r.title, contextHint),
|
|
636
|
+
WIKI_SUMMARY_TIMEOUT_MS,
|
|
637
|
+
'Wikipedia summary timeout',
|
|
638
|
+
).catch(() => ({ title: r.title, extract: '', pageid: undefined } as any));
|
|
639
|
+
let evidence: any = r.edge_meta?.evidence || { kind: 'none' as const };
|
|
640
|
+
const pageTitle = String(evidence?.pageTitle || '');
|
|
641
|
+
const snippet = String(evidence?.snippet || '');
|
|
642
|
+
const pageLooksNonWiki = pageTitle.includes(' - ') || /^https?:\/\//i.test(pageTitle) || !looksLikeWikipediaTitle(pageTitle);
|
|
643
|
+
|
|
644
|
+
if (evidence && evidence.kind === 'ai' && snippet && pageTitle && !pageLooksNonWiki) {
|
|
645
|
+
} else if (pageLooksNonWiki) {
|
|
646
|
+
evidence = { kind: 'none' as const };
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
return {
|
|
650
|
+
...r, title: rWiki.title || r.title, wikipedia_id: rWiki.pageid?.toString(),
|
|
651
|
+
description: rWiki.extract || r.description,
|
|
652
|
+
meta: { ...(r.meta || {}), wikiSummary: rWiki.extract || undefined },
|
|
653
|
+
edge_meta: { evidence },
|
|
654
|
+
edge_label: (() => {
|
|
655
|
+
const lbl = r.edge_label || r.role || null;
|
|
656
|
+
if ((!evidence || evidence.kind === 'none') && roleLooksLikeJobTitle(lbl)) return null;
|
|
657
|
+
return lbl;
|
|
658
|
+
})(),
|
|
659
|
+
...(typeof (rWiki.title || r.title) === 'string' && isParenJobTitle(rWiki.title || r.title) && (!evidence || evidence.kind === 'none')
|
|
660
|
+
? { title: sanitizeTitleParen(rWiki.title || r.title) } : {})
|
|
661
|
+
};
|
|
662
|
+
}));
|
|
663
|
+
|
|
664
|
+
let nodesToUse = resultsWithWiki;
|
|
665
|
+
if (!exploreTerm.toLowerCase().startsWith('list of ')) nodesToUse = nodesToUse.filter((n: any) => !isBadListPage(n.title));
|
|
666
|
+
console.info("[Expansion] post-wiki", {
|
|
667
|
+
title: node.title,
|
|
668
|
+
results: results.length,
|
|
669
|
+
resultsWithWiki: resultsWithWiki.length,
|
|
670
|
+
nodesToUseAfterListFilter: nodesToUse.length,
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
let finalIDMap: Record<string, number> | undefined;
|
|
674
|
+
if (cacheEnabled) {
|
|
675
|
+
let combinedNodes = [...resultsWithWiki];
|
|
676
|
+
const existingCache = await fetchCacheExpansion(node.id);
|
|
677
|
+
if (existingCache && existingCache.nodes) {
|
|
678
|
+
const byTitle = new Map<string, any>();
|
|
679
|
+
existingCache.nodes.forEach((n: any) => { if (n?.title) byTitle.set(String(n.title).toLowerCase(), n); });
|
|
680
|
+
resultsWithWiki.forEach((n: any) => {
|
|
681
|
+
const key = String(n.title || '').toLowerCase();
|
|
682
|
+
if (!key) return;
|
|
683
|
+
const existing = byTitle.get(key);
|
|
684
|
+
if (!existing) { byTitle.set(key, n); return; }
|
|
685
|
+
byTitle.set(key, { ...existing, ...n, id: existing.id ?? n.id, wikipedia_id: n.wikipedia_id || existing.wikipedia_id, description: (n.description && n.description.length >= (existing.description || '').length) ? n.description : existing.description, meta: { ...(existing.meta || {}), ...(n.meta || {}) }, edge_meta: n.edge_meta || existing.edge_meta, edge_label: n.edge_label || existing.edge_label });
|
|
686
|
+
});
|
|
687
|
+
combinedNodes = Array.from(byTitle.values());
|
|
688
|
+
}
|
|
689
|
+
finalIDMap = await saveCacheExpansion(node.id, combinedNodes);
|
|
690
|
+
const cacheHit = await fetchCacheExpansion(node.id);
|
|
691
|
+
if (cacheHit && Array.isArray(cacheHit.nodes) && cacheHit.nodes.length > 0) {
|
|
692
|
+
nodesToUse = cacheHit.nodes;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
const currentNodesForDedupe = graphDataRef.current.nodes;
|
|
697
|
+
const existingByNorm = new Map<string, GraphNode>(currentNodesForDedupe.map(n => [baseDedupeKey(n as any), n]));
|
|
698
|
+
const verifiedNorm = normalizeForEvidence(sourceLong);
|
|
699
|
+
nodesToUse = nodesToUse.map(cn => sanitizeEvidenceAndRole(cn, verifiedNorm));
|
|
700
|
+
// Only filter person nodes for specific names; allow all other types through
|
|
701
|
+
nodesToUse = nodesToUse.filter((cn: any) => {
|
|
702
|
+
const nodeType = String(cn?.type || '').toLowerCase();
|
|
703
|
+
if (nodeType === 'person' || nodeType === 'actor' || nodeType === 'author') {
|
|
704
|
+
return looksLikeSpecificPersonName(cn?.title);
|
|
705
|
+
}
|
|
706
|
+
return true; // Allow all non-person nodes
|
|
707
|
+
});
|
|
708
|
+
console.info("[Expansion] nodesToUse after sanitize/type-filter", {
|
|
709
|
+
title: node.title,
|
|
710
|
+
nodesToUse: nodesToUse.length,
|
|
711
|
+
existingByNorm: existingByNorm.size,
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
const processedNodes = nodesToUse.map(cn => {
|
|
716
|
+
const norm = baseDedupeKey(cn as any);
|
|
717
|
+
const existing = existingByNorm.get(norm);
|
|
718
|
+
let idToUse = existing ? existing.id : (cn.id ?? Math.floor(Math.random() * 1000000));
|
|
719
|
+
|
|
720
|
+
// SYNC WITH DATABASE ID IF AVAILABLE
|
|
721
|
+
if (finalIDMap) {
|
|
722
|
+
const wikiId = (cn.wikipedia_id || cn.wikipediaId || "").toString().trim();
|
|
723
|
+
const key = `${cn.title}|${cn.type}|${wikiId || ''}`;
|
|
724
|
+
if (finalIDMap[key]) {
|
|
725
|
+
idToUse = finalIDMap[key];
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
if (!existing) existingByNorm.set(norm, { id: idToUse, title: cn.title, type: cn.type } as GraphNode);
|
|
730
|
+
return { ...cn, id: idToUse };
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
const currentNodesForNewIds = graphDataRef.current.nodes;
|
|
734
|
+
const existingNodeIdsBefore = new Set(currentNodesForNewIds.map(n => String(n.id)));
|
|
735
|
+
const newChildIds = processedNodes.filter(cn => !existingNodeIdsBefore.has(String(cn.id))).map(cn => cn.id);
|
|
736
|
+
|
|
737
|
+
// Include ALL connected nodes for highlighting, not just new ones
|
|
738
|
+
const allConnectedNodeIds = processedNodes.map(cn => cn.id);
|
|
739
|
+
|
|
740
|
+
if (processedNodes.length > 0 && newChildIds.length === 0) {
|
|
741
|
+
console.info("[Expansion] all processed nodes already present (by id)", {
|
|
742
|
+
title: node.title,
|
|
743
|
+
processedNodes: processedNodes.length,
|
|
744
|
+
existingNodeIdsBefore: existingNodeIdsBefore.size,
|
|
745
|
+
sampleTitles: processedNodes.slice(0, 8).map(n => n.title),
|
|
746
|
+
});
|
|
747
|
+
} else {
|
|
748
|
+
console.info("[Expansion] new child ids", {
|
|
749
|
+
title: node.title,
|
|
750
|
+
processedNodes: processedNodes.length,
|
|
751
|
+
newChildIds: newChildIds.length,
|
|
752
|
+
});
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
if (isStale()) {
|
|
758
|
+
clearThisNodeLoading();
|
|
759
|
+
return;
|
|
760
|
+
}
|
|
761
|
+
setGraphData(prev => {
|
|
762
|
+
const nodeMap = new Map<string, GraphNode>(prev.nodes.map(n => [String(n.id), n]));
|
|
763
|
+
const existingNodeIds = new Set(prev.nodes.map(n => String(n.id)));
|
|
764
|
+
const expectedChildIsAtomic = !currentIsAtomic;
|
|
765
|
+
processedNodes.forEach(cn => {
|
|
766
|
+
const meta = cn.meta || {};
|
|
767
|
+
const existing = nodeMap.get(String(cn.id));
|
|
768
|
+
nodeMap.set(String(cn.id), {
|
|
769
|
+
id: cn.id, title: cn.title, type: cn.type,
|
|
770
|
+
is_atomic: (existing?.is_atomic ?? (existing as any)?.is_person ?? (typeof (cn as any).is_atomic === 'boolean' ? (cn as any).is_atomic : expectedChildIsAtomic)),
|
|
771
|
+
wikipedia_id: cn.wikipedia_id, description: cn.description || existing?.description || "",
|
|
772
|
+
year: cn.year ?? existing?.year, imageUrl: meta.imageUrl ?? existing?.imageUrl,
|
|
773
|
+
imageChecked: !!(meta.imageUrl ?? existing?.imageUrl) || existing?.imageChecked,
|
|
774
|
+
wikiSummary: meta.wikiSummary ?? (existing as any)?.wikiSummary,
|
|
775
|
+
x: existing?.x ?? (node.x ? node.x + (Math.random() - 0.5) * 100 : undefined),
|
|
776
|
+
y: existing?.y ?? (node.y ? node.y + (Math.random() - 0.5) * 100 : undefined),
|
|
777
|
+
expanded: existing?.expanded || false, isLoading: false
|
|
778
|
+
});
|
|
779
|
+
});
|
|
780
|
+
if (nodeMap.has(String(node.id))) {
|
|
781
|
+
nodeMap.set(String(node.id), {
|
|
782
|
+
...nodeMap.get(String(node.id))!,
|
|
783
|
+
expanded: true,
|
|
784
|
+
isLoading: false,
|
|
785
|
+
...nodeUpdates.get(nodeKey),
|
|
786
|
+
});
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
const getLinkId = (thing: any) => String(typeof thing === 'object' ? thing?.id : thing);
|
|
790
|
+
const linkMap = new Map<string, GraphLink>(prev.links.map(l => [`${getLinkId(l.source)}↔${getLinkId(l.target)}`, l]));
|
|
791
|
+
const candidateLinks: GraphLink[] = processedNodes.map(cn => {
|
|
792
|
+
// Find the original result to get the raw evidence before any wiki/dedupe processing
|
|
793
|
+
const sanitizedResults = nodesToUse; // nodesToUse already contains the sanitized results
|
|
794
|
+
const rawEvidence = (sanitizedResults.find(r => baseDedupeKey(r) === baseDedupeKey(cn))?.edge_meta?.evidence) || { kind: 'none' as const };
|
|
795
|
+
const sourceNodeId = node.id;
|
|
796
|
+
const targetNodeId = cn.id;
|
|
797
|
+
const sid = getLinkId(sourceNodeId);
|
|
798
|
+
const tid = getLinkId(targetNodeId);
|
|
799
|
+
const lid1 = `${sid}↔${tid}`;
|
|
800
|
+
const lid2 = `${tid}↔${sid}`;
|
|
801
|
+
const existingl = linkMap.get(lid1) || linkMap.get(lid2);
|
|
802
|
+
return {
|
|
803
|
+
id: existingl?.id ?? Math.floor(Math.random() * 1000000), source: sourceNodeId, target: targetNodeId,
|
|
804
|
+
label: cn.edge_label || existingl?.label || "",
|
|
805
|
+
evidence: rawEvidence
|
|
806
|
+
};
|
|
807
|
+
});
|
|
808
|
+
const bipartiteSafeCandidates = candidateLinks.filter(l => {
|
|
809
|
+
const sid = getLinkId(l.source);
|
|
810
|
+
const tid = getLinkId(l.target);
|
|
811
|
+
|
|
812
|
+
// RELAXATION: If this is a direct link from the node we are currently expanding,
|
|
813
|
+
// we trust the AI and permit it regardless of bipartite classification
|
|
814
|
+
// to avoid "empty expansions" when classification is fuzzy.
|
|
815
|
+
if (sid === String(node.id)) return true;
|
|
816
|
+
|
|
817
|
+
const sourceNode = nodeMap.get(sid);
|
|
818
|
+
const targetNode = nodeMap.get(tid);
|
|
819
|
+
if (!sourceNode || !targetNode) return false; // Should not happen if nodes are in nodeMap
|
|
820
|
+
const sAtomic = sourceNode.is_atomic ?? false;
|
|
821
|
+
const tAtomic = targetNode.is_atomic ?? false;
|
|
822
|
+
if (sAtomic === tAtomic) { return false; }
|
|
823
|
+
return true;
|
|
824
|
+
});
|
|
825
|
+
|
|
826
|
+
const updatedExistingLinks = prev.links.map(l => {
|
|
827
|
+
const cand = bipartiteSafeCandidates.find(c => c.id === l.id);
|
|
828
|
+
return cand ? { ...l, label: l.label || cand.label, evidence: (!l.evidence || l.evidence.kind === 'none') ? cand.evidence : l.evidence } : l;
|
|
829
|
+
});
|
|
830
|
+
const combinedLinks = [...updatedExistingLinks, ...bipartiteSafeCandidates.filter(l => !prev.links.some(ex => ex.id === l.id))];
|
|
831
|
+
const degree = new Map<string, number>();
|
|
832
|
+
combinedLinks.forEach(l => {
|
|
833
|
+
const s = getLinkId(l.source);
|
|
834
|
+
const t = getLinkId(l.target);
|
|
835
|
+
degree.set(s, (degree.get(s) || 0) + 1);
|
|
836
|
+
degree.set(t, (degree.get(t) || 0) + 1);
|
|
837
|
+
});
|
|
838
|
+
const finalNodes = Array.from(nodeMap.values()).filter(n => {
|
|
839
|
+
const isOriginal = String(n.id) === String(node.id);
|
|
840
|
+
const isExisting = existingNodeIds.has(String(n.id));
|
|
841
|
+
const hasDegree = (degree.get(String(n.id)) || 0) > 0;
|
|
842
|
+
const ok = isOriginal || isExisting || hasDegree;
|
|
843
|
+
if (!ok) { /* removed log */ }
|
|
844
|
+
return ok;
|
|
845
|
+
});
|
|
846
|
+
|
|
847
|
+
return dedupeGraph(finalNodes, combinedLinks);
|
|
848
|
+
});
|
|
849
|
+
|
|
850
|
+
maybeAutoExpandMore(processedNodes.length);
|
|
851
|
+
|
|
852
|
+
// Highlight ALL connected nodes, not just new ones
|
|
853
|
+
if (!skipExpandingHighlight) setNewChildNodeIds(new Set(allConnectedNodeIds.map(id => String(id))));
|
|
854
|
+
processedNodes.forEach((cn, idx) => { if (!cn.imageUrl && !cn.imageChecked && !isTextOnly) setTimeout(() => loadNodeImage(cn.id, cn.title), 300 * (idx + 1)); });
|
|
855
|
+
|
|
856
|
+
setTimeout(() => {
|
|
857
|
+
if (isStale()) {
|
|
858
|
+
clearThisNodeLoading();
|
|
859
|
+
return;
|
|
860
|
+
}
|
|
861
|
+
setGraphData(prev => ({ ...prev, nodes: prev.nodes.map(n => String(n.id) === String(node.id) ? { ...n, expanded: true, isLoading: false, ...nodeUpdates.get(nodeKey) } : n) }));
|
|
862
|
+
const updates = nodeUpdates.get(nodeKey);
|
|
863
|
+
if (updates) saveCacheNodeMeta(node.id, updates, node);
|
|
864
|
+
setTimeout(() => {
|
|
865
|
+
graphRef.current?.centerOnNode(node.id);
|
|
866
|
+
if (!skipExpandingHighlight) {
|
|
867
|
+
setExpandingNodeId(null);
|
|
868
|
+
// Keep newChildNodeIds so they remain highlighted
|
|
869
|
+
}
|
|
870
|
+
}, 200);
|
|
871
|
+
}, 500);
|
|
872
|
+
}
|
|
873
|
+
} catch (error) {
|
|
874
|
+
console.error("Failed to expand node", error);
|
|
875
|
+
if (!isStale()) {
|
|
876
|
+
setError(`Failed to fetch connections: ${(error as any)?.message || 'unknown error'}`);
|
|
877
|
+
setGraphData(prev => ({ ...prev, nodes: prev.nodes.map(n => String(n.id) === String(node.id) ? { ...n, isLoading: false } : n) }));
|
|
878
|
+
}
|
|
879
|
+
setSelectedNode(null); setSelectedLink(null); setExpandingNodeId(null); setNewChildNodeIds(new Set());
|
|
880
|
+
} finally {
|
|
881
|
+
clearTimeout(loadingGuard);
|
|
882
|
+
expansionInflightRef.current = Math.max(0, expansionInflightRef.current - 1);
|
|
883
|
+
if (expansionInflightRef.current === 0) setIsProcessing(false);
|
|
884
|
+
clearThisNodeLoading();
|
|
885
|
+
}
|
|
886
|
+
}, [loadNodeImage, cacheEnabled, fetchCacheExpansion, saveCacheExpansion, cacheBaseUrl, saveCacheNodeMeta, setGraphData, setIsProcessing, setError, searchIdRef, lockedPairRef, nodesRef, selectedNodeRef, autoExpandMoreDoneRef, ENABLE_ACADEMIC_CORPORA, ENABLE_WEB_SEARCH, setNewlyExpandedNodeIds, setExpandingNodeId, setNewChildNodeIds, setSelectedNode, setSelectedLink, exploreTerm, isTextOnly, graphRef, setNotification]);
|
|
887
|
+
|
|
888
|
+
return { fetchAndExpandNode, fetchCacheExpansion, saveCacheExpansion };
|
|
889
|
+
}
|