rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,767 @@
1
+ """
2
+ RNSR Cross-Document Navigator
3
+
4
+ Orchestrates multi-document queries by leveraging the knowledge graph
5
+ to find and link entities across documents.
6
+
7
+ This navigator handles queries like:
8
+ - "What happens to Person X mentioned in Document A in Document B?"
9
+ - "Compare the terms in Contract 1 and Contract 2"
10
+ - "Trace the timeline of events across all documents"
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import re
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime, timezone
19
+ from typing import Any, Callable
20
+
21
+ import structlog
22
+
23
+ from rnsr.extraction.models import Entity, EntityType, Relationship, RelationType
24
+ from rnsr.indexing.knowledge_graph import KnowledgeGraph
25
+ from rnsr.indexing.kv_store import KVStore
26
+ from rnsr.models import SkeletonNode
27
+
28
+ logger = structlog.get_logger(__name__)
29
+
30
+
31
+ # =============================================================================
32
+ # Cross-Document Query Models
33
+ # =============================================================================
34
+
35
+
36
+ @dataclass
37
+ class CrossDocQuery:
38
+ """A decomposed cross-document query."""
39
+
40
+ original_query: str
41
+ entities_mentioned: list[str] = field(default_factory=list)
42
+ documents_mentioned: list[str] = field(default_factory=list)
43
+ query_type: str = "general" # general, comparison, timeline, entity_tracking
44
+ sub_queries: list[dict[str, Any]] = field(default_factory=list)
45
+
46
+
47
+ @dataclass
48
+ class DocumentResult:
49
+ """Result from querying a single document."""
50
+
51
+ doc_id: str
52
+ doc_title: str
53
+ answer: str
54
+ evidence: list[str] = field(default_factory=list)
55
+ entities_found: list[Entity] = field(default_factory=list)
56
+ confidence: float = 0.0
57
+
58
+
59
+ @dataclass
60
+ class CrossDocAnswer:
61
+ """Final synthesized answer from cross-document query."""
62
+
63
+ query: str
64
+ answer: str
65
+ document_results: list[DocumentResult] = field(default_factory=list)
66
+ entities_involved: list[Entity] = field(default_factory=list)
67
+ relationships_used: list[Relationship] = field(default_factory=list)
68
+ confidence: float = 0.0
69
+ trace: list[dict[str, Any]] = field(default_factory=list)
70
+
71
+
72
+ # =============================================================================
73
+ # Entity Extraction from Query
74
+ # =============================================================================
75
+
76
+
77
+ QUERY_ENTITY_EXTRACTION_PROMPT = """Analyze this query and extract entities that need to be tracked across documents.
78
+
79
+ Query: {query}
80
+
81
+ Extract:
82
+ 1. People mentioned (names, roles)
83
+ 2. Organizations mentioned
84
+ 3. Documents or sections referenced
85
+ 4. Key legal concepts or events
86
+ 5. Dates or time periods
87
+
88
+ OUTPUT FORMAT (JSON):
89
+ ```json
90
+ {{
91
+ "entities": [
92
+ {{"name": "John Smith", "type": "PERSON", "role": "defendant"}},
93
+ {{"name": "Contract A", "type": "DOCUMENT"}}
94
+ ],
95
+ "query_type": "entity_tracking|comparison|timeline|general",
96
+ "documents_referenced": ["Document A", "Document B"]
97
+ }}
98
+ ```
99
+
100
+ Respond with JSON only:"""
101
+
102
+
103
+ # =============================================================================
104
+ # Cross-Document Navigator
105
+ # =============================================================================
106
+
107
+
108
+ class CrossDocNavigator:
109
+ """
110
+ Orchestrates multi-document queries using the knowledge graph.
111
+
112
+ Workflow:
113
+ 1. Extract entities from the query
114
+ 2. Resolve entities to documents via knowledge graph
115
+ 3. Plan retrieval across documents
116
+ 4. Execute per-document navigation
117
+ 5. Synthesize cross-document answer
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ knowledge_graph: KnowledgeGraph,
123
+ document_navigators: dict[str, Any] | None = None,
124
+ llm_fn: Callable[[str], str] | None = None,
125
+ ):
126
+ """
127
+ Initialize the cross-document navigator.
128
+
129
+ Args:
130
+ knowledge_graph: Knowledge graph with entities and relationships.
131
+ document_navigators: Dict mapping doc_id to navigator instances.
132
+ llm_fn: LLM function for synthesis.
133
+ """
134
+ self.kg = knowledge_graph
135
+ self.navigators = document_navigators or {}
136
+ self._llm_fn = llm_fn
137
+
138
+ # Cache for document content stores
139
+ self._kv_stores: dict[str, KVStore] = {}
140
+ self._skeletons: dict[str, dict[str, SkeletonNode]] = {}
141
+
142
+ def set_llm_function(self, llm_fn: Callable[[str], str]) -> None:
143
+ """Set the LLM function."""
144
+ self._llm_fn = llm_fn
145
+
146
+ def register_document(
147
+ self,
148
+ doc_id: str,
149
+ skeleton: dict[str, SkeletonNode],
150
+ kv_store: KVStore,
151
+ navigator: Any = None,
152
+ ) -> None:
153
+ """
154
+ Register a document's resources for cross-document queries.
155
+
156
+ Args:
157
+ doc_id: Document ID.
158
+ skeleton: Skeleton index for the document.
159
+ kv_store: KV store with document content.
160
+ navigator: Optional pre-configured navigator.
161
+ """
162
+ self._skeletons[doc_id] = skeleton
163
+ self._kv_stores[doc_id] = kv_store
164
+
165
+ if navigator:
166
+ self.navigators[doc_id] = navigator
167
+
168
+ logger.info("document_registered", doc_id=doc_id)
169
+
170
+ def query(self, question: str) -> CrossDocAnswer:
171
+ """
172
+ Execute a cross-document query.
173
+
174
+ Args:
175
+ question: The user's question.
176
+
177
+ Returns:
178
+ CrossDocAnswer with synthesized result.
179
+ """
180
+ trace = []
181
+
182
+ # Step 1: Extract entities from query
183
+ trace.append({
184
+ "step": "extract_entities",
185
+ "timestamp": datetime.now(timezone.utc).isoformat(),
186
+ })
187
+
188
+ query_analysis = self._analyze_query(question)
189
+
190
+ trace.append({
191
+ "step": "query_analyzed",
192
+ "entities": query_analysis.entities_mentioned,
193
+ "type": query_analysis.query_type,
194
+ })
195
+
196
+ # Step 2: Resolve entities to documents
197
+ trace.append({
198
+ "step": "resolve_entities",
199
+ "timestamp": datetime.now(timezone.utc).isoformat(),
200
+ })
201
+
202
+ doc_entities = self._resolve_entities_to_documents(query_analysis)
203
+
204
+ trace.append({
205
+ "step": "entities_resolved",
206
+ "doc_count": len(doc_entities),
207
+ "documents": list(doc_entities.keys()),
208
+ })
209
+
210
+ # Step 3: Plan retrieval
211
+ trace.append({
212
+ "step": "plan_retrieval",
213
+ "timestamp": datetime.now(timezone.utc).isoformat(),
214
+ })
215
+
216
+ retrieval_plan = self._plan_retrieval(question, query_analysis, doc_entities)
217
+
218
+ # Step 4: Execute per-document navigation
219
+ trace.append({
220
+ "step": "execute_navigation",
221
+ "timestamp": datetime.now(timezone.utc).isoformat(),
222
+ })
223
+
224
+ document_results = self._execute_navigation(retrieval_plan)
225
+
226
+ trace.append({
227
+ "step": "navigation_complete",
228
+ "results_count": len(document_results),
229
+ })
230
+
231
+ # Step 5: Synthesize cross-document answer
232
+ trace.append({
233
+ "step": "synthesize",
234
+ "timestamp": datetime.now(timezone.utc).isoformat(),
235
+ })
236
+
237
+ answer = self._synthesize_answer(
238
+ question,
239
+ query_analysis,
240
+ document_results,
241
+ doc_entities,
242
+ )
243
+
244
+ answer.trace = trace
245
+
246
+ logger.info(
247
+ "cross_doc_query_complete",
248
+ query=question[:100],
249
+ documents=len(document_results),
250
+ confidence=answer.confidence,
251
+ )
252
+
253
+ return answer
254
+
255
+ def _analyze_query(self, question: str) -> CrossDocQuery:
256
+ """
257
+ Analyze the query to extract entities and determine query type.
258
+
259
+ Args:
260
+ question: The user's question.
261
+
262
+ Returns:
263
+ CrossDocQuery with extracted information.
264
+ """
265
+ result = CrossDocQuery(original_query=question)
266
+
267
+ if not self._llm_fn:
268
+ # Basic extraction without LLM
269
+ result.query_type = "general"
270
+ return result
271
+
272
+ try:
273
+ prompt = QUERY_ENTITY_EXTRACTION_PROMPT.format(query=question)
274
+ response = self._llm_fn(prompt)
275
+
276
+ # Parse JSON response
277
+ json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response)
278
+ if json_match:
279
+ json_str = json_match.group(1)
280
+ else:
281
+ json_match = re.search(r'\{[\s\S]*\}', response)
282
+ json_str = json_match.group(0) if json_match else "{}"
283
+
284
+ parsed = json.loads(json_str)
285
+
286
+ # Extract entity names
287
+ entities = parsed.get("entities", [])
288
+ result.entities_mentioned = [e.get("name", "") for e in entities if e.get("name")]
289
+ result.query_type = parsed.get("query_type", "general")
290
+ result.documents_mentioned = parsed.get("documents_referenced", [])
291
+
292
+ logger.debug(
293
+ "query_analyzed",
294
+ entities=result.entities_mentioned,
295
+ type=result.query_type,
296
+ )
297
+
298
+ except Exception as e:
299
+ logger.warning("query_analysis_failed", error=str(e))
300
+
301
+ return result
302
+
303
+ def _resolve_entities_to_documents(
304
+ self,
305
+ query: CrossDocQuery,
306
+ ) -> dict[str, list[Entity]]:
307
+ """
308
+ Resolve mentioned entities to their appearances in documents.
309
+
310
+ Args:
311
+ query: Analyzed query with entity mentions.
312
+
313
+ Returns:
314
+ Dict mapping doc_id to list of entities found.
315
+ """
316
+ doc_entities: dict[str, list[Entity]] = {}
317
+
318
+ for entity_name in query.entities_mentioned:
319
+ # Search knowledge graph for this entity
320
+ entities = self.kg.find_entities_by_name(entity_name, fuzzy=True)
321
+
322
+ for entity in entities:
323
+ # Get all documents where this entity appears
324
+ for doc_id in entity.document_ids:
325
+ if doc_id not in doc_entities:
326
+ doc_entities[doc_id] = []
327
+ if entity not in doc_entities[doc_id]:
328
+ doc_entities[doc_id].append(entity)
329
+
330
+ # Also check linked entities across documents
331
+ linked = self.kg.find_entity_across_documents(entity.id)
332
+ for linked_entity in linked:
333
+ for doc_id in linked_entity.document_ids:
334
+ if doc_id not in doc_entities:
335
+ doc_entities[doc_id] = []
336
+ if linked_entity not in doc_entities[doc_id]:
337
+ doc_entities[doc_id].append(linked_entity)
338
+
339
+ return doc_entities
340
+
341
+ def _plan_retrieval(
342
+ self,
343
+ question: str,
344
+ query: CrossDocQuery,
345
+ doc_entities: dict[str, list[Entity]],
346
+ ) -> list[dict[str, Any]]:
347
+ """
348
+ Plan the retrieval strategy for each document.
349
+
350
+ Args:
351
+ question: Original question.
352
+ query: Analyzed query.
353
+ doc_entities: Entities by document.
354
+
355
+ Returns:
356
+ List of retrieval tasks.
357
+ """
358
+ tasks = []
359
+
360
+ for doc_id, entities in doc_entities.items():
361
+ # Get entity names for this document
362
+ entity_names = [e.canonical_name for e in entities]
363
+
364
+ # Create sub-query focused on this document's entities
365
+ if query.query_type == "entity_tracking":
366
+ sub_query = f"What information is there about {', '.join(entity_names[:3])}?"
367
+ elif query.query_type == "comparison":
368
+ sub_query = f"Extract the relevant details for comparison: {question}"
369
+ elif query.query_type == "timeline":
370
+ sub_query = f"What events involving {', '.join(entity_names[:3])} and when did they occur?"
371
+ else:
372
+ sub_query = question
373
+
374
+ # Get node IDs where entities are mentioned
375
+ target_nodes = set()
376
+ for entity in entities:
377
+ target_nodes.update(entity.node_ids)
378
+
379
+ tasks.append({
380
+ "doc_id": doc_id,
381
+ "sub_query": sub_query,
382
+ "entities": entities,
383
+ "target_nodes": list(target_nodes),
384
+ })
385
+
386
+ return tasks
387
+
388
+ def _execute_navigation(
389
+ self,
390
+ tasks: list[dict[str, Any]],
391
+ ) -> list[DocumentResult]:
392
+ """
393
+ Execute navigation for each document task.
394
+
395
+ Args:
396
+ tasks: List of retrieval tasks.
397
+
398
+ Returns:
399
+ List of per-document results.
400
+ """
401
+ results = []
402
+
403
+ for task in tasks:
404
+ doc_id = task["doc_id"]
405
+
406
+ # Check if we have a navigator for this document
407
+ if doc_id in self.navigators:
408
+ navigator = self.navigators[doc_id]
409
+ result = self._navigate_with_navigator(task, navigator)
410
+ elif doc_id in self._kv_stores:
411
+ # Direct content retrieval from target nodes
412
+ result = self._direct_content_retrieval(task)
413
+ else:
414
+ logger.warning("no_navigator_for_doc", doc_id=doc_id)
415
+ result = DocumentResult(
416
+ doc_id=doc_id,
417
+ doc_title=doc_id,
418
+ answer="Document not accessible",
419
+ confidence=0.0,
420
+ )
421
+
422
+ results.append(result)
423
+
424
+ return results
425
+
426
+ def _navigate_with_navigator(
427
+ self,
428
+ task: dict[str, Any],
429
+ navigator: Any,
430
+ ) -> DocumentResult:
431
+ """
432
+ Execute navigation using a document navigator.
433
+
434
+ Args:
435
+ task: Retrieval task.
436
+ navigator: Document navigator instance.
437
+
438
+ Returns:
439
+ DocumentResult.
440
+ """
441
+ doc_id = task["doc_id"]
442
+
443
+ try:
444
+ nav_result = navigator.navigate(task["sub_query"])
445
+
446
+ return DocumentResult(
447
+ doc_id=doc_id,
448
+ doc_title=doc_id,
449
+ answer=nav_result.get("answer", ""),
450
+ evidence=nav_result.get("variables", []),
451
+ entities_found=task["entities"],
452
+ confidence=nav_result.get("confidence", 0.5),
453
+ )
454
+
455
+ except Exception as e:
456
+ logger.error("navigation_failed", doc_id=doc_id, error=str(e))
457
+ return DocumentResult(
458
+ doc_id=doc_id,
459
+ doc_title=doc_id,
460
+ answer=f"Error: {str(e)}",
461
+ confidence=0.0,
462
+ )
463
+
464
+ def _direct_content_retrieval(
465
+ self,
466
+ task: dict[str, Any],
467
+ ) -> DocumentResult:
468
+ """
469
+ Retrieve content directly from target nodes.
470
+
471
+ Args:
472
+ task: Retrieval task.
473
+
474
+ Returns:
475
+ DocumentResult.
476
+ """
477
+ doc_id = task["doc_id"]
478
+ kv_store = self._kv_stores.get(doc_id)
479
+
480
+ if not kv_store:
481
+ return DocumentResult(
482
+ doc_id=doc_id,
483
+ doc_title=doc_id,
484
+ answer="Content not available",
485
+ confidence=0.0,
486
+ )
487
+
488
+ # Retrieve content from target nodes
489
+ evidence = []
490
+ for node_id in task["target_nodes"]:
491
+ content = kv_store.get(node_id)
492
+ if content:
493
+ evidence.append(content)
494
+
495
+ if not evidence:
496
+ return DocumentResult(
497
+ doc_id=doc_id,
498
+ doc_title=doc_id,
499
+ answer="No relevant content found",
500
+ confidence=0.0,
501
+ )
502
+
503
+ # Synthesize answer from evidence if we have LLM
504
+ if self._llm_fn:
505
+ entity_names = [e.canonical_name for e in task["entities"]]
506
+
507
+ synthesis_prompt = f"""Based on the following content, answer the question.
508
+
509
+ Question: {task['sub_query']}
510
+
511
+ Focus on: {', '.join(entity_names)}
512
+
513
+ Content:
514
+ {chr(10).join(f'--- Section ---{chr(10)}{e}' for e in evidence[:5])}
515
+
516
+ Answer:"""
517
+
518
+ try:
519
+ answer = self._llm_fn(synthesis_prompt)
520
+ except Exception as e:
521
+ answer = f"Error synthesizing: {str(e)}"
522
+ else:
523
+ answer = "\n\n".join(evidence[:3])
524
+
525
+ return DocumentResult(
526
+ doc_id=doc_id,
527
+ doc_title=doc_id,
528
+ answer=answer,
529
+ evidence=evidence,
530
+ entities_found=task["entities"],
531
+ confidence=0.7 if evidence else 0.0,
532
+ )
533
+
534
+ def _synthesize_answer(
535
+ self,
536
+ question: str,
537
+ query: CrossDocQuery,
538
+ results: list[DocumentResult],
539
+ doc_entities: dict[str, list[Entity]],
540
+ ) -> CrossDocAnswer:
541
+ """
542
+ Synthesize the final cross-document answer.
543
+
544
+ Args:
545
+ question: Original question.
546
+ query: Analyzed query.
547
+ results: Per-document results.
548
+ doc_entities: Entities by document.
549
+
550
+ Returns:
551
+ Final CrossDocAnswer.
552
+ """
553
+ if not results:
554
+ return CrossDocAnswer(
555
+ query=question,
556
+ answer="No relevant documents found for this query.",
557
+ confidence=0.0,
558
+ )
559
+
560
+ # Collect all entities involved
561
+ all_entities = []
562
+ for entities in doc_entities.values():
563
+ all_entities.extend(entities)
564
+
565
+ # Get relationships between entities
566
+ relationships = []
567
+ entity_ids = {e.id for e in all_entities}
568
+ for entity_id in entity_ids:
569
+ rels = self.kg.get_entity_relationships(entity_id)
570
+ for rel in rels:
571
+ if rel.target_id in entity_ids or rel.source_id in entity_ids:
572
+ if rel not in relationships:
573
+ relationships.append(rel)
574
+
575
+ # Calculate confidence
576
+ avg_confidence = sum(r.confidence for r in results) / len(results) if results else 0.0
577
+
578
+ # Synthesize based on query type
579
+ if not self._llm_fn:
580
+ # Simple concatenation without LLM
581
+ answer = self._simple_synthesis(question, results)
582
+ elif query.query_type == "comparison":
583
+ answer = self._synthesize_comparison(question, results)
584
+ elif query.query_type == "timeline":
585
+ answer = self._synthesize_timeline(question, results, all_entities)
586
+ elif query.query_type == "entity_tracking":
587
+ answer = self._synthesize_entity_tracking(question, results, all_entities)
588
+ else:
589
+ answer = self._synthesize_general(question, results)
590
+
591
+ return CrossDocAnswer(
592
+ query=question,
593
+ answer=answer,
594
+ document_results=results,
595
+ entities_involved=list({e.id: e for e in all_entities}.values()),
596
+ relationships_used=relationships,
597
+ confidence=avg_confidence,
598
+ )
599
+
600
+ def _simple_synthesis(
601
+ self,
602
+ question: str,
603
+ results: list[DocumentResult],
604
+ ) -> str:
605
+ """Simple synthesis without LLM."""
606
+ parts = []
607
+ for result in results:
608
+ if result.answer:
609
+ parts.append(f"**{result.doc_title}**:\n{result.answer}")
610
+ return "\n\n".join(parts) if parts else "No answers found."
611
+
612
+ def _synthesize_comparison(
613
+ self,
614
+ question: str,
615
+ results: list[DocumentResult],
616
+ ) -> str:
617
+ """Synthesize a comparison answer."""
618
+ if not self._llm_fn:
619
+ return self._simple_synthesis(question, results)
620
+
621
+ results_text = "\n\n".join([
622
+ f"Document: {r.doc_title}\nFindings: {r.answer}"
623
+ for r in results
624
+ ])
625
+
626
+ prompt = f"""Compare the following information from multiple documents.
627
+
628
+ Question: {question}
629
+
630
+ Document findings:
631
+ {results_text}
632
+
633
+ Provide a structured comparison highlighting:
634
+ 1. Key similarities
635
+ 2. Key differences
636
+ 3. Summary
637
+
638
+ Comparison:"""
639
+
640
+ try:
641
+ return self._llm_fn(prompt)
642
+ except Exception as e:
643
+ return f"Error: {str(e)}\n\n{self._simple_synthesis(question, results)}"
644
+
645
+ def _synthesize_timeline(
646
+ self,
647
+ question: str,
648
+ results: list[DocumentResult],
649
+ entities: list[Entity],
650
+ ) -> str:
651
+ """Synthesize a timeline answer."""
652
+ if not self._llm_fn:
653
+ return self._simple_synthesis(question, results)
654
+
655
+ results_text = "\n\n".join([
656
+ f"Document: {r.doc_title}\nEvents: {r.answer}"
657
+ for r in results
658
+ ])
659
+
660
+ entity_names = ", ".join([e.canonical_name for e in entities[:5]])
661
+
662
+ prompt = f"""Construct a timeline of events from multiple documents.
663
+
664
+ Question: {question}
665
+
666
+ Key entities: {entity_names}
667
+
668
+ Document findings:
669
+ {results_text}
670
+
671
+ Provide a chronological timeline of events:"""
672
+
673
+ try:
674
+ return self._llm_fn(prompt)
675
+ except Exception as e:
676
+ return f"Error: {str(e)}\n\n{self._simple_synthesis(question, results)}"
677
+
678
+ def _synthesize_entity_tracking(
679
+ self,
680
+ question: str,
681
+ results: list[DocumentResult],
682
+ entities: list[Entity],
683
+ ) -> str:
684
+ """Synthesize an entity tracking answer."""
685
+ if not self._llm_fn:
686
+ return self._simple_synthesis(question, results)
687
+
688
+ results_text = "\n\n".join([
689
+ f"Document: {r.doc_title}\nMentions: {r.answer}"
690
+ for r in results
691
+ ])
692
+
693
+ entity_names = ", ".join([e.canonical_name for e in entities[:5]])
694
+
695
+ prompt = f"""Track the following entities across multiple documents.
696
+
697
+ Question: {question}
698
+
699
+ Entities being tracked: {entity_names}
700
+
701
+ Document findings:
702
+ {results_text}
703
+
704
+ Provide a comprehensive view of what happens to these entities across all documents:"""
705
+
706
+ try:
707
+ return self._llm_fn(prompt)
708
+ except Exception as e:
709
+ return f"Error: {str(e)}\n\n{self._simple_synthesis(question, results)}"
710
+
711
+ def _synthesize_general(
712
+ self,
713
+ question: str,
714
+ results: list[DocumentResult],
715
+ ) -> str:
716
+ """Synthesize a general cross-document answer."""
717
+ if not self._llm_fn:
718
+ return self._simple_synthesis(question, results)
719
+
720
+ results_text = "\n\n".join([
721
+ f"Document: {r.doc_title}\nContent: {r.answer}"
722
+ for r in results
723
+ ])
724
+
725
+ prompt = f"""Answer the question based on information from multiple documents.
726
+
727
+ Question: {question}
728
+
729
+ Document findings:
730
+ {results_text}
731
+
732
+ Synthesized answer:"""
733
+
734
+ try:
735
+ return self._llm_fn(prompt)
736
+ except Exception as e:
737
+ return f"Error: {str(e)}\n\n{self._simple_synthesis(question, results)}"
738
+
739
+
740
+ # =============================================================================
741
+ # Factory Functions
742
+ # =============================================================================
743
+
744
+
745
+ def create_cross_doc_navigator(
746
+ knowledge_graph: KnowledgeGraph,
747
+ ) -> CrossDocNavigator:
748
+ """
749
+ Create a cross-document navigator.
750
+
751
+ Args:
752
+ knowledge_graph: Knowledge graph with entities.
753
+
754
+ Returns:
755
+ Configured CrossDocNavigator.
756
+ """
757
+ navigator = CrossDocNavigator(knowledge_graph)
758
+
759
+ # Configure LLM
760
+ try:
761
+ from rnsr.llm import get_llm
762
+ llm = get_llm()
763
+ navigator.set_llm_function(lambda p: str(llm.complete(p)))
764
+ except Exception as e:
765
+ logger.warning("llm_config_failed", error=str(e))
766
+
767
+ return navigator