rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,147 @@
1
+ """
2
+ FinanceBench Dataset Loader for RNSR Evaluation
3
+
4
+ FinanceBench is a benchmark for financial question answering (QA) using large language models (LLMs).
5
+ It assesses the ability of LLMs to answer questions about financial documents, requiring retrieval
6
+ from complex PDFs (tables, charts, etc.).
7
+
8
+ Repository: https://huggingface.co/datasets/PatronusAI/financebench
9
+ """
10
+
11
+ import os
12
+ import requests
13
+ import hashlib
14
+ from pathlib import Path
15
+ from dataclasses import dataclass
16
+ from typing import List, Dict, Any, Optional
17
+
18
+ import structlog
19
+ from datasets import load_dataset # type: ignore
20
+
21
+ from rnsr.benchmarks.standard_benchmarks import BenchmarkDataset, BenchmarkQuestion
22
+
23
+ logger = structlog.get_logger(__name__)
24
+
25
+ CACHE_DIR = Path("rnsr/benchmarks/data/financebench")
26
+
27
+ class FinanceBenchLoader:
28
+ """Loader for the FinanceBench dataset."""
29
+
30
+ @staticmethod
31
+ def _download_pdf(url: str, doc_name: str) -> Optional[Path]:
32
+ """
33
+ Download PDF from URL and cache it locally.
34
+ Returns the path to the cached PDF.
35
+ """
36
+ if not url:
37
+ return None
38
+
39
+ # Create a safe filename (hash + original name sanitized)
40
+ url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
41
+ safe_name = "".join(c for c in doc_name if c.isalnum() or c in (' ', '.', '_', '-')).strip()
42
+ safe_name = safe_name.replace(" ", "_")
43
+ if not safe_name.lower().endswith(".pdf"):
44
+ safe_name += ".pdf"
45
+
46
+ file_path = CACHE_DIR / f"{url_hash}_{safe_name}"
47
+
48
+ if file_path.exists():
49
+ return file_path
50
+
51
+ try:
52
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
53
+ logger.info("Downloading PDF", url=url, path=str(file_path))
54
+ response = requests.get(url, timeout=30)
55
+ response.raise_for_status()
56
+
57
+ with open(file_path, "wb") as f:
58
+ f.write(response.content)
59
+
60
+ return file_path
61
+ except Exception as e:
62
+ logger.error("Failed to download PDF", url=url, error=str(e))
63
+ return None
64
+
65
+ @staticmethod
66
+ def load(
67
+ split: str = "train",
68
+ max_samples: Optional[int] = None,
69
+ download_pdfs: bool = True
70
+ ) -> BenchmarkDataset:
71
+ """
72
+ Load the FinanceBench dataset.
73
+
74
+ Args:
75
+ split: Dataset split to load (usually 'train' as test is hidden or same)
76
+ max_samples: Max number of questions to load
77
+ download_pdfs: Whether to download the referenced PDFs
78
+
79
+ Returns:
80
+ BenchmarkDataset containing FinanceBench questions
81
+ """
82
+ try:
83
+ dataset = load_dataset("PatronusAI/financebench", split=split)
84
+ except Exception as e:
85
+ logger.error("Failed to load FinanceBench dataset", error=str(e))
86
+ return BenchmarkDataset(
87
+ name="FinanceBench",
88
+ description="Financial QA (Failed to load)",
89
+ questions=[],
90
+ metrics=[],
91
+ source_url=""
92
+ )
93
+
94
+ questions: List[BenchmarkQuestion] = []
95
+
96
+ # FinanceBench structure:
97
+ # question, answer, evidence_text, doc_name, doc_link, id, etc.
98
+
99
+ count = 0
100
+ for item in dataset:
101
+ if not isinstance(item, dict):
102
+ continue
103
+
104
+ if max_samples and count >= max_samples:
105
+ break
106
+
107
+ doc_link = item.get("doc_link")
108
+ doc_name = item.get("doc_name", "unknown_doc")
109
+
110
+ pdf_path = None
111
+ if download_pdfs and doc_link:
112
+ # Some links might be missing or broken, handle gracefully
113
+ pdf_path = FinanceBenchLoader._download_pdf(doc_link, doc_name)
114
+
115
+ # If we couldn't get the PDF, we might skip or mark it
116
+ # But we'll include it with metadata indicating missing file for now
117
+
118
+ # Extract answer - typically a string in FinanceBench
119
+ answer = item.get("answer", "")
120
+
121
+ # Create question object
122
+ q = BenchmarkQuestion(
123
+ id=f"fb_{count}", # FinanceBench doesn't have stable IDs in some versions
124
+ question=item["question"],
125
+ answer=str(answer),
126
+ supporting_facts=[item.get("evidence_text", "")],
127
+ context=[], # Context is the PDF file, not text chunks
128
+ reasoning_type="financial-retrieval",
129
+ metadata={
130
+ "doc_name": doc_name,
131
+ "doc_link": doc_link,
132
+ "pdf_path": str(pdf_path) if pdf_path else None,
133
+ "page_index": item.get("page_index"), # Sometimes available
134
+ "dataset": "financebench"
135
+ }
136
+ )
137
+
138
+ questions.append(q)
139
+ count += 1
140
+
141
+ return BenchmarkDataset(
142
+ name="FinanceBench",
143
+ description="Financial Question Answering on Complex PDFs",
144
+ questions=questions,
145
+ metrics=["answer_evaluation_llm"], # Will need LLM-based eval
146
+ source_url="https://huggingface.co/datasets/PatronusAI/financebench"
147
+ )
@@ -0,0 +1,178 @@
1
+ """
2
+ PDF Merger Utility for "Chaos Mode" Benchmarking.
3
+
4
+ This tool merges multiple random PDFs into a single "Frankenstein" document
5
+ to test retrieving information from a specific sub-document within a larger,
6
+ noisy context. This simulates searching through a "Binder" or a merged scan package.
7
+ """
8
+
9
+ import random
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Tuple
12
+ import structlog
13
+
14
+ # Try to import fitz (PyMuPDF)
15
+ try:
16
+ import fitz # type: ignore
17
+ except ImportError:
18
+ fitz = None
19
+
20
+ logger = structlog.get_logger(__name__)
21
+
22
+ class PDFMerger:
23
+ """Helper to merge PDFs for chaos testing."""
24
+
25
+ @staticmethod
26
+ def merge_pdfs(
27
+ target_pdf_path: Path,
28
+ distractor_pdf_paths: List[Path],
29
+ output_path: Path,
30
+ insert_position: str = "random"
31
+ ) -> Dict[str, Any]:
32
+ """
33
+ Merge target_pdf into a list of distractor_pdfs.
34
+
35
+ Args:
36
+ target_pdf_path: The PDF containing the answer.
37
+ distractor_pdf_paths: List of irrelevant PDFs.
38
+ output_path: Where to save the merged file.
39
+ insert_position: 'start', 'end', or 'random'.
40
+
41
+ Returns:
42
+ Metadata about the merge (page ranges).
43
+ """
44
+ if not fitz:
45
+ raise ImportError("PyMuPDF (fitz) is required for PDF merging. Install 'pymupdf'.")
46
+
47
+ merged_doc = fitz.open()
48
+
49
+ # Prepare list of docs to merge
50
+ # (doc_object, is_target, label)
51
+ docs_to_merge = []
52
+
53
+ # Load distractors
54
+ for p in distractor_pdf_paths:
55
+ try:
56
+ doc = fitz.open(p)
57
+ docs_to_merge.append((doc, False, p.name))
58
+ except Exception as e:
59
+ logger.warning(f"Could not open distractor {p}: {e}")
60
+
61
+ # Load target
62
+ try:
63
+ target_doc = fitz.open(target_pdf_path)
64
+ target_entry = (target_doc, True, target_pdf_path.name)
65
+ except Exception as e:
66
+ logger.error(f"Could not open target PDF {target_pdf_path}: {e}")
67
+ return {}
68
+
69
+ # Insert target
70
+ if insert_position == "start":
71
+ docs_to_merge.insert(0, target_entry)
72
+ elif insert_position == "end":
73
+ docs_to_merge.append(target_entry)
74
+ else: # random
75
+ idx = random.randint(0, len(docs_to_merge))
76
+ docs_to_merge.insert(idx, target_entry)
77
+
78
+ # Perform merge
79
+ current_page = 0
80
+ target_page_range = (0, 0)
81
+
82
+ for doc, is_target, label in docs_to_merge:
83
+ page_count = doc.page_count
84
+ merged_doc.insert_pdf(doc)
85
+
86
+ start = current_page
87
+ end = current_page + page_count - 1
88
+
89
+ if is_target:
90
+ target_page_range = (start, end)
91
+
92
+ current_page += page_count
93
+ doc.close()
94
+
95
+ # Save
96
+ merged_doc.save(output_path)
97
+ merged_doc.close()
98
+
99
+ return {
100
+ "merged_file": str(output_path),
101
+ "target_filename": target_pdf_path.name,
102
+ "target_page_range": target_page_range, # 0-indexed, inclusive
103
+ "total_pages": current_page
104
+ }
105
+
106
+ @staticmethod
107
+ def create_chaos_dataset(
108
+ base_dataset_questions,
109
+ pool_of_pdfs: List[Path],
110
+ output_dir: Path,
111
+ num_distractors: int = 3
112
+ ):
113
+ """
114
+ Takes a list of BenchmarkQuestions (e.g. from FinanceBench) and creates
115
+ a new version where each source PDF is merged with random distractors.
116
+ """
117
+ output_dir.mkdir(parents=True, exist_ok=True)
118
+ chaos_questions = []
119
+
120
+ for q in base_dataset_questions:
121
+ try:
122
+ metadata = q.metadata or {}
123
+ original_pdf = metadata.get("pdf_path")
124
+
125
+ if not original_pdf:
126
+ continue
127
+
128
+ target_path = Path(original_pdf)
129
+ if not target_path.exists():
130
+ logger.warning(f"Original PDF not found: {target_path}")
131
+ continue
132
+
133
+ # Select random distractors (excluding self)
134
+ candidates = [p for p in pool_of_pdfs if p.name != target_path.name]
135
+ if len(candidates) < num_distractors:
136
+ distractors = candidates
137
+ else:
138
+ distractors = random.sample(candidates, num_distractors)
139
+
140
+ # Merge
141
+ merged_filename = f"chaos_{q.id}.pdf"
142
+ merged_path = output_dir / merged_filename
143
+
144
+ merge_info = PDFMerger.merge_pdfs(
145
+ target_path, distractors, merged_path
146
+ )
147
+
148
+ # Clone question and update metadata
149
+ new_meta = metadata.copy()
150
+ new_meta.update({
151
+ "original_pdf_path": str(target_path),
152
+ "pdf_path": str(merged_path), # Point to the chaotic file
153
+ "chaos_mode": True,
154
+ "target_page_range": merge_info["target_page_range"],
155
+ "distractors": [d.name for d in distractors]
156
+ })
157
+
158
+ # Update question text slightly to hint? No, let's keep it hard.
159
+ # q.question = q.question # Unchanged
160
+
161
+ # Update question object (requires creating new instance as it might be immutable-ish)
162
+ from rnsr.benchmarks.standard_benchmarks import BenchmarkQuestion
163
+ new_q = BenchmarkQuestion(
164
+ id=f"{q.id}_chaos",
165
+ question=q.question,
166
+ answer=q.answer,
167
+ supporting_facts=q.supporting_facts,
168
+ context=q.context, # Usually empty for PDF benchmarks
169
+ reasoning_type=q.reasoning_type,
170
+ metadata=new_meta
171
+ )
172
+ chaos_questions.append(new_q)
173
+
174
+ except Exception as e:
175
+ logger.error(f"Failed to process chaos for question {q.id}: {e}")
176
+
177
+ return chaos_questions
178
+
@@ -0,0 +1,321 @@
1
+ """
2
+ Performance Benchmarks
3
+
4
+ Measures:
5
+ - Ingestion time (PDF → Tree)
6
+ - Indexing time (Tree → Skeleton + KV)
7
+ - Query latency (Question → Answer)
8
+ - Memory usage
9
+ - Throughput (pages/sec, queries/sec)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import gc
15
+ import time
16
+ import tracemalloc
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Any, Callable
20
+
21
+ import structlog
22
+
23
+ logger = structlog.get_logger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class BenchmarkResult:
28
+ """Result from a single benchmark run."""
29
+
30
+ name: str
31
+ duration_seconds: float
32
+ memory_peak_mb: float
33
+ memory_current_mb: float
34
+ throughput: float | None = None
35
+ throughput_unit: str = ""
36
+ metadata: dict[str, Any] = field(default_factory=dict)
37
+
38
+ @property
39
+ def duration_ms(self) -> float:
40
+ return self.duration_seconds * 1000
41
+
42
+ def __str__(self) -> str:
43
+ parts = [
44
+ f"{self.name}:",
45
+ f" Time: {self.duration_ms:.2f}ms",
46
+ f" Memory Peak: {self.memory_peak_mb:.2f}MB",
47
+ ]
48
+ if self.throughput:
49
+ parts.append(f" Throughput: {self.throughput:.2f} {self.throughput_unit}")
50
+ return "\n".join(parts)
51
+
52
+
53
+ @dataclass
54
+ class PerformanceBenchmark:
55
+ """Collection of performance benchmark results."""
56
+
57
+ ingestion: BenchmarkResult | None = None
58
+ indexing: BenchmarkResult | None = None
59
+ query_cold: BenchmarkResult | None = None
60
+ query_warm: BenchmarkResult | None = None
61
+ total_time_seconds: float = 0.0
62
+
63
+ def summary(self) -> dict[str, Any]:
64
+ """Get summary statistics."""
65
+ return {
66
+ "ingestion_ms": self.ingestion.duration_ms if self.ingestion else None,
67
+ "indexing_ms": self.indexing.duration_ms if self.indexing else None,
68
+ "query_cold_ms": self.query_cold.duration_ms if self.query_cold else None,
69
+ "query_warm_ms": self.query_warm.duration_ms if self.query_warm else None,
70
+ "total_time_seconds": self.total_time_seconds,
71
+ "peak_memory_mb": max(
72
+ r.memory_peak_mb for r in [self.ingestion, self.indexing, self.query_cold]
73
+ if r is not None
74
+ ) if any([self.ingestion, self.indexing, self.query_cold]) else 0,
75
+ }
76
+
77
+ def __str__(self) -> str:
78
+ lines = ["=== Performance Benchmark Results ==="]
79
+ for result in [self.ingestion, self.indexing, self.query_cold, self.query_warm]:
80
+ if result:
81
+ lines.append(str(result))
82
+ lines.append(f"\nTotal Time: {self.total_time_seconds:.2f}s")
83
+ return "\n".join(lines)
84
+
85
+
86
+ def _measure_execution(
87
+ func: Callable[[], Any],
88
+ name: str,
89
+ warmup_runs: int = 0,
90
+ ) -> tuple[Any, BenchmarkResult]:
91
+ """
92
+ Execute a function and measure time and memory.
93
+
94
+ Args:
95
+ func: Function to execute
96
+ name: Name for the benchmark
97
+ warmup_runs: Number of warmup runs before measurement
98
+
99
+ Returns:
100
+ Tuple of (function result, BenchmarkResult)
101
+ """
102
+ # Warmup
103
+ for _ in range(warmup_runs):
104
+ func()
105
+ gc.collect()
106
+
107
+ # Force garbage collection before measurement
108
+ gc.collect()
109
+
110
+ # Start memory tracking
111
+ tracemalloc.start()
112
+
113
+ # Time the execution
114
+ start_time = time.perf_counter()
115
+ result = func()
116
+ end_time = time.perf_counter()
117
+
118
+ # Get memory stats
119
+ current, peak = tracemalloc.get_traced_memory()
120
+ tracemalloc.stop()
121
+
122
+ benchmark_result = BenchmarkResult(
123
+ name=name,
124
+ duration_seconds=end_time - start_time,
125
+ memory_peak_mb=peak / 1024 / 1024,
126
+ memory_current_mb=current / 1024 / 1024,
127
+ )
128
+
129
+ return result, benchmark_result
130
+
131
+
132
+ def run_ingestion_benchmark(
133
+ pdf_path: Path | str,
134
+ iterations: int = 1,
135
+ ) -> BenchmarkResult:
136
+ """
137
+ Benchmark PDF ingestion.
138
+
139
+ Args:
140
+ pdf_path: Path to PDF file
141
+ iterations: Number of iterations to average
142
+
143
+ Returns:
144
+ BenchmarkResult with timing and memory stats
145
+ """
146
+ from rnsr.ingestion import ingest_document
147
+
148
+ pdf_path = Path(pdf_path)
149
+
150
+ if not pdf_path.exists():
151
+ raise FileNotFoundError(f"PDF not found: {pdf_path}")
152
+
153
+ # Get file size
154
+ file_size_mb = pdf_path.stat().st_size / 1024 / 1024
155
+
156
+ results: list[BenchmarkResult] = []
157
+ final_result = None
158
+
159
+ for i in range(iterations):
160
+ logger.info("ingestion_benchmark_iteration", iteration=i + 1, total=iterations)
161
+
162
+ def run_ingestion():
163
+ return ingest_document(pdf_path)
164
+
165
+ ingestion_result, benchmark = _measure_execution(
166
+ run_ingestion,
167
+ f"Ingestion (iter {i + 1})",
168
+ )
169
+
170
+ # Calculate throughput (pages/sec)
171
+ if hasattr(ingestion_result, 'tree') and ingestion_result.tree:
172
+ page_count = ingestion_result.tree.total_nodes
173
+ benchmark.throughput = page_count / benchmark.duration_seconds
174
+ benchmark.throughput_unit = "nodes/sec"
175
+
176
+ benchmark.metadata = {
177
+ "file": pdf_path.name,
178
+ "file_size_mb": file_size_mb,
179
+ "tier_used": ingestion_result.tier_used if hasattr(ingestion_result, 'tier_used') else None,
180
+ }
181
+
182
+ results.append(benchmark)
183
+ final_result = ingestion_result
184
+
185
+ # Average results
186
+ avg_duration = sum(r.duration_seconds for r in results) / len(results)
187
+ max_memory = max(r.memory_peak_mb for r in results)
188
+ avg_throughput = sum(r.throughput or 0 for r in results) / len(results)
189
+
190
+ return BenchmarkResult(
191
+ name="Ingestion",
192
+ duration_seconds=avg_duration,
193
+ memory_peak_mb=max_memory,
194
+ memory_current_mb=results[-1].memory_current_mb,
195
+ throughput=avg_throughput if avg_throughput > 0 else None,
196
+ throughput_unit="nodes/sec",
197
+ metadata={
198
+ "iterations": iterations,
199
+ "file": pdf_path.name,
200
+ "file_size_mb": file_size_mb,
201
+ },
202
+ )
203
+
204
+
205
+ def run_query_benchmark(
206
+ questions: list[str],
207
+ skeleton: dict,
208
+ kv_store: Any,
209
+ warmup: bool = True,
210
+ ) -> tuple[BenchmarkResult, BenchmarkResult]:
211
+ """
212
+ Benchmark query execution.
213
+
214
+ Args:
215
+ questions: List of questions to benchmark
216
+ skeleton: Skeleton index
217
+ kv_store: KV store with content
218
+ warmup: Whether to do warmup run
219
+
220
+ Returns:
221
+ Tuple of (cold_start_result, warm_result)
222
+ """
223
+ from rnsr.agent import run_navigator
224
+
225
+ if not questions:
226
+ raise ValueError("At least one question required")
227
+
228
+ # Cold start benchmark (first query)
229
+ def run_cold_query():
230
+ return run_navigator(
231
+ question=questions[0],
232
+ skeleton=skeleton,
233
+ kv_store=kv_store,
234
+ max_iterations=10,
235
+ )
236
+
237
+ _, cold_result = _measure_execution(run_cold_query, "Query (Cold Start)")
238
+ cold_result.metadata = {"question": questions[0][:50]}
239
+
240
+ # Warm benchmark (average of remaining queries)
241
+ warm_times: list[float] = []
242
+ warm_memories: list[float] = []
243
+
244
+ for q in questions[1:] if len(questions) > 1 else questions:
245
+ def run_warm_query():
246
+ return run_navigator(
247
+ question=q,
248
+ skeleton=skeleton,
249
+ kv_store=kv_store,
250
+ max_iterations=10,
251
+ )
252
+
253
+ _, warm_bench = _measure_execution(run_warm_query, "Query (Warm)")
254
+ warm_times.append(warm_bench.duration_seconds)
255
+ warm_memories.append(warm_bench.memory_peak_mb)
256
+
257
+ warm_result = BenchmarkResult(
258
+ name="Query (Warm)",
259
+ duration_seconds=sum(warm_times) / len(warm_times) if warm_times else 0,
260
+ memory_peak_mb=max(warm_memories) if warm_memories else 0,
261
+ memory_current_mb=warm_memories[-1] if warm_memories else 0,
262
+ throughput=len(warm_times) / sum(warm_times) if sum(warm_times) > 0 else None,
263
+ throughput_unit="queries/sec",
264
+ metadata={"query_count": len(warm_times)},
265
+ )
266
+
267
+ return cold_result, warm_result
268
+
269
+
270
+ def run_end_to_end_benchmark(
271
+ pdf_path: Path | str,
272
+ questions: list[str],
273
+ ) -> PerformanceBenchmark:
274
+ """
275
+ Run complete end-to-end benchmark.
276
+
277
+ Args:
278
+ pdf_path: Path to PDF file
279
+ questions: List of test questions
280
+
281
+ Returns:
282
+ PerformanceBenchmark with all results
283
+ """
284
+ from rnsr.indexing import build_skeleton_index
285
+ from rnsr.ingestion import ingest_document
286
+
287
+ pdf_path = Path(pdf_path)
288
+ total_start = time.perf_counter()
289
+
290
+ benchmark = PerformanceBenchmark()
291
+
292
+ # 1. Ingestion benchmark
293
+ logger.info("benchmark_ingestion_start", file=pdf_path.name)
294
+
295
+ def do_ingest():
296
+ return ingest_document(pdf_path)
297
+
298
+ ingestion_result, benchmark.ingestion = _measure_execution(do_ingest, "Ingestion")
299
+ benchmark.ingestion.metadata = {"file": pdf_path.name}
300
+
301
+ # 2. Indexing benchmark
302
+ logger.info("benchmark_indexing_start")
303
+
304
+ def do_index():
305
+ return build_skeleton_index(ingestion_result.tree)
306
+
307
+ (skeleton, kv_store), benchmark.indexing = _measure_execution(do_index, "Indexing")
308
+ benchmark.indexing.metadata = {"node_count": len(skeleton)}
309
+
310
+ # 3. Query benchmarks
311
+ if questions:
312
+ logger.info("benchmark_query_start", question_count=len(questions))
313
+ benchmark.query_cold, benchmark.query_warm = run_query_benchmark(
314
+ questions, skeleton, kv_store
315
+ )
316
+
317
+ benchmark.total_time_seconds = time.perf_counter() - total_start
318
+
319
+ logger.info("benchmark_complete", total_seconds=benchmark.total_time_seconds)
320
+
321
+ return benchmark