rnsr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnsr/__init__.py +118 -0
- rnsr/__main__.py +242 -0
- rnsr/agent/__init__.py +218 -0
- rnsr/agent/cross_doc_navigator.py +767 -0
- rnsr/agent/graph.py +1557 -0
- rnsr/agent/llm_cache.py +575 -0
- rnsr/agent/navigator_api.py +497 -0
- rnsr/agent/provenance.py +772 -0
- rnsr/agent/query_clarifier.py +617 -0
- rnsr/agent/reasoning_memory.py +736 -0
- rnsr/agent/repl_env.py +709 -0
- rnsr/agent/rlm_navigator.py +2108 -0
- rnsr/agent/self_reflection.py +602 -0
- rnsr/agent/variable_store.py +308 -0
- rnsr/benchmarks/__init__.py +118 -0
- rnsr/benchmarks/comprehensive_benchmark.py +733 -0
- rnsr/benchmarks/evaluation_suite.py +1210 -0
- rnsr/benchmarks/finance_bench.py +147 -0
- rnsr/benchmarks/pdf_merger.py +178 -0
- rnsr/benchmarks/performance.py +321 -0
- rnsr/benchmarks/quality.py +321 -0
- rnsr/benchmarks/runner.py +298 -0
- rnsr/benchmarks/standard_benchmarks.py +995 -0
- rnsr/client.py +560 -0
- rnsr/document_store.py +394 -0
- rnsr/exceptions.py +74 -0
- rnsr/extraction/__init__.py +172 -0
- rnsr/extraction/candidate_extractor.py +357 -0
- rnsr/extraction/entity_extractor.py +581 -0
- rnsr/extraction/entity_linker.py +825 -0
- rnsr/extraction/grounded_extractor.py +722 -0
- rnsr/extraction/learned_types.py +599 -0
- rnsr/extraction/models.py +232 -0
- rnsr/extraction/relationship_extractor.py +600 -0
- rnsr/extraction/relationship_patterns.py +511 -0
- rnsr/extraction/relationship_validator.py +392 -0
- rnsr/extraction/rlm_extractor.py +589 -0
- rnsr/extraction/rlm_unified_extractor.py +990 -0
- rnsr/extraction/tot_validator.py +610 -0
- rnsr/extraction/unified_extractor.py +342 -0
- rnsr/indexing/__init__.py +60 -0
- rnsr/indexing/knowledge_graph.py +1128 -0
- rnsr/indexing/kv_store.py +313 -0
- rnsr/indexing/persistence.py +323 -0
- rnsr/indexing/semantic_retriever.py +237 -0
- rnsr/indexing/semantic_search.py +320 -0
- rnsr/indexing/skeleton_index.py +395 -0
- rnsr/ingestion/__init__.py +161 -0
- rnsr/ingestion/chart_parser.py +569 -0
- rnsr/ingestion/document_boundary.py +662 -0
- rnsr/ingestion/font_histogram.py +334 -0
- rnsr/ingestion/header_classifier.py +595 -0
- rnsr/ingestion/hierarchical_cluster.py +515 -0
- rnsr/ingestion/layout_detector.py +356 -0
- rnsr/ingestion/layout_model.py +379 -0
- rnsr/ingestion/ocr_fallback.py +177 -0
- rnsr/ingestion/pipeline.py +936 -0
- rnsr/ingestion/semantic_fallback.py +417 -0
- rnsr/ingestion/table_parser.py +799 -0
- rnsr/ingestion/text_builder.py +460 -0
- rnsr/ingestion/tree_builder.py +402 -0
- rnsr/ingestion/vision_retrieval.py +965 -0
- rnsr/ingestion/xy_cut.py +555 -0
- rnsr/llm.py +733 -0
- rnsr/models.py +167 -0
- rnsr/py.typed +2 -0
- rnsr-0.1.0.dist-info/METADATA +592 -0
- rnsr-0.1.0.dist-info/RECORD +72 -0
- rnsr-0.1.0.dist-info/WHEEL +5 -0
- rnsr-0.1.0.dist-info/entry_points.txt +2 -0
- rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
- rnsr-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FinanceBench Dataset Loader for RNSR Evaluation
|
|
3
|
+
|
|
4
|
+
FinanceBench is a benchmark for financial question answering (QA) using large language models (LLMs).
|
|
5
|
+
It assesses the ability of LLMs to answer questions about financial documents, requiring retrieval
|
|
6
|
+
from complex PDFs (tables, charts, etc.).
|
|
7
|
+
|
|
8
|
+
Repository: https://huggingface.co/datasets/PatronusAI/financebench
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import requests
|
|
13
|
+
import hashlib
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import List, Dict, Any, Optional
|
|
17
|
+
|
|
18
|
+
import structlog
|
|
19
|
+
from datasets import load_dataset # type: ignore
|
|
20
|
+
|
|
21
|
+
from rnsr.benchmarks.standard_benchmarks import BenchmarkDataset, BenchmarkQuestion
|
|
22
|
+
|
|
23
|
+
logger = structlog.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
CACHE_DIR = Path("rnsr/benchmarks/data/financebench")
|
|
26
|
+
|
|
27
|
+
class FinanceBenchLoader:
|
|
28
|
+
"""Loader for the FinanceBench dataset."""
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def _download_pdf(url: str, doc_name: str) -> Optional[Path]:
|
|
32
|
+
"""
|
|
33
|
+
Download PDF from URL and cache it locally.
|
|
34
|
+
Returns the path to the cached PDF.
|
|
35
|
+
"""
|
|
36
|
+
if not url:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
# Create a safe filename (hash + original name sanitized)
|
|
40
|
+
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
|
|
41
|
+
safe_name = "".join(c for c in doc_name if c.isalnum() or c in (' ', '.', '_', '-')).strip()
|
|
42
|
+
safe_name = safe_name.replace(" ", "_")
|
|
43
|
+
if not safe_name.lower().endswith(".pdf"):
|
|
44
|
+
safe_name += ".pdf"
|
|
45
|
+
|
|
46
|
+
file_path = CACHE_DIR / f"{url_hash}_{safe_name}"
|
|
47
|
+
|
|
48
|
+
if file_path.exists():
|
|
49
|
+
return file_path
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
logger.info("Downloading PDF", url=url, path=str(file_path))
|
|
54
|
+
response = requests.get(url, timeout=30)
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
|
|
57
|
+
with open(file_path, "wb") as f:
|
|
58
|
+
f.write(response.content)
|
|
59
|
+
|
|
60
|
+
return file_path
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error("Failed to download PDF", url=url, error=str(e))
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def load(
|
|
67
|
+
split: str = "train",
|
|
68
|
+
max_samples: Optional[int] = None,
|
|
69
|
+
download_pdfs: bool = True
|
|
70
|
+
) -> BenchmarkDataset:
|
|
71
|
+
"""
|
|
72
|
+
Load the FinanceBench dataset.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
split: Dataset split to load (usually 'train' as test is hidden or same)
|
|
76
|
+
max_samples: Max number of questions to load
|
|
77
|
+
download_pdfs: Whether to download the referenced PDFs
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
BenchmarkDataset containing FinanceBench questions
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
dataset = load_dataset("PatronusAI/financebench", split=split)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.error("Failed to load FinanceBench dataset", error=str(e))
|
|
86
|
+
return BenchmarkDataset(
|
|
87
|
+
name="FinanceBench",
|
|
88
|
+
description="Financial QA (Failed to load)",
|
|
89
|
+
questions=[],
|
|
90
|
+
metrics=[],
|
|
91
|
+
source_url=""
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
questions: List[BenchmarkQuestion] = []
|
|
95
|
+
|
|
96
|
+
# FinanceBench structure:
|
|
97
|
+
# question, answer, evidence_text, doc_name, doc_link, id, etc.
|
|
98
|
+
|
|
99
|
+
count = 0
|
|
100
|
+
for item in dataset:
|
|
101
|
+
if not isinstance(item, dict):
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
if max_samples and count >= max_samples:
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
doc_link = item.get("doc_link")
|
|
108
|
+
doc_name = item.get("doc_name", "unknown_doc")
|
|
109
|
+
|
|
110
|
+
pdf_path = None
|
|
111
|
+
if download_pdfs and doc_link:
|
|
112
|
+
# Some links might be missing or broken, handle gracefully
|
|
113
|
+
pdf_path = FinanceBenchLoader._download_pdf(doc_link, doc_name)
|
|
114
|
+
|
|
115
|
+
# If we couldn't get the PDF, we might skip or mark it
|
|
116
|
+
# But we'll include it with metadata indicating missing file for now
|
|
117
|
+
|
|
118
|
+
# Extract answer - typically a string in FinanceBench
|
|
119
|
+
answer = item.get("answer", "")
|
|
120
|
+
|
|
121
|
+
# Create question object
|
|
122
|
+
q = BenchmarkQuestion(
|
|
123
|
+
id=f"fb_{count}", # FinanceBench doesn't have stable IDs in some versions
|
|
124
|
+
question=item["question"],
|
|
125
|
+
answer=str(answer),
|
|
126
|
+
supporting_facts=[item.get("evidence_text", "")],
|
|
127
|
+
context=[], # Context is the PDF file, not text chunks
|
|
128
|
+
reasoning_type="financial-retrieval",
|
|
129
|
+
metadata={
|
|
130
|
+
"doc_name": doc_name,
|
|
131
|
+
"doc_link": doc_link,
|
|
132
|
+
"pdf_path": str(pdf_path) if pdf_path else None,
|
|
133
|
+
"page_index": item.get("page_index"), # Sometimes available
|
|
134
|
+
"dataset": "financebench"
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
questions.append(q)
|
|
139
|
+
count += 1
|
|
140
|
+
|
|
141
|
+
return BenchmarkDataset(
|
|
142
|
+
name="FinanceBench",
|
|
143
|
+
description="Financial Question Answering on Complex PDFs",
|
|
144
|
+
questions=questions,
|
|
145
|
+
metrics=["answer_evaluation_llm"], # Will need LLM-based eval
|
|
146
|
+
source_url="https://huggingface.co/datasets/PatronusAI/financebench"
|
|
147
|
+
)
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PDF Merger Utility for "Chaos Mode" Benchmarking.
|
|
3
|
+
|
|
4
|
+
This tool merges multiple random PDFs into a single "Frankenstein" document
|
|
5
|
+
to test retrieving information from a specific sub-document within a larger,
|
|
6
|
+
noisy context. This simulates searching through a "Binder" or a merged scan package.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import random
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
import structlog
|
|
13
|
+
|
|
14
|
+
# Try to import fitz (PyMuPDF)
|
|
15
|
+
try:
|
|
16
|
+
import fitz # type: ignore
|
|
17
|
+
except ImportError:
|
|
18
|
+
fitz = None
|
|
19
|
+
|
|
20
|
+
logger = structlog.get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
class PDFMerger:
|
|
23
|
+
"""Helper to merge PDFs for chaos testing."""
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def merge_pdfs(
|
|
27
|
+
target_pdf_path: Path,
|
|
28
|
+
distractor_pdf_paths: List[Path],
|
|
29
|
+
output_path: Path,
|
|
30
|
+
insert_position: str = "random"
|
|
31
|
+
) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Merge target_pdf into a list of distractor_pdfs.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
target_pdf_path: The PDF containing the answer.
|
|
37
|
+
distractor_pdf_paths: List of irrelevant PDFs.
|
|
38
|
+
output_path: Where to save the merged file.
|
|
39
|
+
insert_position: 'start', 'end', or 'random'.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Metadata about the merge (page ranges).
|
|
43
|
+
"""
|
|
44
|
+
if not fitz:
|
|
45
|
+
raise ImportError("PyMuPDF (fitz) is required for PDF merging. Install 'pymupdf'.")
|
|
46
|
+
|
|
47
|
+
merged_doc = fitz.open()
|
|
48
|
+
|
|
49
|
+
# Prepare list of docs to merge
|
|
50
|
+
# (doc_object, is_target, label)
|
|
51
|
+
docs_to_merge = []
|
|
52
|
+
|
|
53
|
+
# Load distractors
|
|
54
|
+
for p in distractor_pdf_paths:
|
|
55
|
+
try:
|
|
56
|
+
doc = fitz.open(p)
|
|
57
|
+
docs_to_merge.append((doc, False, p.name))
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.warning(f"Could not open distractor {p}: {e}")
|
|
60
|
+
|
|
61
|
+
# Load target
|
|
62
|
+
try:
|
|
63
|
+
target_doc = fitz.open(target_pdf_path)
|
|
64
|
+
target_entry = (target_doc, True, target_pdf_path.name)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.error(f"Could not open target PDF {target_pdf_path}: {e}")
|
|
67
|
+
return {}
|
|
68
|
+
|
|
69
|
+
# Insert target
|
|
70
|
+
if insert_position == "start":
|
|
71
|
+
docs_to_merge.insert(0, target_entry)
|
|
72
|
+
elif insert_position == "end":
|
|
73
|
+
docs_to_merge.append(target_entry)
|
|
74
|
+
else: # random
|
|
75
|
+
idx = random.randint(0, len(docs_to_merge))
|
|
76
|
+
docs_to_merge.insert(idx, target_entry)
|
|
77
|
+
|
|
78
|
+
# Perform merge
|
|
79
|
+
current_page = 0
|
|
80
|
+
target_page_range = (0, 0)
|
|
81
|
+
|
|
82
|
+
for doc, is_target, label in docs_to_merge:
|
|
83
|
+
page_count = doc.page_count
|
|
84
|
+
merged_doc.insert_pdf(doc)
|
|
85
|
+
|
|
86
|
+
start = current_page
|
|
87
|
+
end = current_page + page_count - 1
|
|
88
|
+
|
|
89
|
+
if is_target:
|
|
90
|
+
target_page_range = (start, end)
|
|
91
|
+
|
|
92
|
+
current_page += page_count
|
|
93
|
+
doc.close()
|
|
94
|
+
|
|
95
|
+
# Save
|
|
96
|
+
merged_doc.save(output_path)
|
|
97
|
+
merged_doc.close()
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
"merged_file": str(output_path),
|
|
101
|
+
"target_filename": target_pdf_path.name,
|
|
102
|
+
"target_page_range": target_page_range, # 0-indexed, inclusive
|
|
103
|
+
"total_pages": current_page
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def create_chaos_dataset(
|
|
108
|
+
base_dataset_questions,
|
|
109
|
+
pool_of_pdfs: List[Path],
|
|
110
|
+
output_dir: Path,
|
|
111
|
+
num_distractors: int = 3
|
|
112
|
+
):
|
|
113
|
+
"""
|
|
114
|
+
Takes a list of BenchmarkQuestions (e.g. from FinanceBench) and creates
|
|
115
|
+
a new version where each source PDF is merged with random distractors.
|
|
116
|
+
"""
|
|
117
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
chaos_questions = []
|
|
119
|
+
|
|
120
|
+
for q in base_dataset_questions:
|
|
121
|
+
try:
|
|
122
|
+
metadata = q.metadata or {}
|
|
123
|
+
original_pdf = metadata.get("pdf_path")
|
|
124
|
+
|
|
125
|
+
if not original_pdf:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
target_path = Path(original_pdf)
|
|
129
|
+
if not target_path.exists():
|
|
130
|
+
logger.warning(f"Original PDF not found: {target_path}")
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
# Select random distractors (excluding self)
|
|
134
|
+
candidates = [p for p in pool_of_pdfs if p.name != target_path.name]
|
|
135
|
+
if len(candidates) < num_distractors:
|
|
136
|
+
distractors = candidates
|
|
137
|
+
else:
|
|
138
|
+
distractors = random.sample(candidates, num_distractors)
|
|
139
|
+
|
|
140
|
+
# Merge
|
|
141
|
+
merged_filename = f"chaos_{q.id}.pdf"
|
|
142
|
+
merged_path = output_dir / merged_filename
|
|
143
|
+
|
|
144
|
+
merge_info = PDFMerger.merge_pdfs(
|
|
145
|
+
target_path, distractors, merged_path
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Clone question and update metadata
|
|
149
|
+
new_meta = metadata.copy()
|
|
150
|
+
new_meta.update({
|
|
151
|
+
"original_pdf_path": str(target_path),
|
|
152
|
+
"pdf_path": str(merged_path), # Point to the chaotic file
|
|
153
|
+
"chaos_mode": True,
|
|
154
|
+
"target_page_range": merge_info["target_page_range"],
|
|
155
|
+
"distractors": [d.name for d in distractors]
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
# Update question text slightly to hint? No, let's keep it hard.
|
|
159
|
+
# q.question = q.question # Unchanged
|
|
160
|
+
|
|
161
|
+
# Update question object (requires creating new instance as it might be immutable-ish)
|
|
162
|
+
from rnsr.benchmarks.standard_benchmarks import BenchmarkQuestion
|
|
163
|
+
new_q = BenchmarkQuestion(
|
|
164
|
+
id=f"{q.id}_chaos",
|
|
165
|
+
question=q.question,
|
|
166
|
+
answer=q.answer,
|
|
167
|
+
supporting_facts=q.supporting_facts,
|
|
168
|
+
context=q.context, # Usually empty for PDF benchmarks
|
|
169
|
+
reasoning_type=q.reasoning_type,
|
|
170
|
+
metadata=new_meta
|
|
171
|
+
)
|
|
172
|
+
chaos_questions.append(new_q)
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(f"Failed to process chaos for question {q.id}: {e}")
|
|
176
|
+
|
|
177
|
+
return chaos_questions
|
|
178
|
+
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Performance Benchmarks
|
|
3
|
+
|
|
4
|
+
Measures:
|
|
5
|
+
- Ingestion time (PDF → Tree)
|
|
6
|
+
- Indexing time (Tree → Skeleton + KV)
|
|
7
|
+
- Query latency (Question → Answer)
|
|
8
|
+
- Memory usage
|
|
9
|
+
- Throughput (pages/sec, queries/sec)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import gc
|
|
15
|
+
import time
|
|
16
|
+
import tracemalloc
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Callable
|
|
20
|
+
|
|
21
|
+
import structlog
|
|
22
|
+
|
|
23
|
+
logger = structlog.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class BenchmarkResult:
|
|
28
|
+
"""Result from a single benchmark run."""
|
|
29
|
+
|
|
30
|
+
name: str
|
|
31
|
+
duration_seconds: float
|
|
32
|
+
memory_peak_mb: float
|
|
33
|
+
memory_current_mb: float
|
|
34
|
+
throughput: float | None = None
|
|
35
|
+
throughput_unit: str = ""
|
|
36
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def duration_ms(self) -> float:
|
|
40
|
+
return self.duration_seconds * 1000
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
parts = [
|
|
44
|
+
f"{self.name}:",
|
|
45
|
+
f" Time: {self.duration_ms:.2f}ms",
|
|
46
|
+
f" Memory Peak: {self.memory_peak_mb:.2f}MB",
|
|
47
|
+
]
|
|
48
|
+
if self.throughput:
|
|
49
|
+
parts.append(f" Throughput: {self.throughput:.2f} {self.throughput_unit}")
|
|
50
|
+
return "\n".join(parts)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class PerformanceBenchmark:
|
|
55
|
+
"""Collection of performance benchmark results."""
|
|
56
|
+
|
|
57
|
+
ingestion: BenchmarkResult | None = None
|
|
58
|
+
indexing: BenchmarkResult | None = None
|
|
59
|
+
query_cold: BenchmarkResult | None = None
|
|
60
|
+
query_warm: BenchmarkResult | None = None
|
|
61
|
+
total_time_seconds: float = 0.0
|
|
62
|
+
|
|
63
|
+
def summary(self) -> dict[str, Any]:
|
|
64
|
+
"""Get summary statistics."""
|
|
65
|
+
return {
|
|
66
|
+
"ingestion_ms": self.ingestion.duration_ms if self.ingestion else None,
|
|
67
|
+
"indexing_ms": self.indexing.duration_ms if self.indexing else None,
|
|
68
|
+
"query_cold_ms": self.query_cold.duration_ms if self.query_cold else None,
|
|
69
|
+
"query_warm_ms": self.query_warm.duration_ms if self.query_warm else None,
|
|
70
|
+
"total_time_seconds": self.total_time_seconds,
|
|
71
|
+
"peak_memory_mb": max(
|
|
72
|
+
r.memory_peak_mb for r in [self.ingestion, self.indexing, self.query_cold]
|
|
73
|
+
if r is not None
|
|
74
|
+
) if any([self.ingestion, self.indexing, self.query_cold]) else 0,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def __str__(self) -> str:
|
|
78
|
+
lines = ["=== Performance Benchmark Results ==="]
|
|
79
|
+
for result in [self.ingestion, self.indexing, self.query_cold, self.query_warm]:
|
|
80
|
+
if result:
|
|
81
|
+
lines.append(str(result))
|
|
82
|
+
lines.append(f"\nTotal Time: {self.total_time_seconds:.2f}s")
|
|
83
|
+
return "\n".join(lines)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _measure_execution(
|
|
87
|
+
func: Callable[[], Any],
|
|
88
|
+
name: str,
|
|
89
|
+
warmup_runs: int = 0,
|
|
90
|
+
) -> tuple[Any, BenchmarkResult]:
|
|
91
|
+
"""
|
|
92
|
+
Execute a function and measure time and memory.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
func: Function to execute
|
|
96
|
+
name: Name for the benchmark
|
|
97
|
+
warmup_runs: Number of warmup runs before measurement
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Tuple of (function result, BenchmarkResult)
|
|
101
|
+
"""
|
|
102
|
+
# Warmup
|
|
103
|
+
for _ in range(warmup_runs):
|
|
104
|
+
func()
|
|
105
|
+
gc.collect()
|
|
106
|
+
|
|
107
|
+
# Force garbage collection before measurement
|
|
108
|
+
gc.collect()
|
|
109
|
+
|
|
110
|
+
# Start memory tracking
|
|
111
|
+
tracemalloc.start()
|
|
112
|
+
|
|
113
|
+
# Time the execution
|
|
114
|
+
start_time = time.perf_counter()
|
|
115
|
+
result = func()
|
|
116
|
+
end_time = time.perf_counter()
|
|
117
|
+
|
|
118
|
+
# Get memory stats
|
|
119
|
+
current, peak = tracemalloc.get_traced_memory()
|
|
120
|
+
tracemalloc.stop()
|
|
121
|
+
|
|
122
|
+
benchmark_result = BenchmarkResult(
|
|
123
|
+
name=name,
|
|
124
|
+
duration_seconds=end_time - start_time,
|
|
125
|
+
memory_peak_mb=peak / 1024 / 1024,
|
|
126
|
+
memory_current_mb=current / 1024 / 1024,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return result, benchmark_result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def run_ingestion_benchmark(
|
|
133
|
+
pdf_path: Path | str,
|
|
134
|
+
iterations: int = 1,
|
|
135
|
+
) -> BenchmarkResult:
|
|
136
|
+
"""
|
|
137
|
+
Benchmark PDF ingestion.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
pdf_path: Path to PDF file
|
|
141
|
+
iterations: Number of iterations to average
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
BenchmarkResult with timing and memory stats
|
|
145
|
+
"""
|
|
146
|
+
from rnsr.ingestion import ingest_document
|
|
147
|
+
|
|
148
|
+
pdf_path = Path(pdf_path)
|
|
149
|
+
|
|
150
|
+
if not pdf_path.exists():
|
|
151
|
+
raise FileNotFoundError(f"PDF not found: {pdf_path}")
|
|
152
|
+
|
|
153
|
+
# Get file size
|
|
154
|
+
file_size_mb = pdf_path.stat().st_size / 1024 / 1024
|
|
155
|
+
|
|
156
|
+
results: list[BenchmarkResult] = []
|
|
157
|
+
final_result = None
|
|
158
|
+
|
|
159
|
+
for i in range(iterations):
|
|
160
|
+
logger.info("ingestion_benchmark_iteration", iteration=i + 1, total=iterations)
|
|
161
|
+
|
|
162
|
+
def run_ingestion():
|
|
163
|
+
return ingest_document(pdf_path)
|
|
164
|
+
|
|
165
|
+
ingestion_result, benchmark = _measure_execution(
|
|
166
|
+
run_ingestion,
|
|
167
|
+
f"Ingestion (iter {i + 1})",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Calculate throughput (pages/sec)
|
|
171
|
+
if hasattr(ingestion_result, 'tree') and ingestion_result.tree:
|
|
172
|
+
page_count = ingestion_result.tree.total_nodes
|
|
173
|
+
benchmark.throughput = page_count / benchmark.duration_seconds
|
|
174
|
+
benchmark.throughput_unit = "nodes/sec"
|
|
175
|
+
|
|
176
|
+
benchmark.metadata = {
|
|
177
|
+
"file": pdf_path.name,
|
|
178
|
+
"file_size_mb": file_size_mb,
|
|
179
|
+
"tier_used": ingestion_result.tier_used if hasattr(ingestion_result, 'tier_used') else None,
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
results.append(benchmark)
|
|
183
|
+
final_result = ingestion_result
|
|
184
|
+
|
|
185
|
+
# Average results
|
|
186
|
+
avg_duration = sum(r.duration_seconds for r in results) / len(results)
|
|
187
|
+
max_memory = max(r.memory_peak_mb for r in results)
|
|
188
|
+
avg_throughput = sum(r.throughput or 0 for r in results) / len(results)
|
|
189
|
+
|
|
190
|
+
return BenchmarkResult(
|
|
191
|
+
name="Ingestion",
|
|
192
|
+
duration_seconds=avg_duration,
|
|
193
|
+
memory_peak_mb=max_memory,
|
|
194
|
+
memory_current_mb=results[-1].memory_current_mb,
|
|
195
|
+
throughput=avg_throughput if avg_throughput > 0 else None,
|
|
196
|
+
throughput_unit="nodes/sec",
|
|
197
|
+
metadata={
|
|
198
|
+
"iterations": iterations,
|
|
199
|
+
"file": pdf_path.name,
|
|
200
|
+
"file_size_mb": file_size_mb,
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def run_query_benchmark(
|
|
206
|
+
questions: list[str],
|
|
207
|
+
skeleton: dict,
|
|
208
|
+
kv_store: Any,
|
|
209
|
+
warmup: bool = True,
|
|
210
|
+
) -> tuple[BenchmarkResult, BenchmarkResult]:
|
|
211
|
+
"""
|
|
212
|
+
Benchmark query execution.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
questions: List of questions to benchmark
|
|
216
|
+
skeleton: Skeleton index
|
|
217
|
+
kv_store: KV store with content
|
|
218
|
+
warmup: Whether to do warmup run
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Tuple of (cold_start_result, warm_result)
|
|
222
|
+
"""
|
|
223
|
+
from rnsr.agent import run_navigator
|
|
224
|
+
|
|
225
|
+
if not questions:
|
|
226
|
+
raise ValueError("At least one question required")
|
|
227
|
+
|
|
228
|
+
# Cold start benchmark (first query)
|
|
229
|
+
def run_cold_query():
|
|
230
|
+
return run_navigator(
|
|
231
|
+
question=questions[0],
|
|
232
|
+
skeleton=skeleton,
|
|
233
|
+
kv_store=kv_store,
|
|
234
|
+
max_iterations=10,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
_, cold_result = _measure_execution(run_cold_query, "Query (Cold Start)")
|
|
238
|
+
cold_result.metadata = {"question": questions[0][:50]}
|
|
239
|
+
|
|
240
|
+
# Warm benchmark (average of remaining queries)
|
|
241
|
+
warm_times: list[float] = []
|
|
242
|
+
warm_memories: list[float] = []
|
|
243
|
+
|
|
244
|
+
for q in questions[1:] if len(questions) > 1 else questions:
|
|
245
|
+
def run_warm_query():
|
|
246
|
+
return run_navigator(
|
|
247
|
+
question=q,
|
|
248
|
+
skeleton=skeleton,
|
|
249
|
+
kv_store=kv_store,
|
|
250
|
+
max_iterations=10,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
_, warm_bench = _measure_execution(run_warm_query, "Query (Warm)")
|
|
254
|
+
warm_times.append(warm_bench.duration_seconds)
|
|
255
|
+
warm_memories.append(warm_bench.memory_peak_mb)
|
|
256
|
+
|
|
257
|
+
warm_result = BenchmarkResult(
|
|
258
|
+
name="Query (Warm)",
|
|
259
|
+
duration_seconds=sum(warm_times) / len(warm_times) if warm_times else 0,
|
|
260
|
+
memory_peak_mb=max(warm_memories) if warm_memories else 0,
|
|
261
|
+
memory_current_mb=warm_memories[-1] if warm_memories else 0,
|
|
262
|
+
throughput=len(warm_times) / sum(warm_times) if sum(warm_times) > 0 else None,
|
|
263
|
+
throughput_unit="queries/sec",
|
|
264
|
+
metadata={"query_count": len(warm_times)},
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
return cold_result, warm_result
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def run_end_to_end_benchmark(
|
|
271
|
+
pdf_path: Path | str,
|
|
272
|
+
questions: list[str],
|
|
273
|
+
) -> PerformanceBenchmark:
|
|
274
|
+
"""
|
|
275
|
+
Run complete end-to-end benchmark.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
pdf_path: Path to PDF file
|
|
279
|
+
questions: List of test questions
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
PerformanceBenchmark with all results
|
|
283
|
+
"""
|
|
284
|
+
from rnsr.indexing import build_skeleton_index
|
|
285
|
+
from rnsr.ingestion import ingest_document
|
|
286
|
+
|
|
287
|
+
pdf_path = Path(pdf_path)
|
|
288
|
+
total_start = time.perf_counter()
|
|
289
|
+
|
|
290
|
+
benchmark = PerformanceBenchmark()
|
|
291
|
+
|
|
292
|
+
# 1. Ingestion benchmark
|
|
293
|
+
logger.info("benchmark_ingestion_start", file=pdf_path.name)
|
|
294
|
+
|
|
295
|
+
def do_ingest():
|
|
296
|
+
return ingest_document(pdf_path)
|
|
297
|
+
|
|
298
|
+
ingestion_result, benchmark.ingestion = _measure_execution(do_ingest, "Ingestion")
|
|
299
|
+
benchmark.ingestion.metadata = {"file": pdf_path.name}
|
|
300
|
+
|
|
301
|
+
# 2. Indexing benchmark
|
|
302
|
+
logger.info("benchmark_indexing_start")
|
|
303
|
+
|
|
304
|
+
def do_index():
|
|
305
|
+
return build_skeleton_index(ingestion_result.tree)
|
|
306
|
+
|
|
307
|
+
(skeleton, kv_store), benchmark.indexing = _measure_execution(do_index, "Indexing")
|
|
308
|
+
benchmark.indexing.metadata = {"node_count": len(skeleton)}
|
|
309
|
+
|
|
310
|
+
# 3. Query benchmarks
|
|
311
|
+
if questions:
|
|
312
|
+
logger.info("benchmark_query_start", question_count=len(questions))
|
|
313
|
+
benchmark.query_cold, benchmark.query_warm = run_query_benchmark(
|
|
314
|
+
questions, skeleton, kv_store
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
benchmark.total_time_seconds = time.perf_counter() - total_start
|
|
318
|
+
|
|
319
|
+
logger.info("benchmark_complete", total_seconds=benchmark.total_time_seconds)
|
|
320
|
+
|
|
321
|
+
return benchmark
|