docsgraph 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cairn/__init__.py +5 -0
- cairn/bench/__init__.py +37 -0
- cairn/bench/baseline.py +236 -0
- cairn/bench/dataset.py +109 -0
- cairn/bench/judge.py +126 -0
- cairn/bench/metrics.py +32 -0
- cairn/bench/report.py +143 -0
- cairn/bench/runner.py +219 -0
- cairn/cli/__init__.py +5 -0
- cairn/cli/app.py +776 -0
- cairn/cli/config.py +105 -0
- cairn/core/__init__.py +41 -0
- cairn/core/errors.py +68 -0
- cairn/core/types.py +147 -0
- cairn/embed/__init__.py +17 -0
- cairn/embed/base.py +31 -0
- cairn/embed/doubao.py +167 -0
- cairn/embed/fake.py +36 -0
- cairn/embed/openai_compatible.py +155 -0
- cairn/engine/__init__.py +18 -0
- cairn/engine/indexer.py +298 -0
- cairn/engine/manifest.py +83 -0
- cairn/entity/__init__.py +21 -0
- cairn/entity/base.py +52 -0
- cairn/entity/fake.py +34 -0
- cairn/entity/heuristic.py +148 -0
- cairn/index/__init__.py +39 -0
- cairn/index/entities.py +244 -0
- cairn/index/summaries.py +269 -0
- cairn/index/tree.py +274 -0
- cairn/index/vectors.py +287 -0
- cairn/index/xrefs.py +195 -0
- cairn/ingest/__init__.py +36 -0
- cairn/ingest/base.py +46 -0
- cairn/ingest/markdown.py +244 -0
- cairn/ingest/markitdown.py +145 -0
- cairn/ingest/pdf.py +357 -0
- cairn/inspection.py +971 -0
- cairn/mcp/__init__.py +12 -0
- cairn/mcp/schemas.py +547 -0
- cairn/mcp/server.py +363 -0
- cairn/providers.py +50 -0
- cairn/py.typed +0 -0
- cairn/repo.py +1486 -0
- cairn/repo_search.py +1505 -0
- cairn/summarize/__init__.py +18 -0
- cairn/summarize/base.py +56 -0
- cairn/summarize/cache.py +66 -0
- cairn/summarize/fake.py +43 -0
- cairn/summarize/openai_compatible.py +148 -0
- cairn/summarize/prompts.py +73 -0
- cairn/tools/__init__.py +31 -0
- cairn/tools/base.py +126 -0
- cairn/tools/find_mentions.py +93 -0
- cairn/tools/get_related.py +140 -0
- cairn/tools/get_section.py +130 -0
- cairn/tools/outline.py +75 -0
- cairn/tools/read_range.py +94 -0
- cairn/tools/search_keyword.py +94 -0
- cairn/tools/search_semantic.py +181 -0
- cairn/xref/__init__.py +24 -0
- cairn/xref/base.py +50 -0
- cairn/xref/fake.py +40 -0
- cairn/xref/heuristic.py +217 -0
- docsgraph-0.1.0a2.dist-info/METADATA +688 -0
- docsgraph-0.1.0a2.dist-info/RECORD +69 -0
- docsgraph-0.1.0a2.dist-info/WHEEL +4 -0
- docsgraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- docsgraph-0.1.0a2.dist-info/licenses/LICENSE +201 -0
cairn/bench/runner.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Bench orchestrator — runs a suite end-to-end against Cairn and the baseline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
from cairn.bench.baseline import NaiveHit, NaiveRAG
|
|
11
|
+
from cairn.bench.dataset import BenchDocument, BenchQuestion, BenchSuite
|
|
12
|
+
from cairn.bench.judge import LLMJudge
|
|
13
|
+
from cairn.bench.metrics import recall_at_k
|
|
14
|
+
from cairn.bench.report import BenchSummary, QuestionResult, SystemResult
|
|
15
|
+
from cairn.embed.base import Embedder
|
|
16
|
+
from cairn.engine.indexer import Indexer
|
|
17
|
+
from cairn.entity.heuristic import HeuristicExtractor
|
|
18
|
+
from cairn.ingest.markdown import MarkdownParser
|
|
19
|
+
from cairn.summarize.base import Summarizer
|
|
20
|
+
from cairn.tools.base import DocumentIndex, estimate_tokens
|
|
21
|
+
from cairn.tools.search_semantic import search_semantic
|
|
22
|
+
from cairn.xref.heuristic import HeuristicXRefExtractor
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BenchOptions(BaseModel):
|
|
26
|
+
model_config = ConfigDict(frozen=True, extra="forbid")
|
|
27
|
+
|
|
28
|
+
k: int = Field(default=8, ge=1, le=32)
|
|
29
|
+
naive_chunk_size_words: int = Field(default=512, ge=1)
|
|
30
|
+
summary_concurrency: int = Field(default=4, ge=1)
|
|
31
|
+
embed_batch_size: int = Field(default=32, ge=1)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BenchRunner:
|
|
35
|
+
"""Runs a :class:`BenchSuite` against Cairn and a naive baseline."""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
summarizer: Summarizer,
|
|
41
|
+
embedder: Embedder,
|
|
42
|
+
judge: LLMJudge | None = None,
|
|
43
|
+
options: BenchOptions | None = None,
|
|
44
|
+
progress: Callable[[str], None] | None = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
self.summarizer = summarizer
|
|
47
|
+
self.embedder = embedder
|
|
48
|
+
self.judge = judge
|
|
49
|
+
self.options = options or BenchOptions()
|
|
50
|
+
self.progress = progress
|
|
51
|
+
|
|
52
|
+
async def run(self, suite: BenchSuite, *, work_dir: Path) -> BenchSummary:
|
|
53
|
+
work_dir.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
results: list[QuestionResult] = []
|
|
55
|
+
for document in suite.documents:
|
|
56
|
+
self._emit(f"document {document.id}: starting")
|
|
57
|
+
results.extend(await self._run_document(document, work_dir / document.id))
|
|
58
|
+
self._emit(f"document {document.id}: done")
|
|
59
|
+
return BenchSummary(
|
|
60
|
+
suite_name=suite.name,
|
|
61
|
+
k=self.options.k,
|
|
62
|
+
questions=tuple(results),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
async def _run_document(
|
|
66
|
+
self,
|
|
67
|
+
document: BenchDocument,
|
|
68
|
+
doc_dir: Path,
|
|
69
|
+
) -> list[QuestionResult]:
|
|
70
|
+
doc_dir.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
source_text = document.source.read_text(encoding="utf-8")
|
|
72
|
+
|
|
73
|
+
cairn_dir = doc_dir / "cairn"
|
|
74
|
+
naive_dir = doc_dir / "naive"
|
|
75
|
+
cairn_dir.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
naive_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
parser = MarkdownParser()
|
|
79
|
+
parsed = parser.parse(document.source, doc_id=document.id)
|
|
80
|
+
|
|
81
|
+
indexer = Indexer(
|
|
82
|
+
parser=parser,
|
|
83
|
+
summarizer=self.summarizer,
|
|
84
|
+
embedder=self.embedder,
|
|
85
|
+
entity_extractor=HeuristicExtractor(),
|
|
86
|
+
xref_extractor=HeuristicXRefExtractor(),
|
|
87
|
+
summary_concurrency=self.options.summary_concurrency,
|
|
88
|
+
embed_batch_size=self.options.embed_batch_size,
|
|
89
|
+
progress=lambda message: self._emit(f"cairn index: {message}"),
|
|
90
|
+
)
|
|
91
|
+
self._emit("cairn index: starting")
|
|
92
|
+
await indexer.index_document(parsed, out_dir=cairn_dir)
|
|
93
|
+
self._emit("cairn index: loaded")
|
|
94
|
+
cairn_index = DocumentIndex.load(cairn_dir)
|
|
95
|
+
|
|
96
|
+
naive = NaiveRAG(
|
|
97
|
+
self.embedder,
|
|
98
|
+
chunk_size_words=self.options.naive_chunk_size_words,
|
|
99
|
+
batch_size=self.options.embed_batch_size,
|
|
100
|
+
)
|
|
101
|
+
self._emit("naive index: starting")
|
|
102
|
+
await naive.index(parsed, source_text, out_dir=naive_dir)
|
|
103
|
+
self._emit("naive index: done")
|
|
104
|
+
|
|
105
|
+
results: list[QuestionResult] = []
|
|
106
|
+
total_questions = len(document.questions)
|
|
107
|
+
for question_no, question in enumerate(document.questions, start=1):
|
|
108
|
+
self._emit(
|
|
109
|
+
f"question {question_no}/{total_questions} {question.id}: retrieving"
|
|
110
|
+
)
|
|
111
|
+
cairn_result, cairn_context = await self._run_cairn(
|
|
112
|
+
cairn_index, question
|
|
113
|
+
)
|
|
114
|
+
naive_result, naive_context = await self._run_naive(
|
|
115
|
+
naive, question, naive_dir
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
if self.judge is not None and question.reference is not None:
|
|
119
|
+
cairn_result = await self._judge_result(
|
|
120
|
+
cairn_result, question, cairn_context
|
|
121
|
+
)
|
|
122
|
+
naive_result = await self._judge_result(
|
|
123
|
+
naive_result, question, naive_context
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
results.append(
|
|
127
|
+
QuestionResult(
|
|
128
|
+
document_id=document.id,
|
|
129
|
+
question_id=question.id,
|
|
130
|
+
question=question.question,
|
|
131
|
+
expected_anchors=question.expected_anchors,
|
|
132
|
+
tags=question.tags,
|
|
133
|
+
cairn=cairn_result,
|
|
134
|
+
naive=naive_result,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
return results
|
|
138
|
+
|
|
139
|
+
def _emit(self, message: str) -> None:
|
|
140
|
+
if self.progress is not None:
|
|
141
|
+
self.progress(message)
|
|
142
|
+
|
|
143
|
+
async def _judge_result(
|
|
144
|
+
self,
|
|
145
|
+
result: SystemResult,
|
|
146
|
+
question: BenchQuestion,
|
|
147
|
+
context: str,
|
|
148
|
+
) -> SystemResult:
|
|
149
|
+
if self.judge is None or question.reference is None:
|
|
150
|
+
return result
|
|
151
|
+
answer = await self.judge.answer(question.question, context)
|
|
152
|
+
is_correct, _ = await self.judge.judge(
|
|
153
|
+
question.question, question.reference, answer
|
|
154
|
+
)
|
|
155
|
+
return result.model_copy(
|
|
156
|
+
update={"qa_correct": is_correct, "qa_answer": answer}
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
async def _run_cairn(
|
|
160
|
+
self,
|
|
161
|
+
index: DocumentIndex,
|
|
162
|
+
question: BenchQuestion,
|
|
163
|
+
) -> tuple[SystemResult, str]:
|
|
164
|
+
response = await search_semantic(
|
|
165
|
+
index,
|
|
166
|
+
embedder=self.embedder,
|
|
167
|
+
query=question.question,
|
|
168
|
+
k=self.options.k,
|
|
169
|
+
)
|
|
170
|
+
section_ids = [hit["id"] for hit in response.data["hits"]]
|
|
171
|
+
recall = recall_at_k(
|
|
172
|
+
section_ids, question.expected_anchors, k=self.options.k
|
|
173
|
+
)
|
|
174
|
+
context = _format_cairn_context(response.data["hits"])
|
|
175
|
+
result = SystemResult(
|
|
176
|
+
system="cairn",
|
|
177
|
+
section_ids=tuple(section_ids),
|
|
178
|
+
recall_at_k=recall,
|
|
179
|
+
tokens_returned=response.tokens_returned,
|
|
180
|
+
)
|
|
181
|
+
return result, context
|
|
182
|
+
|
|
183
|
+
async def _run_naive(
|
|
184
|
+
self,
|
|
185
|
+
naive: NaiveRAG,
|
|
186
|
+
question: BenchQuestion,
|
|
187
|
+
naive_dir: Path,
|
|
188
|
+
) -> tuple[SystemResult, str]:
|
|
189
|
+
hits = await naive.retrieve(
|
|
190
|
+
question.question, out_dir=naive_dir, k=self.options.k
|
|
191
|
+
)
|
|
192
|
+
section_ids = [hit.section_id or "" for hit in hits]
|
|
193
|
+
recall = recall_at_k(
|
|
194
|
+
section_ids, question.expected_anchors, k=self.options.k
|
|
195
|
+
)
|
|
196
|
+
tokens = sum(estimate_tokens(hit.text) for hit in hits)
|
|
197
|
+
context = _format_naive_context(hits)
|
|
198
|
+
result = SystemResult(
|
|
199
|
+
system="naive",
|
|
200
|
+
section_ids=tuple(section_ids),
|
|
201
|
+
recall_at_k=recall,
|
|
202
|
+
tokens_returned=tokens,
|
|
203
|
+
)
|
|
204
|
+
return result, context
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _format_cairn_context(hits: list[dict[str, object]]) -> str:
|
|
208
|
+
parts: list[str] = []
|
|
209
|
+
for hit in hits:
|
|
210
|
+
title = str(hit.get("title", ""))
|
|
211
|
+
synopsis = str(hit.get("synopsis", ""))
|
|
212
|
+
head = str(hit.get("head", ""))
|
|
213
|
+
body = synopsis or head
|
|
214
|
+
parts.append(f"## {title}\n\n{body}".strip())
|
|
215
|
+
return "\n\n---\n\n".join(parts)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _format_naive_context(hits: list[NaiveHit]) -> str:
|
|
219
|
+
return "\n\n---\n\n".join(hit.text for hit in hits)
|