docsgraph 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cairn/__init__.py +5 -0
- cairn/bench/__init__.py +37 -0
- cairn/bench/baseline.py +236 -0
- cairn/bench/dataset.py +109 -0
- cairn/bench/judge.py +126 -0
- cairn/bench/metrics.py +32 -0
- cairn/bench/report.py +143 -0
- cairn/bench/runner.py +219 -0
- cairn/cli/__init__.py +5 -0
- cairn/cli/app.py +776 -0
- cairn/cli/config.py +105 -0
- cairn/core/__init__.py +41 -0
- cairn/core/errors.py +68 -0
- cairn/core/types.py +147 -0
- cairn/embed/__init__.py +17 -0
- cairn/embed/base.py +31 -0
- cairn/embed/doubao.py +167 -0
- cairn/embed/fake.py +36 -0
- cairn/embed/openai_compatible.py +155 -0
- cairn/engine/__init__.py +18 -0
- cairn/engine/indexer.py +298 -0
- cairn/engine/manifest.py +83 -0
- cairn/entity/__init__.py +21 -0
- cairn/entity/base.py +52 -0
- cairn/entity/fake.py +34 -0
- cairn/entity/heuristic.py +148 -0
- cairn/index/__init__.py +39 -0
- cairn/index/entities.py +244 -0
- cairn/index/summaries.py +269 -0
- cairn/index/tree.py +274 -0
- cairn/index/vectors.py +287 -0
- cairn/index/xrefs.py +195 -0
- cairn/ingest/__init__.py +36 -0
- cairn/ingest/base.py +46 -0
- cairn/ingest/markdown.py +244 -0
- cairn/ingest/markitdown.py +145 -0
- cairn/ingest/pdf.py +357 -0
- cairn/inspection.py +971 -0
- cairn/mcp/__init__.py +12 -0
- cairn/mcp/schemas.py +547 -0
- cairn/mcp/server.py +363 -0
- cairn/providers.py +50 -0
- cairn/py.typed +0 -0
- cairn/repo.py +1486 -0
- cairn/repo_search.py +1505 -0
- cairn/summarize/__init__.py +18 -0
- cairn/summarize/base.py +56 -0
- cairn/summarize/cache.py +66 -0
- cairn/summarize/fake.py +43 -0
- cairn/summarize/openai_compatible.py +148 -0
- cairn/summarize/prompts.py +73 -0
- cairn/tools/__init__.py +31 -0
- cairn/tools/base.py +126 -0
- cairn/tools/find_mentions.py +93 -0
- cairn/tools/get_related.py +140 -0
- cairn/tools/get_section.py +130 -0
- cairn/tools/outline.py +75 -0
- cairn/tools/read_range.py +94 -0
- cairn/tools/search_keyword.py +94 -0
- cairn/tools/search_semantic.py +181 -0
- cairn/xref/__init__.py +24 -0
- cairn/xref/base.py +50 -0
- cairn/xref/fake.py +40 -0
- cairn/xref/heuristic.py +217 -0
- docsgraph-0.1.0a2.dist-info/METADATA +688 -0
- docsgraph-0.1.0a2.dist-info/RECORD +69 -0
- docsgraph-0.1.0a2.dist-info/WHEEL +4 -0
- docsgraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- docsgraph-0.1.0a2.dist-info/licenses/LICENSE +201 -0
cairn/cli/app.py
ADDED
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
"""Typer-based command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Annotated, Literal
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from cairn import __version__
|
|
13
|
+
from cairn.cli.config import load_embed_config, load_index_config, load_llm_config
|
|
14
|
+
from cairn.embed.base import Embedder
|
|
15
|
+
from cairn.engine.indexer import Indexer
|
|
16
|
+
from cairn.entity.heuristic import HeuristicExtractor
|
|
17
|
+
from cairn.ingest import parser_for_path
|
|
18
|
+
from cairn.inspection import write_inspector
|
|
19
|
+
from cairn.providers import make_embedder, make_summarizer
|
|
20
|
+
from cairn.repo import (
|
|
21
|
+
RepoStatus,
|
|
22
|
+
find_repo_root,
|
|
23
|
+
load_repo_document_index,
|
|
24
|
+
repo_status,
|
|
25
|
+
search_repo_documents,
|
|
26
|
+
sync_repo,
|
|
27
|
+
write_default_config,
|
|
28
|
+
)
|
|
29
|
+
from cairn.summarize.base import Summarizer
|
|
30
|
+
from cairn.tools.base import DocumentIndex
|
|
31
|
+
from cairn.tools.find_mentions import find_mentions as find_mentions_tool
|
|
32
|
+
from cairn.tools.get_related import get_related as get_related_tool
|
|
33
|
+
from cairn.tools.outline import outline as outline_tool
|
|
34
|
+
from cairn.tools.search_keyword import Mode
|
|
35
|
+
from cairn.tools.search_keyword import search_keyword as search_keyword_tool
|
|
36
|
+
from cairn.tools.search_semantic import search_semantic as search_semantic_tool
|
|
37
|
+
from cairn.xref.heuristic import HeuristicXRefExtractor
|
|
38
|
+
|
|
39
|
+
app = typer.Typer(
|
|
40
|
+
name="docsgraph",
|
|
41
|
+
help="Local-first documentation graph for AI agents.",
|
|
42
|
+
no_args_is_help=True,
|
|
43
|
+
)
|
|
44
|
+
query_app = typer.Typer(help="Run a single retrieval tool from the command line.")
|
|
45
|
+
mcp_app = typer.Typer(help="Generate MCP client configuration snippets.")
|
|
46
|
+
app.add_typer(query_app, name="query")
|
|
47
|
+
app.add_typer(mcp_app, name="mcp")
|
|
48
|
+
|
|
49
|
+
McpClient = Literal["claude", "cursor", "codex", "goose"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Plugin construction
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _make_summarizer(use_fake: bool) -> Summarizer:
|
|
58
|
+
return make_summarizer(use_fake)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _make_embedder(use_fake: bool) -> Embedder:
|
|
62
|
+
return make_embedder(use_fake)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# Commands
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@app.command()
|
|
71
|
+
def init(
|
|
72
|
+
yes: Annotated[
|
|
73
|
+
bool,
|
|
74
|
+
typer.Option(
|
|
75
|
+
"-y",
|
|
76
|
+
"--yes",
|
|
77
|
+
help="Create .cairn/config.toml without prompting.",
|
|
78
|
+
),
|
|
79
|
+
] = False,
|
|
80
|
+
force: Annotated[
|
|
81
|
+
bool,
|
|
82
|
+
typer.Option("--force", help="Overwrite an existing .cairn/config.toml."),
|
|
83
|
+
] = False,
|
|
84
|
+
markitdown: Annotated[
|
|
85
|
+
bool,
|
|
86
|
+
typer.Option(
|
|
87
|
+
"--markitdown",
|
|
88
|
+
help="Include MarkItDown-backed Office/data/web globs in config.",
|
|
89
|
+
),
|
|
90
|
+
] = False,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Initialize Cairn for repository documentation indexing."""
|
|
93
|
+
root = Path.cwd()
|
|
94
|
+
config_file = root / ".cairn" / "config.toml"
|
|
95
|
+
if config_file.exists() and not force:
|
|
96
|
+
typer.echo(f"already initialized: {config_file}")
|
|
97
|
+
return
|
|
98
|
+
if not yes and not typer.confirm(f"Create {config_file}?"):
|
|
99
|
+
raise typer.Exit(code=1)
|
|
100
|
+
written = write_default_config(root, force=force, enable_markitdown=markitdown)
|
|
101
|
+
typer.echo(f"initialized: {written}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@app.command()
|
|
105
|
+
def sync(
|
|
106
|
+
fake: Annotated[
|
|
107
|
+
bool,
|
|
108
|
+
typer.Option(
|
|
109
|
+
"--fake",
|
|
110
|
+
help="Use deterministic FakeSummarizer + FakeEmbedder (no network).",
|
|
111
|
+
),
|
|
112
|
+
] = False,
|
|
113
|
+
force: Annotated[
|
|
114
|
+
bool,
|
|
115
|
+
typer.Option("--force", help="Rebuild every configured document."),
|
|
116
|
+
] = False,
|
|
117
|
+
) -> None:
|
|
118
|
+
"""Index every configured repository document under .cairn/documents/."""
|
|
119
|
+
asyncio.run(_run_sync(fake, force))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
async def _run_sync(use_fake: bool, force: bool) -> None:
|
|
123
|
+
root = find_repo_root()
|
|
124
|
+
results = await sync_repo(
|
|
125
|
+
root,
|
|
126
|
+
summarizer=_make_summarizer(use_fake),
|
|
127
|
+
embedder=_make_embedder(use_fake),
|
|
128
|
+
index_config=load_index_config(),
|
|
129
|
+
force=force,
|
|
130
|
+
progress=lambda message: typer.echo(message, err=True),
|
|
131
|
+
)
|
|
132
|
+
failed = sum(1 for item in results if not item.ok)
|
|
133
|
+
successful = len(results) - failed
|
|
134
|
+
rebuilt = sum(1 for item in results if item.ok and item.rebuilt)
|
|
135
|
+
skipped = successful - rebuilt
|
|
136
|
+
typer.echo(
|
|
137
|
+
"synced: "
|
|
138
|
+
f"{successful}/{len(results)} documents "
|
|
139
|
+
f"({rebuilt} rebuilt, {skipped} up to date, {failed} failed)"
|
|
140
|
+
)
|
|
141
|
+
if failed:
|
|
142
|
+
raise typer.Exit(code=1)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@app.command()
|
|
146
|
+
def status(
|
|
147
|
+
json_output: Annotated[
|
|
148
|
+
bool,
|
|
149
|
+
typer.Option("--json", help="Print machine-readable JSON."),
|
|
150
|
+
] = False,
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Show repository documentation index status."""
|
|
153
|
+
root = find_repo_root()
|
|
154
|
+
status_obj = repo_status(root)
|
|
155
|
+
if json_output:
|
|
156
|
+
typer.echo(status_obj.model_dump_json(indent=2))
|
|
157
|
+
return
|
|
158
|
+
typer.echo(_format_repo_status(status_obj))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@app.command()
|
|
162
|
+
def doctor(
|
|
163
|
+
json_output: Annotated[
|
|
164
|
+
bool,
|
|
165
|
+
typer.Option("--json", help="Print machine-readable JSON."),
|
|
166
|
+
] = False,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Check repo setup, index freshness, and model configuration."""
|
|
169
|
+
checks = _doctor_checks()
|
|
170
|
+
ok = all(item["ok"] for item in checks)
|
|
171
|
+
payload = {
|
|
172
|
+
"ok": ok,
|
|
173
|
+
"version": __version__,
|
|
174
|
+
"checks": checks,
|
|
175
|
+
}
|
|
176
|
+
if json_output:
|
|
177
|
+
typer.echo(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
178
|
+
else:
|
|
179
|
+
typer.echo(_format_doctor(payload))
|
|
180
|
+
if not ok:
|
|
181
|
+
raise typer.Exit(code=1)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@app.command()
|
|
185
|
+
def version() -> None:
|
|
186
|
+
"""Print the Cairn version."""
|
|
187
|
+
typer.echo(__version__)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@app.command()
|
|
191
|
+
def index(
|
|
192
|
+
source: Annotated[
|
|
193
|
+
Path,
|
|
194
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
195
|
+
],
|
|
196
|
+
doc_id: Annotated[
|
|
197
|
+
str | None,
|
|
198
|
+
typer.Option(help="Override the document id (defaults to filename stem)."),
|
|
199
|
+
] = None,
|
|
200
|
+
out: Annotated[
|
|
201
|
+
Path | None,
|
|
202
|
+
typer.Option(help="Output directory. Defaults to .cairn/documents/<doc_id>/."),
|
|
203
|
+
] = None,
|
|
204
|
+
fake: Annotated[
|
|
205
|
+
bool,
|
|
206
|
+
typer.Option(
|
|
207
|
+
"--fake",
|
|
208
|
+
help="Use deterministic FakeSummarizer + FakeEmbedder (no network).",
|
|
209
|
+
),
|
|
210
|
+
] = False,
|
|
211
|
+
force: Annotated[
|
|
212
|
+
bool,
|
|
213
|
+
typer.Option(
|
|
214
|
+
"--force",
|
|
215
|
+
help="Rebuild even if the source file is unchanged since last index.",
|
|
216
|
+
),
|
|
217
|
+
] = False,
|
|
218
|
+
) -> None:
|
|
219
|
+
"""Index a source document — build Tree + Summaries + Vectors."""
|
|
220
|
+
asyncio.run(_run_index(source, doc_id, out, fake, force))
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
async def _run_index(
|
|
224
|
+
source: Path,
|
|
225
|
+
doc_id: str | None,
|
|
226
|
+
out: Path | None,
|
|
227
|
+
use_fake: bool,
|
|
228
|
+
force: bool,
|
|
229
|
+
) -> None:
|
|
230
|
+
parser = parser_for_path(source)
|
|
231
|
+
resolved_doc_id = doc_id or source.stem
|
|
232
|
+
out_dir = out or Path.cwd() / ".cairn" / "documents" / resolved_doc_id
|
|
233
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
234
|
+
index_cfg = load_index_config()
|
|
235
|
+
|
|
236
|
+
indexer = Indexer(
|
|
237
|
+
parser=parser,
|
|
238
|
+
summarizer=_make_summarizer(use_fake),
|
|
239
|
+
embedder=_make_embedder(use_fake),
|
|
240
|
+
entity_extractor=HeuristicExtractor(),
|
|
241
|
+
xref_extractor=HeuristicXRefExtractor(),
|
|
242
|
+
summary_concurrency=index_cfg.summary_concurrency,
|
|
243
|
+
embed_batch_size=index_cfg.embed_batch_size,
|
|
244
|
+
progress=lambda message: typer.echo(message, err=True),
|
|
245
|
+
)
|
|
246
|
+
result = await indexer.index_path(
|
|
247
|
+
source, out_dir=out_dir, doc_id=doc_id, force=force
|
|
248
|
+
)
|
|
249
|
+
if result.rebuilt:
|
|
250
|
+
typer.echo(f"indexed: {result.manifest_path}")
|
|
251
|
+
else:
|
|
252
|
+
typer.echo(f"already up to date: {result.manifest_path}")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@app.command()
|
|
256
|
+
def serve(
|
|
257
|
+
doc_dir: Annotated[
|
|
258
|
+
Path | None,
|
|
259
|
+
typer.Argument(
|
|
260
|
+
file_okay=False,
|
|
261
|
+
dir_okay=True,
|
|
262
|
+
readable=True,
|
|
263
|
+
help=(
|
|
264
|
+
"Built document directory. Omit to serve the current repo's "
|
|
265
|
+
".cairn documents."
|
|
266
|
+
),
|
|
267
|
+
),
|
|
268
|
+
] = None,
|
|
269
|
+
fake: Annotated[
|
|
270
|
+
bool,
|
|
271
|
+
typer.Option(
|
|
272
|
+
"--fake",
|
|
273
|
+
help="Use FakeEmbedder for query embedding (no network at query time).",
|
|
274
|
+
),
|
|
275
|
+
] = False,
|
|
276
|
+
repo: Annotated[
|
|
277
|
+
Path | None,
|
|
278
|
+
typer.Option(
|
|
279
|
+
"--repo",
|
|
280
|
+
file_okay=False,
|
|
281
|
+
dir_okay=True,
|
|
282
|
+
readable=True,
|
|
283
|
+
help="Repository root or child path containing .cairn/config.toml.",
|
|
284
|
+
),
|
|
285
|
+
] = None,
|
|
286
|
+
) -> None:
|
|
287
|
+
"""Start the MCP stdio server against a document or repo index."""
|
|
288
|
+
from cairn.mcp.server import serve_repo_stdio, serve_stdio
|
|
289
|
+
|
|
290
|
+
if repo is not None and doc_dir is not None:
|
|
291
|
+
typer.echo("error: pass either a document directory or --repo, not both", err=True)
|
|
292
|
+
raise typer.Exit(code=2)
|
|
293
|
+
if repo is not None:
|
|
294
|
+
asyncio.run(serve_repo_stdio(find_repo_root(repo), embedder=_make_embedder(fake)))
|
|
295
|
+
return
|
|
296
|
+
if doc_dir is None:
|
|
297
|
+
asyncio.run(serve_repo_stdio(find_repo_root(), embedder=_make_embedder(fake)))
|
|
298
|
+
return
|
|
299
|
+
if not doc_dir.exists() or not doc_dir.is_dir():
|
|
300
|
+
typer.echo(f"error: document directory not found: {doc_dir}", err=True)
|
|
301
|
+
raise typer.Exit(code=2)
|
|
302
|
+
asyncio.run(serve_stdio(doc_dir, embedder=_make_embedder(fake)))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@mcp_app.command("config")
|
|
306
|
+
def mcp_config(
|
|
307
|
+
client: Annotated[
|
|
308
|
+
McpClient,
|
|
309
|
+
typer.Option(
|
|
310
|
+
"--client",
|
|
311
|
+
case_sensitive=False,
|
|
312
|
+
help="Client snippet to print: claude, cursor, codex, or goose.",
|
|
313
|
+
),
|
|
314
|
+
] = "claude",
|
|
315
|
+
repo: Annotated[
|
|
316
|
+
Path | None,
|
|
317
|
+
typer.Option(
|
|
318
|
+
"--repo",
|
|
319
|
+
file_okay=False,
|
|
320
|
+
dir_okay=True,
|
|
321
|
+
readable=True,
|
|
322
|
+
help="Repository root. Defaults to the nearest .cairn/config.toml.",
|
|
323
|
+
),
|
|
324
|
+
] = None,
|
|
325
|
+
command: Annotated[
|
|
326
|
+
str,
|
|
327
|
+
typer.Option(
|
|
328
|
+
"--command",
|
|
329
|
+
help="Executable path clients should run.",
|
|
330
|
+
),
|
|
331
|
+
] = "docsgraph",
|
|
332
|
+
fake: Annotated[
|
|
333
|
+
bool,
|
|
334
|
+
typer.Option(
|
|
335
|
+
"--fake",
|
|
336
|
+
help="Include --fake for deterministic local smoke tests.",
|
|
337
|
+
),
|
|
338
|
+
] = False,
|
|
339
|
+
) -> None:
|
|
340
|
+
"""Print a copy-pasteable MCP stdio configuration snippet."""
|
|
341
|
+
root = find_repo_root(repo)
|
|
342
|
+
args = ["serve", "--repo", str(root)]
|
|
343
|
+
if fake:
|
|
344
|
+
args.append("--fake")
|
|
345
|
+
typer.echo(_format_mcp_config(client, command=command, args=args))
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@app.command()
|
|
349
|
+
def outline(
|
|
350
|
+
doc_dir: Annotated[
|
|
351
|
+
Path,
|
|
352
|
+
typer.Argument(exists=True, file_okay=False, dir_okay=True, readable=True),
|
|
353
|
+
],
|
|
354
|
+
depth: Annotated[int, typer.Option(min=1, max=6)] = 2,
|
|
355
|
+
focus: Annotated[
|
|
356
|
+
str | None, typer.Option(help="Restrict to a subtree.")
|
|
357
|
+
] = None,
|
|
358
|
+
) -> None:
|
|
359
|
+
"""Print the document outline as JSON."""
|
|
360
|
+
asyncio.run(_run_outline(doc_dir, depth, focus))
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
async def _run_outline(doc_dir: Path, depth: int, focus: str | None) -> None:
|
|
364
|
+
idx = DocumentIndex.load(doc_dir)
|
|
365
|
+
resp = await outline_tool(idx, depth=depth, focus=focus, include=("gist",))
|
|
366
|
+
typer.echo(json.dumps(resp.data, ensure_ascii=False, indent=2))
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@query_app.command("semantic")
|
|
370
|
+
def query_semantic(
|
|
371
|
+
doc_dir: Annotated[
|
|
372
|
+
Path,
|
|
373
|
+
typer.Argument(exists=True, file_okay=False, dir_okay=True, readable=True),
|
|
374
|
+
],
|
|
375
|
+
query: Annotated[str, typer.Argument(help="Query string.")],
|
|
376
|
+
k: Annotated[int, typer.Option(min=1, max=32)] = 8,
|
|
377
|
+
fake: Annotated[bool, typer.Option("--fake")] = False,
|
|
378
|
+
) -> None:
|
|
379
|
+
"""Run a semantic search and print results as JSON."""
|
|
380
|
+
asyncio.run(_run_search_semantic(doc_dir, query, k, fake))
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
async def _run_search_semantic(
|
|
384
|
+
doc_dir: Path, query: str, k: int, use_fake: bool
|
|
385
|
+
) -> None:
|
|
386
|
+
idx = DocumentIndex.load(doc_dir)
|
|
387
|
+
embedder = _make_embedder(use_fake)
|
|
388
|
+
resp = await search_semantic_tool(idx, embedder=embedder, query=query, k=k)
|
|
389
|
+
typer.echo(json.dumps(resp.data, ensure_ascii=False, indent=2))
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
@query_app.command("repo")
|
|
393
|
+
def query_repo(
|
|
394
|
+
query: Annotated[
|
|
395
|
+
str,
|
|
396
|
+
typer.Argument(help="Conceptual query across indexed repo docs."),
|
|
397
|
+
],
|
|
398
|
+
k: Annotated[int, typer.Option(min=1, max=32)] = 8,
|
|
399
|
+
sections_per_doc: Annotated[
|
|
400
|
+
int | None,
|
|
401
|
+
typer.Option(help="Maximum section hits per document (1-8)."),
|
|
402
|
+
] = None,
|
|
403
|
+
repo: Annotated[
|
|
404
|
+
Path | None,
|
|
405
|
+
typer.Option(
|
|
406
|
+
"--repo",
|
|
407
|
+
file_okay=False,
|
|
408
|
+
dir_okay=True,
|
|
409
|
+
readable=True,
|
|
410
|
+
help="Repository root or child path containing .cairn/config.toml.",
|
|
411
|
+
),
|
|
412
|
+
] = None,
|
|
413
|
+
fake: Annotated[bool, typer.Option("--fake")] = False,
|
|
414
|
+
) -> None:
|
|
415
|
+
"""Run repo-scoped hybrid search and print results as JSON."""
|
|
416
|
+
asyncio.run(_run_search_repo(query, k, sections_per_doc, repo, fake))
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
async def _run_search_repo(
|
|
420
|
+
query: str,
|
|
421
|
+
k: int,
|
|
422
|
+
sections_per_doc: int | None,
|
|
423
|
+
repo: Path | None,
|
|
424
|
+
use_fake: bool,
|
|
425
|
+
) -> None:
|
|
426
|
+
root = find_repo_root(repo)
|
|
427
|
+
resp = await search_repo_documents(
|
|
428
|
+
root,
|
|
429
|
+
embedder=_make_embedder(use_fake),
|
|
430
|
+
query=query,
|
|
431
|
+
k=k,
|
|
432
|
+
sections_per_doc=sections_per_doc,
|
|
433
|
+
)
|
|
434
|
+
typer.echo(json.dumps(resp["data"], ensure_ascii=False, indent=2))
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
@query_app.command("keyword")
|
|
438
|
+
def query_keyword(
|
|
439
|
+
doc_dir: Annotated[
|
|
440
|
+
Path,
|
|
441
|
+
typer.Argument(exists=True, file_okay=False, dir_okay=True, readable=True),
|
|
442
|
+
],
|
|
443
|
+
terms: Annotated[list[str], typer.Argument(help="One or more search terms.")],
|
|
444
|
+
k: Annotated[int, typer.Option(min=1, max=32)] = 12,
|
|
445
|
+
mode: Annotated[str, typer.Option(help="any | all")] = "any",
|
|
446
|
+
) -> None:
|
|
447
|
+
"""Run a keyword search and print results as JSON."""
|
|
448
|
+
asyncio.run(_run_search_keyword(doc_dir, terms, k, mode))
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
async def _run_search_keyword(
|
|
452
|
+
doc_dir: Path, terms: list[str], k: int, mode: str
|
|
453
|
+
) -> None:
|
|
454
|
+
if mode not in ("any", "all"):
|
|
455
|
+
typer.echo(f"error: mode must be 'any' or 'all'; got {mode!r}", err=True)
|
|
456
|
+
raise typer.Exit(code=2)
|
|
457
|
+
cast_mode: Mode = mode # type: ignore[assignment]
|
|
458
|
+
idx = DocumentIndex.load(doc_dir)
|
|
459
|
+
resp = await search_keyword_tool(idx, terms=terms, k=k, mode=cast_mode)
|
|
460
|
+
typer.echo(json.dumps(resp.data, ensure_ascii=False, indent=2))
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
@query_app.command("mentions")
|
|
464
|
+
def query_mentions(
|
|
465
|
+
doc_dir: Annotated[
|
|
466
|
+
Path,
|
|
467
|
+
typer.Argument(exists=True, file_okay=False, dir_okay=True, readable=True),
|
|
468
|
+
],
|
|
469
|
+
entity: Annotated[str, typer.Argument(help="Entity name (canonical or surface form).")],
|
|
470
|
+
scope: Annotated[
|
|
471
|
+
str | None,
|
|
472
|
+
typer.Option(help="Restrict to a section-id prefix."),
|
|
473
|
+
] = None,
|
|
474
|
+
) -> None:
|
|
475
|
+
"""Locate every section that mentions an entity."""
|
|
476
|
+
asyncio.run(_run_find_mentions(doc_dir, entity, scope))
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
async def _run_find_mentions(
|
|
480
|
+
doc_dir: Path, entity: str, scope: str | None
|
|
481
|
+
) -> None:
|
|
482
|
+
idx = DocumentIndex.load(doc_dir)
|
|
483
|
+
resp = await find_mentions_tool(idx, entity=entity, scope=scope)
|
|
484
|
+
typer.echo(json.dumps(resp.data, ensure_ascii=False, indent=2))
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@query_app.command("related")
|
|
488
|
+
def query_related(
|
|
489
|
+
doc_dir: Annotated[
|
|
490
|
+
Path,
|
|
491
|
+
typer.Argument(exists=True, file_okay=False, dir_okay=True, readable=True),
|
|
492
|
+
],
|
|
493
|
+
section_id: Annotated[str, typer.Argument(help="Section id to find neighbors of.")],
|
|
494
|
+
kinds: Annotated[
|
|
495
|
+
str,
|
|
496
|
+
typer.Option(help="Comma-separated channels: xref,sibling,parent,child"),
|
|
497
|
+
] = "xref",
|
|
498
|
+
k: Annotated[int, typer.Option(min=1, max=32)] = 8,
|
|
499
|
+
) -> None:
|
|
500
|
+
"""Return neighbors of a section across the xref graph and tree."""
|
|
501
|
+
parsed = tuple(s.strip() for s in kinds.split(",") if s.strip())
|
|
502
|
+
asyncio.run(_run_get_related(doc_dir, section_id, parsed, k))
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
async def _run_get_related(
|
|
506
|
+
doc_dir: Path, section_id: str, kinds: tuple[str, ...], k: int
|
|
507
|
+
) -> None:
|
|
508
|
+
idx = DocumentIndex.load(doc_dir)
|
|
509
|
+
resp = await get_related_tool(idx, id=section_id, kinds=kinds, k=k) # type: ignore[arg-type]
|
|
510
|
+
typer.echo(json.dumps(resp.data, ensure_ascii=False, indent=2))
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
@app.command()
|
|
514
|
+
def inspect(
|
|
515
|
+
doc_dir: Annotated[
|
|
516
|
+
Path | None,
|
|
517
|
+
typer.Argument(
|
|
518
|
+
file_okay=False,
|
|
519
|
+
dir_okay=True,
|
|
520
|
+
readable=True,
|
|
521
|
+
help=(
|
|
522
|
+
"Built document directory. Omit to inspect the current repo's "
|
|
523
|
+
"primary Cairn document."
|
|
524
|
+
),
|
|
525
|
+
),
|
|
526
|
+
] = None,
|
|
527
|
+
out: Annotated[
|
|
528
|
+
Path | None,
|
|
529
|
+
typer.Option(
|
|
530
|
+
help=(
|
|
531
|
+
"HTML output path. Defaults to <doc_dir>/inspector.html for "
|
|
532
|
+
"single-doc mode or .cairn/inspector.html for repo mode."
|
|
533
|
+
)
|
|
534
|
+
),
|
|
535
|
+
] = None,
|
|
536
|
+
doc: Annotated[
|
|
537
|
+
str | None,
|
|
538
|
+
typer.Option(help="Repo document id to inspect when doc_dir is omitted."),
|
|
539
|
+
] = None,
|
|
540
|
+
) -> None:
|
|
541
|
+
"""Generate a standalone HTML inspector for a document index."""
|
|
542
|
+
if doc_dir is None:
|
|
543
|
+
root = find_repo_root()
|
|
544
|
+
idx = load_repo_document_index(root, doc_id=doc)
|
|
545
|
+
out_path = out or root / ".cairn" / "inspector.html"
|
|
546
|
+
else:
|
|
547
|
+
if not doc_dir.exists() or not doc_dir.is_dir():
|
|
548
|
+
typer.echo(f"error: document directory not found: {doc_dir}", err=True)
|
|
549
|
+
raise typer.Exit(code=2)
|
|
550
|
+
idx = DocumentIndex.load(doc_dir)
|
|
551
|
+
out_path = out or doc_dir / "inspector.html"
|
|
552
|
+
written = write_inspector(idx, out=out_path)
|
|
553
|
+
typer.echo(f"inspector: {written}")
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
@app.command()
|
|
557
|
+
def bench(
|
|
558
|
+
suite: Annotated[
|
|
559
|
+
Path,
|
|
560
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
561
|
+
],
|
|
562
|
+
k: Annotated[int, typer.Option(min=1, max=32)] = 8,
|
|
563
|
+
out: Annotated[
|
|
564
|
+
Path | None,
|
|
565
|
+
typer.Option(help="Where to write the JSON report."),
|
|
566
|
+
] = None,
|
|
567
|
+
fake: Annotated[
|
|
568
|
+
bool,
|
|
569
|
+
typer.Option("--fake", help="Use FakeSummarizer + FakeEmbedder (deterministic, offline)."),
|
|
570
|
+
] = False,
|
|
571
|
+
judge: Annotated[
|
|
572
|
+
bool,
|
|
573
|
+
typer.Option(
|
|
574
|
+
"--judge",
|
|
575
|
+
help="Run LLM-as-judge for QA accuracy (uses CAIRN_LLM_* settings).",
|
|
576
|
+
),
|
|
577
|
+
] = False,
|
|
578
|
+
) -> None:
|
|
579
|
+
"""Run a benchmark suite comparing Cairn against a naive vector-RAG baseline."""
|
|
580
|
+
asyncio.run(_run_bench(suite, k, out, fake, judge))
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
async def _run_bench(
|
|
584
|
+
suite_path: Path,
|
|
585
|
+
k: int,
|
|
586
|
+
out: Path | None,
|
|
587
|
+
use_fake: bool,
|
|
588
|
+
use_judge: bool,
|
|
589
|
+
) -> None:
|
|
590
|
+
from cairn.bench.dataset import load_suite
|
|
591
|
+
from cairn.bench.judge import LLMJudge
|
|
592
|
+
from cairn.bench.report import format_markdown_report, write_json_report
|
|
593
|
+
from cairn.bench.runner import BenchOptions, BenchRunner
|
|
594
|
+
|
|
595
|
+
suite = load_suite(suite_path)
|
|
596
|
+
|
|
597
|
+
judge_client: LLMJudge | None = None
|
|
598
|
+
if use_judge:
|
|
599
|
+
cfg = load_llm_config()
|
|
600
|
+
judge_client = LLMJudge(
|
|
601
|
+
base_url=cfg.base_url,
|
|
602
|
+
model=cfg.model,
|
|
603
|
+
api_key=cfg.api_key,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
index_cfg = load_index_config()
|
|
607
|
+
runner = BenchRunner(
|
|
608
|
+
summarizer=_make_summarizer(use_fake),
|
|
609
|
+
embedder=_make_embedder(use_fake),
|
|
610
|
+
judge=judge_client,
|
|
611
|
+
options=BenchOptions(
|
|
612
|
+
k=k,
|
|
613
|
+
summary_concurrency=index_cfg.summary_concurrency,
|
|
614
|
+
embed_batch_size=index_cfg.embed_batch_size,
|
|
615
|
+
),
|
|
616
|
+
progress=lambda message: typer.echo(message, err=True),
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
import tempfile
|
|
620
|
+
|
|
621
|
+
with tempfile.TemporaryDirectory(prefix="cairn-bench-") as work_str:
|
|
622
|
+
work_dir = Path(work_str)
|
|
623
|
+
summary = await runner.run(suite, work_dir=work_dir)
|
|
624
|
+
|
|
625
|
+
typer.echo(format_markdown_report(summary))
|
|
626
|
+
|
|
627
|
+
out_path = out or Path("/tmp/cairn-bench") / f"{suite_path.stem}.json"
|
|
628
|
+
write_json_report(summary, out_path)
|
|
629
|
+
typer.echo(f"\njson report written → {out_path}", err=True)
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def _format_repo_status(status_obj: RepoStatus) -> str:
|
|
633
|
+
lines = [
|
|
634
|
+
f"Cairn repo: {status_obj.root}",
|
|
635
|
+
f"config: {status_obj.config_path}",
|
|
636
|
+
(
|
|
637
|
+
"documents: "
|
|
638
|
+
f"{status_obj.indexed_count} indexed, "
|
|
639
|
+
f"{status_obj.stale_count} stale, "
|
|
640
|
+
f"{status_obj.missing_count} missing, "
|
|
641
|
+
f"{status_obj.error_count} errors"
|
|
642
|
+
),
|
|
643
|
+
"",
|
|
644
|
+
"| state | doc | sections | source |",
|
|
645
|
+
"|---|---|---:|---|",
|
|
646
|
+
]
|
|
647
|
+
for doc in status_obj.documents:
|
|
648
|
+
sections = "" if doc.section_count is None else str(doc.section_count)
|
|
649
|
+
lines.append(f"| {doc.state} | `{doc.id}` | {sections} | {doc.source} |")
|
|
650
|
+
return "\n".join(lines)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _doctor_checks() -> list[dict[str, object]]:
|
|
654
|
+
checks: list[dict[str, object]] = []
|
|
655
|
+
try:
|
|
656
|
+
root = find_repo_root()
|
|
657
|
+
except Exception as exc:
|
|
658
|
+
return [
|
|
659
|
+
{
|
|
660
|
+
"name": "repo_config",
|
|
661
|
+
"ok": False,
|
|
662
|
+
"message": f"{exc}. Run `docsgraph init -y` first.",
|
|
663
|
+
}
|
|
664
|
+
]
|
|
665
|
+
|
|
666
|
+
checks.append(
|
|
667
|
+
{
|
|
668
|
+
"name": "repo_config",
|
|
669
|
+
"ok": True,
|
|
670
|
+
"message": f"found {root / '.cairn' / 'config.toml'}",
|
|
671
|
+
}
|
|
672
|
+
)
|
|
673
|
+
try:
|
|
674
|
+
status_obj = repo_status(root)
|
|
675
|
+
unhealthy = (
|
|
676
|
+
status_obj.missing_count
|
|
677
|
+
+ status_obj.stale_count
|
|
678
|
+
+ status_obj.error_count
|
|
679
|
+
)
|
|
680
|
+
checks.append(
|
|
681
|
+
{
|
|
682
|
+
"name": "repo_index",
|
|
683
|
+
"ok": unhealthy == 0 and status_obj.indexed_count > 0,
|
|
684
|
+
"message": (
|
|
685
|
+
f"{status_obj.indexed_count} indexed, "
|
|
686
|
+
f"{status_obj.stale_count} stale, "
|
|
687
|
+
f"{status_obj.missing_count} missing, "
|
|
688
|
+
f"{status_obj.error_count} errors"
|
|
689
|
+
),
|
|
690
|
+
}
|
|
691
|
+
)
|
|
692
|
+
if status_obj.primary_doc:
|
|
693
|
+
primary_ok = any(
|
|
694
|
+
doc.id == status_obj.primary_doc and doc.state == "indexed"
|
|
695
|
+
for doc in status_obj.documents
|
|
696
|
+
)
|
|
697
|
+
checks.append(
|
|
698
|
+
{
|
|
699
|
+
"name": "primary_doc",
|
|
700
|
+
"ok": primary_ok,
|
|
701
|
+
"message": status_obj.primary_doc,
|
|
702
|
+
}
|
|
703
|
+
)
|
|
704
|
+
except Exception as exc:
|
|
705
|
+
checks.append(
|
|
706
|
+
{
|
|
707
|
+
"name": "repo_index",
|
|
708
|
+
"ok": False,
|
|
709
|
+
"message": f"{exc}. Run `docsgraph sync --fake` to build locally.",
|
|
710
|
+
}
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
llm = load_llm_config()
|
|
714
|
+
embed = load_embed_config()
|
|
715
|
+
checks.append(
|
|
716
|
+
{
|
|
717
|
+
"name": "summarizer",
|
|
718
|
+
"ok": bool(llm.model and llm.base_url),
|
|
719
|
+
"message": f"{llm.model} at {llm.base_url}",
|
|
720
|
+
}
|
|
721
|
+
)
|
|
722
|
+
checks.append(
|
|
723
|
+
{
|
|
724
|
+
"name": "embedder",
|
|
725
|
+
"ok": bool(embed.model and embed.base_url and embed.dim > 0),
|
|
726
|
+
"message": f"{embed.provider}:{embed.model} dim={embed.dim}",
|
|
727
|
+
}
|
|
728
|
+
)
|
|
729
|
+
return checks
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _format_doctor(payload: dict[str, object]) -> str:
|
|
733
|
+
checks = payload["checks"]
|
|
734
|
+
assert isinstance(checks, list)
|
|
735
|
+
lines = [f"Cairn doctor: {'ok' if payload['ok'] else 'needs attention'}"]
|
|
736
|
+
for item in checks:
|
|
737
|
+
assert isinstance(item, dict)
|
|
738
|
+
marker = "ok" if item["ok"] else "!!"
|
|
739
|
+
lines.append(f"[{marker}] {item['name']}: {item['message']}")
|
|
740
|
+
if not payload["ok"]:
|
|
741
|
+
lines.append("")
|
|
742
|
+
lines.append(
|
|
743
|
+
"Next steps: run `docsgraph init -y`, then `docsgraph sync --fake` "
|
|
744
|
+
"(or use the compatible `cairn` alias)."
|
|
745
|
+
)
|
|
746
|
+
return "\n".join(lines)
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _format_mcp_config(client: McpClient, *, command: str, args: list[str]) -> str:
|
|
750
|
+
server = {"command": command, "args": args}
|
|
751
|
+
if client in {"claude", "cursor"}:
|
|
752
|
+
return json.dumps({"mcpServers": {"cairn": server}}, indent=2)
|
|
753
|
+
if client == "codex":
|
|
754
|
+
quoted_args = ", ".join(json.dumps(arg) for arg in args)
|
|
755
|
+
return "\n".join(
|
|
756
|
+
[
|
|
757
|
+
"[mcp_servers.cairn]",
|
|
758
|
+
f"command = {json.dumps(command)}",
|
|
759
|
+
f"args = [{quoted_args}]",
|
|
760
|
+
]
|
|
761
|
+
)
|
|
762
|
+
yaml_args = "\n".join(f" - {json.dumps(arg)}" for arg in args)
|
|
763
|
+
return "\n".join(
|
|
764
|
+
[
|
|
765
|
+
"extensions:",
|
|
766
|
+
" cairn:",
|
|
767
|
+
" type: stdio",
|
|
768
|
+
f" command: {json.dumps(command)}",
|
|
769
|
+
" args:",
|
|
770
|
+
yaml_args,
|
|
771
|
+
]
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
if __name__ == "__main__":
|
|
776
|
+
app()
|