retrieval-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+
5
+ # Virtual environments
6
+ .venv/
7
+ .ropeproject/
8
+
9
+ # IDE / Claude Code
10
+ .claude/
11
+ .omc/
12
+ .omx/
13
+ .pytest_cache/
14
+
15
+ # Data (150GB+ corpus, DB, indices)
16
+ data/
17
+ snapshots/
18
+
19
+ # Code KB local Qdrant + eval scratch / ACE project-local playbook (per-project state)
20
+ .code_kb/
21
+ .ace/
22
+
23
+ # Qdrant binary
24
+ tools/qdrant/
25
+
26
+ # Frontend build artifacts
27
+ frontend/node_modules/
28
+ frontend/dist/
29
+
30
+ # Logs
31
+ *.log
32
+ myeasylog.log
33
+
34
+ # Temp files
35
+ tmp_*
36
+ nul
37
+ .qdrant-initialized
38
+
39
+ # Secrets
40
+ .env
41
+ *.key
42
+ *.pem
43
+
44
+ # Gateway (app-tier) — secrets + local ledger, never commit
45
+ gateway.env
46
+ deploy/bwh/gateway.env
47
+ gateway-data/
48
+ gateway.db
49
+ gateway.db-shm
50
+ gateway.db-wal
51
+
52
+ # wrangler / analysis artifacts (not source)
53
+ .wrangler/
54
+ frontend/public/landscape.json
55
+
56
+ # OS
57
+ Thumbs.db
58
+ .DS_Store
59
+ desktop.ini
60
+ .playwright-mcp/
@@ -0,0 +1,11 @@
1
+ {
2
+ "mcpServers": {
3
+ "retrieval": {
4
+ "command": "uvx",
5
+ "args": ["retrieval-mcp"],
6
+ "env": {
7
+ "RETRIEVAL_API_URL": "http://10.100.100.111:8000"
8
+ }
9
+ }
10
+ }
11
+ }
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.4
2
+ Name: retrieval-mcp
3
+ Version: 0.1.0
4
+ Summary: Installable MCP server for the Retrieval academic-paper search + ACE journal + index inventory API
5
+ Project-URL: Homepage, https://retrieval.rnarket.com
6
+ License: MIT
7
+ Keywords: llm,mcp,model-context-protocol,papers,retrieval,search
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: httpx>=0.27
10
+ Requires-Dist: mcp>=1.2.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # retrieval-mcp
14
+
15
+ An **MCP server** for the Retrieval academic-paper API — semantic paper search,
16
+ document matching, ACE journal memory, and index inventory. Self-contained: it talks
17
+ to the backend over HTTP only (just `mcp` + `httpx`), so it installs anywhere with
18
+ `uvx` / `pip` — no repo checkout, no GPU, no models.
19
+
20
+ By default it targets the compute box on the lab LAN (`http://10.100.100.111:8000`),
21
+ which trusts LAN callers so **no key is needed**. Off-LAN, point `RETRIEVAL_API_URL` at
22
+ the public gateway (`https://retrieval.rnarket.com`) and set `RETRIEVAL_API_KEY` (`sk-...`).
23
+
24
+ ## Tools
25
+
26
+ | Tool | What it does |
27
+ |------|--------------|
28
+ | `search_papers` | Semantic hybrid search over 95k+ top-venue CS papers |
29
+ | `search_within_paper` | Every matching passage inside one paper |
30
+ | `match_document` / `match_paper` | Content-nearest papers to a passage / a paper |
31
+ | `list_conferences` / `corpus_stats` | Venue registry / corpus size |
32
+ | `journal_record` / `journal_search` / `journal_recent` | ACE journal work-memory (cwd-scoped) |
33
+ | `index_inventory` | Your indexed-file tree: user → host → project → dir → file |
34
+
35
+ ## Install
36
+
37
+ ### Claude Code
38
+
39
+ ```bash
40
+ # LAN (no key):
41
+ claude mcp add retrieval -- uvx retrieval-mcp
42
+ # Off-LAN (public gateway + key):
43
+ claude mcp add retrieval \
44
+ --env RETRIEVAL_API_URL=https://retrieval.rnarket.com \
45
+ --env RETRIEVAL_API_KEY=sk-... \
46
+ -- uvx retrieval-mcp
47
+ ```
48
+
49
+ ### Claude Desktop / any MCP client
50
+
51
+ `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/`,
52
+ Windows: `%APPDATA%\Claude\`):
53
+
54
+ ```json
55
+ {
56
+ "mcpServers": {
57
+ "retrieval": {
58
+ "command": "uvx",
59
+ "args": ["retrieval-mcp"],
60
+ "env": {
61
+ "RETRIEVAL_API_URL": "https://retrieval.rnarket.com",
62
+ "RETRIEVAL_API_KEY": "sk-...",
63
+ "JOURNAL_PROJECT_SLUG": "my-project"
64
+ }
65
+ }
66
+ }
67
+ }
68
+ ```
69
+
70
+ No `uv`? `pip install retrieval-mcp` then use `"command": "retrieval-mcp"`.
71
+
72
+ ## Config (env)
73
+
74
+ | Var | Default | Notes |
75
+ |-----|---------|-------|
76
+ | `RETRIEVAL_API_URL` | `http://10.100.100.111:8000` | LAN compute box (no key). Off-LAN, set to `https://retrieval.rnarket.com`. |
77
+ | `RETRIEVAL_API_KEY` | — | `sk-...` key for the gateway (create under `/auth/keys`). Required off-LAN. |
78
+ | `JOURNAL_PROJECT_SLUG` | current dir name | Journal namespace; scopes journal reads/writes so projects don't leak into each other. |
79
+
80
+ ## License
81
+
82
+ MIT
@@ -0,0 +1,70 @@
1
+ # retrieval-mcp
2
+
3
+ An **MCP server** for the Retrieval academic-paper API — semantic paper search,
4
+ document matching, ACE journal memory, and index inventory. Self-contained: it talks
5
+ to the backend over HTTP only (just `mcp` + `httpx`), so it installs anywhere with
6
+ `uvx` / `pip` — no repo checkout, no GPU, no models.
7
+
8
+ By default it targets the compute box on the lab LAN (`http://10.100.100.111:8000`),
9
+ which trusts LAN callers so **no key is needed**. Off-LAN, point `RETRIEVAL_API_URL` at
10
+ the public gateway (`https://retrieval.rnarket.com`) and set `RETRIEVAL_API_KEY` (`sk-...`).
11
+
12
+ ## Tools
13
+
14
+ | Tool | What it does |
15
+ |------|--------------|
16
+ | `search_papers` | Semantic hybrid search over 95k+ top-venue CS papers |
17
+ | `search_within_paper` | Every matching passage inside one paper |
18
+ | `match_document` / `match_paper` | Content-nearest papers to a passage / a paper |
19
+ | `list_conferences` / `corpus_stats` | Venue registry / corpus size |
20
+ | `journal_record` / `journal_search` / `journal_recent` | ACE journal work-memory (cwd-scoped) |
21
+ | `index_inventory` | Your indexed-file tree: user → host → project → dir → file |
22
+
23
+ ## Install
24
+
25
+ ### Claude Code
26
+
27
+ ```bash
28
+ # LAN (no key):
29
+ claude mcp add retrieval -- uvx retrieval-mcp
30
+ # Off-LAN (public gateway + key):
31
+ claude mcp add retrieval \
32
+ --env RETRIEVAL_API_URL=https://retrieval.rnarket.com \
33
+ --env RETRIEVAL_API_KEY=sk-... \
34
+ -- uvx retrieval-mcp
35
+ ```
36
+
37
+ ### Claude Desktop / any MCP client
38
+
39
+ `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/`,
40
+ Windows: `%APPDATA%\Claude\`):
41
+
42
+ ```json
43
+ {
44
+ "mcpServers": {
45
+ "retrieval": {
46
+ "command": "uvx",
47
+ "args": ["retrieval-mcp"],
48
+ "env": {
49
+ "RETRIEVAL_API_URL": "https://retrieval.rnarket.com",
50
+ "RETRIEVAL_API_KEY": "sk-...",
51
+ "JOURNAL_PROJECT_SLUG": "my-project"
52
+ }
53
+ }
54
+ }
55
+ }
56
+ ```
57
+
58
+ No `uv`? `pip install retrieval-mcp` then use `"command": "retrieval-mcp"`.
59
+
60
+ ## Config (env)
61
+
62
+ | Var | Default | Notes |
63
+ |-----|---------|-------|
64
+ | `RETRIEVAL_API_URL` | `http://10.100.100.111:8000` | LAN compute box (no key). Off-LAN, set to `https://retrieval.rnarket.com`. |
65
+ | `RETRIEVAL_API_KEY` | — | `sk-...` key for the gateway (create under `/auth/keys`). Required off-LAN. |
66
+ | `JOURNAL_PROJECT_SLUG` | current dir name | Journal namespace; scopes journal reads/writes so projects don't leak into each other. |
67
+
68
+ ## License
69
+
70
+ MIT
@@ -0,0 +1,28 @@
1
+ [project]
2
+ name = "retrieval-mcp"
3
+ version = "0.1.0"
4
+ description = "Installable MCP server for the Retrieval academic-paper search + ACE journal + index inventory API"
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ requires-python = ">=3.10"
8
+ keywords = ["mcp", "model-context-protocol", "retrieval", "papers", "search", "llm"]
9
+ dependencies = [
10
+ "mcp>=1.2.0",
11
+ "httpx>=0.27",
12
+ ]
13
+
14
+ [project.urls]
15
+ Homepage = "https://retrieval.rnarket.com"
16
+
17
+ [project.scripts]
18
+ retrieval-mcp = "retrieval_mcp:main"
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ only-include = ["retrieval_mcp.py"]
26
+
27
+ [tool.hatch.build.targets.wheel.sources]
28
+ "retrieval_mcp.py" = "retrieval_mcp.py"
@@ -0,0 +1,354 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = ["mcp>=1.2.0", "httpx>=0.27"]
5
+ # ///
6
+ """Retrieval MCP — a self-contained, installable MCP server.
7
+
8
+ Unlike the in-repo ``mcp_server/`` (which also hosts the local code KB and needs the
9
+ project's Python modules), THIS file is standalone: it only talks to the Retrieval
10
+ REST API over HTTP, so it installs anywhere with just ``mcp`` + ``httpx`` and needs
11
+ no checkout, no GPU, no models.
12
+
13
+ Default target is the compute box directly: http://10.100.100.111:8000
14
+ On the lab LAN that is trusted as an internal admin, so no key is needed. Off-LAN,
15
+ point RETRIEVAL_API_URL at the public gateway and set RETRIEVAL_API_KEY (sk-...).
16
+
17
+ Config (env):
18
+ RETRIEVAL_API_URL backend base URL (default http://10.100.100.111:8000)
19
+ RETRIEVAL_API_KEY sk-key for the paid gateway (optional; unset on LAN)
20
+ JOURNAL_PROJECT_SLUG override the journal project namespace (default: cwd dir name)
21
+
22
+ (The misspelled legacy names RETRIVAL_API_URL / RETRIVAL_API_KEY are still honored
23
+ as a fallback so existing configs keep working.)
24
+
25
+ Run:
26
+ uvx retrieval-mcp # or: uv run retrieval_mcp.py
27
+ python retrieval_mcp.py # if mcp + httpx are already installed
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import glob
32
+ import os
33
+ import socket
34
+ from typing import Any
35
+
36
+ import httpx
37
+ from mcp.server.fastmcp import FastMCP
38
+
39
+
40
+ # NOTE: intentionally duplicated in mcp_server/server.py — this file ships as a
41
+ # zero-dependency standalone package (pyproject bundles ONLY this file), so it must
42
+ # not import from the repo. Keep the two copies identical.
43
+ def _env(*names: str, default: str | None = None) -> str | None:
44
+ """First set value among `names` (new spelling first, legacy fallback), else default."""
45
+ for name in names:
46
+ value = os.environ.get(name)
47
+ if value:
48
+ return value
49
+ return default
50
+
51
+
52
+ BASE_URL = (_env("RETRIEVAL_API_URL", "RETRIVAL_API_URL",
53
+ default="http://10.100.100.111:8000") or "").rstrip("/")
54
+ API_KEY = _env("RETRIEVAL_API_KEY", "RETRIVAL_API_KEY")
55
+ _HEADERS = {"Authorization": f"Bearer {API_KEY}"} if API_KEY else {}
56
+ _TIMEOUT = 180.0
57
+
58
+
59
+ def _default_project() -> str:
60
+ """Journal project scope, defaulting to the current working directory's name.
61
+
62
+ Precedence: explicit ``JOURNAL_PROJECT_SLUG`` env > cwd basename > ``"default"``.
63
+ Scoping journal reads/writes to the cwd project by default keeps one project's
64
+ memory out of another's results; pass ``project_slug=...`` to a tool to override.
65
+ """
66
+ env = os.environ.get("JOURNAL_PROJECT_SLUG")
67
+ if env:
68
+ return env
69
+ try:
70
+ base = os.path.basename(os.getcwd()).strip().lower()
71
+ return base or "default"
72
+ except OSError:
73
+ return "default"
74
+
75
+ mcp = FastMCP("retrieval")
76
+
77
+
78
+ async def _post(path: str, payload: dict) -> dict:
79
+ async with httpx.AsyncClient(base_url=BASE_URL, timeout=_TIMEOUT) as c:
80
+ r = await c.post(path, json=payload, headers=_HEADERS)
81
+ r.raise_for_status()
82
+ return r.json()
83
+
84
+
85
+ async def _get(path: str, params: dict | None = None) -> dict:
86
+ async with httpx.AsyncClient(base_url=BASE_URL, timeout=_TIMEOUT) as c:
87
+ r = await c.get(path, params=params or {}, headers=_HEADERS)
88
+ r.raise_for_status()
89
+ return r.json()
90
+
91
+
92
+ def _snippet(text: str, limit: int = 280) -> str:
93
+ text = " ".join((text or "").split())
94
+ return text if len(text) <= limit else text[:limit].rstrip() + "…"
95
+
96
+
97
+ # ------------------------------- paper retrieval ------------------------------- #
98
+
99
+ @mcp.tool()
100
+ async def search_papers(query: str, top_k: int = 10, venue: str | None = None,
101
+ year: int | None = None, title_only: bool = False) -> str:
102
+ """Semantic hybrid search over 94k+ top-venue CS papers. Phrase the query as a
103
+ full, specific description (not a bare keyword). Returns title + venue/year +
104
+ paper_id + snippet. Pass paper_id to search_within_paper / match_paper."""
105
+ payload: dict[str, Any] = {"query": query, "top_k": top_k, "method": "hybrid",
106
+ "title_only": title_only, "dedup_by_paper": True}
107
+ if venue:
108
+ payload["venue"] = venue
109
+ if year:
110
+ payload["year"] = year
111
+ return _fmt_papers(await _post("/api/search", payload))
112
+
113
+
114
+ @mcp.tool()
115
+ async def search_within_paper(paper_id: str, query: str, top_k: int = 20) -> str:
116
+ """Search inside ONE paper; returns every matching passage with its score."""
117
+ payload = {"query": query, "top_k": top_k, "paper_ids": [paper_id],
118
+ "dedup_by_paper": False, "method": "hybrid"}
119
+ return _fmt_papers(await _post("/api/search", payload))
120
+
121
+
122
+ @mcp.tool()
123
+ async def match_document(text: str, top_k: int = 10) -> str:
124
+ """Find corpus papers whose CONTENT is most similar to a passage (related-work
125
+ discovery). Aggregated per paper by coverage."""
126
+ return _fmt_match(await _post("/api/match-doc",
127
+ {"input_type": "text", "text": text, "top_k": top_k}))
128
+
129
+
130
+ @mcp.tool()
131
+ async def match_paper(paper_id: str, top_k: int = 10) -> str:
132
+ """Content-nearest neighbours of an already-indexed paper (self excluded)."""
133
+ return _fmt_match(await _post("/api/match-doc",
134
+ {"input_type": "paper_id", "paper_id": paper_id, "top_k": top_k}))
135
+
136
+
137
+ @mcp.tool()
138
+ async def list_conferences() -> str:
139
+ """List venues in the corpus with CCF tier + years available."""
140
+ resp = await _get("/api/conferences")
141
+ confs = resp.get("conferences") or []
142
+ lines = [f"{len(confs)} venues:"]
143
+ for c in confs:
144
+ years = c.get("years") or []
145
+ yr = f"{min(years)}-{max(years)}" if years else "?"
146
+ lines.append(f"- {c.get('title', c.get('sub', '?'))} [{c.get('venue_tier', '?')}] {yr}")
147
+ return "\n".join(lines)
148
+
149
+
150
+ @mcp.tool()
151
+ async def corpus_stats() -> str:
152
+ """Corpus size: papers, chunks, vectors, embedding model."""
153
+ r = await _get("/api/stats")
154
+ return (f"Corpus: {r.get('total_papers', '?')} papers, {r.get('total_chunks', '?')} chunks, "
155
+ f"{r.get('vector_index_size', '?')} vectors ({r.get('embedding_model', '?')}).")
156
+
157
+
158
+ # ------------------------------ journal memory -------------------------------- #
159
+
160
+ @mcp.tool()
161
+ async def journal_record(body: str, project_slug: str | None = None, title: str | None = None,
162
+ source_kind: str = "memory", tags: list[str] | None = None) -> str:
163
+ """Record this session's detailed work memory into the authenticated journal.
164
+
165
+ Scoped to the current project by default (cwd name); pass project_slug to override.
166
+ Example: journal_record(body="Fixed the RRF fusion off-by-one in pipeline.py; root
167
+ cause was 0-indexed rank. Verified with test_pipeline_rrf.", title="RRF rank fix",
168
+ tags=["bugfix","retrieval"]).
169
+ """
170
+ payload: dict[str, Any] = {"project_slug": project_slug or _default_project(), "client": "mcp",
171
+ "source_kind": source_kind, "body": body, "tags": tags or []}
172
+ if title:
173
+ payload["title"] = title
174
+ resp = await _post("/api/journal/entries", payload)
175
+ e = resp.get("entry") or {}
176
+ return (f"Recorded {e.get('entry_id', '?')} "
177
+ f"(chunks={resp.get('chunk_count', 0)}, indexed={resp.get('indexed', False)}).")
178
+
179
+
180
+ @mcp.tool()
181
+ async def journal_search(query: str, project_slug: str | None = None, top_k: int = 5,
182
+ mode: str = "keyword", dedup_by_entry: bool = True) -> str:
183
+ """Search journal memory, scoped to the current project by default (cwd name) so
184
+ other projects' notes don't leak into results — pass project_slug to widen/switch.
185
+
186
+ mode="keyword" (default, FTS5, needs no embedding — always works) or
187
+ "hybrid"/"dense"/"sparse" (semantic; needs the embed endpoint up).
188
+ Example: journal_search(query="qdrant upsert wait flag", mode="keyword").
189
+ """
190
+ resp = await _post("/api/journal/search", {"project_slug": project_slug or _default_project(),
191
+ "query": query, "top_k": top_k, "mode": mode,
192
+ "dedup_by_entry": dedup_by_entry,
193
+ "include_chunks": True})
194
+ hits = resp.get("hits") or []
195
+ if not hits:
196
+ return "No matching journal entries found."
197
+ lines = [f"Found {len(hits)} journal entries ({resp.get('mode', '?')}):", ""]
198
+ for i, hit in enumerate(hits, 1):
199
+ e = hit.get("entry") or {}
200
+ chunk = hit.get("best_chunk") or {}
201
+ lines.append(f"{i}. {e.get('title') or e.get('entry_id')} "
202
+ f"[{e.get('source_kind', '?')}] score={hit.get('score', 0.0):.3f}")
203
+ lines.append(f" entry_id: {e.get('entry_id', '')}")
204
+ if chunk.get("content"):
205
+ lines.append(f" ▸ {_snippet(chunk.get('content', ''))}")
206
+ return "\n".join(lines)
207
+
208
+
209
+ @mcp.tool()
210
+ async def journal_recent(project_slug: str | None = None, limit: int = 10) -> str:
211
+ """List recent journal entries for the current project (cwd name by default;
212
+ pass project_slug to override). Example: journal_recent(limit=5)."""
213
+ resp = await _get("/api/journal/entries",
214
+ {"project_slug": project_slug or _default_project(), "limit": limit})
215
+ entries = resp.get("entries") or []
216
+ if not entries:
217
+ return "No recent journal entries found."
218
+ lines = [f"Recent journal entries ({len(entries)} of {resp.get('total', len(entries))}):", ""]
219
+ for i, e in enumerate(entries, 1):
220
+ lines.append(f"{i}. {e.get('title') or e.get('entry_id')} [{e.get('source_kind', '?')}] "
221
+ f"{e.get('entry_id', '')}")
222
+ return "\n".join(lines)
223
+
224
+
225
+ @mcp.tool()
226
+ async def journal_index_dir(
227
+ dir: str = ".",
228
+ project_slug: str | None = None,
229
+ recursive: bool = False,
230
+ pattern: str = "*.md",
231
+ ) -> str:
232
+ """Batch-index every matching text file in a LOCAL directory into the journal,
233
+ then register them in your index inventory — the bulk counterpart to journal_record.
234
+
235
+ Runs where THIS server runs (your machine): reads each file locally, sends its
236
+ CONTENT to the backend as a journal entry (so the full text is searchable via
237
+ journal_search), and reports the file list to your inventory (so index_inventory
238
+ shows them). Non-recursive by default. Safe to re-run (incremental upsert).
239
+
240
+ Example: journal_index_dir(dir="~/Prd/retrival", pattern="*.md") — index the repo's
241
+ root Markdown files into the cwd-named project.
242
+
243
+ Args:
244
+ dir: Directory to index (default: current directory).
245
+ project_slug: Journal project namespace. Default = cwd dir name.
246
+ recursive: Recurse into subdirectories (default: false — root files only).
247
+ Note: dot-dirs (.git/.venv) are skipped by glob, but non-dot junk dirs
248
+ (node_modules, etc.) are not — point `dir` at a clean docs tree.
249
+ pattern: Glob for files to index (default: "*.md").
250
+ """
251
+ project = project_slug or _default_project()
252
+ root = os.path.abspath(os.path.expanduser(dir))
253
+ globpat = os.path.join(glob.escape(root), "**", pattern) if recursive else os.path.join(glob.escape(root), pattern)
254
+ files = sorted(p for p in glob.glob(globpat, recursive=recursive) if os.path.isfile(p))
255
+ if not files:
256
+ return f"No files matching {pattern!r} in {root} (recursive={recursive})."
257
+
258
+ indexed = failed = 0
259
+ records: list[dict[str, Any]] = []
260
+ for path in files:
261
+ try:
262
+ with open(path, encoding="utf-8", errors="replace") as fh:
263
+ content = fh.read()
264
+ if not content.strip():
265
+ continue
266
+ resp = await _post("/api/journal/entries", {
267
+ "project_slug": project, "client": "mcp", "source_kind": "doc",
268
+ "title": os.path.basename(path), "body": content, "tags": [],
269
+ })
270
+ indexed += 1
271
+ records.append({
272
+ "rel_path": os.path.relpath(path, root), "kind": "doc",
273
+ "collection": "journal_chunks", "chunk_count": resp.get("chunk_count", 0),
274
+ })
275
+ except Exception: # noqa: BLE001 — keep going; one bad file shouldn't abort the batch
276
+ failed += 1
277
+
278
+ host = _env("RETRIEVAL_HOST", "RETRIVAL_HOST", default=socket.gethostname())
279
+ reported = 0
280
+ try:
281
+ rep = await _post("/api/index/report", {
282
+ "host": host, "project": project, "root_dir": root, "files": records, "replace": False,
283
+ })
284
+ reported = rep.get("total_files", len(records))
285
+ except Exception as exc: # noqa: BLE001 — inventory reporting is best-effort
286
+ return (f"Indexed {indexed} file(s) into journal '{project}' ({failed} failed); "
287
+ f"inventory report skipped: {exc}")
288
+ return (f"Indexed {indexed} file(s) into journal '{project}' ({failed} failed); "
289
+ f"reported {reported} to inventory (host={host}).")
290
+
291
+
292
+ @mcp.tool()
293
+ async def index_inventory(host: str | None = None, project: str | None = None) -> str:
294
+ """Show YOUR indexed-file inventory: user → host → project → dir → file."""
295
+ params = {k: v for k, v in {"host": host, "project": project}.items() if v}
296
+ resp = await _get("/api/index/inventory", params)
297
+ users = resp.get("users") or []
298
+ t = resp.get("totals") or {}
299
+ if not users:
300
+ return "No indexed files reported yet."
301
+ lines = [f"Inventory: {t.get('hosts', 0)} host(s), {t.get('projects', 0)} project(s), "
302
+ f"{t.get('files', 0)} file(s), {t.get('chunks', 0)} chunk(s).", ""]
303
+ for u in users:
304
+ lines.append(f"● {u.get('owner_user_id', '?')} ({u.get('file_count', 0)} files)")
305
+ for h in u.get("hosts") or []:
306
+ lines.append(f" ▸ {h.get('host', '?')}")
307
+ for p in h.get("projects") or []:
308
+ lines.append(f" · {p.get('project', '?')} [{p.get('root_dir') or '?'}] "
309
+ f"({p.get('file_count', 0)} files)")
310
+ return "\n".join(lines)
311
+
312
+
313
+ # ------------------------------ shared formatters ----------------------------- #
314
+
315
+ def _fmt_papers(resp: dict) -> str:
316
+ papers = resp.get("papers") or []
317
+ if not papers:
318
+ return "No matching papers found."
319
+ lines = [f"Found {len(papers)} papers:", ""]
320
+ for i, p in enumerate(papers, 1):
321
+ meta = p.get("metadata") or {}
322
+ venue = meta.get("venue") or p.get("venue") or "?"
323
+ year = meta.get("year") or p.get("year") or "?"
324
+ title = p.get("doc_title") or p.get("title") or "(untitled)"
325
+ score = p.get("max_score", p.get("score", 0.0))
326
+ lines.append(f"{i}. {title} [{venue} {year}] score={score:.3f}")
327
+ lines.append(f" paper_id: {p.get('paper_id') or ''}")
328
+ chunks = p.get("chunks") or []
329
+ if chunks:
330
+ lines.append(f" ▸ {_snippet(chunks[0].get('content', ''))}")
331
+ lines.append("")
332
+ return "\n".join(lines).rstrip()
333
+
334
+
335
+ def _fmt_match(resp: dict) -> str:
336
+ papers = resp.get("papers") or []
337
+ if not papers:
338
+ return "No similar papers found."
339
+ lines = [f"Top {len(papers)} content-nearest papers:", ""]
340
+ for i, p in enumerate(papers, 1):
341
+ lines.append(f"{i}. {p.get('title') or '(untitled)'} [{p.get('venue', '?')} "
342
+ f"{p.get('year', '?')}] score={p.get('score', 0.0):.3f} "
343
+ f"coverage={p.get('coverage', 0)}")
344
+ lines.append(f" paper_id: {p.get('paper_id') or ''}")
345
+ lines.append("")
346
+ return "\n".join(lines).rstrip()
347
+
348
+
349
+ def main() -> None:
350
+ mcp.run()
351
+
352
+
353
+ if __name__ == "__main__":
354
+ main()