sin-code-bundle 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. sin_code_bundle/__init__.py +6 -0
  2. sin_code_bundle/agents_md.py +245 -0
  3. sin_code_bundle/ast_edit.py +323 -0
  4. sin_code_bundle/bench.py +506 -0
  5. sin_code_bundle/budget.py +51 -0
  6. sin_code_bundle/cache.py +131 -0
  7. sin_code_bundle/checkpoint.py +230 -0
  8. sin_code_bundle/cli.py +1943 -0
  9. sin_code_bundle/codocs.py +328 -0
  10. sin_code_bundle/dap_bridge.py +135 -0
  11. sin_code_bundle/data/codocs/SKILL.md +280 -0
  12. sin_code_bundle/gitnexus.py +368 -0
  13. sin_code_bundle/hashline.py +216 -0
  14. sin_code_bundle/hooks.py +249 -0
  15. sin_code_bundle/immortal_commit.py +288 -0
  16. sin_code_bundle/interceptor.py +119 -0
  17. sin_code_bundle/lsp_backend.py +303 -0
  18. sin_code_bundle/lsp_bootstrap.py +85 -0
  19. sin_code_bundle/markitdown.py +254 -0
  20. sin_code_bundle/mcp_config.py +455 -0
  21. sin_code_bundle/mcp_server.py +963 -0
  22. sin_code_bundle/memory.py +208 -0
  23. sin_code_bundle/merge_safety.py +313 -0
  24. sin_code_bundle/orchestration_worktrees.py +102 -0
  25. sin_code_bundle/policy.py +224 -0
  26. sin_code_bundle/preflight.py +152 -0
  27. sin_code_bundle/programming_workflow.py +541 -0
  28. sin_code_bundle/rtk.py +154 -0
  29. sin_code_bundle/safety.py +52 -0
  30. sin_code_bundle/session_warmup.py +247 -0
  31. sin_code_bundle/skills.py +188 -0
  32. sin_code_bundle/symbol_resolve.py +166 -0
  33. sin_code_bundle/tools/__init__.py +4 -0
  34. sin_code_bundle/tools/pypi_setup.py +289 -0
  35. sin_code_bundle/vfs.py +264 -0
  36. sin_code_bundle-0.9.2.dist-info/METADATA +470 -0
  37. sin_code_bundle-0.9.2.dist-info/RECORD +41 -0
  38. sin_code_bundle-0.9.2.dist-info/WHEEL +5 -0
  39. sin_code_bundle-0.9.2.dist-info/entry_points.txt +4 -0
  40. sin_code_bundle-0.9.2.dist-info/licenses/LICENSE +21 -0
  41. sin_code_bundle-0.9.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,303 @@
1
+ # SPDX-License-Identifier: MIT
2
+ """LSP-backed symbol resolution for the SCKG.
3
+
4
+ This makes `impact()` structural and type-accurate instead of textual:
5
+ - "what calls this symbol?" -> LSP references
6
+ - "where is it defined?" -> LSP definition
7
+ - blast-radius scoring -> ranked caller set + fan-in
8
+
9
+ Primary backend: multilspy (drives real language servers: pyright, gopls,
10
+ typescript-language-server, rust-analyzer, jdtls, …).
11
+ Fallback backend: tree-sitter symbol scan (cheap, language-agnostic, no server).
12
+
13
+ The module degrades gracefully: if no LSP is available it returns tree-sitter
14
+ results and flags `source="treesitter"`, so the agent still gets a useful signal
15
+ and the bundle keeps working (consistent with `sin status`).
16
+
17
+ Docs: lsp_backend.doc.md
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import asyncio
23
+ from dataclasses import dataclass, field
24
+ from pathlib import Path
25
+ from typing import Literal, Optional
26
+
27
+ Source = Literal["lsp", "treesitter", "none"]
28
+
29
+ _LANG_BY_EXT = {
30
+ ".py": "python",
31
+ ".ts": "typescript",
32
+ ".tsx": "typescript",
33
+ ".js": "javascript",
34
+ ".jsx": "javascript",
35
+ ".go": "go",
36
+ ".rs": "rust",
37
+ ".java": "java",
38
+ ".rb": "ruby",
39
+ ".php": "php",
40
+ ".cs": "csharp",
41
+ ".c": "c",
42
+ ".cpp": "cpp",
43
+ ".h": "cpp",
44
+ }
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class Location:
49
+ """A single source-code position, optionally with a short snippet."""
50
+
51
+ file: str
52
+ line: int
53
+ column: int
54
+ snippet: str = ""
55
+
56
+
57
+ # ── LSPBackend: Language Server Manager ────────────────────────────────
58
+ @dataclass
59
+ class ImpactResult:
60
+ """Compact, deterministic blast-radius payload for the agent."""
61
+
62
+ symbol: str
63
+ defined_at: Optional[Location]
64
+ callers: list[Location] = field(default_factory=list)
65
+ fan_in: int = 0
66
+ touches_tests: bool = False
67
+ touches_public_api: bool = False
68
+ risk: Literal["low", "medium", "high"] = "low"
69
+ source: Source = "none"
70
+ notes: list[str] = field(default_factory=list)
71
+
72
+ def to_dict(self) -> dict:
73
+ """Serialize to a JSON-safe dict (caches the result under `cache.set`).
74
+
75
+ Returns a plain dict with `Location` fields flattened to `{file, line,
76
+ column, snippet}` so the GraphCache (JSONL-backed) can round-trip it
77
+ without a custom encoder.
78
+ """
79
+ return {
80
+ "symbol": self.symbol,
81
+ "defined_at": _loc_to_dict(self.defined_at),
82
+ "callers": [_loc_to_dict(c) for c in self.callers],
83
+ "fan_in": self.fan_in,
84
+ "touches_tests": self.touches_tests,
85
+ "touches_public_api": self.touches_public_api,
86
+ "risk": self.risk,
87
+ "source": self.source,
88
+ "notes": self.notes,
89
+ }
90
+
91
+
92
+ def _loc_to_dict(loc: Optional[Location]) -> Optional[dict]:
93
+ if loc is None:
94
+ return None
95
+ return {"file": loc.file, "line": loc.line, "column": loc.column, "snippet": loc.snippet}
96
+
97
+
98
+ def _lang_for(path: Path) -> Optional[str]:
99
+ return _LANG_BY_EXT.get(path.suffix.lower())
100
+
101
+
102
+ def _score_risk(
103
+ callers: int, touches_tests: bool, touches_api: bool
104
+ ) -> Literal["low", "medium", "high"]:
105
+ # Thresholds are intentionally simple and conservative. >10 callers = broad
106
+ # blast radius (high). >3 = significant surface area (medium). Tests + API
107
+ # each escalate one tier (e.g. a 4-caller non-test/non-api function is
108
+ # "low" but a 4-caller test-touching one is "medium").
109
+ if touches_api or callers > 10:
110
+ return "high"
111
+ if touches_tests or callers > 3:
112
+ return "medium"
113
+ return "low"
114
+
115
+
116
+ def _is_test_path(p: str) -> bool:
117
+ pl = p.lower()
118
+ return "test" in Path(pl).name or "/tests/" in pl or pl.endswith("_test.py")
119
+
120
+
121
+ def _is_public_api_path(p: str) -> bool:
122
+ name = Path(p).name.lower()
123
+ return name in {"__init__.py", "api.py", "index.ts", "index.js", "mod.rs", "lib.rs"}
124
+
125
+
126
+ # ── Language Detection: File → Server Mapping ──────────────────────────
127
+ # --------------------------------------------------------------------------- #
128
+ # LSP backend (multilspy)
129
+ # --------------------------------------------------------------------------- #
130
+ async def _lsp_impact(
131
+ root: Path, file: Path, symbol: str, line: int, column: int
132
+ ) -> Optional[ImpactResult]:
133
+ try:
134
+ from multilspy import LanguageServer # type: ignore
135
+ from multilspy.multilspy_config import MultilspyConfig # type: ignore
136
+ from multilspy.multilspy_logger import MultilspyLogger # type: ignore
137
+ except ImportError:
138
+ return None
139
+
140
+ lang = _lang_for(file)
141
+ if not lang:
142
+ return None
143
+
144
+ config = MultilspyConfig.from_dict({"code_language": lang})
145
+ logger = MultilspyLogger()
146
+ server = LanguageServer.create(config, logger, str(root))
147
+
148
+ rel = str(file.relative_to(root)) if file.is_absolute() else str(file)
149
+ async with server.start_server():
150
+ definition = await server.request_definition(rel, line - 1, column - 1)
151
+ references = await server.request_references(rel, line - 1, column - 1)
152
+
153
+ def_loc: Optional[Location] = None
154
+ if definition:
155
+ d = definition[0]
156
+ def_loc = Location(
157
+ file=d.get("relativePath", d.get("uri", "")),
158
+ line=d["range"]["start"]["line"] + 1,
159
+ column=d["range"]["start"]["character"] + 1,
160
+ )
161
+
162
+ callers: list[Location] = []
163
+ for ref in references or []:
164
+ rp = ref.get("relativePath", ref.get("uri", ""))
165
+ callers.append(
166
+ Location(
167
+ file=rp,
168
+ line=ref["range"]["start"]["line"] + 1,
169
+ column=ref["range"]["start"]["character"] + 1,
170
+ )
171
+ )
172
+
173
+ touches_tests = any(_is_test_path(c.file) for c in callers)
174
+ touches_api = any(_is_public_api_path(c.file) for c in callers)
175
+ fan_in = len(callers)
176
+ # Cap caller list at 25 — fits an LLM prompt-friendly blast-radius view
177
+ # without dropping high-fan-in signals. Anything larger reports the
178
+ # truncated count in `notes` so the agent can ask for more if needed.
179
+ return ImpactResult(
180
+ symbol=symbol,
181
+ defined_at=def_loc,
182
+ callers=callers[:25],
183
+ fan_in=fan_in,
184
+ touches_tests=touches_tests,
185
+ touches_public_api=touches_api,
186
+ risk=_score_risk(fan_in, touches_tests, touches_api),
187
+ source="lsp",
188
+ notes=[] if fan_in <= 25 else [f"{fan_in} callers total; showing first 25"],
189
+ )
190
+
191
+
192
+ # --------------------------------------------------------------------------- #
193
+ # tree-sitter fallback (textual but symbol-aware)
194
+ # --------------------------------------------------------------------------- #
195
+ def _treesitter_impact(root: Path, symbol: str) -> ImpactResult:
196
+ bare = symbol.split(".")[-1].split("::")[-1]
197
+ callers: list[Location] = []
198
+ defined_at: Optional[Location] = None
199
+
200
+ for path in root.rglob("*"):
201
+ if not path.is_file() or _lang_for(path) is None:
202
+ continue
203
+ if any(part in {".git", "node_modules", ".venv", "__pycache__"} for part in path.parts):
204
+ continue
205
+ try:
206
+ text = path.read_text(encoding="utf-8", errors="ignore")
207
+ except OSError:
208
+ continue
209
+ for i, raw in enumerate(text.splitlines(), start=1):
210
+ if bare not in raw:
211
+ continue
212
+ col = raw.find(bare) + 1
213
+ loc = Location(
214
+ file=str(path.relative_to(root)),
215
+ line=i,
216
+ column=col,
217
+ snippet=raw.strip()[:120],
218
+ )
219
+ stripped = raw.lstrip()
220
+ if defined_at is None and (
221
+ stripped.startswith(("def ", "class ", "func ", "fn ", "function "))
222
+ and bare in stripped.split("(")[0]
223
+ ):
224
+ defined_at = loc
225
+ else:
226
+ callers.append(loc)
227
+
228
+ touches_tests = any(_is_test_path(c.file) for c in callers)
229
+ touches_api = any(_is_public_api_path(c.file) for c in callers)
230
+ fan_in = len(callers)
231
+ # Mirror the same 25-caller cap as the LSP path above — keeps both
232
+ # backend outputs structurally identical so callers don't have to branch.
233
+ return ImpactResult(
234
+ symbol=symbol,
235
+ defined_at=defined_at,
236
+ callers=callers[:25],
237
+ fan_in=fan_in,
238
+ touches_tests=touches_tests,
239
+ touches_public_api=touches_api,
240
+ risk=_score_risk(fan_in, touches_tests, touches_api),
241
+ source="treesitter",
242
+ notes=["LSP unavailable — textual approximation. Install 'sin[lsp]' for accuracy."],
243
+ )
244
+
245
+
246
+ # ── Graceful Shutdown: Cleanup Lifecycle ──────────────────────────────
247
+ # --------------------------------------------------------------------------- #
248
+ # Public entry point
249
+ # --------------------------------------------------------------------------- #
250
+ def compute_impact(
251
+ root: str | Path,
252
+ symbol: str,
253
+ file: Optional[str | Path] = None,
254
+ line: Optional[int] = None,
255
+ column: Optional[int] = None,
256
+ ) -> ImpactResult:
257
+ """Resolve the blast radius of `symbol`.
258
+
259
+ If (file, line, column) are given and an LSP is available, returns precise
260
+ LSP references. Otherwise falls back to a tree-sitter/textual scan.
261
+
262
+ Results are cached under .sin/cache/ and reused if the repo hasn't changed.
263
+ """
264
+ root_path = Path(root).resolve()
265
+
266
+ # Cache layer
267
+ from sin_code_bundle.cache import GraphCache
268
+
269
+ cache = GraphCache(root_path)
270
+ cache_key = f"impact:{symbol}:{file}:{line}:{column}"
271
+ cached = cache.get(cache_key)
272
+ if cached is not None:
273
+ defined = cached.get("defined_at")
274
+ return ImpactResult(
275
+ symbol=cached["symbol"],
276
+ defined_at=Location(**defined) if defined else None,
277
+ callers=[Location(**c) for c in cached.get("callers", [])],
278
+ fan_in=cached.get("fan_in", 0),
279
+ touches_tests=cached.get("touches_tests", False),
280
+ touches_public_api=cached.get("touches_public_api", False),
281
+ risk=cached.get("risk", "low"),
282
+ source=cached.get("source", "none"),
283
+ notes=cached.get("notes", []),
284
+ )
285
+
286
+ if file and line and column:
287
+ file_path = (
288
+ (root_path / file) if not Path(file).is_absolute() else Path(file) # type: ignore[arg-type]
289
+ )
290
+ try:
291
+ result = asyncio.run(_lsp_impact(root_path, file_path, symbol, line, column))
292
+ if result is not None:
293
+ cache.set(cache_key, result.to_dict())
294
+ return result
295
+ except Exception as exc: # noqa: BLE001
296
+ ts = _treesitter_impact(root_path, symbol)
297
+ ts.notes.append(f"LSP error, used fallback: {exc}")
298
+ cache.set(cache_key, ts.to_dict())
299
+ return ts
300
+
301
+ result = _treesitter_impact(root_path, symbol)
302
+ cache.set(cache_key, result.to_dict())
303
+ return result
@@ -0,0 +1,85 @@
1
+ """Detect repo languages and ensure the matching language servers are present.
2
+
3
+ `sin doctor` uses this to tell users exactly what to install for accurate
4
+ impact analysis. We never silently install global tooling; we report and offer
5
+ the exact install command.
6
+
7
+ Docs: lsp_bootstrap.doc.md
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import shutil
13
+ from collections import Counter
14
+ from pathlib import Path
15
+
16
+ # language -> (server binary, install hint)
17
+ SERVERS: dict[str, tuple[str, str]] = {
18
+ "python": (
19
+ "pyright-langserver",
20
+ "npm i -g pyright (or: pip install pyright)",
21
+ ),
22
+ "typescript": (
23
+ "typescript-language-server",
24
+ "npm i -g typescript typescript-language-server",
25
+ ),
26
+ "javascript": (
27
+ "typescript-language-server",
28
+ "npm i -g typescript typescript-language-server",
29
+ ),
30
+ "go": (
31
+ "gopls",
32
+ "go install golang.org/x/tools/gopls@latest",
33
+ ),
34
+ "rust": (
35
+ "rust-analyzer",
36
+ "rustup component add rust-analyzer",
37
+ ),
38
+ "java": (
39
+ "jdtls",
40
+ "see: https://github.com/eclipse-jdtls/eclipse.jdt.ls",
41
+ ),
42
+ }
43
+
44
+ _EXT_LANG: dict[str, str] = {
45
+ ".py": "python",
46
+ ".ts": "typescript",
47
+ ".tsx": "typescript",
48
+ ".js": "javascript",
49
+ ".jsx": "javascript",
50
+ ".go": "go",
51
+ ".rs": "rust",
52
+ ".java": "java",
53
+ }
54
+ _IGNORE = {".git", "node_modules", ".venv", "__pycache__", ".sin"}
55
+
56
+
57
+ def detect_languages(root: Path) -> list[tuple[str, int]]:
58
+ """Return (language, file_count) pairs, most frequent first."""
59
+ counter: Counter[str] = Counter()
60
+ for p in root.rglob("*"):
61
+ if not p.is_file() or any(part in _IGNORE for part in p.parts):
62
+ continue
63
+ lang = _EXT_LANG.get(p.suffix.lower())
64
+ if lang:
65
+ counter[lang] += 1
66
+ return counter.most_common()
67
+
68
+
69
+ def server_status(root: Path) -> list[dict]:
70
+ """Return a list of dicts with language server availability info."""
71
+ rows: list[dict] = []
72
+ for lang, count in detect_languages(root):
73
+ entry = SERVERS.get(lang)
74
+ binary, hint = entry if entry else (None, "no LSP integration yet")
75
+ installed = bool(binary and shutil.which(binary))
76
+ rows.append(
77
+ {
78
+ "language": lang,
79
+ "files": count,
80
+ "server": binary,
81
+ "installed": installed,
82
+ "install_hint": hint,
83
+ }
84
+ )
85
+ return rows
@@ -0,0 +1,254 @@
1
+ # SPDX-License-Identifier: MIT
2
+ """MarkItDown bridge.
3
+
4
+ MarkItDown (https://github.com/microsoft/markitdown) is an *upstream* tool by
5
+ Microsoft, distributed as the MIT-licensed PyPI packages ``markitdown`` (CLI /
6
+ library) and ``markitdown-mcp`` (an MCP server). We never vendor or copy its
7
+ source; we only invoke the published packages. This keeps the bundle
8
+ MIT-licensed while giving coder agents a first-class way to turn binary and
9
+ office documents (PDF, DOCX, PPTX, XLSX, images, audio, HTML, ...) into
10
+ LLM-friendly Markdown.
11
+
12
+ The bridge provides:
13
+ * discovery / health checks for the ``markitdown-mcp`` runner and the
14
+ ``markitdown`` CLI,
15
+ * a thin ``convert`` wrapper over the ``markitdown`` CLI,
16
+ * MCP wiring so OpenCode / Codex / Hermes each get the MarkItDown MCP server,
17
+ mirroring upstream's recommended ``uvx markitdown-mcp`` invocation.
18
+
19
+ Docs: markitdown.doc.md
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import shutil
26
+ import subprocess
27
+ from dataclasses import dataclass
28
+ from pathlib import Path
29
+ from typing import Any
30
+
31
+ # ── MarkItDown Bridge: Document → Markdown ────────────────────────────
32
+ # Microsoft MarkItDown is the upstream package. We never vendor it; the
33
+ # bridge only discovers the published `markitdown-mcp` server and the
34
+ # `markitdown` CLI and shells out to them. This keeps the bundle MIT and
35
+ # lets us pick up upstream format support (PDF, DOCX, PPTX, XLSX, images
36
+ # with OCR, audio transcription, HTML, CSV/JSON/XML, ZIP, EPUB, etc.)
37
+ # without re-implementing any of it.
38
+
39
+ # MarkItDown exposes its MCP server through the ``markitdown-mcp`` package.
40
+ # Upstream recommends running it via ``uvx`` so it is fetched/cached on demand;
41
+ # we fall back to a directly-installed ``markitdown-mcp`` executable.
42
+ MARKITDOWN_MCP_PACKAGE = "markitdown-mcp"
43
+ MARKITDOWN_CLI = "markitdown"
44
+
45
+
46
+ class MarkItDownError(RuntimeError):
47
+ """Raised when MarkItDown is unavailable or a command fails."""
48
+
49
+
50
+ @dataclass
51
+ class MarkItDownEnv:
52
+ """Resolved runtime environment for invoking MarkItDown."""
53
+
54
+ uvx: str | None
55
+ mcp_exe: str | None
56
+ cli: str | None
57
+
58
+ @property
59
+ def mcp_available(self) -> bool:
60
+ """True iff either ``uvx`` or a directly-installed ``markitdown-mcp`` binary is on PATH."""
61
+ return bool(self.uvx or self.mcp_exe)
62
+
63
+ @property
64
+ def cli_available(self) -> bool:
65
+ """True iff the ``markitdown`` CLI (the converter) is on PATH."""
66
+ return bool(self.cli)
67
+
68
+ def mcp_command(self) -> dict[str, Any]:
69
+ """Return the MCP launch command, preferring ``uvx``."""
70
+ if self.uvx:
71
+ return {"command": "uvx", "args": [MARKITDOWN_MCP_PACKAGE]}
72
+ if self.mcp_exe:
73
+ return {"command": MARKITDOWN_MCP_PACKAGE, "args": []}
74
+ raise MarkItDownError(
75
+ "MarkItDown MCP server not found. Install it with "
76
+ "`pip install markitdown-mcp` (or `uv tool install markitdown-mcp`). "
77
+ "The bundle does not vendor MarkItDown."
78
+ )
79
+
80
+ def cli_cmd(self) -> str:
81
+ """Return the absolute path of the ``markitdown`` CLI, or raise.
82
+
83
+ Used by ``convert()`` to shell out for one-shot document→markdown
84
+ conversion without spinning up the long-lived MCP server.
85
+ """
86
+ if not self.cli:
87
+ raise MarkItDownError(
88
+ "`markitdown` CLI not found. Install with `pip install 'markitdown[all]'`."
89
+ )
90
+ return self.cli
91
+
92
+
93
+ def detect_env() -> MarkItDownEnv:
94
+ """Probe PATH for ``uvx``, ``markitdown-mcp``, and ``markitdown`` (no I/O beyond that)."""
95
+ return MarkItDownEnv(
96
+ uvx=shutil.which("uvx"),
97
+ mcp_exe=shutil.which(MARKITDOWN_MCP_PACKAGE),
98
+ cli=shutil.which(MARKITDOWN_CLI),
99
+ )
100
+
101
+
102
+ def mcp_server_command(env: MarkItDownEnv | None = None) -> dict[str, Any]:
103
+ """Resolve the MCP server launch command (``uvx markitdown-mcp`` by default)."""
104
+ env = env or detect_env()
105
+ return env.mcp_command()
106
+
107
+
108
+ # ── Local-Only Safety: File Access Guard ─────────────────────────────
109
+ # `convert()` is the only public surface that touches a file path. It
110
+ # deliberately refuses anything that is not a regular file on the local
111
+ # filesystem — we never want an MCP client (potentially remote / hostile)
112
+ # to coerce us into passing an http:// or pipe:// URL into MarkItDown's
113
+ # CLI, which would expand the attack surface considerably.
114
+
115
+
116
+ def convert(
117
+ path: str, env: MarkItDownEnv | None = None, timeout: int = 300
118
+ ) -> str: # 300s = 5min; large PDFs / pptx with embedded media can be slow on first pass
119
+ """Convert a document to Markdown using the upstream ``markitdown`` CLI."""
120
+ env = env or detect_env()
121
+ cli = env.cli_cmd()
122
+ src = Path(path)
123
+ # `is_file()` (not `exists()`) — guards against directories and broken
124
+ # symlinks, both of which the CLI would otherwise try to read as content.
125
+ if not src.is_file():
126
+ raise MarkItDownError(f"File not found: {path}")
127
+ try:
128
+ proc = subprocess.run(
129
+ [cli, str(src)],
130
+ capture_output=True,
131
+ text=True,
132
+ timeout=timeout,
133
+ )
134
+ except subprocess.TimeoutExpired as exc: # pragma: no cover - timing dependent
135
+ raise MarkItDownError(f"markitdown timed out after {timeout}s") from exc
136
+ if proc.returncode != 0:
137
+ raise MarkItDownError(f"markitdown failed ({proc.returncode}): {proc.stderr.strip()}")
138
+ return proc.stdout
139
+
140
+
141
+ def doctor() -> dict[str, Any]:
142
+ """Report MarkItDown availability for diagnostics."""
143
+ env = detect_env()
144
+ return {
145
+ "mcp_available": env.mcp_available,
146
+ "cli_available": env.cli_available,
147
+ "runner": "uvx" if env.uvx else (MARKITDOWN_MCP_PACKAGE if env.mcp_exe else None),
148
+ "mcp_package": MARKITDOWN_MCP_PACKAGE,
149
+ }
150
+
151
+
152
+ # ── OpenCode Integration: File Watcher Hooks ──────────────────────────
153
+ # Below: per-agent MCP config writers. These mutate well-known files
154
+ # under the user's home directory:
155
+ # * OpenCode: ~/.config/opencode/opencode.json (JSON, mcp.<name>)
156
+ # * Codex: ~/.codex/config.toml (TOML, [mcp_servers.<name>])
157
+ # * Hermes: ~/.hermes/mcp.json (JSON, mcpServers.<name>)
158
+ # We DO NOT touch plugin/hook files for the agents — MarkItDown integrates
159
+ # through MCP, the same surface as GitNexus, so behaviour is uniform.
160
+
161
+
162
+ # ── MCP Wiring (mirrors the GitNexus bridge) ──────────────────────────────
163
+ def _opencode_config_path() -> Path:
164
+ return Path.home() / ".config" / "opencode" / "opencode.json"
165
+
166
+
167
+ def _codex_config_path() -> Path:
168
+ return Path.home() / ".codex" / "config.toml"
169
+
170
+
171
+ def _hermes_config_path() -> Path:
172
+ return Path.home() / ".hermes" / "mcp.json"
173
+
174
+
175
+ AGENTS = ("opencode", "codex", "hermes")
176
+
177
+
178
+ def _launch(env: MarkItDownEnv | None) -> tuple[str, list[str]]:
179
+ cmd = mcp_server_command(env)
180
+ return cmd["command"], cmd["args"]
181
+
182
+
183
+ def _wire_opencode(env: MarkItDownEnv | None) -> str:
184
+ command, args = _launch(env)
185
+ path = _opencode_config_path()
186
+ path.parent.mkdir(parents=True, exist_ok=True)
187
+ data: dict[str, Any] = {}
188
+ if path.is_file():
189
+ try:
190
+ data = json.loads(path.read_text() or "{}")
191
+ except json.JSONDecodeError:
192
+ data = {}
193
+ mcp = data.setdefault("mcp", {})
194
+ mcp["markitdown"] = {
195
+ "type": "local",
196
+ "command": [command, *args],
197
+ "enabled": True,
198
+ }
199
+ path.write_text(json.dumps(data, indent=2) + "\n")
200
+ return str(path)
201
+
202
+
203
+ def _wire_codex(env: MarkItDownEnv | None) -> str:
204
+ command, args = _launch(env)
205
+ path = _codex_config_path()
206
+ path.parent.mkdir(parents=True, exist_ok=True)
207
+ args_repr = ", ".join(f'"{a}"' for a in args)
208
+ block = f'\n[mcp_servers.markitdown]\ncommand = "{command}"\nargs = [{args_repr}]\n'
209
+ existing = path.read_text() if path.is_file() else ""
210
+ if "[mcp_servers.markitdown]" in existing:
211
+ return str(path) # already wired; leave user edits intact
212
+ path.write_text(existing + block)
213
+ return str(path)
214
+
215
+
216
+ def _wire_hermes(env: MarkItDownEnv | None) -> str:
217
+ command, args = _launch(env)
218
+ path = _hermes_config_path()
219
+ path.parent.mkdir(parents=True, exist_ok=True)
220
+ data: dict[str, Any] = {}
221
+ if path.is_file():
222
+ try:
223
+ data = json.loads(path.read_text() or "{}")
224
+ except json.JSONDecodeError:
225
+ data = {}
226
+ servers = data.setdefault("mcpServers", {})
227
+ servers["markitdown"] = {"command": command, "args": args}
228
+ path.write_text(json.dumps(data, indent=2) + "\n")
229
+ return str(path)
230
+
231
+
232
+ _WIRERS = {
233
+ "opencode": _wire_opencode,
234
+ "codex": _wire_codex,
235
+ "hermes": _wire_hermes,
236
+ }
237
+
238
+
239
+ def setup_agents(
240
+ agents: list[str] | None = None,
241
+ env: MarkItDownEnv | None = None,
242
+ ) -> dict[str, str]:
243
+ """Wire the MarkItDown MCP server into each agent's config.
244
+
245
+ Returns a mapping of agent -> config file written.
246
+ """
247
+ chosen = agents or list(AGENTS)
248
+ written: dict[str, str] = {}
249
+ for agent in chosen:
250
+ wirer = _WIRERS.get(agent)
251
+ if not wirer:
252
+ raise MarkItDownError(f"Unknown agent: {agent!r}. Known: {', '.join(AGENTS)}")
253
+ written[agent] = wirer(env)
254
+ return written