refs-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- refs_mcp/__init__.py +16 -0
- refs_mcp/__main__.py +23 -0
- refs_mcp/_arch.py +92 -0
- refs_mcp/_build.py +135 -0
- refs_mcp/_exec_safety.py +88 -0
- refs_mcp/_link.py +195 -0
- refs_mcp/api/__init__.py +39 -0
- refs_mcp/api/_slug.py +96 -0
- refs_mcp/api/inventory.py +402 -0
- refs_mcp/api/observability.py +109 -0
- refs_mcp/api/repo.py +579 -0
- refs_mcp/auto_clone.py +247 -0
- refs_mcp/bench/README.md +93 -0
- refs_mcp/bench/__init__.py +8 -0
- refs_mcp/bench/backends/__init__.py +1 -0
- refs_mcp/bench/backends/baseline.py +255 -0
- refs_mcp/bench/backends/refs_mcp_backend.py +131 -0
- refs_mcp/bench/corpus.json +1663 -0
- refs_mcp/bench/corpus.py +127 -0
- refs_mcp/bench/measure_perf.py +64 -0
- refs_mcp/bench/metrics.py +185 -0
- refs_mcp/bench/models.py +166 -0
- refs_mcp/bench/run_offline_ir.py +385 -0
- refs_mcp/bench/tools.py +56 -0
- refs_mcp/bootstrap.py +290 -0
- refs_mcp/cli.py +517 -0
- refs_mcp/config.py +95 -0
- refs_mcp/contracts/__init__.py +56 -0
- refs_mcp/contracts/drift.py +713 -0
- refs_mcp/contracts/models.py +349 -0
- refs_mcp/contracts/passthrough.py +279 -0
- refs_mcp/contracts/runtime.py +255 -0
- refs_mcp/contracts/scrape_gh.py +225 -0
- refs_mcp/contracts/scrape_git.py +489 -0
- refs_mcp/contracts/scrape_rg.py +319 -0
- refs_mcp/correlation.py +63 -0
- refs_mcp/discovery.py +437 -0
- refs_mcp/doctor.py +426 -0
- refs_mcp/events.py +164 -0
- refs_mcp/file_log.py +167 -0
- refs_mcp/gh_models.py +92 -0
- refs_mcp/gh_runner.py +256 -0
- refs_mcp/git_ops.py +326 -0
- refs_mcp/git_runner.py +485 -0
- refs_mcp/git_url.py +74 -0
- refs_mcp/github_schema.py +65 -0
- refs_mcp/help_doc.py +676 -0
- refs_mcp/host_tools.py +1086 -0
- refs_mcp/index_render.py +151 -0
- refs_mcp/init/__init__.py +10 -0
- refs_mcp/init/_common.py +251 -0
- refs_mcp/init/_vscode_shared.py +166 -0
- refs_mcp/init/cmd_claude.py +111 -0
- refs_mcp/init/cmd_codex.py +201 -0
- refs_mcp/init/cmd_copilot.py +117 -0
- refs_mcp/init/cmd_copilot_agent.py +137 -0
- refs_mcp/init/cmd_gemini.py +109 -0
- refs_mcp/init/cmd_opencode.py +122 -0
- refs_mcp/init/cmd_vscode.py +65 -0
- refs_mcp/init/cmd_vscode_insiders.py +56 -0
- refs_mcp/models.py +1119 -0
- refs_mcp/operations.py +677 -0
- refs_mcp/path_safety.py +183 -0
- refs_mcp/preseed.py +191 -0
- refs_mcp/ranking.py +141 -0
- refs_mcp/remote_discover.py +511 -0
- refs_mcp/reorg.py +503 -0
- refs_mcp/replay.py +75 -0
- refs_mcp/run_metadata.py +410 -0
- refs_mcp/runner.py +312 -0
- refs_mcp/search.py +1586 -0
- refs_mcp/selftest.py +410 -0
- refs_mcp/server.py +813 -0
- refs_mcp/symbol_extraction.py +674 -0
- refs_mcp/symbols.py +557 -0
- refs_mcp/trace_export.py +110 -0
- refs_mcp/user_config.py +329 -0
- refs_mcp-0.2.0.dist-info/METADATA +524 -0
- refs_mcp-0.2.0.dist-info/RECORD +81 -0
- refs_mcp-0.2.0.dist-info/WHEEL +4 -0
- refs_mcp-0.2.0.dist-info/entry_points.txt +2 -0
refs_mcp/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""FastMCP server for managing the refs/ reference-repo tree."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version as _pkg_version
|
|
4
|
+
|
|
5
|
+
# Resolve at import time from the installed distribution so the value
|
|
6
|
+
# tracks pyproject.toml automatically instead of going stale on every
|
|
7
|
+
# version bump. The hardcoded "0.1.0" string that previously lived here
|
|
8
|
+
# stayed at 0.1.0 from PR #2 through 0.1.2's release, contradicting
|
|
9
|
+
# every other version source. The PyInstaller bundle's refs.spec ships
|
|
10
|
+
# the ``refs-mcp`` dist-info via ``copy_metadata("refs-mcp")`` so this
|
|
11
|
+
# resolves in frozen mode too. The dist name is ``refs-mcp`` (PyPI);
|
|
12
|
+
# the console-script entry stays ``refs`` (project.scripts).
|
|
13
|
+
try:
|
|
14
|
+
__version__ = _pkg_version("refs-mcp")
|
|
15
|
+
except PackageNotFoundError: # pragma: no cover - dev tree without install
|
|
16
|
+
__version__ = "unknown"
|
refs_mcp/__main__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Entry-point shim for ``python -m refs_mcp`` and PyInstaller.
|
|
2
|
+
|
|
3
|
+
PyInstaller's ``Analysis`` takes a script path, not a console-script
|
|
4
|
+
target, so we route ``__main__`` through ``cli.main`` here. The
|
|
5
|
+
``[project.scripts]`` ``refs`` entry in pyproject.toml points at
|
|
6
|
+
``refs_mcp.cli:main`` for the pip-installed wheel path.
|
|
7
|
+
|
|
8
|
+
Absolute import (``from refs_mcp.cli import main``) rather than the
|
|
9
|
+
package-relative form because PyInstaller bundles ``__main__.py`` as a
|
|
10
|
+
standalone script with no package context; relative imports raise
|
|
11
|
+
``ImportError`` at frozen runtime. The wheel install path still
|
|
12
|
+
resolves the same module via the absolute name.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from refs_mcp.cli import main
|
|
18
|
+
|
|
19
|
+
if __name__ == "__main__":
|
|
20
|
+
# ``main`` is a click.Command; calling it parses ``sys.argv`` and exits
|
|
21
|
+
# via ``SystemExit`` from inside click. No explicit return code wiring
|
|
22
|
+
# needed here.
|
|
23
|
+
main()
|
refs_mcp/_arch.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Assert a PyInstaller binary's machine code matches the labeled npm target.
|
|
2
|
+
|
|
3
|
+
Called from CI's build job right after PyInstaller writes ``dist/``, before
|
|
4
|
+
the artifact is uploaded. Catches "arm64-labeled x86_64" and similar
|
|
5
|
+
mis-labeled-artifact failure modes that a GitHub runner-alias shift
|
|
6
|
+
(e.g. ``macos-latest`` flipping between Intel and Apple Silicon) would
|
|
7
|
+
otherwise let through.
|
|
8
|
+
|
|
9
|
+
Pure stdlib + magic-byte parsing — no ``file`` dependency, no third-party
|
|
10
|
+
binary inspection lib. Anchored to:
|
|
11
|
+
|
|
12
|
+
* ELF ``e_machine`` constants — ``/usr/include/elf.h`` (``EM_X86_64 = 0x3E``)
|
|
13
|
+
* PE/COFF machine constants —
|
|
14
|
+
https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
|
|
15
|
+
(``IMAGE_FILE_MACHINE_AMD64 = 0x8664``)
|
|
16
|
+
* Mach-O cputype — ``/usr/include/mach/machine.h``
|
|
17
|
+
(``CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64 = 0x0100000C``,
|
|
18
|
+
little-endian 64-bit magic ``0xCFFAEDFE``)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
import struct
|
|
25
|
+
import sys
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _fail(msg: str) -> None:
|
|
29
|
+
print(f"::error::{msg}", file=sys.stderr)
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _check_linux_x64(head: bytes) -> None:
|
|
34
|
+
if head[:4] != b"\x7fELF":
|
|
35
|
+
_fail("not an ELF binary")
|
|
36
|
+
# e_machine: 2-byte LE at offset 18.
|
|
37
|
+
machine = struct.unpack_from("<H", head, 18)[0]
|
|
38
|
+
if machine != 0x3E:
|
|
39
|
+
_fail(f"ELF e_machine={machine:#06x}, expected 0x003e (EM_X86_64)")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _check_win32_x64(head: bytes) -> None:
|
|
43
|
+
if head[:2] != b"MZ":
|
|
44
|
+
_fail("not a PE/MZ binary")
|
|
45
|
+
# PE header offset lives at 0x3C as a 4-byte LE.
|
|
46
|
+
pe_offset = struct.unpack_from("<I", head, 0x3C)[0]
|
|
47
|
+
if head[pe_offset : pe_offset + 4] != b"PE\0\0":
|
|
48
|
+
_fail(f"PE signature missing at offset {pe_offset:#x}")
|
|
49
|
+
# IMAGE_FILE_HEADER.Machine is the first 2-byte LE after "PE\0\0".
|
|
50
|
+
machine = struct.unpack_from("<H", head, pe_offset + 4)[0]
|
|
51
|
+
if machine != 0x8664:
|
|
52
|
+
_fail(f"PE Machine={machine:#06x}, expected 0x8664 (AMD64)")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _check_darwin_arm64(head: bytes) -> None:
|
|
56
|
+
# 64-bit Mach-O little-endian magic.
|
|
57
|
+
if head[:4] != b"\xcf\xfa\xed\xfe":
|
|
58
|
+
_fail(f"unexpected Mach-O magic {head[:4]!r}; expected MH_MAGIC_64 LE")
|
|
59
|
+
# cputype: 4-byte LE at offset 4.
|
|
60
|
+
cpu_type = struct.unpack_from("<I", head, 4)[0]
|
|
61
|
+
if cpu_type != 0x0100000C:
|
|
62
|
+
_fail(f"Mach-O cputype={cpu_type:#010x}, expected 0x0100000c (arm64)")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_CHECKS = {
|
|
66
|
+
"linux-x64": _check_linux_x64,
|
|
67
|
+
"win32-x64": _check_win32_x64,
|
|
68
|
+
"darwin-arm64": _check_darwin_arm64,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main(argv: list[str]) -> int:
|
|
73
|
+
if len(argv) != 3:
|
|
74
|
+
print("usage: assert_binary_arch.py <binary> <npm-target>", file=sys.stderr)
|
|
75
|
+
return 2
|
|
76
|
+
|
|
77
|
+
binary = Path(argv[1])
|
|
78
|
+
target = argv[2]
|
|
79
|
+
|
|
80
|
+
check = _CHECKS.get(target)
|
|
81
|
+
if check is None:
|
|
82
|
+
_fail(f"unknown target {target!r}; expected one of {sorted(_CHECKS)}")
|
|
83
|
+
return 1 # _fail exits; keeps pyright happy on the path below
|
|
84
|
+
|
|
85
|
+
head = binary.read_bytes()[:4096]
|
|
86
|
+
check(head)
|
|
87
|
+
print(f"{binary}: {target} OK ({len(head)} byte header inspected)")
|
|
88
|
+
return 0
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
raise SystemExit(main(sys.argv))
|
refs_mcp/_build.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Build refs.exe via PyInstaller with warnings-as-errors.
|
|
2
|
+
|
|
3
|
+
PyInstaller has NO ``--werror`` / ``--strict`` CLI flag. Verified via
|
|
4
|
+
full reads of:
|
|
5
|
+
|
|
6
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/log.py``
|
|
7
|
+
— only ``--log-level`` / ``PYI_LOG_LEVEL``; the level only silences,
|
|
8
|
+
it does not escalate.
|
|
9
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/__main__.py``
|
|
10
|
+
— assembles the parser from three ``__add_options`` callables and
|
|
11
|
+
exposes a programmatic entrypoint ``PyInstaller.__main__.run()``.
|
|
12
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/building/makespec.py:__add_options``
|
|
13
|
+
— every spec-generation flag; none related to warnings-as-errors.
|
|
14
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/building/build_main.py:__add_options``
|
|
15
|
+
— only ``--distpath``, ``--workpath``, ``--noconfirm``, ``--upx-dir``,
|
|
16
|
+
``--clean``. ``_write_warnings`` (line 1064) just writes the
|
|
17
|
+
``warn-*.txt`` report; it does not exit.
|
|
18
|
+
|
|
19
|
+
UV also has no equivalent flag (``uv sync --help`` / ``uv lock --help``
|
|
20
|
+
yield no warning-escalation options).
|
|
21
|
+
|
|
22
|
+
What PyInstaller DOES expose: standard Python ``logging``. At
|
|
23
|
+
``log.py:42`` it does ``logger = getLogger('PyInstaller')``. Every
|
|
24
|
+
``logger.warning(...)`` call inside PyInstaller — including the
|
|
25
|
+
``"Hidden import X not found!"`` site at
|
|
26
|
+
``../refs/pyinstaller/pyinstaller/PyInstaller/depend/imphook.py:551``
|
|
27
|
+
— routes through that logger. So the legitimate config option is to
|
|
28
|
+
install a ``logging.Handler`` on the ``PyInstaller`` logger that
|
|
29
|
+
raises on records at WARNING level or above.
|
|
30
|
+
|
|
31
|
+
This script does exactly that. No regex on stdout; the handler sees
|
|
32
|
+
the actual ``LogRecord`` at emit time and converts it into a real
|
|
33
|
+
exception, surfacing the level, module, and message via the logging
|
|
34
|
+
API the upstream itself defines.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import argparse
|
|
40
|
+
import logging
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
import sys
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class WarningsAsErrorsHandler(logging.Handler):
|
|
46
|
+
"""Abort on every PyInstaller log record at WARNING or above.
|
|
47
|
+
|
|
48
|
+
Reads in full that anchor this design:
|
|
49
|
+
|
|
50
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/log.py`` — uses
|
|
51
|
+
``logging.getLogger('PyInstaller')`` as the package's root
|
|
52
|
+
logger.
|
|
53
|
+
* ``../refs/pyinstaller/pyinstaller/PyInstaller/depend/imphook.py``
|
|
54
|
+
— line 27 binds ``logger = logging.getLogger(__name__)``
|
|
55
|
+
(resolves to ``PyInstaller.depend.imphook``, which inherits from
|
|
56
|
+
``PyInstaller``). Line 551 emits ``logger.warning('Hidden import
|
|
57
|
+
"%s" not found!', ...)`` — the exact warning we have to escalate.
|
|
58
|
+
|
|
59
|
+
Why ``sys.exit`` instead of raising a regular exception:
|
|
60
|
+
``logging.Handler.handle`` wraps ``emit`` in ``except Exception:``
|
|
61
|
+
and routes failures to ``Handler.handleError`` which (by default)
|
|
62
|
+
prints to stderr and KEEPS GOING. ``SystemExit`` inherits from
|
|
63
|
+
``BaseException``, not ``Exception``, so it is NOT caught by that
|
|
64
|
+
``except``; it propagates out of the logging framework and back to
|
|
65
|
+
the build wrapper. This is the only stdlib-clean way to abort.
|
|
66
|
+
|
|
67
|
+
DEPRECATION is PyInstaller's custom level above WARN (see
|
|
68
|
+
``log.py:23``); levelno >= WARNING covers it.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
72
|
+
if record.levelno < logging.WARNING:
|
|
73
|
+
return
|
|
74
|
+
msg = record.getMessage()
|
|
75
|
+
sys.exit(
|
|
76
|
+
f"FAIL: PyInstaller warning treated as error\n"
|
|
77
|
+
f" level : {record.levelname}\n"
|
|
78
|
+
f" logger : {record.name}\n"
|
|
79
|
+
f" message : {msg}\n\n"
|
|
80
|
+
'Fix the underlying cause; never ignore. For \'Hidden import "X" not\n'
|
|
81
|
+
"found!' install X as a runtime dep or add a hook; for deprecated\n"
|
|
82
|
+
"spec args, update the spec to the supported form."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def main(argv: list[str] | None = None) -> int:
|
|
87
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
88
|
+
parser.add_argument(
|
|
89
|
+
"--spec",
|
|
90
|
+
type=Path,
|
|
91
|
+
default=Path("refs.spec"),
|
|
92
|
+
help="Path to the PyInstaller .spec file.",
|
|
93
|
+
)
|
|
94
|
+
args = parser.parse_args(argv)
|
|
95
|
+
|
|
96
|
+
if not args.spec.is_file():
|
|
97
|
+
sys.stderr.write(f"FAIL: spec file not found: {args.spec}\n")
|
|
98
|
+
return 1
|
|
99
|
+
|
|
100
|
+
# Install the handler BEFORE importing PyInstaller's runtime
|
|
101
|
+
# machinery so any warning emitted during option parsing or
|
|
102
|
+
# analysis is caught at the source.
|
|
103
|
+
pyi_logger = logging.getLogger("PyInstaller")
|
|
104
|
+
handler = WarningsAsErrorsHandler()
|
|
105
|
+
handler.setLevel(logging.WARNING)
|
|
106
|
+
pyi_logger.addHandler(handler)
|
|
107
|
+
|
|
108
|
+
# PyInstaller imported AFTER the handler is in place. The
|
|
109
|
+
# programmatic entrypoint is at ``PyInstaller/__main__.py:160``
|
|
110
|
+
# (``def run(pyi_args=None, pyi_config=None)``).
|
|
111
|
+
import PyInstaller.__main__ as pyi_main
|
|
112
|
+
|
|
113
|
+
pyi_args = [str(args.spec), "--noconfirm", "--clean", "--log-level=INFO"]
|
|
114
|
+
|
|
115
|
+
print(f"[build_binary] PyInstaller.run({pyi_args!r})", flush=True)
|
|
116
|
+
try:
|
|
117
|
+
pyi_main.run(pyi_args=pyi_args)
|
|
118
|
+
except SystemExit as exc:
|
|
119
|
+
# Two sources of ``SystemExit`` here, both wanted:
|
|
120
|
+
# 1. ``WarningsAsErrorsHandler.emit`` calls ``sys.exit(msg)`` when
|
|
121
|
+
# PyInstaller emits a WARNING/DEPRECATION/ERROR log record.
|
|
122
|
+
# ``exc.code`` is the FAIL message string.
|
|
123
|
+
# 2. PyInstaller itself raises ``SystemExit`` on internal errors.
|
|
124
|
+
# Either way we surface the code (or 1 for string codes) to CI.
|
|
125
|
+
if isinstance(exc.code, str):
|
|
126
|
+
sys.stderr.write(exc.code + "\n")
|
|
127
|
+
return 1
|
|
128
|
+
return int(exc.code or 0)
|
|
129
|
+
|
|
130
|
+
print("[build_binary] OK: build clean (no warnings).", flush=True)
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
raise SystemExit(main())
|
refs_mcp/_exec_safety.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Subprocess argv[0] hardening — single source of truth.
|
|
2
|
+
|
|
3
|
+
Microsoft's canonical guidance for ``CreateProcess`` (which Python's
|
|
4
|
+
``subprocess`` invokes on Windows when ``shell=False``) explicitly says
|
|
5
|
+
"specify a fully qualified path":
|
|
6
|
+
|
|
7
|
+
* Dynamic-Link Library Security:
|
|
8
|
+
https://learn.microsoft.com/windows/win32/dlls/dynamic-link-library-security
|
|
9
|
+
"Wherever possible, specify a fully qualified path when using the
|
|
10
|
+
LoadLibrary, LoadLibraryEx, CreateProcess, or ShellExecute functions."
|
|
11
|
+
* Security Considerations: Microsoft Windows Shell:
|
|
12
|
+
https://learn.microsoft.com/windows/win32/shell/sec-shell
|
|
13
|
+
"Provide the fully qualified path. Do not depend on the Shell to
|
|
14
|
+
locate the file."
|
|
15
|
+
* Warning C6277: NULL application name with an unquoted path is a
|
|
16
|
+
security vulnerability because CreateProcess will search the parent
|
|
17
|
+
process's CWD (step 2 of its documented search order) before PATH —
|
|
18
|
+
a malicious binary in the CWD would win.
|
|
19
|
+
|
|
20
|
+
``shutil.which`` honors PATHEXT on Windows so a bare ``"rg"`` resolves
|
|
21
|
+
to ``rg.exe``. When ``shutil.which`` returns a path that still has a
|
|
22
|
+
directory component (e.g. ``"./rg"`` for an explicit relative override),
|
|
23
|
+
this module promotes via ``os.path.abspath``. ``os.path.isabs`` is the
|
|
24
|
+
canonical absolute-path check — it correctly handles UNC paths, rooted
|
|
25
|
+
Windows paths, and rejects drive-relative ``"C:tool.exe"`` forms that a
|
|
26
|
+
string-startswith heuristic would mis-classify.
|
|
27
|
+
|
|
28
|
+
This module exists separately from ``refs_mcp.runner`` so the helpers
|
|
29
|
+
can be imported by any subprocess site (git_runner, gh_runner, operations,
|
|
30
|
+
remote_discover, run_metadata, init scripts) without pulling in the
|
|
31
|
+
OpenTelemetry import graph that ``runner`` carries.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from collections.abc import Sequence
|
|
37
|
+
import os
|
|
38
|
+
import shutil
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def resolved_executable(name: str) -> str:
|
|
42
|
+
"""Promote ``name`` to an absolute executable path. Bare name on miss.
|
|
43
|
+
|
|
44
|
+
Microsoft SDL guidance is unambiguous: always use a fully qualified
|
|
45
|
+
path when invoking ``CreateProcess`` / ``ShellExecute`` /
|
|
46
|
+
``LoadLibrary``. The cited references are in this module's
|
|
47
|
+
docstring; CWE-426 (Untrusted Search Path) applies to ANY relative
|
|
48
|
+
argv[0], not only bare names. A caller-supplied ``"./tool"`` would
|
|
49
|
+
let ``subprocess.run`` honor the passed ``cwd=`` on POSIX —
|
|
50
|
+
documented behavior, but the SDL prefers absolute resolution
|
|
51
|
+
regardless because the relative form is ambiguous under cwd
|
|
52
|
+
races and binary planting.
|
|
53
|
+
|
|
54
|
+
Returns the input unchanged when:
|
|
55
|
+
* ``name`` is already absolute (per ``os.path.isabs``), OR
|
|
56
|
+
* ``shutil.which(name)`` returns ``None`` (the binary is not
|
|
57
|
+
locatable — let the eventual ``subprocess.run`` raise
|
|
58
|
+
``FileNotFoundError`` so callers can map it to their own typed
|
|
59
|
+
error contract).
|
|
60
|
+
|
|
61
|
+
Otherwise returns ``shutil.which(name)`` promoted to absolute via
|
|
62
|
+
``os.path.abspath`` (covers the relative-result case where
|
|
63
|
+
``shutil.which`` preserves the directory component instead of
|
|
64
|
+
prepending the cwd — typically when ``name`` itself had a
|
|
65
|
+
directory component like ``"./rg"``).
|
|
66
|
+
"""
|
|
67
|
+
if os.path.isabs(name):
|
|
68
|
+
return name
|
|
69
|
+
resolved = shutil.which(name)
|
|
70
|
+
if resolved is None:
|
|
71
|
+
return name
|
|
72
|
+
return resolved if os.path.isabs(resolved) else os.path.abspath(resolved)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def hardened_argv(argv: Sequence[str]) -> list[str]:
|
|
76
|
+
"""Return a new argv list with argv[0] resolved to an absolute path.
|
|
77
|
+
|
|
78
|
+
Every other element is passed through unchanged. Empty argv is
|
|
79
|
+
returned as ``[]`` (callers should validate; this helper makes no
|
|
80
|
+
assumption about minimum length).
|
|
81
|
+
"""
|
|
82
|
+
out = list(argv)
|
|
83
|
+
if out:
|
|
84
|
+
out[0] = resolved_executable(out[0])
|
|
85
|
+
return out
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
__all__ = ("hardened_argv", "resolved_executable")
|
refs_mcp/_link.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Cross-platform unprivileged directory aliasing.
|
|
2
|
+
|
|
3
|
+
POSIX → ``os.symlink(target, link, target_is_directory=True)``
|
|
4
|
+
Unprivileged on every supported POSIX.
|
|
5
|
+
Windows → ``mklink /J`` (junction; an NTFS reparse point with the
|
|
6
|
+
``IO_REPARSE_TAG_MOUNT_POINT`` tag). Junctions DO NOT
|
|
7
|
+
require ``SeCreateSymbolicLinkPrivilege`` — distinct from
|
|
8
|
+
symbolic links (Microsoft Learn: ``Hard links and
|
|
9
|
+
junctions``). They are the canonical Windows-native
|
|
10
|
+
unprivileged way to alias a directory across drives on the
|
|
11
|
+
same machine. Limitations: local volumes only (no UNC),
|
|
12
|
+
directory-only (no file junctions), reported by Python as
|
|
13
|
+
``Path.is_symlink() == False`` because their reparse tag is
|
|
14
|
+
``IO_REPARSE_TAG_MOUNT_POINT`` not ``IO_REPARSE_TAG_SYMLINK``.
|
|
15
|
+
|
|
16
|
+
``link_directory`` raises by default when neither symlink nor
|
|
17
|
+
junction is available, so callers don't silently absorb a multi-GB
|
|
18
|
+
``copytree`` they didn't ask for. Pass ``copy_fallback=True`` to
|
|
19
|
+
opt in to the copy semantics (used by ``safe_directory_move`` where
|
|
20
|
+
copy-on-cross-FS is the existing ``shutil.move`` contract).
|
|
21
|
+
|
|
22
|
+
``safe_directory_move`` patches the one ``shutil.move`` correctness
|
|
23
|
+
hole that bites Windows unprivileged callers: when ``src`` is a
|
|
24
|
+
symlink and the move crosses a filesystem boundary, ``shutil`` falls
|
|
25
|
+
back to ``copytree`` + ``rmtree`` and recreates the link via
|
|
26
|
+
``os.symlink``, which requires ``SeCreateSymbolicLinkPrivilege``.
|
|
27
|
+
Routing the recreation through ``link_directory`` (junction-capable)
|
|
28
|
+
keeps the move unprivileged.
|
|
29
|
+
|
|
30
|
+
AGENTS.md Rule #1 (subprocess argv[0] hardened via ``hardened_argv``),
|
|
31
|
+
Rule #11 (production runs unprivileged).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from dataclasses import dataclass
|
|
37
|
+
import os
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
import shutil
|
|
40
|
+
import subprocess
|
|
41
|
+
import sys
|
|
42
|
+
from typing import Literal
|
|
43
|
+
|
|
44
|
+
from ._exec_safety import hardened_argv
|
|
45
|
+
|
|
46
|
+
LinkMechanism = Literal["symlink", "junction", "copy", "move"]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class LinkResult:
|
|
51
|
+
"""Outcome of a directory-link or directory-move operation.
|
|
52
|
+
|
|
53
|
+
``mechanism`` reports which OS primitive actually took effect:
|
|
54
|
+
|
|
55
|
+
* ``"symlink"`` POSIX ``os.symlink`` or Windows symlink (rare)
|
|
56
|
+
* ``"junction"`` Windows NTFS junction via ``mklink /J``
|
|
57
|
+
* ``"copy"`` ``shutil.copytree`` — link is a full tree copy,
|
|
58
|
+
NOT a reference. Downstream rm / mutation
|
|
59
|
+
semantics differ.
|
|
60
|
+
* ``"move"`` non-link directory moved by ``shutil.move``
|
|
61
|
+
(used by ``safe_directory_move``)
|
|
62
|
+
|
|
63
|
+
Audit trails carry this so operators can tell whether downstream
|
|
64
|
+
behavior will affect just the link or the whole tree.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
mechanism: LinkMechanism
|
|
68
|
+
link: Path
|
|
69
|
+
target: Path
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Public API
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def link_directory(target: Path, link: Path, *, copy_fallback: bool = False) -> LinkResult:
|
|
78
|
+
"""Create ``link`` as an alias for the directory ``target``.
|
|
79
|
+
|
|
80
|
+
Cross-platform without requiring elevated privilege.
|
|
81
|
+
|
|
82
|
+
Order of attempts:
|
|
83
|
+
|
|
84
|
+
1. POSIX → ``os.symlink`` (always works unprivileged)
|
|
85
|
+
2. Windows → junction via ``mklink /J`` (unprivileged)
|
|
86
|
+
3. If junction creation fails AND ``copy_fallback=True`` →
|
|
87
|
+
``shutil.copytree(symlinks=True)`` (the link is no longer
|
|
88
|
+
a reference; it's a full tree copy)
|
|
89
|
+
4. If junction creation fails AND ``copy_fallback=False`` →
|
|
90
|
+
``OSError`` raised, no fallback
|
|
91
|
+
|
|
92
|
+
``target`` must already exist as a directory. ``link`` must not
|
|
93
|
+
exist. The link's parent directory must already exist (this
|
|
94
|
+
function doesn't ``mkdir`` intermediates — caller's policy stays
|
|
95
|
+
explicit).
|
|
96
|
+
"""
|
|
97
|
+
if not target.is_dir():
|
|
98
|
+
raise NotADirectoryError(f"link target is not a directory: {target!r}")
|
|
99
|
+
if link.exists() or link.is_symlink():
|
|
100
|
+
raise FileExistsError(f"link path already exists: {link!r}")
|
|
101
|
+
|
|
102
|
+
if sys.platform != "win32":
|
|
103
|
+
# POSIX ``os.symlink`` is unprivileged on every supported POSIX.
|
|
104
|
+
# The Windows branch (below) uses junctions via ``mklink /J``,
|
|
105
|
+
# also unprivileged. This module is the boundary that lets the
|
|
106
|
+
# rest of the codebase alias directories without ever needing
|
|
107
|
+
# ``SeCreateSymbolicLinkPrivilege`` — the privilege-allow on the
|
|
108
|
+
# call below documents that intentional shape.
|
|
109
|
+
os.symlink(
|
|
110
|
+
str(target), str(link), target_is_directory=True
|
|
111
|
+
) # privilege-allow: POSIX unprivileged path; Windows uses junctions
|
|
112
|
+
return LinkResult(mechanism="symlink", link=link, target=target)
|
|
113
|
+
|
|
114
|
+
if _create_junction(link=link, target=target):
|
|
115
|
+
return LinkResult(mechanism="junction", link=link, target=target)
|
|
116
|
+
|
|
117
|
+
if not copy_fallback:
|
|
118
|
+
raise OSError(
|
|
119
|
+
f"could not create junction at {link!r} -> {target!r}; "
|
|
120
|
+
f"pass copy_fallback=True to copy the directory tree instead"
|
|
121
|
+
)
|
|
122
|
+
shutil.copytree(str(target), str(link), symlinks=True)
|
|
123
|
+
return LinkResult(mechanism="copy", link=link, target=target)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def safe_directory_move(src: Path, dst: Path) -> LinkResult:
|
|
127
|
+
"""Move ``src`` to ``dst``; preserve link semantics across filesystems.
|
|
128
|
+
|
|
129
|
+
Default ``shutil.move`` is correct except in one case: a symlink
|
|
130
|
+
source moved across a filesystem boundary uses ``os.symlink`` to
|
|
131
|
+
recreate the link atom at the destination, which requires
|
|
132
|
+
``SeCreateSymbolicLinkPrivilege`` on Windows. This wrapper routes
|
|
133
|
+
that case through ``link_directory`` (junction-capable) so the
|
|
134
|
+
move stays unprivileged.
|
|
135
|
+
|
|
136
|
+
Same-filesystem moves of symlinks use ``os.rename`` directly — the
|
|
137
|
+
link atom is preserved without copying. Non-symlink sources
|
|
138
|
+
delegate to ``shutil.move`` unchanged.
|
|
139
|
+
"""
|
|
140
|
+
if src.is_symlink():
|
|
141
|
+
raw_target = os.readlink(src)
|
|
142
|
+
# ``os.readlink`` returns the stored target STRING. A relative
|
|
143
|
+
# target is interpreted relative to ``src.parent``, NOT the
|
|
144
|
+
# current working directory. Resolving lexically against
|
|
145
|
+
# ``src.parent`` gives the actual target so ``link_directory``
|
|
146
|
+
# (which checks ``target.is_dir()``) sees the right path and
|
|
147
|
+
# the ``LinkResult`` audit field carries an accurate value.
|
|
148
|
+
# ``Path.__truediv__`` resets if ``raw_target`` is already
|
|
149
|
+
# absolute, so this works for both relative and absolute
|
|
150
|
+
# stored targets.
|
|
151
|
+
link_target = (src.parent / raw_target).absolute()
|
|
152
|
+
try:
|
|
153
|
+
os.rename(src, dst)
|
|
154
|
+
return LinkResult(mechanism="symlink", link=dst, target=link_target)
|
|
155
|
+
except OSError:
|
|
156
|
+
# Cross-filesystem move of a symlink. ``link_directory``
|
|
157
|
+
# creates a junction on Windows (unprivileged) or a real
|
|
158
|
+
# symlink on POSIX; copy_fallback=True matches
|
|
159
|
+
# ``shutil.move``'s existing cross-FS behavior.
|
|
160
|
+
result = link_directory(link_target, dst, copy_fallback=True)
|
|
161
|
+
src.unlink()
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
shutil.move(str(src), str(dst))
|
|
165
|
+
return LinkResult(mechanism="move", link=dst, target=src)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# Private — Windows junction creation via ``cmd /c mklink /J``
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _create_junction(*, link: Path, target: Path) -> bool:
|
|
174
|
+
"""Create a Windows directory junction via ``mklink /J``.
|
|
175
|
+
|
|
176
|
+
``mklink`` is a ``cmd.exe`` builtin (not a standalone exe) so the
|
|
177
|
+
invocation has to go through ``cmd /c``. ``hardened_argv``
|
|
178
|
+
promotes ``cmd`` to its absolute path (AGENTS Rule #1).
|
|
179
|
+
|
|
180
|
+
Returns ``True`` on success, ``False`` if ``cmd.exe`` is not
|
|
181
|
+
reachable on PATH or ``mklink`` returns non-zero. Never raises —
|
|
182
|
+
the caller decides whether to fall back or surface failure.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
argv = hardened_argv(["cmd", "/c", "mklink", "/J", str(link), str(target)])
|
|
186
|
+
except OSError:
|
|
187
|
+
return False
|
|
188
|
+
proc = subprocess.run( # noqa: S603 - argv-list, hardened, no shell
|
|
189
|
+
argv,
|
|
190
|
+
capture_output=True,
|
|
191
|
+
text=True,
|
|
192
|
+
timeout=10,
|
|
193
|
+
check=False,
|
|
194
|
+
)
|
|
195
|
+
return proc.returncode == 0
|
refs_mcp/api/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Noun-namespace MCP tool surface for refs.
|
|
2
|
+
|
|
3
|
+
Three tools instead of 27. Each is a thin dispatcher around the existing
|
|
4
|
+
engine functions in ``refs_mcp.search`` / ``refs_mcp.operations`` /
|
|
5
|
+
``refs_mcp.symbols`` / etc. — the engines stay, only the public MCP
|
|
6
|
+
surface collapses.
|
|
7
|
+
|
|
8
|
+
Tools:
|
|
9
|
+
|
|
10
|
+
* ``refs_repo(slug, action, ...)`` — every operation scoped to a single
|
|
11
|
+
repo. ``action`` is a ``Literal`` over: find / clone / status /
|
|
12
|
+
update / search / symbols / sparse / export.
|
|
13
|
+
* ``refs_inventory(action, ...)`` — every cross-repo / corpus operation.
|
|
14
|
+
Local sweeps, remote discovery, reorg, index, preseed.
|
|
15
|
+
* ``refs_observability(action, ...)`` — server-side diagnostic. Host
|
|
16
|
+
tools probe, contract drift, events tail, journal, help.
|
|
17
|
+
|
|
18
|
+
Internalized (no longer exposed as separate tools):
|
|
19
|
+
|
|
20
|
+
* ``refs_prove_absence`` → every ``refs_repo(action=search)`` returns
|
|
21
|
+
``receipt.verdict``. ``VALIDATED_EMPTY`` is the proof; clients check
|
|
22
|
+
the verdict, no separate tool.
|
|
23
|
+
* ``refs_inspect_terms`` → ``refs_repo(action=search, target=[list])``
|
|
24
|
+
auto-fans-out across the term list.
|
|
25
|
+
* ``refs_search_files`` → ``refs_repo(action=search, mode='files_with')``.
|
|
26
|
+
* ``refs_count_matches`` → ``refs_repo(action=search, mode='count')``.
|
|
27
|
+
* ``refs_find_symbol`` → ``refs_repo(action=symbols, name=X)`` (the
|
|
28
|
+
point-lookup is just a filter on the list-symbols query).
|
|
29
|
+
|
|
30
|
+
Design rationale: pydantic discriminated unions were considered but
|
|
31
|
+
FastMCP's discriminator support has only one regression test in the
|
|
32
|
+
upstream suite (jlowin/fastmcp tests/utilities/test_json_schema.py
|
|
33
|
+
line 249), so we use the simpler ``action: Literal[...]`` + flat
|
|
34
|
+
optional per-action params. Body code validates per-action requirements.
|
|
35
|
+
Every MCP client renders this shape; discriminator-mapping rendering
|
|
36
|
+
varies.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|