crumbs-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crumbs/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """crumbs - local, token-efficient cross-repo context for LLMs.
2
+
3
+ crumbs indexes repositories into compact "context crumbs" (file maps and symbol
4
+ signatures, not full file bodies) stored locally. An assistant can query these
5
+ crumbs to understand many repos at once without reading -- and paying tokens for
6
+ -- the entire source tree.
7
+ """
8
+
9
+ __version__ = "0.3.0"
crumbs/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ import sys
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ sys.exit(main())
crumbs/cli.py ADDED
@@ -0,0 +1,186 @@
1
+ """crumbs command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ import time
9
+ from typing import List, Optional
10
+
11
+ from . import __version__, digest, indexer, query, store
12
+
13
+
14
+ def _fmt_age(ts: float) -> str:
15
+ secs = max(0, int(time.time() - ts))
16
+ for unit, n in (("d", 86400), ("h", 3600), ("m", 60)):
17
+ if secs >= n:
18
+ return f"{secs // n}{unit} ago"
19
+ return "just now"
20
+
21
+
22
+ def cmd_index(args: argparse.Namespace) -> int:
23
+ paths = args.paths or ["."]
24
+ for p in paths:
25
+ try:
26
+ data = indexer.index_repo(p, name=args.name)
27
+ except (NotADirectoryError, FileNotFoundError) as e:
28
+ print(f"error: {e}", file=sys.stderr)
29
+ return 1
30
+ st = data["stats"]
31
+ m = digest.repo_map(data["id"])
32
+ sav = digest.savings(data, m)
33
+ print(
34
+ f"indexed {data['name']} "
35
+ f"{st['files']} files, {st['symbols']} symbols "
36
+ f"(map ~{sav['map_tokens']} tok vs ~{sav['source_tokens']} tok source, "
37
+ f"-{sav['saved_pct']}%)"
38
+ )
39
+ return 0
40
+
41
+
42
+ def cmd_list(args: argparse.Namespace) -> int:
43
+ reg = store.load_registry()
44
+ if not reg:
45
+ print("no repos indexed. run: crumbs index <path>")
46
+ return 0
47
+ if args.json:
48
+ print(json.dumps(reg, indent=2))
49
+ return 0
50
+ rows = sorted(reg.items(), key=lambda kv: kv[1]["name"])
51
+ name_w = max((len(m["name"]) for _, m in rows), default=4)
52
+ for rid, m in rows:
53
+ st = m["stats"]
54
+ print(
55
+ f"{m['name']:<{name_w}} {rid} "
56
+ f"{st['files']:>4} files {st['symbols']:>5} symbols "
57
+ f"{_fmt_age(m['indexed_at'])}"
58
+ )
59
+ return 0
60
+
61
+
62
+ def cmd_map(args: argparse.Namespace) -> int:
63
+ rid = store.resolve(args.repo)
64
+ if not rid:
65
+ print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
66
+ return 1
67
+ text = digest.repo_map(rid, max_symbols_per_file=args.max_symbols)
68
+ print(text)
69
+ if args.stats:
70
+ data = store.load_repo(rid)
71
+ sav = digest.savings(data, text)
72
+ print(
73
+ f"\n_~{sav['map_tokens']} tokens (vs ~{sav['source_tokens']} for full source, "
74
+ f"-{sav['saved_pct']}%)_",
75
+ file=sys.stderr,
76
+ )
77
+ return 0
78
+
79
+
80
+ def cmd_search(args: argparse.Namespace) -> int:
81
+ hits = query.search(args.query, repo=args.repo, limit=args.limit)
82
+ if args.json:
83
+ print(json.dumps(hits, indent=2))
84
+ return 0
85
+ if not hits:
86
+ print("no matches")
87
+ return 0
88
+ for h in hits:
89
+ sig = h["sig"] or f"{h['kind']} {h['name']}"
90
+ loc = f":{h['line']}" if h.get("line") else ""
91
+ print(f"{h['repo']}:{h['path']}{loc} {sig}")
92
+ return 0
93
+
94
+
95
+ def cmd_context(args: argparse.Namespace) -> int:
96
+ print(query.context(args.query, repo=args.repo, limit=args.limit))
97
+ return 0
98
+
99
+
100
+ def cmd_remove(args: argparse.Namespace) -> int:
101
+ rid = store.resolve(args.repo)
102
+ if not rid:
103
+ print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
104
+ return 1
105
+ name = store.load_registry().get(rid, {}).get("name", rid)
106
+ store.remove_repo(rid)
107
+ print(f"removed {name}")
108
+ return 0
109
+
110
+
111
+ def cmd_mcp(args: argparse.Namespace) -> int:
112
+ from . import mcp
113
+ return mcp.serve()
114
+
115
+
116
+ def cmd_refresh(args: argparse.Namespace) -> int:
117
+ reg = store.load_registry()
118
+ if not reg:
119
+ print("nothing to refresh")
120
+ return 0
121
+ for rid, m in list(reg.items()):
122
+ try:
123
+ indexer.index_repo(m["path"], name=m["name"])
124
+ print(f"refreshed {m['name']}")
125
+ except (NotADirectoryError, FileNotFoundError):
126
+ print(f"skip {m['name']} (path missing: {m['path']})", file=sys.stderr)
127
+ return 0
128
+
129
+
130
+ def build_parser() -> argparse.ArgumentParser:
131
+ p = argparse.ArgumentParser(
132
+ prog="crumbs",
133
+ description="Local, token-efficient cross-repo context for LLMs.",
134
+ )
135
+ p.add_argument("--version", action="version", version=f"crumbs {__version__}")
136
+ sub = p.add_subparsers(dest="cmd", required=True)
137
+
138
+ pi = sub.add_parser("index", help="index one or more repos")
139
+ pi.add_argument("paths", nargs="*", help="repo paths (default: .)")
140
+ pi.add_argument("--name", help="override repo name")
141
+ pi.set_defaults(func=cmd_index)
142
+
143
+ pl = sub.add_parser("list", help="list indexed repos")
144
+ pl.add_argument("--json", action="store_true")
145
+ pl.set_defaults(func=cmd_list)
146
+
147
+ pm = sub.add_parser("map", help="print compact map of a repo")
148
+ pm.add_argument("repo", help="repo name, id, or path")
149
+ pm.add_argument("--max-symbols", type=int, default=12)
150
+ pm.add_argument("--stats", action="store_true", help="print token estimate to stderr")
151
+ pm.set_defaults(func=cmd_map)
152
+
153
+ ps = sub.add_parser("search", help="search symbols across repos")
154
+ ps.add_argument("query")
155
+ ps.add_argument("--repo", help="limit to one repo")
156
+ ps.add_argument("--limit", type=int, default=30)
157
+ ps.add_argument("--json", action="store_true")
158
+ ps.set_defaults(func=cmd_search)
159
+
160
+ pc = sub.add_parser("context", help="LLM-ready context slice for a query")
161
+ pc.add_argument("query")
162
+ pc.add_argument("--repo", help="limit to one repo")
163
+ pc.add_argument("--limit", type=int, default=20)
164
+ pc.set_defaults(func=cmd_context)
165
+
166
+ pr = sub.add_parser("remove", help="remove a repo from the index")
167
+ pr.add_argument("repo")
168
+ pr.set_defaults(func=cmd_remove)
169
+
170
+ prf = sub.add_parser("refresh", help="re-index all known repos")
171
+ prf.set_defaults(func=cmd_refresh)
172
+
173
+ pmcp = sub.add_parser("mcp", help="run as an MCP server over stdio")
174
+ pmcp.set_defaults(func=cmd_mcp)
175
+
176
+ return p
177
+
178
+
179
+ def main(argv: Optional[List[str]] = None) -> int:
180
+ parser = build_parser()
181
+ args = parser.parse_args(argv)
182
+ return args.func(args)
183
+
184
+
185
+ if __name__ == "__main__":
186
+ sys.exit(main())
crumbs/digest.py ADDED
@@ -0,0 +1,75 @@
1
+ """Render a compact, token-efficient map of an indexed repo."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List
6
+
7
+ from . import store
8
+
9
+
10
+ def _est_tokens(chars: int) -> int:
11
+ """Rough token estimate (~4 chars/token)."""
12
+ return chars // 4
13
+
14
+
15
+ def loc(sym: Dict[str, Any]) -> str:
16
+ """Compact source location tag, e.g. ``L40-92`` or ``L40``."""
17
+ start = sym.get("line")
18
+ if not start:
19
+ return ""
20
+ end = sym.get("end_line", start)
21
+ return f"L{start}" if end == start else f"L{start}-{end}"
22
+
23
+
24
+ def repo_map(rid: str, max_symbols_per_file: int = 12) -> str:
25
+ data = store.load_repo(rid)
26
+ if not data:
27
+ return ""
28
+ lines: List[str] = []
29
+ g = data.get("git", {})
30
+ header = f"# {data['name']}"
31
+ lines.append(header)
32
+ meta = []
33
+ if g.get("remote"):
34
+ meta.append(g["remote"])
35
+ if g.get("branch"):
36
+ meta.append(f"@{g['branch']}")
37
+ if meta:
38
+ lines.append(" ".join(meta))
39
+ st = data["stats"]
40
+ lines.append(
41
+ f"_{st['files']} files, {st['symbols']} symbols indexed_"
42
+ )
43
+ lines.append("")
44
+ if data.get("readme"):
45
+ excerpt = data["readme"].strip().replace("\n\n", "\n")
46
+ lines.append("> " + excerpt.replace("\n", "\n> "))
47
+ lines.append("")
48
+
49
+ for f in data["files"]:
50
+ syms = f["symbols"]
51
+ if not syms:
52
+ continue
53
+ lines.append(f"### {f['path']}")
54
+ for sym in syms[:max_symbols_per_file]:
55
+ sig = sym["sig"] or f"{sym['kind']} {sym['name']}"
56
+ tag = loc(sym)
57
+ where = f" [{tag}]" if tag else ""
58
+ doc = f" — {sym['doc']}" if sym.get("doc") else ""
59
+ lines.append(f"- {sig}{where}{doc}")
60
+ if len(syms) > max_symbols_per_file:
61
+ lines.append(f"- â€Ķ +{len(syms) - max_symbols_per_file} more")
62
+ lines.append("")
63
+
64
+ return "\n".join(lines)
65
+
66
+
67
+ def savings(data: Dict[str, Any], map_text: str) -> Dict[str, int]:
68
+ src_tokens = _est_tokens(data["stats"]["source_bytes"])
69
+ map_tokens = _est_tokens(len(map_text))
70
+ pct = 0 if src_tokens == 0 else round(100 * (1 - map_tokens / src_tokens))
71
+ return {
72
+ "source_tokens": src_tokens,
73
+ "map_tokens": map_tokens,
74
+ "saved_pct": pct,
75
+ }
crumbs/extractors.py ADDED
@@ -0,0 +1,255 @@
1
+ """Extract compact symbol signatures from source files.
2
+
3
+ The goal is a high-signal, low-token summary of what a file *contains* and
4
+ *exposes* -- function/class/type signatures and one-line docs -- never the full
5
+ bodies. Python is parsed with the stdlib ``ast`` for accuracy; other languages
6
+ use lightweight regex that captures declarations without trying to be a parser.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import re
13
+ from typing import Dict, List
14
+
15
+ # Map file extension -> language label used in output.
16
+ LANGS: Dict[str, str] = {
17
+ ".py": "python",
18
+ ".js": "javascript",
19
+ ".jsx": "javascript",
20
+ ".mjs": "javascript",
21
+ ".cjs": "javascript",
22
+ ".ts": "typescript",
23
+ ".tsx": "typescript",
24
+ ".go": "go",
25
+ ".rs": "rust",
26
+ ".java": "java",
27
+ ".rb": "ruby",
28
+ ".php": "php",
29
+ ".c": "c",
30
+ ".h": "c",
31
+ ".cpp": "cpp",
32
+ ".cc": "cpp",
33
+ ".hpp": "cpp",
34
+ ".cs": "csharp",
35
+ ".swift": "swift",
36
+ ".kt": "kotlin",
37
+ ".md": "markdown",
38
+ }
39
+
40
+
41
+ def lang_for(filename: str) -> str:
42
+ for ext, lang in LANGS.items():
43
+ if filename.endswith(ext):
44
+ return lang
45
+ return ""
46
+
47
+
48
+ def _first_line(text: str) -> str:
49
+ text = (text or "").strip()
50
+ return text.splitlines()[0].strip() if text else ""
51
+
52
+
53
+ def extract(path: str, text: str) -> List[Dict[str, str]]:
54
+ """Return a list of symbols.
55
+
56
+ Each symbol is ``{kind, name, sig, doc, line, end_line}`` where ``line`` /
57
+ ``end_line`` are 1-based source line numbers so a reader can open just the
58
+ symbol's slice (e.g. ``path:line-end_line``) instead of the whole file.
59
+ """
60
+ lang = lang_for(path)
61
+ if lang == "python":
62
+ return _python(text)
63
+ if lang in ("javascript", "typescript"):
64
+ return _js_ts(text)
65
+ if lang == "go":
66
+ return _go(text)
67
+ if lang == "rust":
68
+ return _rust(text)
69
+ if lang == "markdown":
70
+ return _markdown(text)
71
+ if lang:
72
+ return _generic(text)
73
+ return []
74
+
75
+
76
+ # --------------------------------------------------------------------------- #
77
+ # Python (AST-based, accurate)
78
+ # --------------------------------------------------------------------------- #
79
+ def _unparse(node) -> str:
80
+ """Best-effort source for an annotation/default node (3.9+ has ast.unparse)."""
81
+ if node is None:
82
+ return ""
83
+ if hasattr(ast, "unparse"):
84
+ try:
85
+ return ast.unparse(node)
86
+ except Exception:
87
+ return ""
88
+ return "" # Python 3.8: omit annotation rather than guess
89
+
90
+
91
+ def _arg(arg: ast.arg, default=None) -> str:
92
+ s = arg.arg
93
+ ann = _unparse(getattr(arg, "annotation", None))
94
+ if ann:
95
+ s += ": " + ann
96
+ if default is not None:
97
+ d = _unparse(default)
98
+ if d:
99
+ s += ("=" if not ann else " = ") + d
100
+ return s
101
+
102
+
103
+ def _py_args(node: ast.AST) -> str:
104
+ try:
105
+ a = node.args # type: ignore[attr-defined]
106
+ except AttributeError:
107
+ return "()"
108
+ parts: List[str] = []
109
+ pos = list(a.posonlyargs) + list(a.args)
110
+ # defaults align to the tail of the positional args.
111
+ pad = [None] * (len(pos) - len(a.defaults)) + list(a.defaults)
112
+ for arg, default in zip(pos, pad):
113
+ parts.append(_arg(arg, default))
114
+ if a.posonlyargs:
115
+ parts.insert(len(a.posonlyargs), "/")
116
+ if a.vararg:
117
+ parts.append("*" + _arg(a.vararg))
118
+ elif a.kwonlyargs:
119
+ parts.append("*")
120
+ for arg, default in zip(a.kwonlyargs, a.kw_defaults):
121
+ parts.append(_arg(arg, default))
122
+ if a.kwarg:
123
+ parts.append("**" + _arg(a.kwarg))
124
+ sig = "(" + ", ".join(parts) + ")"
125
+ ret = _unparse(getattr(node, "returns", None))
126
+ if ret:
127
+ sig += " -> " + ret
128
+ return sig
129
+
130
+
131
+ def _python(text: str) -> List[Dict[str, str]]:
132
+ try:
133
+ tree = ast.parse(text)
134
+ except SyntaxError:
135
+ return _generic(text)
136
+ out: List[Dict[str, str]] = []
137
+ for node in tree.body:
138
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
139
+ prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
140
+ out.append({
141
+ "kind": "function",
142
+ "name": node.name,
143
+ "sig": f"{prefix} {node.name}{_py_args(node)}",
144
+ "doc": _first_line(ast.get_docstring(node) or ""),
145
+ "line": node.lineno,
146
+ "end_line": getattr(node, "end_lineno", node.lineno),
147
+ })
148
+ elif isinstance(node, ast.ClassDef):
149
+ bases = ", ".join(
150
+ b.id for b in node.bases if isinstance(b, ast.Name)
151
+ )
152
+ sig = f"class {node.name}" + (f"({bases})" if bases else "")
153
+ methods = [
154
+ n.name
155
+ for n in node.body
156
+ if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
157
+ and not n.name.startswith("_")
158
+ ]
159
+ doc = _first_line(ast.get_docstring(node) or "")
160
+ if methods:
161
+ doc = (doc + " " if doc else "") + "methods: " + ", ".join(methods[:12])
162
+ out.append({
163
+ "kind": "class",
164
+ "name": node.name,
165
+ "sig": sig,
166
+ "doc": doc,
167
+ "line": node.lineno,
168
+ "end_line": getattr(node, "end_lineno", node.lineno),
169
+ })
170
+ return out
171
+
172
+
173
+ # --------------------------------------------------------------------------- #
174
+ # Regex-based extractors for other languages
175
+ # --------------------------------------------------------------------------- #
176
+ def _collect(text: str, patterns: List[tuple]) -> List[Dict[str, str]]:
177
+ out: List[Dict[str, str]] = []
178
+ seen = set()
179
+ for kind, rx in patterns:
180
+ for m in rx.finditer(text):
181
+ name = m.group("name")
182
+ if not name or name in seen:
183
+ continue
184
+ seen.add(name)
185
+ sig = m.group(0).strip().rstrip("{(=").strip()
186
+ sig = re.sub(r"\s+", " ", sig)[:120]
187
+ line = text.count("\n", 0, m.start()) + 1
188
+ out.append({
189
+ "kind": kind, "name": name, "sig": sig, "doc": "",
190
+ "line": line, "end_line": line,
191
+ })
192
+ return out
193
+
194
+
195
+ _JS_TS = [
196
+ ("function", re.compile(r"^\s*export\s+(?:default\s+)?(?:async\s+)?function\s+(?P<name>\w+)", re.M)),
197
+ ("function", re.compile(r"^\s*(?:async\s+)?function\s+(?P<name>\w+)", re.M)),
198
+ ("class", re.compile(r"^\s*export\s+(?:default\s+)?(?:abstract\s+)?class\s+(?P<name>\w+)", re.M)),
199
+ ("class", re.compile(r"^\s*(?:abstract\s+)?class\s+(?P<name>\w+)", re.M)),
200
+ ("const", re.compile(r"^\s*export\s+const\s+(?P<name>\w+)", re.M)),
201
+ ("type", re.compile(r"^\s*export\s+(?:type|interface)\s+(?P<name>\w+)", re.M)),
202
+ ("type", re.compile(r"^\s*(?:type|interface)\s+(?P<name>\w+)", re.M)),
203
+ ]
204
+
205
+ _GO = [
206
+ ("function", re.compile(r"^\s*func\s+(?:\([^)]*\)\s*)?(?P<name>\w+)\s*\(", re.M)),
207
+ ("type", re.compile(r"^\s*type\s+(?P<name>\w+)\s+(?:struct|interface)", re.M)),
208
+ ]
209
+
210
+ _RUST = [
211
+ ("function", re.compile(r"^\s*(?:pub\s+)?(?:async\s+)?fn\s+(?P<name>\w+)", re.M)),
212
+ ("struct", re.compile(r"^\s*(?:pub\s+)?struct\s+(?P<name>\w+)", re.M)),
213
+ ("enum", re.compile(r"^\s*(?:pub\s+)?enum\s+(?P<name>\w+)", re.M)),
214
+ ("trait", re.compile(r"^\s*(?:pub\s+)?trait\s+(?P<name>\w+)", re.M)),
215
+ ]
216
+
217
+
218
+ def _js_ts(text: str) -> List[Dict[str, str]]:
219
+ return _collect(text, _JS_TS)
220
+
221
+
222
+ def _go(text: str) -> List[Dict[str, str]]:
223
+ return _collect(text, _GO)
224
+
225
+
226
+ def _rust(text: str) -> List[Dict[str, str]]:
227
+ return _collect(text, _RUST)
228
+
229
+
230
+ _HEADING = re.compile(r"^(#{1,3})\s+(?P<name>.+?)\s*#*$", re.M)
231
+
232
+
233
+ def _markdown(text: str) -> List[Dict[str, str]]:
234
+ out: List[Dict[str, str]] = []
235
+ for m in _HEADING.finditer(text):
236
+ level = len(m.group(1))
237
+ line = text.count("\n", 0, m.start()) + 1
238
+ out.append({
239
+ "kind": f"h{level}",
240
+ "name": m.group("name").strip(),
241
+ "sig": "",
242
+ "doc": "",
243
+ "line": line,
244
+ "end_line": line,
245
+ })
246
+ return out[:30]
247
+
248
+
249
+ _GENERIC = [
250
+ ("def", re.compile(r"^\s*(?:public|private|protected|static|\s)*\b(?:func|function|def|fn|sub|method)\s+(?P<name>\w+)", re.M)),
251
+ ]
252
+
253
+
254
+ def _generic(text: str) -> List[Dict[str, str]]:
255
+ return _collect(text, _GENERIC)
crumbs/indexer.py ADDED
@@ -0,0 +1,133 @@
1
+ """Walk a repository and build its compact crumb data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import subprocess
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from . import extractors, store
11
+
12
+ # Directories never worth indexing.
13
+ SKIP_DIRS = {
14
+ ".git", "node_modules", "__pycache__", ".venv", "venv", "env",
15
+ "dist", "build", "target", ".next", ".nuxt", "out", "vendor",
16
+ ".idea", ".vscode", "coverage", ".pytest_cache", ".mypy_cache",
17
+ ".ruff_cache", "site-packages", ".tox", "bin", "obj", ".cache",
18
+ ".remember", ".crumbs",
19
+ }
20
+
21
+ # Files to skip by name.
22
+ SKIP_FILES = {"package-lock.json", "yarn.lock", "poetry.lock", "Cargo.lock", "pnpm-lock.yaml"}
23
+
24
+ MAX_FILE_BYTES = 1_500_000 # skip files larger than this (likely generated/binary)
25
+ DOC_NAMES = {"readme.md", "readme.rst", "readme.txt", "readme"}
26
+
27
+
28
+ def _is_text(path: Path) -> bool:
29
+ try:
30
+ with path.open("rb") as f:
31
+ chunk = f.read(2048)
32
+ return b"\x00" not in chunk
33
+ except OSError:
34
+ return False
35
+
36
+
37
+ def _git_info(root: Path) -> Dict[str, str]:
38
+ info: Dict[str, str] = {}
39
+ try:
40
+ remote = subprocess.run(
41
+ ["git", "-C", str(root), "remote", "get-url", "origin"],
42
+ capture_output=True, text=True, timeout=5,
43
+ )
44
+ if remote.returncode == 0:
45
+ info["remote"] = remote.stdout.strip()
46
+ branch = subprocess.run(
47
+ ["git", "-C", str(root), "rev-parse", "--abbrev-ref", "HEAD"],
48
+ capture_output=True, text=True, timeout=5,
49
+ )
50
+ if branch.returncode == 0:
51
+ info["branch"] = branch.stdout.strip()
52
+ except (OSError, subprocess.SubprocessError):
53
+ pass
54
+ return info
55
+
56
+
57
+ def index_repo(path: str, name: Optional[str] = None) -> Dict[str, Any]:
58
+ """Index a repository at ``path`` and persist its crumbs.
59
+
60
+ Returns the crumb data dict.
61
+ """
62
+ root = Path(path).expanduser().resolve()
63
+ if not root.is_dir():
64
+ raise NotADirectoryError(f"not a directory: {root}")
65
+
66
+ rid = store.repo_id(str(root))
67
+ name = name or root.name
68
+ files: List[Dict[str, Any]] = []
69
+ total_source_bytes = 0
70
+ readme_excerpt = ""
71
+
72
+ for dirpath, dirnames, filenames in os.walk(root):
73
+ dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS and not d.startswith(".") or d in (".github",)]
74
+ for fn in filenames:
75
+ if fn in SKIP_FILES:
76
+ continue
77
+ fpath = Path(dirpath) / fn
78
+ rel = str(fpath.relative_to(root))
79
+ lang = extractors.lang_for(fn)
80
+
81
+ try:
82
+ size = fpath.stat().st_size
83
+ except OSError:
84
+ continue
85
+ if size > MAX_FILE_BYTES:
86
+ continue
87
+
88
+ # Capture a top-level README excerpt for the repo summary.
89
+ if fn.lower() in DOC_NAMES and "/" not in rel and not readme_excerpt:
90
+ if _is_text(fpath):
91
+ readme_excerpt = _read(fpath)[:600]
92
+
93
+ if not lang:
94
+ continue
95
+ if not _is_text(fpath):
96
+ continue
97
+
98
+ text = _read(fpath)
99
+ total_source_bytes += len(text)
100
+ symbols = extractors.extract(rel, text)
101
+ files.append({
102
+ "path": rel,
103
+ "lang": lang,
104
+ "loc": text.count("\n") + 1,
105
+ "symbols": symbols,
106
+ })
107
+
108
+ files.sort(key=lambda f: f["path"])
109
+ sym_count = sum(len(f["symbols"]) for f in files)
110
+
111
+ data: Dict[str, Any] = {
112
+ "id": rid,
113
+ "name": name,
114
+ "path": str(root),
115
+ "indexed_at": store.now(),
116
+ "git": _git_info(root),
117
+ "readme": readme_excerpt,
118
+ "files": files,
119
+ "stats": {
120
+ "files": len(files),
121
+ "symbols": sym_count,
122
+ "source_bytes": total_source_bytes,
123
+ },
124
+ }
125
+ store.save_repo(rid, data)
126
+ return data
127
+
128
+
129
+ def _read(path: Path) -> str:
130
+ try:
131
+ return path.read_text(encoding="utf-8", errors="ignore")
132
+ except OSError:
133
+ return ""
crumbs/mcp.py ADDED
@@ -0,0 +1,291 @@
1
+ """A minimal MCP (Model Context Protocol) server for crumbs.
2
+
3
+ This speaks the MCP wire protocol directly over stdio with **zero
4
+ dependencies** -- no SDK -- to keep crumbs pure-stdlib. An MCP host (Claude
5
+ Code, Claude Desktop, or any MCP client) launches ``crumbs mcp`` as a
6
+ subprocess and talks to it in JSON-RPC 2.0 over stdin/stdout.
7
+
8
+ Wire format (stdio transport): newline-delimited JSON. Each message is one
9
+ JSON object on its own line. stdout is reserved for protocol traffic only;
10
+ all logging goes to stderr.
11
+
12
+ Lifecycle:
13
+ client -> initialize -> server: capabilities + serverInfo
14
+ client -> notifications/initialized (no response)
15
+ client -> tools/list -> server: the tool catalog
16
+ client -> tools/call -> server: the tool's output
17
+
18
+ The tools are thin adapters over the existing crumbs modules; the MCP layer
19
+ only translates JSON-RPC <-> Python calls and formats results as text.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import sys
26
+ from typing import Any, Callable, Dict, List, Optional
27
+
28
+ from . import __version__, digest, indexer, query, store
29
+
30
+ # Protocol version we default to if the client doesn't propose one. We echo
31
+ # the client's requested version when present for forward compatibility.
32
+ DEFAULT_PROTOCOL_VERSION = "2025-06-18"
33
+
34
+ # JSON-RPC error codes we use.
35
+ PARSE_ERROR = -32700
36
+ INVALID_REQUEST = -32600
37
+ METHOD_NOT_FOUND = -32601
38
+ INVALID_PARAMS = -32602
39
+ INTERNAL_ERROR = -32603
40
+
41
+
42
+ def _log(msg: str) -> None:
43
+ print(f"[crumbs-mcp] {msg}", file=sys.stderr, flush=True)
44
+
45
+
46
+ # --------------------------------------------------------------------------- #
47
+ # Tool implementations -- each returns a plain string (rendered as text).
48
+ # --------------------------------------------------------------------------- #
49
+ def _tool_index(args: Dict[str, Any]) -> str:
50
+ paths = args.get("paths") or ([args["path"]] if args.get("path") else ["."])
51
+ name = args.get("name")
52
+ out: List[str] = []
53
+ for p in paths:
54
+ data = indexer.index_repo(p, name=name)
55
+ st = data["stats"]
56
+ m = digest.repo_map(data["id"])
57
+ sav = digest.savings(data, m)
58
+ out.append(
59
+ f"indexed {data['name']}: {st['files']} files, {st['symbols']} symbols "
60
+ f"(map ~{sav['map_tokens']} tok vs ~{sav['source_tokens']} source, -{sav['saved_pct']}%)"
61
+ )
62
+ return "\n".join(out)
63
+
64
+
65
+ def _tool_list(args: Dict[str, Any]) -> str:
66
+ reg = store.load_registry()
67
+ if not reg:
68
+ return "No repos indexed yet. Use crumbs_index with a path first."
69
+ rows = sorted(reg.items(), key=lambda kv: kv[1]["name"])
70
+ lines = []
71
+ for rid, m in rows:
72
+ st = m["stats"]
73
+ lines.append(f"{m['name']} ({rid}): {st['files']} files, {st['symbols']} symbols")
74
+ return "\n".join(lines)
75
+
76
+
77
+ def _resolve_or_index(selector: str) -> Optional[str]:
78
+ """Resolve a repo selector; if it's an unindexed path, index it first."""
79
+ rid = store.resolve(selector)
80
+ if rid:
81
+ return rid
82
+ try:
83
+ indexer.index_repo(selector)
84
+ except (NotADirectoryError, FileNotFoundError):
85
+ return None
86
+ return store.resolve(selector)
87
+
88
+
89
+ def _tool_map(args: Dict[str, Any]) -> str:
90
+ repo = args["repo"]
91
+ rid = _resolve_or_index(repo)
92
+ if not rid:
93
+ return f"No indexed repo matches '{repo}' (and it is not an indexable path)."
94
+ return digest.repo_map(rid, max_symbols_per_file=int(args.get("max_symbols", 12)))
95
+
96
+
97
+ def _tool_search(args: Dict[str, Any]) -> str:
98
+ repo = args.get("repo")
99
+ if repo:
100
+ _resolve_or_index(repo)
101
+ hits = query.search(args["query"], repo=repo, limit=int(args.get("limit", 30)))
102
+ if not hits:
103
+ return "No matches."
104
+ lines = []
105
+ for h in hits:
106
+ sig = h["sig"] or f"{h['kind']} {h['name']}"
107
+ loc = f":{h['line']}" if h.get("line") else ""
108
+ lines.append(f"{h['repo']}:{h['path']}{loc} {sig}")
109
+ return "\n".join(lines)
110
+
111
+
112
+ def _tool_context(args: Dict[str, Any]) -> str:
113
+ repo = args.get("repo")
114
+ if repo:
115
+ _resolve_or_index(repo)
116
+ return query.context(args["query"], repo=repo, limit=int(args.get("limit", 20)))
117
+
118
+
119
+ # --------------------------------------------------------------------------- #
120
+ # Tool catalog: name -> {description, inputSchema, handler}. The description and
121
+ # schema are what the model uses to decide *whether* and *how* to call a tool.
122
+ # --------------------------------------------------------------------------- #
123
+ def _str(desc: str) -> Dict[str, str]:
124
+ return {"type": "string", "description": desc}
125
+
126
+
127
+ TOOLS: Dict[str, Dict[str, Any]] = {
128
+ "crumbs_map": {
129
+ "description": (
130
+ "Get a compact, token-efficient map of a repository: every file with "
131
+ "its typed function/class signatures, one-line docs, and source line "
132
+ "ranges (e.g. [L40-92]) -- but NOT the file bodies. Use this FIRST to "
133
+ "orient yourself in a repo instead of reading files; then open only the "
134
+ "line ranges it points to. Indexes the repo automatically if needed."
135
+ ),
136
+ "inputSchema": {
137
+ "type": "object",
138
+ "properties": {
139
+ "repo": _str("Repo name, id, or filesystem path."),
140
+ "max_symbols": {"type": "integer", "description": "Max symbols shown per file (default 12)."},
141
+ },
142
+ "required": ["repo"],
143
+ },
144
+ "handler": _tool_map,
145
+ },
146
+ "crumbs_search": {
147
+ "description": (
148
+ "Search for symbols (functions, classes, types) by keyword across all "
149
+ "indexed repos, ranked by relevance. Returns repo:path:line plus the "
150
+ "signature for each hit, so you can open the exact slice. Use to find "
151
+ "where something lives across one or many repos."
152
+ ),
153
+ "inputSchema": {
154
+ "type": "object",
155
+ "properties": {
156
+ "query": _str("Keywords to search for, e.g. 'auth token'."),
157
+ "repo": _str("Optional: limit to one repo (name, id, or path)."),
158
+ "limit": {"type": "integer", "description": "Max results (default 30)."},
159
+ },
160
+ "required": ["query"],
161
+ },
162
+ "handler": _tool_search,
163
+ },
164
+ "crumbs_context": {
165
+ "description": (
166
+ "Build an LLM-ready context slice for a topic: the most relevant symbols "
167
+ "across indexed repos, grouped by repo and file, with signatures, docs, "
168
+ "and line ranges. Use when you want focused context on a topic rather "
169
+ "than a whole repo map."
170
+ ),
171
+ "inputSchema": {
172
+ "type": "object",
173
+ "properties": {
174
+ "query": _str("Topic to gather context for, e.g. 'rate limiting'."),
175
+ "repo": _str("Optional: limit to one repo."),
176
+ "limit": {"type": "integer", "description": "Max symbols (default 20)."},
177
+ },
178
+ "required": ["query"],
179
+ },
180
+ "handler": _tool_context,
181
+ },
182
+ "crumbs_index": {
183
+ "description": (
184
+ "Index one or more repositories so their maps/searches are available. "
185
+ "Usually unnecessary -- the other tools auto-index a path on first use -- "
186
+ "but call this to (re)index explicitly."
187
+ ),
188
+ "inputSchema": {
189
+ "type": "object",
190
+ "properties": {
191
+ "paths": {"type": "array", "items": {"type": "string"}, "description": "Repo paths to index."},
192
+ "path": _str("A single repo path (alternative to 'paths')."),
193
+ "name": _str("Optional override name for the repo."),
194
+ },
195
+ },
196
+ "handler": _tool_index,
197
+ },
198
+ "crumbs_list": {
199
+ "description": "List all indexed repositories with their file and symbol counts.",
200
+ "inputSchema": {"type": "object", "properties": {}},
201
+ "handler": _tool_list,
202
+ },
203
+ }
204
+
205
+
206
+ # --------------------------------------------------------------------------- #
207
+ # JSON-RPC plumbing
208
+ # --------------------------------------------------------------------------- #
209
+ def _result(req_id: Any, result: Any) -> Dict[str, Any]:
210
+ return {"jsonrpc": "2.0", "id": req_id, "result": result}
211
+
212
+
213
+ def _error(req_id: Any, code: int, message: str) -> Dict[str, Any]:
214
+ return {"jsonrpc": "2.0", "id": req_id, "error": {"code": code, "message": message}}
215
+
216
+
217
+ def _handle(msg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
218
+ """Process one JSON-RPC message; return a response, or None for notifications."""
219
+ method = msg.get("method")
220
+ req_id = msg.get("id")
221
+ is_notification = "id" not in msg
222
+ params = msg.get("params") or {}
223
+
224
+ if method == "initialize":
225
+ proto = params.get("protocolVersion", DEFAULT_PROTOCOL_VERSION)
226
+ return _result(req_id, {
227
+ "protocolVersion": proto,
228
+ "capabilities": {"tools": {"listChanged": False}},
229
+ "serverInfo": {"name": "crumbs", "version": __version__},
230
+ })
231
+
232
+ if method in ("notifications/initialized", "initialized"):
233
+ return None # notification: acknowledge by doing nothing
234
+
235
+ if method == "ping":
236
+ return _result(req_id, {})
237
+
238
+ if method == "tools/list":
239
+ tools = [
240
+ {"name": name, "description": t["description"], "inputSchema": t["inputSchema"]}
241
+ for name, t in TOOLS.items()
242
+ ]
243
+ return _result(req_id, {"tools": tools})
244
+
245
+ if method == "tools/call":
246
+ name = params.get("name")
247
+ arguments = params.get("arguments") or {}
248
+ tool = TOOLS.get(name)
249
+ if not tool:
250
+ return _error(req_id, INVALID_PARAMS, f"unknown tool: {name}")
251
+ try:
252
+ text = tool["handler"](arguments)
253
+ except KeyError as e:
254
+ # a required argument was missing -- report as a tool error, not a crash
255
+ return _result(req_id, {
256
+ "content": [{"type": "text", "text": f"missing argument: {e}"}],
257
+ "isError": True,
258
+ })
259
+ except Exception as e: # noqa: BLE001 -- surface any tool failure to the client
260
+ _log(f"tool {name} failed: {e}")
261
+ return _result(req_id, {
262
+ "content": [{"type": "text", "text": f"error: {e}"}],
263
+ "isError": True,
264
+ })
265
+ return _result(req_id, {"content": [{"type": "text", "text": text}], "isError": False})
266
+
267
+ if is_notification:
268
+ return None # ignore unknown notifications
269
+ return _error(req_id, METHOD_NOT_FOUND, f"method not found: {method}")
270
+
271
+
272
+ def serve(stdin=None, stdout=None) -> int:
273
+ """Run the stdio MCP server loop until stdin closes."""
274
+ stdin = stdin or sys.stdin
275
+ stdout = stdout or sys.stdout
276
+ _log(f"crumbs {__version__} MCP server ready (stdio)")
277
+ for line in stdin:
278
+ line = line.strip()
279
+ if not line:
280
+ continue
281
+ try:
282
+ msg = json.loads(line)
283
+ except json.JSONDecodeError:
284
+ stdout.write(json.dumps(_error(None, PARSE_ERROR, "invalid JSON")) + "\n")
285
+ stdout.flush()
286
+ continue
287
+ response = _handle(msg)
288
+ if response is not None:
289
+ stdout.write(json.dumps(response) + "\n")
290
+ stdout.flush()
291
+ return 0
crumbs/query.py ADDED
@@ -0,0 +1,80 @@
1
+ """Search across indexed repos and build LLM-ready context slices."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from . import digest, store
9
+
10
+
11
+ def _tokens(q: str) -> List[str]:
12
+ return [t for t in re.split(r"[^A-Za-z0-9_]+", q.lower()) if t]
13
+
14
+
15
+ def _score(terms: List[str], path: str, sym: Dict[str, str]) -> int:
16
+ hay = f"{path} {sym['name']} {sym['sig']} {sym.get('doc', '')}".lower()
17
+ name = sym["name"].lower()
18
+ score = 0
19
+ for t in terms:
20
+ if not t:
21
+ continue
22
+ if t == name:
23
+ score += 10
24
+ elif t in name:
25
+ score += 5
26
+ if t in hay:
27
+ score += 1
28
+ return score
29
+
30
+
31
+ def search(query: str, repo: Optional[str] = None, limit: int = 30) -> List[Dict[str, Any]]:
32
+ """Return ranked symbol matches across indexed repos."""
33
+ terms = _tokens(query)
34
+ if not terms:
35
+ return []
36
+ rids = [store.resolve(repo)] if repo else store.all_repos()
37
+ rids = [r for r in rids if r]
38
+ results: List[Dict[str, Any]] = []
39
+ for rid in rids:
40
+ data = store.load_repo(rid)
41
+ if not data:
42
+ continue
43
+ for f in data["files"]:
44
+ for sym in f["symbols"]:
45
+ s = _score(terms, f["path"], sym)
46
+ if s > 0:
47
+ results.append({
48
+ "repo": data["name"],
49
+ "path": f["path"],
50
+ "lang": f["lang"],
51
+ "score": s,
52
+ **sym,
53
+ })
54
+ results.sort(key=lambda r: r["score"], reverse=True)
55
+ return results[:limit]
56
+
57
+
58
+ def context(query: str, repo: Optional[str] = None, limit: int = 20) -> str:
59
+ """Format the most relevant crumbs for a query as compact markdown."""
60
+ hits = search(query, repo=repo, limit=limit)
61
+ if not hits:
62
+ return f"# crumbs context: {query}\n\n_No matches across indexed repos._\n"
63
+ lines = [f"# crumbs context: {query}", ""]
64
+ by_repo: Dict[str, List[Dict[str, Any]]] = {}
65
+ for h in hits:
66
+ by_repo.setdefault(h["repo"], []).append(h)
67
+ for repo_name, items in by_repo.items():
68
+ lines.append(f"## {repo_name}")
69
+ cur_path = None
70
+ for it in items:
71
+ if it["path"] != cur_path:
72
+ cur_path = it["path"]
73
+ lines.append(f"- `{it['path']}`")
74
+ sig = it["sig"] or f"{it['kind']} {it['name']}"
75
+ tag = digest.loc(it)
76
+ where = f" [{tag}]" if tag else ""
77
+ doc = f" — {it['doc']}" if it.get("doc") else ""
78
+ lines.append(f" - {sig}{where}{doc}")
79
+ lines.append("")
80
+ return "\n".join(lines)
crumbs/store.py ADDED
@@ -0,0 +1,117 @@
1
+ """Local on-disk store for crumb data.
2
+
3
+ Layout (default ~/.crumbs, override with CRUMBS_HOME):
4
+
5
+ <home>/
6
+ registry.json # id -> {name, path, indexed_at, stats}
7
+ repos/<id>.json # full crumb data for one repo
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import hashlib
13
+ import json
14
+ import os
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional
18
+
19
+
20
+ def home() -> Path:
21
+ """Return the crumbs home directory, creating it if needed."""
22
+ root = Path(os.environ.get("CRUMBS_HOME", Path.home() / ".crumbs"))
23
+ (root / "repos").mkdir(parents=True, exist_ok=True)
24
+ return root
25
+
26
+
27
+ def repo_id(path: str) -> str:
28
+ """Stable short id for a repo, derived from its absolute path."""
29
+ abspath = str(Path(path).expanduser().resolve())
30
+ return hashlib.sha1(abspath.encode()).hexdigest()[:12]
31
+
32
+
33
+ def _registry_path() -> Path:
34
+ return home() / "registry.json"
35
+
36
+
37
+ def load_registry() -> Dict[str, Any]:
38
+ p = _registry_path()
39
+ if not p.exists():
40
+ return {}
41
+ try:
42
+ return json.loads(p.read_text())
43
+ except (json.JSONDecodeError, OSError):
44
+ return {}
45
+
46
+
47
+ def save_registry(reg: Dict[str, Any]) -> None:
48
+ _registry_path().write_text(json.dumps(reg, indent=2, sort_keys=True))
49
+
50
+
51
+ def save_repo(rid: str, data: Dict[str, Any]) -> None:
52
+ """Persist one repo's crumb data and update the registry."""
53
+ (home() / "repos" / f"{rid}.json").write_text(json.dumps(data))
54
+ reg = load_registry()
55
+ reg[rid] = {
56
+ "name": data["name"],
57
+ "path": data["path"],
58
+ "indexed_at": data["indexed_at"],
59
+ "stats": data["stats"],
60
+ }
61
+ save_registry(reg)
62
+
63
+
64
+ def load_repo(rid: str) -> Optional[Dict[str, Any]]:
65
+ p = home() / "repos" / f"{rid}.json"
66
+ if not p.exists():
67
+ return None
68
+ try:
69
+ return json.loads(p.read_text())
70
+ except (json.JSONDecodeError, OSError):
71
+ return None
72
+
73
+
74
+ def remove_repo(rid: str) -> bool:
75
+ p = home() / "repos" / f"{rid}.json"
76
+ existed = p.exists()
77
+ if existed:
78
+ p.unlink()
79
+ reg = load_registry()
80
+ if rid in reg:
81
+ del reg[rid]
82
+ save_registry(reg)
83
+ return existed
84
+
85
+
86
+ def resolve(selector: str) -> Optional[str]:
87
+ """Resolve a user-supplied selector to a repo id.
88
+
89
+ Accepts an exact id, a repo name, or a filesystem path.
90
+ """
91
+ reg = load_registry()
92
+ if selector in reg:
93
+ return selector
94
+ # by name (exact, then unique prefix)
95
+ by_name = [rid for rid, m in reg.items() if m["name"] == selector]
96
+ if len(by_name) == 1:
97
+ return by_name[0]
98
+ # by path
99
+ try:
100
+ rid = repo_id(selector)
101
+ if rid in reg:
102
+ return rid
103
+ except OSError:
104
+ pass
105
+ # by name prefix
106
+ pref = [rid for rid, m in reg.items() if m["name"].startswith(selector)]
107
+ if len(pref) == 1:
108
+ return pref[0]
109
+ return None
110
+
111
+
112
+ def now() -> float:
113
+ return time.time()
114
+
115
+
116
+ def all_repos() -> List[str]:
117
+ return list(load_registry().keys())
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: crumbs-cli
3
+ Version: 0.3.0
4
+ Summary: Local, token-efficient cross-repo context for LLMs. CLI + MCP server.
5
+ Author: crumbs
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/crumbs1505/crumbs
8
+ Project-URL: Repository, https://github.com/crumbs1505/crumbs
9
+ Project-URL: Issues, https://github.com/crumbs1505/crumbs/issues
10
+ Keywords: llm,context,claude,code,repo,tokens,mcp
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ # crumbs
26
+
27
+ **Local, token-efficient cross-repo context for LLMs.**
28
+
29
+ `crumbs` indexes your repositories into compact *context crumbs* — file maps and
30
+ symbol signatures (typed function/class/type declarations + one-line docs + line
31
+ ranges), **never the full file bodies**. An assistant like Claude can then
32
+ understand many repos at once by reading a tiny map instead of paying tokens to
33
+ read the entire source tree.
34
+
35
+ Indexing this very tool produces a map of **~1,200 tokens** standing in for
36
+ **~8,400 tokens** of source — an **~86% reduction** — while still naming every
37
+ file and symbol. Each symbol carries its full type signature and a source line
38
+ range (e.g. `def build_parser() -> ArgumentParser [L125-168]`), so the assistant
39
+ can open *just that slice* of a file rather than the whole thing.
40
+
41
+ - ðŸŠķ **Zero dependencies.** Pure Python 3.8+ stdlib. Runs on any device.
42
+ - 🔒 **Fully local.** Crumbs live in `~/.crumbs`. Nothing leaves your machine.
43
+ - 🧠 **Cross-repo.** Search and pull context across every repo you've indexed.
44
+ - ðŸŽŊ **High signal.** Python is parsed via `ast`; JS/TS/Go/Rust/etc. via fast
45
+ regex. Skips `node_modules`, `.git`, build dirs, lockfiles, and binaries.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ pip install -e . # provides the `crumbs` command
51
+ # or run without installing:
52
+ python3 -m crumbs --help
53
+ ```
54
+
55
+ ## Usage
56
+
57
+ ```bash
58
+ crumbs index ~/code/my-api ~/code/my-web # index one or more repos
59
+ crumbs list # show indexed repos + stats
60
+ crumbs map my-api --stats # compact map of one repo (+ token estimate)
61
+ crumbs search "auth token" # rank matching symbols across all repos
62
+ crumbs context "rate limiting" --repo my-api # LLM-ready context slice
63
+ crumbs refresh # re-index everything
64
+ crumbs remove my-web # drop a repo from the index
65
+ ```
66
+
67
+ A repo can be referenced by name, id, or path.
68
+
69
+ ## Workflow with Claude
70
+
71
+ 1. `crumbs index` the repos you work across (once, or on a `crumbs refresh` cron).
72
+ 2. Ask Claude to run `crumbs map <repo>` or `crumbs context "<topic>"` instead of
73
+ reading whole files. It gets the structure and the relevant symbols for a
74
+ fraction of the tokens, then reads full files only where it actually needs to.
75
+
76
+ ## How it stays cheap
77
+
78
+ | | Full repo read | `crumbs map` |
79
+ |---|---|---|
80
+ | What | every byte of every file | file tree + typed signatures + 1-line docs + line ranges |
81
+ | Bodies | yes | no |
82
+ | Cost | grows with codebase | grows with *interface* size |
83
+
84
+ Because every symbol records its line range, the follow-up step is cheap too: the
85
+ assistant reads `path:start-end` for the one function it needs instead of opening
86
+ the entire file.
87
+
88
+ Storage layout (`~/.crumbs`, override with `CRUMBS_HOME`):
89
+
90
+ ```
91
+ registry.json # id -> {name, path, indexed_at, stats}
92
+ repos/<id>.json # full crumb data for one repo
93
+ ```
94
+
95
+ ## Supported languages
96
+
97
+ Python (AST), JavaScript/TypeScript, Go, Rust, and a generic declaration
98
+ matcher for Java, Ruby, PHP, C/C++, C#, Swift, Kotlin. Markdown is indexed by
99
+ heading. Anything else is skipped from symbol extraction but still ignored
100
+ safely.
101
+
102
+ ## Tests
103
+
104
+ ```bash
105
+ python3 -m unittest discover -s tests -v
106
+ ```
107
+
108
+ ## License
109
+
110
+ MIT
@@ -0,0 +1,15 @@
1
+ crumbs/__init__.py,sha256=q35zpXLx5u_N4VcaLi8c9MtXCaiqWvLOEYy1XHNHTAs,359
2
+ crumbs/__main__.py,sha256=4JMK66Wj4uLZTKbF-sT3LAxOsr6buig77PmOkJCRRxw,83
3
+ crumbs/cli.py,sha256=EB8MufECSES4I1mlRQNzbvZgE1DfxdDX53OWc13Qb1g,5903
4
+ crumbs/digest.py,sha256=57U4aBx9DRDdRm-bzyisgNBElTi3gYg5P6qbyZp7FqM,2255
5
+ crumbs/extractors.py,sha256=SFGQeeHyTU29Vp1YXia0uY2xhOXb1yL81JyMTw0xJoQ,8265
6
+ crumbs/indexer.py,sha256=udba2FnVg_cegc7PO9pEYBKQiPPRmjtpGoBG7rjw2JM,4069
7
+ crumbs/mcp.py,sha256=yDXRTj9JzxuhQJd7V89Xt_KBFXsL3RWB3t5dwhTqWM4,11211
8
+ crumbs/query.py,sha256=qImdrwV_3uGYTX9seBjZ1RlUOG_2aN_15ASMh3T4imU,2686
9
+ crumbs/store.py,sha256=msocCNaLpoAprhwx40t-0LXcvyb3kFwAM3A_xHL9SL0,2976
10
+ crumbs_cli-0.3.0.dist-info/licenses/LICENSE,sha256=VKhnYSB3LGOqeE0zQqoresvhyX33yvdZ5MUA0g8ReSE,1068
11
+ crumbs_cli-0.3.0.dist-info/METADATA,sha256=nZscACFHl1QGIDWMZGsMipFxOflGtS6UUXRmDn7pwoU,4174
12
+ crumbs_cli-0.3.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ crumbs_cli-0.3.0.dist-info/entry_points.txt,sha256=b8-FzjddBJ7krJaLq301KWLvmOvyMIZoaiSYTM5qsd4,43
14
+ crumbs_cli-0.3.0.dist-info/top_level.txt,sha256=l3c3J2z_MFKJW73ZMrHj8sF9XlsHdUpNm8how5Of7sY,7
15
+ crumbs_cli-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ crumbs = crumbs.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 SufyanShaik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ crumbs