crumbs-cli 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 SufyanShaik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: crumbs-cli
3
+ Version: 0.3.0
4
+ Summary: Local, token-efficient cross-repo context for LLMs. CLI + MCP server.
5
+ Author: crumbs
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/crumbs1505/crumbs
8
+ Project-URL: Repository, https://github.com/crumbs1505/crumbs
9
+ Project-URL: Issues, https://github.com/crumbs1505/crumbs/issues
10
+ Keywords: llm,context,claude,code,repo,tokens,mcp
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ # crumbs
26
+
27
+ **Local, token-efficient cross-repo context for LLMs.**
28
+
29
+ `crumbs` indexes your repositories into compact *context crumbs* — file maps and
30
+ symbol signatures (typed function/class/type declarations + one-line docs + line
31
+ ranges), **never the full file bodies**. An assistant like Claude can then
32
+ understand many repos at once by reading a tiny map instead of paying tokens to
33
+ read the entire source tree.
34
+
35
+ Indexing this very tool produces a map of **~1,200 tokens** standing in for
36
+ **~8,400 tokens** of source — an **~86% reduction** — while still naming every
37
+ file and symbol. Each symbol carries its full type signature and a source line
38
+ range (e.g. `def build_parser() -> ArgumentParser [L125-168]`), so the assistant
39
+ can open *just that slice* of a file rather than the whole thing.
40
+
41
+ - ðŸŠķ **Zero dependencies.** Pure Python 3.8+ stdlib. Runs on any device.
42
+ - 🔒 **Fully local.** Crumbs live in `~/.crumbs`. Nothing leaves your machine.
43
+ - 🧠 **Cross-repo.** Search and pull context across every repo you've indexed.
44
+ - ðŸŽŊ **High signal.** Python is parsed via `ast`; JS/TS/Go/Rust/etc. via fast
45
+ regex. Skips `node_modules`, `.git`, build dirs, lockfiles, and binaries.
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ pip install -e . # provides the `crumbs` command
51
+ # or run without installing:
52
+ python3 -m crumbs --help
53
+ ```
54
+
55
+ ## Usage
56
+
57
+ ```bash
58
+ crumbs index ~/code/my-api ~/code/my-web # index one or more repos
59
+ crumbs list # show indexed repos + stats
60
+ crumbs map my-api --stats # compact map of one repo (+ token estimate)
61
+ crumbs search "auth token" # rank matching symbols across all repos
62
+ crumbs context "rate limiting" --repo my-api # LLM-ready context slice
63
+ crumbs refresh # re-index everything
64
+ crumbs remove my-web # drop a repo from the index
65
+ ```
66
+
67
+ A repo can be referenced by name, id, or path.
68
+
69
+ ## Workflow with Claude
70
+
71
+ 1. `crumbs index` the repos you work across (once, or on a `crumbs refresh` cron).
72
+ 2. Ask Claude to run `crumbs map <repo>` or `crumbs context "<topic>"` instead of
73
+ reading whole files. It gets the structure and the relevant symbols for a
74
+ fraction of the tokens, then reads full files only where it actually needs to.
75
+
76
+ ## How it stays cheap
77
+
78
+ | | Full repo read | `crumbs map` |
79
+ |---|---|---|
80
+ | What | every byte of every file | file tree + typed signatures + 1-line docs + line ranges |
81
+ | Bodies | yes | no |
82
+ | Cost | grows with codebase | grows with *interface* size |
83
+
84
+ Because every symbol records its line range, the follow-up step is cheap too: the
85
+ assistant reads `path:start-end` for the one function it needs instead of opening
86
+ the entire file.
87
+
88
+ Storage layout (`~/.crumbs`, override with `CRUMBS_HOME`):
89
+
90
+ ```
91
+ registry.json # id -> {name, path, indexed_at, stats}
92
+ repos/<id>.json # full crumb data for one repo
93
+ ```
94
+
95
+ ## Supported languages
96
+
97
+ Python (AST), JavaScript/TypeScript, Go, Rust, and a generic declaration
98
+ matcher for Java, Ruby, PHP, C/C++, C#, Swift, Kotlin. Markdown is indexed by
99
+ heading. Anything else is skipped from symbol extraction but still ignored
100
+ safely.
101
+
102
+ ## Tests
103
+
104
+ ```bash
105
+ python3 -m unittest discover -s tests -v
106
+ ```
107
+
108
+ ## License
109
+
110
+ MIT
@@ -0,0 +1,86 @@
1
+ # crumbs
2
+
3
+ **Local, token-efficient cross-repo context for LLMs.**
4
+
5
+ `crumbs` indexes your repositories into compact *context crumbs* — file maps and
6
+ symbol signatures (typed function/class/type declarations + one-line docs + line
7
+ ranges), **never the full file bodies**. An assistant like Claude can then
8
+ understand many repos at once by reading a tiny map instead of paying tokens to
9
+ read the entire source tree.
10
+
11
+ Indexing this very tool produces a map of **~1,200 tokens** standing in for
12
+ **~8,400 tokens** of source — an **~86% reduction** — while still naming every
13
+ file and symbol. Each symbol carries its full type signature and a source line
14
+ range (e.g. `def build_parser() -> ArgumentParser [L125-168]`), so the assistant
15
+ can open *just that slice* of a file rather than the whole thing.
16
+
17
+ - ðŸŠķ **Zero dependencies.** Pure Python 3.8+ stdlib. Runs on any device.
18
+ - 🔒 **Fully local.** Crumbs live in `~/.crumbs`. Nothing leaves your machine.
19
+ - 🧠 **Cross-repo.** Search and pull context across every repo you've indexed.
20
+ - ðŸŽŊ **High signal.** Python is parsed via `ast`; JS/TS/Go/Rust/etc. via fast
21
+ regex. Skips `node_modules`, `.git`, build dirs, lockfiles, and binaries.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install -e . # provides the `crumbs` command
27
+ # or run without installing:
28
+ python3 -m crumbs --help
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ```bash
34
+ crumbs index ~/code/my-api ~/code/my-web # index one or more repos
35
+ crumbs list # show indexed repos + stats
36
+ crumbs map my-api --stats # compact map of one repo (+ token estimate)
37
+ crumbs search "auth token" # rank matching symbols across all repos
38
+ crumbs context "rate limiting" --repo my-api # LLM-ready context slice
39
+ crumbs refresh # re-index everything
40
+ crumbs remove my-web # drop a repo from the index
41
+ ```
42
+
43
+ A repo can be referenced by name, id, or path.
44
+
45
+ ## Workflow with Claude
46
+
47
+ 1. `crumbs index` the repos you work across (once, or on a `crumbs refresh` cron).
48
+ 2. Ask Claude to run `crumbs map <repo>` or `crumbs context "<topic>"` instead of
49
+ reading whole files. It gets the structure and the relevant symbols for a
50
+ fraction of the tokens, then reads full files only where it actually needs to.
51
+
52
+ ## How it stays cheap
53
+
54
+ | | Full repo read | `crumbs map` |
55
+ |---|---|---|
56
+ | What | every byte of every file | file tree + typed signatures + 1-line docs + line ranges |
57
+ | Bodies | yes | no |
58
+ | Cost | grows with codebase | grows with *interface* size |
59
+
60
+ Because every symbol records its line range, the follow-up step is cheap too: the
61
+ assistant reads `path:start-end` for the one function it needs instead of opening
62
+ the entire file.
63
+
64
+ Storage layout (`~/.crumbs`, override with `CRUMBS_HOME`):
65
+
66
+ ```
67
+ registry.json # id -> {name, path, indexed_at, stats}
68
+ repos/<id>.json # full crumb data for one repo
69
+ ```
70
+
71
+ ## Supported languages
72
+
73
+ Python (AST), JavaScript/TypeScript, Go, Rust, and a generic declaration
74
+ matcher for Java, Ruby, PHP, C/C++, C#, Swift, Kotlin. Markdown is indexed by
75
+ heading. Anything else is skipped from symbol extraction but still ignored
76
+ safely.
77
+
78
+ ## Tests
79
+
80
+ ```bash
81
+ python3 -m unittest discover -s tests -v
82
+ ```
83
+
84
+ ## License
85
+
86
+ MIT
@@ -0,0 +1,9 @@
1
+ """crumbs - local, token-efficient cross-repo context for LLMs.
2
+
3
+ crumbs indexes repositories into compact "context crumbs" (file maps and symbol
4
+ signatures, not full file bodies) stored locally. An assistant can query these
5
+ crumbs to understand many repos at once without reading -- and paying tokens for
6
+ -- the entire source tree.
7
+ """
8
+
9
+ __version__ = "0.3.0"
@@ -0,0 +1,6 @@
1
+ import sys
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ sys.exit(main())
@@ -0,0 +1,186 @@
1
+ """crumbs command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ import time
9
+ from typing import List, Optional
10
+
11
+ from . import __version__, digest, indexer, query, store
12
+
13
+
14
+ def _fmt_age(ts: float) -> str:
15
+ secs = max(0, int(time.time() - ts))
16
+ for unit, n in (("d", 86400), ("h", 3600), ("m", 60)):
17
+ if secs >= n:
18
+ return f"{secs // n}{unit} ago"
19
+ return "just now"
20
+
21
+
22
+ def cmd_index(args: argparse.Namespace) -> int:
23
+ paths = args.paths or ["."]
24
+ for p in paths:
25
+ try:
26
+ data = indexer.index_repo(p, name=args.name)
27
+ except (NotADirectoryError, FileNotFoundError) as e:
28
+ print(f"error: {e}", file=sys.stderr)
29
+ return 1
30
+ st = data["stats"]
31
+ m = digest.repo_map(data["id"])
32
+ sav = digest.savings(data, m)
33
+ print(
34
+ f"indexed {data['name']} "
35
+ f"{st['files']} files, {st['symbols']} symbols "
36
+ f"(map ~{sav['map_tokens']} tok vs ~{sav['source_tokens']} tok source, "
37
+ f"-{sav['saved_pct']}%)"
38
+ )
39
+ return 0
40
+
41
+
42
+ def cmd_list(args: argparse.Namespace) -> int:
43
+ reg = store.load_registry()
44
+ if not reg:
45
+ print("no repos indexed. run: crumbs index <path>")
46
+ return 0
47
+ if args.json:
48
+ print(json.dumps(reg, indent=2))
49
+ return 0
50
+ rows = sorted(reg.items(), key=lambda kv: kv[1]["name"])
51
+ name_w = max((len(m["name"]) for _, m in rows), default=4)
52
+ for rid, m in rows:
53
+ st = m["stats"]
54
+ print(
55
+ f"{m['name']:<{name_w}} {rid} "
56
+ f"{st['files']:>4} files {st['symbols']:>5} symbols "
57
+ f"{_fmt_age(m['indexed_at'])}"
58
+ )
59
+ return 0
60
+
61
+
62
+ def cmd_map(args: argparse.Namespace) -> int:
63
+ rid = store.resolve(args.repo)
64
+ if not rid:
65
+ print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
66
+ return 1
67
+ text = digest.repo_map(rid, max_symbols_per_file=args.max_symbols)
68
+ print(text)
69
+ if args.stats:
70
+ data = store.load_repo(rid)
71
+ sav = digest.savings(data, text)
72
+ print(
73
+ f"\n_~{sav['map_tokens']} tokens (vs ~{sav['source_tokens']} for full source, "
74
+ f"-{sav['saved_pct']}%)_",
75
+ file=sys.stderr,
76
+ )
77
+ return 0
78
+
79
+
80
+ def cmd_search(args: argparse.Namespace) -> int:
81
+ hits = query.search(args.query, repo=args.repo, limit=args.limit)
82
+ if args.json:
83
+ print(json.dumps(hits, indent=2))
84
+ return 0
85
+ if not hits:
86
+ print("no matches")
87
+ return 0
88
+ for h in hits:
89
+ sig = h["sig"] or f"{h['kind']} {h['name']}"
90
+ loc = f":{h['line']}" if h.get("line") else ""
91
+ print(f"{h['repo']}:{h['path']}{loc} {sig}")
92
+ return 0
93
+
94
+
95
+ def cmd_context(args: argparse.Namespace) -> int:
96
+ print(query.context(args.query, repo=args.repo, limit=args.limit))
97
+ return 0
98
+
99
+
100
+ def cmd_remove(args: argparse.Namespace) -> int:
101
+ rid = store.resolve(args.repo)
102
+ if not rid:
103
+ print(f"error: no indexed repo matches '{args.repo}'", file=sys.stderr)
104
+ return 1
105
+ name = store.load_registry().get(rid, {}).get("name", rid)
106
+ store.remove_repo(rid)
107
+ print(f"removed {name}")
108
+ return 0
109
+
110
+
111
+ def cmd_mcp(args: argparse.Namespace) -> int:
112
+ from . import mcp
113
+ return mcp.serve()
114
+
115
+
116
+ def cmd_refresh(args: argparse.Namespace) -> int:
117
+ reg = store.load_registry()
118
+ if not reg:
119
+ print("nothing to refresh")
120
+ return 0
121
+ for rid, m in list(reg.items()):
122
+ try:
123
+ indexer.index_repo(m["path"], name=m["name"])
124
+ print(f"refreshed {m['name']}")
125
+ except (NotADirectoryError, FileNotFoundError):
126
+ print(f"skip {m['name']} (path missing: {m['path']})", file=sys.stderr)
127
+ return 0
128
+
129
+
130
+ def build_parser() -> argparse.ArgumentParser:
131
+ p = argparse.ArgumentParser(
132
+ prog="crumbs",
133
+ description="Local, token-efficient cross-repo context for LLMs.",
134
+ )
135
+ p.add_argument("--version", action="version", version=f"crumbs {__version__}")
136
+ sub = p.add_subparsers(dest="cmd", required=True)
137
+
138
+ pi = sub.add_parser("index", help="index one or more repos")
139
+ pi.add_argument("paths", nargs="*", help="repo paths (default: .)")
140
+ pi.add_argument("--name", help="override repo name")
141
+ pi.set_defaults(func=cmd_index)
142
+
143
+ pl = sub.add_parser("list", help="list indexed repos")
144
+ pl.add_argument("--json", action="store_true")
145
+ pl.set_defaults(func=cmd_list)
146
+
147
+ pm = sub.add_parser("map", help="print compact map of a repo")
148
+ pm.add_argument("repo", help="repo name, id, or path")
149
+ pm.add_argument("--max-symbols", type=int, default=12)
150
+ pm.add_argument("--stats", action="store_true", help="print token estimate to stderr")
151
+ pm.set_defaults(func=cmd_map)
152
+
153
+ ps = sub.add_parser("search", help="search symbols across repos")
154
+ ps.add_argument("query")
155
+ ps.add_argument("--repo", help="limit to one repo")
156
+ ps.add_argument("--limit", type=int, default=30)
157
+ ps.add_argument("--json", action="store_true")
158
+ ps.set_defaults(func=cmd_search)
159
+
160
+ pc = sub.add_parser("context", help="LLM-ready context slice for a query")
161
+ pc.add_argument("query")
162
+ pc.add_argument("--repo", help="limit to one repo")
163
+ pc.add_argument("--limit", type=int, default=20)
164
+ pc.set_defaults(func=cmd_context)
165
+
166
+ pr = sub.add_parser("remove", help="remove a repo from the index")
167
+ pr.add_argument("repo")
168
+ pr.set_defaults(func=cmd_remove)
169
+
170
+ prf = sub.add_parser("refresh", help="re-index all known repos")
171
+ prf.set_defaults(func=cmd_refresh)
172
+
173
+ pmcp = sub.add_parser("mcp", help="run as an MCP server over stdio")
174
+ pmcp.set_defaults(func=cmd_mcp)
175
+
176
+ return p
177
+
178
+
179
+ def main(argv: Optional[List[str]] = None) -> int:
180
+ parser = build_parser()
181
+ args = parser.parse_args(argv)
182
+ return args.func(args)
183
+
184
+
185
+ if __name__ == "__main__":
186
+ sys.exit(main())
@@ -0,0 +1,75 @@
1
+ """Render a compact, token-efficient map of an indexed repo."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List
6
+
7
+ from . import store
8
+
9
+
10
+ def _est_tokens(chars: int) -> int:
11
+ """Rough token estimate (~4 chars/token)."""
12
+ return chars // 4
13
+
14
+
15
+ def loc(sym: Dict[str, Any]) -> str:
16
+ """Compact source location tag, e.g. ``L40-92`` or ``L40``."""
17
+ start = sym.get("line")
18
+ if not start:
19
+ return ""
20
+ end = sym.get("end_line", start)
21
+ return f"L{start}" if end == start else f"L{start}-{end}"
22
+
23
+
24
+ def repo_map(rid: str, max_symbols_per_file: int = 12) -> str:
25
+ data = store.load_repo(rid)
26
+ if not data:
27
+ return ""
28
+ lines: List[str] = []
29
+ g = data.get("git", {})
30
+ header = f"# {data['name']}"
31
+ lines.append(header)
32
+ meta = []
33
+ if g.get("remote"):
34
+ meta.append(g["remote"])
35
+ if g.get("branch"):
36
+ meta.append(f"@{g['branch']}")
37
+ if meta:
38
+ lines.append(" ".join(meta))
39
+ st = data["stats"]
40
+ lines.append(
41
+ f"_{st['files']} files, {st['symbols']} symbols indexed_"
42
+ )
43
+ lines.append("")
44
+ if data.get("readme"):
45
+ excerpt = data["readme"].strip().replace("\n\n", "\n")
46
+ lines.append("> " + excerpt.replace("\n", "\n> "))
47
+ lines.append("")
48
+
49
+ for f in data["files"]:
50
+ syms = f["symbols"]
51
+ if not syms:
52
+ continue
53
+ lines.append(f"### {f['path']}")
54
+ for sym in syms[:max_symbols_per_file]:
55
+ sig = sym["sig"] or f"{sym['kind']} {sym['name']}"
56
+ tag = loc(sym)
57
+ where = f" [{tag}]" if tag else ""
58
+ doc = f" — {sym['doc']}" if sym.get("doc") else ""
59
+ lines.append(f"- {sig}{where}{doc}")
60
+ if len(syms) > max_symbols_per_file:
61
+ lines.append(f"- â€Ķ +{len(syms) - max_symbols_per_file} more")
62
+ lines.append("")
63
+
64
+ return "\n".join(lines)
65
+
66
+
67
+ def savings(data: Dict[str, Any], map_text: str) -> Dict[str, int]:
68
+ src_tokens = _est_tokens(data["stats"]["source_bytes"])
69
+ map_tokens = _est_tokens(len(map_text))
70
+ pct = 0 if src_tokens == 0 else round(100 * (1 - map_tokens / src_tokens))
71
+ return {
72
+ "source_tokens": src_tokens,
73
+ "map_tokens": map_tokens,
74
+ "saved_pct": pct,
75
+ }